diff options
Diffstat (limited to 'drivers/gpu/drm/amd')
304 files changed, 9346 insertions, 5261 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 653726588956..7fedbb725e17 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -45,7 +45,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \ atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \ amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \ - amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \ + amdgpu_gem.o amdgpu_ring.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b85b67a88a3d..d8b854fcbffa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -458,7 +458,6 @@ struct amdgpu_flip_work { uint64_t base; struct drm_pending_vblank_event *event; struct amdgpu_bo *old_abo; - struct dma_fence *excl; unsigned shared_count; struct dma_fence **shared; struct dma_fence_cb cb; @@ -813,6 +812,7 @@ struct amd_powerplay { #define AMDGPU_RESET_MAGIC_NUM 64 #define AMDGPU_MAX_DF_PERFMONS 4 +#define AMDGPU_PRODUCT_NAME_LEN 64 struct amdgpu_device { struct device *dev; struct pci_dev *pdev; @@ -1077,13 +1077,14 @@ struct amdgpu_device { bool runpm; bool in_runpm; bool has_pr3; + bool is_fw_fb; bool pm_sysfs_en; bool ucode_sysfs_en; /* Chip product information */ char product_number[16]; - char product_name[32]; + char product_name[AMDGPU_PRODUCT_NAME_LEN]; char serial[20]; atomic_t throttling_logging_enabled; @@ -1096,7 +1097,9 @@ struct amdgpu_device { pci_channel_state_t pci_channel_state; struct amdgpu_reset_control *reset_cntl; - uint32_t ip_versions[HW_ID_MAX][HWIP_MAX_INSTANCE]; + uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE]; + + bool ram_is_direct_mapped; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1317,6 +1320,8 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring); +void amdgpu_device_halt(struct amdgpu_device *adev); + /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) void amdgpu_register_atpx_handler(void); @@ -1360,8 +1365,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); int amdgpu_enable_vblank_kms(struct drm_crtc *crtc); void amdgpu_disable_vblank_kms(struct drm_crtc *crtc); -long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg); int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 7077f21f0021..6ca1db3c243f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -72,7 +72,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) if (!kfd_initialized) return; - adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, vf); + adev->kfd.dev = kgd2kfd_probe(adev, vf); if (adev->kfd.dev) amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -233,19 +233,16 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) return r; } -void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) +void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (amdgpu_device_should_recover_gpu(adev)) amdgpu_device_gpu_recover(adev, NULL); } -int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, +int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool cp_mqd_gfx9) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; struct amdgpu_bo_param bp; int r; @@ -314,7 +311,7 @@ allocate_mem_reserve_bo_failed: return r; } -void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj) { struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; @@ -325,10 +322,9 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) amdgpu_bo_unref(&(bo)); } -int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, +int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, void **mem_obj) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; struct amdgpu_bo_user *ubo; struct amdgpu_bo_param bp; @@ -355,18 +351,16 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, return 0; } -void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) +void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj) { struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; amdgpu_bo_unref(&bo); } -uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, +uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, enum kgd_engine_type type) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - switch (type) { case KGD_ENGINE_PFP: return adev->gfx.pfp_fw_version; @@ -399,11 +393,9 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, return 0; } -void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, +void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, struct kfd_local_mem_info *mem_info) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - memset(mem_info, 0, sizeof(*mem_info)); mem_info->local_mem_size_public = adev->gmc.visible_vram_size; @@ -428,19 +420,15 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, mem_info->mem_clk_max = 100; } -uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd) +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (adev->gfx.funcs->get_gpu_clock_counter) return adev->gfx.funcs->get_gpu_clock_counter(adev); return 0; } -uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - /* the sclk is in quantas of 10kHz */ if (amdgpu_sriov_vf(adev)) return adev->clock.default_sclk / 100; @@ -450,9 +438,8 @@ uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) return 100; } -void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) +void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_cu_info acu_info = adev->gfx.cu_info; memset(cu_info, 0, sizeof(*cu_info)); @@ -473,13 +460,12 @@ void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) cu_info->lds_size = acu_info.lds_size; } -int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, - struct kgd_dev **dma_buf_kgd, +int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, + struct amdgpu_device **dmabuf_adev, uint64_t *bo_size, void *metadata_buffer, size_t buffer_size, uint32_t *metadata_size, uint32_t *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct dma_buf *dma_buf; struct drm_gem_object *obj; struct amdgpu_bo *bo; @@ -507,8 +493,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, goto out_put; r = 0; - if (dma_buf_kgd) - *dma_buf_kgd = (struct kgd_dev *)adev; + if (dmabuf_adev) + *dmabuf_adev = adev; if (bo_size) *bo_size = amdgpu_bo_size(bo); if (metadata_buffer) @@ -528,32 +514,11 @@ out_put: return r; } -uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, + struct amdgpu_device *src) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); - - return amdgpu_vram_mgr_usage(vram_man); -} - -uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->gmc.xgmi.hive_id; -} - -uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->unique_id; -} - -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) -{ - struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; - struct amdgpu_device *adev = (struct amdgpu_device *)dst; + struct amdgpu_device *peer_adev = src; + struct amdgpu_device *adev = dst; int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); if (ret < 0) { @@ -565,16 +530,18 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s return (uint8_t)ret; } -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min) +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, + struct amdgpu_device *src, + bool is_min) { - struct amdgpu_device *adev = (struct amdgpu_device *)dst, *peer_adev; + struct amdgpu_device *adev = dst, *peer_adev; int num_links; if (adev->asic_type != CHIP_ALDEBARAN) return 0; if (src) - peer_adev = (struct amdgpu_device *)src; + peer_adev = src; /* num links returns 0 for indirect peers since indirect route is unknown. */ num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); @@ -589,9 +556,8 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev return (num_links * 16 * 25000)/BITS_PER_BYTE; } -int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min) +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min) { - struct amdgpu_device *adev = (struct amdgpu_device *)dev; int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : fls(adev->pm.pcie_mlw_mask)) - 1; int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask & @@ -647,39 +613,11 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min) return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE; } -uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->rmmio_remap.bus_addr; -} - -uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->gds.gws_size; -} - -uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->rev_id; -} - -int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->gmc.noretry; -} - -int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, +int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, + enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_job *job; struct amdgpu_ib *ib; struct amdgpu_ring *ring; @@ -730,10 +668,8 @@ err: return ret; } -void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) +void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle); @@ -747,10 +683,9 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, + uint16_t vmid) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (adev->family == AMDGPU_FAMILY_AI) { int i; @@ -763,10 +698,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) return 0; } -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid, - enum TLB_FLUSH_TYPE flush_type) +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, enum TLB_FLUSH_TYPE flush_type) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; bool all_hub = false; if (adev->family == AMDGPU_FAMILY_AI) @@ -775,21 +709,18 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid, return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); } -bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) +bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - return adev->have_atomics_support; } -void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd) +void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct ras_err_data err_data = {0, 0, 0, NULL}; /* CPU MCA will handle page retirement if connected_to_cpu is 1 */ if (!adev->gmc.xgmi.connected_to_cpu) - amdgpu_umc_process_ras_data_cb(adev, &err_data, NULL); - else - amdgpu_amdkfd_gpu_reset(kgd); + amdgpu_umc_poison_handler(adev, &err_data, reset); + else if (reset) + amdgpu_amdkfd_gpu_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a15a4787c7ee..ac841ae8f5cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -144,14 +144,16 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev); -int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, +int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, + enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); -void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); -bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid, - enum TLB_FLUSH_TYPE flush_type); +void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle); +bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev); +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, + uint16_t vmid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, enum TLB_FLUSH_TYPE flush_type); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -159,7 +161,7 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); -void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); +void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev); int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, int queue_bit); @@ -198,37 +200,35 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) } #endif /* Shared API */ -int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, +int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); -void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); -int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj); -void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj); +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj); +int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, + void **mem_obj); +void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj); int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem); int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); -uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, +uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, enum kgd_engine_type type); -void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, +void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, struct kfd_local_mem_info *mem_info); -uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd); +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev); -uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd); -void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); -int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, - struct kgd_dev **dmabuf_kgd, +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev); +void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, + struct kfd_cu_info *cu_info); +int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, + struct amdgpu_device **dmabuf_adev, uint64_t *bo_size, void *metadata_buffer, size_t buffer_size, uint32_t *metadata_size, uint32_t *flags); -uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); -uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); -uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd); -uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); -uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); -uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); -int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd); -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min); -int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min); +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, + struct amdgpu_device *src); +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, + struct amdgpu_device *src, + bool is_min); +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -258,45 +258,55 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min); (&((struct amdgpu_fpriv *) \ ((struct drm_file *)(drm_priv))->driver_priv)->vm) -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, struct file *filp, u32 pasid, void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv); +void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, + void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( - struct kgd_dev *kgd, uint64_t va, uint64_t size, + struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed); + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, + bool *table_freed); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( - struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, struct kgd_mem *mem, void **kptr, uint64_t *size); -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem); +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, + struct kgd_mem *mem); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); -int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *info); -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, struct dma_buf *dmabuf, uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset); -int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); -void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd); +void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, + bool reset); #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm); + +/** + * @amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released + * + * Allows KFD to release its resources associated with the GEM object. + */ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo); void amdgpu_amdkfd_reserve_system_mem(uint64_t size); #else @@ -324,7 +334,7 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, #if IS_ENABLED(CONFIG_HSA_AMD) int kgd2kfd_init(void); void kgd2kfd_exit(void); -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf); +struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources); @@ -348,7 +358,7 @@ static inline void kgd2kfd_exit(void) } static inline -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) +struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) { return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 5a7f680bcb3f..abe93b3ff765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -57,11 +57,6 @@ (*dump)[i++][1] = RREG32(addr); \ } while (0) -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) { return (struct v9_sdma_mqd *)mqd; @@ -123,10 +118,9 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, return sdma_rlc_reg_offset; } -int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -193,11 +187,10 @@ int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd, +int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; @@ -225,9 +218,9 @@ int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev, + void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -244,10 +237,9 @@ bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -int kgd_arcturus_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h index ce08131b7b5f..756c1a5679c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h @@ -20,11 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm); -int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd, +int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); -bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -int kgd_arcturus_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev, + void *mqd); +int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 960acf68150a..7b7f4b2764c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -39,37 +39,26 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, mec, pipe, queue, vmid); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } static uint64_t get_queue_mask(struct amdgpu_device *adev, @@ -81,33 +70,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ - unlock_srbm(kgd); + unlock_srbm(adev); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -150,22 +135,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, * but still works */ -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -218,12 +202,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v10_sdma_mqd *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_compute_mqd *m; uint32_t *mqd_hqd; uint32_t reg, hqd_base, data; @@ -231,7 +214,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; @@ -296,16 +279,15 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data); - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v10_compute_mqd *m; uint32_t mec, pipe; @@ -313,7 +295,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -349,16 +331,15 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, out_unlock: spin_unlock(&adev->gfx.kiq.ring_lock); - release_queue(kgd); + release_queue(adev); return r; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -372,13 +353,13 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -386,10 +367,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -456,11 +436,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; @@ -488,15 +467,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -506,13 +485,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -529,12 +507,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); enum hqd_dequeue_request_type type; unsigned long end_jiffies; uint32_t temp; @@ -548,7 +525,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, int retry; #endif - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); @@ -633,20 +610,19 @@ loop: break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -683,11 +659,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); @@ -696,12 +671,12 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +static int kgd_address_watch_disable(struct amdgpu_device *adev) { return 0; } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +static int kgd_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -710,11 +685,10 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -735,18 +709,16 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void set_vm_context_page_table_base(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", vmid); @@ -757,12 +729,10 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } -static void program_trap_handler_settings(struct kgd_dev *kgd, +static void program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); /* * Program TBA registers @@ -781,7 +751,7 @@ static void program_trap_handler_settings(struct kgd_dev *kgd, WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), upper_32_bits(tma_addr >> 8)); - unlock_srbm(kgd); + unlock_srbm(adev); } const struct kfd2kgd_calls gfx_v10_kfd2kgd = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index dac0d751d5af..1f37d3574001 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -38,37 +38,26 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, mec, pipe, queue, vmid); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } static uint64_t get_queue_mask(struct amdgpu_device *adev, @@ -80,34 +69,30 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -static void program_sh_mem_settings_v10_3(struct kgd_dev *kgd, uint32_t vmid, +static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ - unlock_srbm(kgd); + unlock_srbm(adev); } /* ATC is defeatured on Sienna_Cichlid */ -static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid, +static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT; /* Mapping vmid to pasid also for IH block */ @@ -118,22 +103,21 @@ static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int init_interrupts_v10_3(struct kgd_dev *kgd, uint32_t pipe_id) +static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -188,12 +172,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v10_sdma_mqd *)mqd; } -static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_compute_mqd *m; uint32_t *mqd_hqd; uint32_t reg, hqd_base, data; @@ -201,7 +184,7 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HIQ is set during driver init period with vmid set to 0*/ if (m->cp_hqd_vmid == 0) { @@ -281,16 +264,15 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data); - release_queue(kgd); + release_queue(adev); return 0; } -static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd, +static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v10_compute_mqd *m; uint32_t mec, pipe; @@ -298,7 +280,7 @@ static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -334,16 +316,15 @@ static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd, out_unlock: spin_unlock(&adev->gfx.kiq.ring_lock); - release_queue(kgd); + release_queue(adev); return r; } -static int hqd_dump_v10_3(struct kgd_dev *kgd, +static int hqd_dump_v10_3(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -357,13 +338,13 @@ static int hqd_dump_v10_3(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -371,10 +352,9 @@ static int hqd_dump_v10_3(struct kgd_dev *kgd, return 0; } -static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd, +static int hqd_sdma_load_v10_3(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -441,11 +421,10 @@ static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd, return 0; } -static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd, +static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; @@ -473,15 +452,15 @@ static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd, return 0; } -static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -491,13 +470,13 @@ static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd) +static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev, + void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -514,18 +493,17 @@ static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd) return false; } -static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd, +static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); enum hqd_dequeue_request_type type; unsigned long end_jiffies; uint32_t temp; struct v10_compute_mqd *m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); @@ -555,20 +533,19 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd, if (time_after(jiffies, end_jiffies)) { pr_err("cp queue pipe %d queue %d preemption failed\n", pipe_id, queue_id); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd, +static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -606,12 +583,12 @@ static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd, } -static int address_watch_disable_v10_3(struct kgd_dev *kgd) +static int address_watch_disable_v10_3(struct amdgpu_device *adev) { return 0; } -static int address_watch_execute_v10_3(struct kgd_dev *kgd, +static int address_watch_execute_v10_3(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -620,11 +597,10 @@ static int address_watch_execute_v10_3(struct kgd_dev *kgd, return 0; } -static int wave_control_execute_v10_3(struct kgd_dev *kgd, +static int wave_control_execute_v10_3(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -645,28 +621,24 @@ static int wave_control_execute_v10_3(struct kgd_dev *kgd, return 0; } -static uint32_t address_watch_get_offset_v10_3(struct kgd_dev *kgd, +static uint32_t address_watch_get_offset_v10_3(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* SDMA is on gfxhub as well for Navi1* series */ adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } -static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd, +static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); /* * Program TBA registers @@ -685,15 +657,14 @@ static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd, WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), upper_32_bits(tma_addr >> 8)); - unlock_srbm(kgd); + unlock_srbm(adev); } #if 0 -uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd, +uint32_t enable_debug_trap_v10_3(struct amdgpu_device *adev, uint32_t trap_debug_wave_launch_mode, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; uint32_t orig_wave_cntl_value; uint32_t orig_stall_vmid; @@ -720,10 +691,8 @@ uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd, return 0; } -uint32_t disable_debug_trap_v10_3(struct kgd_dev *kgd) +uint32_t disable_debug_trap_v10_3(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->grbm_idx_mutex); WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); @@ -733,11 +702,10 @@ uint32_t disable_debug_trap_v10_3(struct kgd_dev *kgd) return 0; } -uint32_t set_wave_launch_trap_override_v10_3(struct kgd_dev *kgd, +uint32_t set_wave_launch_trap_override_v10_3(struct amdgpu_device *adev, uint32_t trap_override, uint32_t trap_mask) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -762,11 +730,10 @@ uint32_t set_wave_launch_trap_override_v10_3(struct kgd_dev *kgd, return 0; } -uint32_t set_wave_launch_mode_v10_3(struct kgd_dev *kgd, +uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev, uint8_t wave_launch_mode, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; bool is_stall_mode; bool is_mode_set; @@ -805,16 +772,14 @@ uint32_t set_wave_launch_mode_v10_3(struct kgd_dev *kgd, * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. * deq_retry_wait_time -- Wait Count for Global Wave Syncs. */ -void get_iq_wait_times_v10_3(struct kgd_dev *kgd, +void get_iq_wait_times_v10_3(struct amdgpu_device *adev, uint32_t *wait_times) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); } -void build_grace_period_packet_info_v10_3(struct kgd_dev *kgd, +void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index b91d27e39bad..36528dad7684 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -82,68 +82,54 @@ union TCP_WATCH_CNTL_BITS { float f32All; }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); mutex_lock(&adev->srbm_mutex); WREG32(mmSRBM_GFX_CNTL, value); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - WREG32(mmSRBM_GFX_CNTL, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32(mmSH_MEM_CONFIG, sh_mem_config); WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); WREG32(mmSH_MEM_BASES, sh_mem_bases); - unlock_srbm(kgd); + unlock_srbm(adev); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -165,21 +151,20 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, return 0; } -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -207,12 +192,11 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) return (struct cik_sdma_rlc_registers *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_mqd *m; uint32_t *mqd_hqd; uint32_t reg, wptr_val, data; @@ -220,7 +204,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ mqd_hqd = &m->cp_mqd_base_addr_lo; @@ -239,25 +223,24 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, * release srbm_mutex to avoid circular dependency between * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. */ - release_queue(kgd); + release_queue(adev); valid_wptr = read_user_wptr(mm, wptr, wptr_val); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (valid_wptr) WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32(mmCP_HQD_ACTIVE, data); - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS (35+4) #define DUMP_REG(addr) do { \ @@ -271,7 +254,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); @@ -281,7 +264,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -289,10 +272,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; unsigned long end_jiffies; uint32_t sdma_rlc_reg_offset; @@ -345,11 +327,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; uint32_t i = 0, reg; @@ -372,15 +353,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32(mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -390,13 +371,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32(mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -412,12 +392,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; enum hqd_dequeue_request_type type; unsigned long flags, end_jiffies; @@ -426,7 +405,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, if (amdgpu_in_reset(adev)) return -EIO; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); switch (reset_type) { @@ -504,20 +483,19 @@ loop: break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -551,9 +529,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +static int kgd_address_watch_disable(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); union TCP_WATCH_CNTL_BITS cntl; unsigned int i; @@ -571,13 +548,12 @@ static int kgd_address_watch_disable(struct kgd_dev *kgd) return 0; } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +static int kgd_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, uint32_t addr_lo) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); union TCP_WATCH_CNTL_BITS cntl; cntl.u32All = cntl_val; @@ -602,11 +578,10 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data; mutex_lock(&adev->grbm_idx_mutex); @@ -627,18 +602,17 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; @@ -646,21 +620,17 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static void set_scratch_backing_va(struct kgd_dev *kgd, +static void set_scratch_backing_va(struct amdgpu_device *adev, uint64_t va, uint32_t vmid) { - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); - unlock_srbm(kgd); + unlock_srbm(adev); } -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void set_vm_context_page_table_base(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID\n"); return; @@ -676,10 +646,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, * @vmid: vmid pointer * read vmid from register (CIK). */ -static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) +static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 5ce0ce704a21..52832cd69a93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -39,68 +39,54 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); mutex_lock(&adev->srbm_mutex); WREG32(mmSRBM_GFX_CNTL, value); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - WREG32(mmSRBM_GFX_CNTL, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32(mmSH_MEM_CONFIG, sh_mem_config); WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); WREG32(mmSH_MEM_BASES, sh_mem_bases); - unlock_srbm(kgd); + unlock_srbm(adev); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -123,21 +109,20 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, return 0; } -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -165,12 +150,11 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) return (struct vi_sdma_mqd *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_mqd *m; uint32_t *mqd_hqd; uint32_t reg, wptr_val, data; @@ -178,7 +162,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HIQ is set during driver init period with vmid set to 0*/ if (m->cp_hqd_vmid == 0) { @@ -206,7 +190,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, * on ASICs that do not support context-save. * EOP writes/reads can start anywhere in the ring. */ - if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { + if (adev->asic_type != CHIP_TONGA) { WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); @@ -226,25 +210,24 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, * release srbm_mutex to avoid circular dependency between * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. */ - release_queue(kgd); + release_queue(adev); valid_wptr = read_user_wptr(mm, wptr, wptr_val); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (valid_wptr) WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32(mmCP_HQD_ACTIVE, data); - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS (54+4) #define DUMP_REG(addr) do { \ @@ -258,7 +241,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); @@ -268,7 +251,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -276,10 +259,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; unsigned long end_jiffies; uint32_t sdma_rlc_reg_offset; @@ -331,11 +313,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + queue_id * KFD_VI_SDMA_QUEUE_OFFSET; uint32_t i = 0, reg; @@ -367,15 +348,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32(mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -385,13 +366,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32(mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -407,12 +387,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; enum hqd_dequeue_request_type type; unsigned long flags, end_jiffies; @@ -422,7 +401,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, if (amdgpu_in_reset(adev)) return -EIO; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); @@ -502,20 +481,19 @@ loop: break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -549,11 +527,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; @@ -561,12 +538,12 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +static int kgd_address_watch_disable(struct amdgpu_device *adev) { return 0; } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +static int kgd_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -575,11 +552,10 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -600,28 +576,24 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_scratch_backing_va(struct kgd_dev *kgd, +static void set_scratch_backing_va(struct amdgpu_device *adev, uint64_t va, uint32_t vmid) { - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); - unlock_srbm(kgd); + unlock_srbm(adev); } -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void set_vm_context_page_table_base(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID\n"); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index bcc1cbeb8799..1abf662a0e91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -46,37 +46,26 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, mec, pipe, queue, vmid); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } static uint64_t get_queue_mask(struct amdgpu_device *adev, @@ -88,33 +77,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); /* APE1 no longer exists on GFX9 */ - unlock_srbm(kgd); + unlock_srbm(adev); } -int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -171,22 +156,21 @@ int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, * but still works */ -int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -233,19 +217,18 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } -int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_mqd *m; uint32_t *mqd_hqd; uint32_t reg, hqd_base, data; m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; @@ -296,7 +279,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, lower_32_bits((uintptr_t)wptr)); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1, (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } @@ -308,16 +291,15 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); - release_queue(kgd); + release_queue(adev); return 0; } -int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v9_mqd *m; uint32_t mec, pipe; @@ -325,7 +307,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -361,16 +343,15 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, out_unlock: spin_unlock(&adev->gfx.kiq.ring_lock); - release_queue(kgd); + release_queue(adev); return r; } -int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -384,13 +365,13 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -398,10 +379,9 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -468,11 +448,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; @@ -500,31 +479,30 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); - act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + acquire_queue(adev, pipe_id, queue_id); + act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); high = upper_32_bits(queue_address >> 8); - if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && - high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) && + high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -541,12 +519,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); enum hqd_dequeue_request_type type; unsigned long end_jiffies; uint32_t temp; @@ -555,7 +532,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, if (amdgpu_in_reset(adev)) return -EIO; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); @@ -579,25 +556,24 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { - temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -634,11 +610,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); @@ -647,12 +622,12 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) +int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev) { return 0; } -int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -661,17 +636,16 @@ int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, return 0; } -int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); @@ -686,18 +660,16 @@ int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, return 0; } -uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, +void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", vmid); @@ -750,7 +722,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0); - reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; if (*wave_cnt != 0) @@ -804,7 +776,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * * Reading registers referenced above involves programming GRBM appropriately */ -void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, int *pasid_wave_cnt, int *max_waves_per_cu) { int qidx; @@ -818,10 +790,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, int pasid_tmp; int max_queue_cnt; int vmid_wave_cnt = 0; - struct amdgpu_device *adev; DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); - adev = get_amdgpu_device(kgd); lock_spi_csq_mutexes(adev); soc15_grbm_select(adev, 1, 0, 0, 0); @@ -839,8 +809,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); - queue_map = RREG32(SOC15_REG_OFFSET(GC, 0, - mmSPI_CSQ_WF_ACTIVE_STATUS)); + queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS); /* * Assumption: queue map encodes following schema: four @@ -882,30 +851,28 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, adev->gfx.cu_info.max_waves_per_simd; } -void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, +void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); /* * Program TBA registers */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), + WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO, lower_32_bits(tba_addr >> 8)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), + WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI, upper_32_bits(tba_addr >> 8)); /* * Program TMA registers */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), + WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO, lower_32_bits(tma_addr >> 8)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), + WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_HI, upper_32_bits(tma_addr >> 8)); - unlock_srbm(kgd); + unlock_srbm(adev); } const struct kfd2kgd_calls gfx_v9_kfd2kgd = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index c63591106879..24be49df26fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -22,48 +22,49 @@ -void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); -int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid); -int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id); +int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); -int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off); -int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); -bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, +bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id); +int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); -int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd); -int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev); +int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, uint32_t addr_lo); -int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd); -uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset); -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, +void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); -void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, int *pasid_wave_cnt, int *max_waves_per_cu); -void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, +void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6348559608ce..f9bab963a948 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -60,12 +60,6 @@ static const char * const domain_bit_to_string[] = { static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); - -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - static bool kfd_mem_is_attached(struct amdgpu_vm *avm, struct kgd_mem *mem) { @@ -126,8 +120,19 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size) PAGE_ALIGN(size); } +/** + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size + * of buffer including any reserved for control structures + * + * @adev: Device to which allocated BO belongs to + * @size: Size of buffer, in bytes, encapsulated by B0. This should be + * equivalent to amdgpu_bo_size(BO) + * @alloc_flag: Flag used in allocating a BO as noted above + * + * Return: returns -ENOMEM in case of error, ZERO otherwise + */ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain, bool sg) + uint64_t size, u32 alloc_flag) { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); @@ -137,20 +142,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, acc_size = amdgpu_amdkfd_acc_size(size); vram_needed = 0; - if (domain == AMDGPU_GEM_DOMAIN_GTT) { - /* TTM GTT memory */ + if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size + size; - } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { - /* Userptr */ + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + system_mem_needed = acc_size; + ttm_mem_needed = acc_size; + vram_needed = size; + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size; - } else { - /* VRAM and SG */ + } else if (alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { system_mem_needed = acc_size; ttm_mem_needed = acc_size; - if (domain == AMDGPU_GEM_DOMAIN_VRAM) - vram_needed = size; + } else { + pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); + return -ENOMEM; } spin_lock(&kfd_mem_limit.mem_limit_lock); @@ -166,64 +175,72 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (adev->kfd.vram_used + vram_needed > adev->gmc.real_vram_size - reserved_for_pt)) { ret = -ENOMEM; - } else { - kfd_mem_limit.system_mem_used += system_mem_needed; - kfd_mem_limit.ttm_mem_used += ttm_mem_needed; - adev->kfd.vram_used += vram_needed; + goto release; } + /* Update memory accounting by decreasing available system + * memory, TTM memory and GPU memory as computed above + */ + adev->kfd.vram_used += vram_needed; + kfd_mem_limit.system_mem_used += system_mem_needed; + kfd_mem_limit.ttm_mem_used += ttm_mem_needed; + +release: spin_unlock(&kfd_mem_limit.mem_limit_lock); return ret; } static void unreserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain, bool sg) + uint64_t size, u32 alloc_flag) { size_t acc_size; acc_size = amdgpu_amdkfd_acc_size(size); spin_lock(&kfd_mem_limit.mem_limit_lock); - if (domain == AMDGPU_GEM_DOMAIN_GTT) { + + if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= (acc_size + size); - } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + kfd_mem_limit.system_mem_used -= acc_size; + kfd_mem_limit.ttm_mem_used -= acc_size; + adev->kfd.vram_used -= size; + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= acc_size; - } else { + } else if (alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; - if (domain == AMDGPU_GEM_DOMAIN_VRAM) { - adev->kfd.vram_used -= size; - WARN_ONCE(adev->kfd.vram_used < 0, - "kfd VRAM memory accounting unbalanced"); - } + } else { + pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); + goto release; } - WARN_ONCE(kfd_mem_limit.system_mem_used < 0, - "kfd system memory accounting unbalanced"); + + WARN_ONCE(adev->kfd.vram_used < 0, + "KFD VRAM memory accounting unbalanced"); WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, - "kfd TTM memory accounting unbalanced"); + "KFD TTM memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "KFD system memory accounting unbalanced"); +release: spin_unlock(&kfd_mem_limit.mem_limit_lock); } void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u32 domain = bo->preferred_domains; - bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); + u32 alloc_flags = bo->kfd_bo->alloc_flags; + u64 size = amdgpu_bo_size(bo); - if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) { - domain = AMDGPU_GEM_DOMAIN_CPU; - sg = false; - } - - unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); + unreserve_mem_limit(adev, size, alloc_flags); kfree(bo->kfd_bo); } - /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * @@ -691,10 +708,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, va + bo_size, vm); - if (adev == bo_adev || (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && - amdgpu_xgmi_same_hive(adev, bo_adev))) { - /* Mappings on the local GPU and VRAM mappings in the - * local hive share the original BO + if (adev == bo_adev || + (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || + (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) { + /* Mappings on the local GPU, or VRAM mappings in the + * local hive, or userptr mapping IOMMU direct map mode + * share the original BO */ attachment[i]->type = KFD_MEM_ATT_SHARED; bo[i] = mem->bo; @@ -1272,12 +1291,60 @@ create_evict_fence_fail: return ret; } -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, +/** + * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria + * @bo: Handle of buffer object being pinned + * @domain: Domain into which BO should be pinned + * + * - USERPTR BOs are UNPINNABLE and will return error + * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their + * PIN count incremented. It is valid to PIN a BO multiple times + * + * Return: ZERO if successful in pinning, Non-Zero in case of error. + */ +static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) +{ + int ret = 0; + + ret = amdgpu_bo_reserve(bo, false); + if (unlikely(ret)) + return ret; + + ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); + if (ret) + pr_err("Error in Pinning BO to domain: %d\n", domain); + + amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); + amdgpu_bo_unreserve(bo); + + return ret; +} + +/** + * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria + * @bo: Handle of buffer object being unpinned + * + * - Is a illegal request for USERPTR BOs and is ignored + * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their + * PIN count decremented. Calls to UNPIN must balance calls to PIN + */ +static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) +{ + int ret = 0; + + ret = amdgpu_bo_reserve(bo, false); + if (unlikely(ret)) + return; + + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); +} + +int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, struct file *filp, u32 pasid, void **process_info, struct dma_fence **ef) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_fpriv *drv_priv; struct amdgpu_vm *avm; int ret; @@ -1353,12 +1420,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, } } -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv) +void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, + void *drm_priv) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm; - if (WARN_ON(!kgd || !drm_priv)) + if (WARN_ON(!adev || !drm_priv)) return; avm = drm_priv_to_vm(drm_priv); @@ -1386,11 +1453,10 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) } int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( - struct kgd_dev *kgd, uint64_t va, uint64_t size, + struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); enum ttm_bo_type bo_type = ttm_bo_type_device; struct sg_table *sg = NULL; @@ -1454,7 +1520,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags); if (ret) { pr_debug("Insufficient memory\n"); goto err_reserve_limit; @@ -1495,6 +1561,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ret = init_user_pages(*mem, user_addr); if (ret) goto allocate_init_user_pages_failed; + } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { + ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT); + if (ret) { + pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n"); + goto err_pin_bo; + } + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; } if (offset) @@ -1503,13 +1578,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( return 0; allocate_init_user_pages_failed: +err_pin_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - unreserve_mem_limit(adev, size, alloc_domain, !!sg); + unreserve_mem_limit(adev, size, flags); err_reserve_limit: mutex_destroy(&(*mem)->lock); if (gobj) @@ -1525,7 +1601,7 @@ err: } int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size) { struct amdkfd_process_info *process_info = mem->process_info; @@ -1538,6 +1614,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( bool is_imported = false; mutex_lock(&mem->lock); + + /* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */ + if (mem->alloc_flags & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { + amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); + } + mapped_to_gpu_memory = mem->mapped_to_gpu_memory; is_imported = mem->is_imported; mutex_unlock(&mem->lock); @@ -1617,10 +1701,9 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, bool *table_freed) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; struct amdgpu_bo *bo; @@ -1747,7 +1830,7 @@ out: } int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct amdkfd_process_info *process_info = avm->process_info; @@ -1808,7 +1891,7 @@ out: } int amdgpu_amdkfd_gpuvm_sync_memory( - struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) + struct amdgpu_device *adev, struct kgd_mem *mem, bool intr) { struct amdgpu_sync sync; int ret; @@ -1824,7 +1907,7 @@ int amdgpu_amdkfd_gpuvm_sync_memory( return ret; } -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, struct kgd_mem *mem, void **kptr, uint64_t *size) { int ret; @@ -1880,7 +1963,8 @@ bo_reserve_failed: return ret; } -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem) +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, + struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; @@ -1890,12 +1974,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kg amdgpu_bo_unreserve(bo); } -int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, - struct kfd_vm_fault_info *mem) +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, + struct kfd_vm_fault_info *mem) { - struct amdgpu_device *adev; - - adev = (struct amdgpu_device *)kgd; if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { *mem = *adev->gmc.vm_fault_info; mb(); @@ -1904,13 +1985,12 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, return 0; } -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, struct dma_buf *dma_buf, uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct drm_gem_object *obj; struct amdgpu_bo *bo; @@ -2537,11 +2617,9 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) } /* Returns GPU-specific tiling mode information */ -int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - config->gb_addr_config = adev->gfx.config.gb_addr_config; config->tile_config_ptr = adev->gfx.config.tile_mode_array; config->num_tile_configs = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 97178b307ed6..4d4ddf026faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -470,8 +470,8 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade /** * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS - * adev: amdgpu_device pointer - * i2c_address: pointer to u8; if not NULL, will contain + * @adev: amdgpu_device pointer + * @i2c_address: pointer to u8; if not NULL, will contain * the RAS EEPROM address if the function returns true * * Return true if VBIOS supports RAS EEPROM address reporting, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 7abe9500c0c6..d6d986be906a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -11,6 +11,7 @@ #include <linux/pci.h> #include <linux/delay.h> +#include "amdgpu.h" #include "amd_acpi.h" #define AMDGPU_PX_QUIRK_FORCE_ATPX (1 << 0) @@ -165,7 +166,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas } /** - * amdgpu_atpx_validate_functions - validate ATPX functions + * amdgpu_atpx_validate - validate ATPX functions * * @atpx: amdgpu atpx struct * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 0de66f59adb8..c16a2704ced6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -108,7 +108,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) case DRM_MODE_CONNECTOR_DVII: case DRM_MODE_CONNECTOR_HDMIB: if (amdgpu_connector->use_digital) { - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { if (connector->display_info.bpc) bpc = connector->display_info.bpc; } @@ -116,7 +116,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) break; case DRM_MODE_CONNECTOR_DVID: case DRM_MODE_CONNECTOR_HDMIA: - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { if (connector->display_info.bpc) bpc = connector->display_info.bpc; } @@ -125,7 +125,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) dig_connector = amdgpu_connector->con_priv; if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) || (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) || - drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + connector->display_info.is_hdmi) { if (connector->display_info.bpc) bpc = connector->display_info.bpc; } @@ -149,7 +149,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) break; } - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { /* * Pre DCE-8 hw can't handle > 12 bpc, and more than 12 bpc doesn't make * much sense without support for > 12 bpc framebuffers. RGB 4:4:4 at @@ -315,8 +315,10 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector) if (!amdgpu_connector->edid) { /* some laptops provide a hardcoded edid in rom for LCDs */ if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) || - (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) + (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) { amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev); + drm_connector_update_edid_property(connector, amdgpu_connector->edid); + } } } @@ -326,6 +328,7 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) kfree(amdgpu_connector->edid); amdgpu_connector->edid = NULL; + drm_connector_update_edid_property(connector, NULL); } static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) @@ -387,6 +390,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) native_mode->vdisplay != 0 && native_mode->clock != 0) { mode = drm_mode_duplicate(dev, native_mode); + if (!mode) + return NULL; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; drm_mode_set_name(mode); @@ -401,6 +407,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) * simpler. */ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false); + if (!mode) + return NULL; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name); } @@ -1171,7 +1180,7 @@ static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D) || (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) { return MODE_OK; - } else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + } else if (connector->display_info.is_hdmi) { /* HDMI 1.3+ supports max clock of 340 Mhz */ if (mode->clock > 340000) return MODE_CLOCK_HIGH; @@ -1463,7 +1472,7 @@ static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector (amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) { return amdgpu_atombios_dp_mode_valid_helper(connector, mode); } else { - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { /* HDMI 1.3+ supports max clock of 340 Mhz */ if (mode->clock > 340000) return MODE_CLOCK_HIGH; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0311d799a010..06d07502a1f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -298,7 +298,6 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, { s64 time_us, increment_us; u64 free_vram, total_vram, used_vram; - struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); /* Allow a maximum of 200 accumulated ms. This is basically per-IB * throttling. * @@ -315,7 +314,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, } total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); - used_vram = amdgpu_vram_mgr_usage(vram_man); + used_vram = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; spin_lock(&adev->mm_stats.lock); @@ -362,7 +361,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { u64 total_vis_vram = adev->gmc.visible_vram_size; u64 used_vis_vram = - amdgpu_vram_mgr_vis_usage(vram_man); + amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); if (used_vis_vram < total_vis_vram) { u64 free_vis_vram = total_vis_vram - used_vis_vram; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 164d6a9e9fbb..25e2e5bf90eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1618,6 +1618,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) if (!debugfs_initialized()) return 0; + debugfs_create_x32("amdgpu_smu_debug", 0600, root, + &adev->pm.smu_debug_mask); + ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev, &fops_ib_preempt); if (IS_ERR(ent)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1e651b959141..cf7fad88c138 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/console.h> #include <linux/slab.h> +#include <linux/iommu.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_probe_helper.h> @@ -331,7 +332,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, } /** - * amdgpu_device_vram_access - access vram by vram aperature + * amdgpu_device_aper_access - access vram by vram aperature * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram @@ -550,11 +551,11 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, trace_amdgpu_device_wreg(adev->pdev->device, reg, v); } -/* - * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range +/** + * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range * * this function is invoked only the debugfs register access - * */ + */ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { @@ -566,6 +567,8 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, adev->gfx.rlc.funcs->is_rlcg_access_range) { if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0); + } else if ((reg * 4) >= adev->rmmio_size) { + adev->pcie_wreg(adev, reg * 4, v); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); } @@ -1100,7 +1103,7 @@ static void amdgpu_device_wb_fini(struct amdgpu_device *adev) } /** - * amdgpu_device_wb_init- Init Writeback driver info and allocate memory + * amdgpu_device_wb_init - Init Writeback driver info and allocate memory * * @adev: amdgpu_device pointer * @@ -1447,7 +1450,7 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; break; default: - return -EINVAL; + break; } return 0; @@ -2316,6 +2319,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) /* need to do gmc hw init early so we can allocate gpu mem */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + /* Try to reserve bad pages early */ + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_exchange_data(adev); + r = amdgpu_device_vram_scratch_init(adev); if (r) { DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); @@ -2347,7 +2354,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } if (amdgpu_sriov_vf(adev)) - amdgpu_virt_init_data_exchange(adev); + amdgpu_virt_exchange_data(adev); r = amdgpu_ib_pool_init(adev); if (r) { @@ -2614,11 +2621,10 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (r) DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); - /* For XGMI + passthrough configuration on arcturus, enable light SBR */ - if (adev->asic_type == CHIP_ARCTURUS && - amdgpu_passthrough(adev) && - adev->gmc.xgmi.num_physical_nodes > 1) - smu_set_light_sbr(&adev->smu, true); + /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ + if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)|| + adev->asic_type == CHIP_ALDEBARAN )) + smu_handle_passthrough_sbr(&adev->smu, true); if (adev->gmc.xgmi.num_physical_nodes > 1) { mutex_lock(&mgpu_info.mutex); @@ -2657,6 +2663,36 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return 0; } +/** + * amdgpu_device_smu_fini_early - smu hw_fini wrapper + * + * @adev: amdgpu_device pointer + * + * For ASICs need to disable SMC first + */ +static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) +{ + int i, r; + + if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)) + return; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.hw) + continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { + r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); + /* XXX handle errors */ + if (r) { + DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + } + adev->ip_blocks[i].status.hw = false; + break; + } + } +} + static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) { int i, r; @@ -2677,21 +2713,8 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - /* need to disable SMC first */ - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.hw) - continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { - r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); - /* XXX handle errors */ - if (r) { - DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - } - adev->ip_blocks[i].status.hw = false; - break; - } - } + /* Workaroud for ASICs need to disable SMC first */ + amdgpu_device_smu_fini_early(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.hw) @@ -2733,8 +2756,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) amdgpu_virt_release_ras_err_handler_data(adev); - amdgpu_ras_pre_fini(adev); - if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); @@ -3166,6 +3187,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) { switch (asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + case CHIP_TOPAZ: + /* chips with no display hardware */ + return false; #if defined(CONFIG_DRM_AMD_DC) case CHIP_TAHITI: case CHIP_PITCAIRN: @@ -3367,6 +3394,22 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) return ret; } +/** + * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU + * + * @adev: amdgpu_device pointer + * + * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode + */ +static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(adev->dev); + if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) + adev->ram_is_direct_mapped = true; +} + static const struct attribute *amdgpu_dev_attributes[] = { &dev_attr_product_name.attr, &dev_attr_product_number.attr, @@ -3455,9 +3498,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); - r = amdgpu_device_init_apu_flags(adev); - if (r) - return r; + amdgpu_device_init_apu_flags(adev); r = amdgpu_device_check_arguments(adev); if (r) @@ -3541,6 +3582,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* Need to get xgmi info early to decide the reset behavior*/ + if (adev->gmc.xgmi.supported) { + r = adev->gfxhub.funcs->get_xgmi_info(adev); + if (r) + return r; + } + /* enable PCIE atomic ops */ if (amdgpu_sriov_vf(adev)) adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) @@ -3687,8 +3735,6 @@ fence_driver_init: /* Get a log2 for easy divisions. */ adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); - amdgpu_fbdev_init(adev); - r = amdgpu_pm_sysfs_init(adev); if (r) { adev->pm_sysfs_en = false; @@ -3772,6 +3818,8 @@ fence_driver_init: queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); + amdgpu_device_check_iommu_direct_map(adev); + return 0; release_ras_con: @@ -3785,6 +3833,7 @@ failed: static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) { + /* Clear all CPU mappings pointing to this device */ unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); @@ -3805,7 +3854,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) } /** - * amdgpu_device_fini - tear down the driver + * amdgpu_device_fini_hw - tear down the driver * * @adev: amdgpu_device pointer * @@ -3846,21 +3895,27 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_ucode_sysfs_fini(adev); sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); - amdgpu_fbdev_fini(adev); + /* disable ras feature must before hw fini */ + amdgpu_ras_pre_fini(adev); amdgpu_device_ip_fini_early(adev); amdgpu_irq_fini_hw(adev); - ttm_device_clear_dma_mappings(&adev->mman.bdev); + if (adev->mman.initialized) + ttm_device_clear_dma_mappings(&adev->mman.bdev); amdgpu_gart_dummy_page_fini(adev); - amdgpu_device_unmap_mmio(adev); + if (drm_dev_is_unplugged(adev_to_drm(adev))) + amdgpu_device_unmap_mmio(adev); + } void amdgpu_device_fini_sw(struct amdgpu_device *adev) { + int idx; + amdgpu_fence_driver_sw_fini(adev); amdgpu_device_ip_fini(adev); release_firmware(adev->firmware.gpu_info_fw); @@ -3885,6 +3940,14 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) vga_client_unregister(adev->pdev); + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + + iounmap(adev->rmmio); + adev->rmmio = NULL; + amdgpu_device_doorbell_fini(adev); + drm_dev_exit(idx); + } + if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); if (adev->mman.discovery_bin) @@ -3905,8 +3968,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) */ static void amdgpu_device_evict_resources(struct amdgpu_device *adev) { - /* No need to evict vram on APUs for suspend to ram */ - if (adev->in_s3 && (adev->flags & AMD_IS_APU)) + /* No need to evict vram on APUs for suspend to ram or s2idle */ + if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) return; if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM)) @@ -3942,7 +4005,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_kms_helper_poll_disable(dev); if (fbcon) - amdgpu_fbdev_set_suspend(adev, 1); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -3953,16 +4016,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); - /* First evict vram memory */ amdgpu_device_evict_resources(adev); amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); - /* This second call to evict device resources is to evict - * the gart page table using the CPU. - */ - amdgpu_device_evict_resources(adev); return 0; } @@ -4019,7 +4077,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) flush_delayed_work(&adev->delayed_init_work); if (fbcon) - amdgpu_fbdev_set_suspend(adev, 0); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); drm_kms_helper_poll_enable(dev); @@ -4288,6 +4346,9 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, bool from_hypervisor) { int r; + struct amdgpu_hive_info *hive = NULL; + + amdgpu_amdkfd_pre_reset(adev); amdgpu_amdkfd_pre_reset(adev); @@ -4304,8 +4365,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, goto error; amdgpu_virt_init_data_exchange(adev); - /* we need recover gart prior to run SMC/CP/SDMA resume */ - amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)); r = amdgpu_device_fw_loading(adev); if (r) @@ -4316,9 +4375,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) goto error; - amdgpu_irq_gpu_reset_resume_helper(adev); - r = amdgpu_ib_ring_tests(adev); - amdgpu_amdkfd_post_reset(adev); + hive = amdgpu_get_xgmi_hive(adev); + /* Update PSP FW topology after reset */ + if (hive && adev->gmc.xgmi.num_physical_nodes > 1) + r = amdgpu_xgmi_update_topology(hive, adev); + + if (hive) + amdgpu_put_xgmi_hive(hive); + + if (!r) { + amdgpu_irq_gpu_reset_resume_helper(adev); + r = amdgpu_ib_ring_tests(adev); + amdgpu_amdkfd_post_reset(adev); + } error: if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { @@ -4461,7 +4530,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { - int i, j, r = 0; + int i, r = 0; struct amdgpu_job *job = NULL; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); @@ -4483,15 +4552,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, /*clear job fence from fence drv to avoid force_completion *leave NULL and vm flush fence in fence drv */ - for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) { - struct dma_fence *old, **ptr; + amdgpu_fence_driver_clear_job_fences(ring); - ptr = &ring->fence_drv.fences[j]; - old = rcu_dereference_protected(*ptr, 1); - if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) { - RCU_INIT_POINTER(*ptr, NULL); - } - } /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } @@ -4622,10 +4684,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, amdgpu_inc_vram_lost(tmp_adev); } - r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT)); - if (r) - goto out; - r = amdgpu_device_fw_loading(tmp_adev); if (r) return r; @@ -4651,7 +4709,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (r) goto out; - amdgpu_fbdev_set_suspend(tmp_adev, 0); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false); /* * The GPU enters bad state once faulty pages @@ -4750,7 +4808,7 @@ static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgp { struct amdgpu_device *tmp_adev = NULL; - if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { if (!hive) { dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes"); return -ENODEV; @@ -4962,7 +5020,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * We always reset all schedulers for device and all devices for XGMI * hive so that should take care of them too. */ - hive = amdgpu_get_xgmi_hive(adev); + if (!amdgpu_sriov_vf(adev)) + hive = amdgpu_get_xgmi_hive(adev); if (hive) { if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) { DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", @@ -5003,7 +5062,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * to put adev in the 1st position. */ INIT_LIST_HEAD(&device_list); - if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) list_add_tail(&tmp_adev->reset_list, &device_list); if (!list_is_first(&adev->reset_list, &device_list)) @@ -5042,7 +5101,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); - amdgpu_fbdev_set_suspend(tmp_adev, 1); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); /* disable ras on ALL IPs */ if (!need_emergency_restart && @@ -5637,3 +5696,42 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, amdgpu_asic_invalidate_hdp(adev, ring); } + +/** + * amdgpu_device_halt() - bring hardware to some kind of halt state + * + * @adev: amdgpu_device pointer + * + * Bring hardware to some kind of halt state so that no one can touch it + * any more. It will help to maintain error context when error occurred. + * Compare to a simple hang, the system will keep stable at least for SSH + * access. Then it should be trivial to inspect the hardware state and + * see what's going on. Implemented as following: + * + * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc), + * clears all CPU mappings to device, disallows remappings through page faults + * 2. amdgpu_irq_disable_all() disables all interrupts + * 3. amdgpu_fence_driver_hw_fini() signals all HW fences + * 4. set adev->no_hw_access to avoid potential crashes after setp 5 + * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings + * 6. pci_disable_device() and pci_wait_for_pending_transaction() + * flush any in flight DMA operations + */ +void amdgpu_device_halt(struct amdgpu_device *adev) +{ + struct pci_dev *pdev = adev->pdev; + struct drm_device *ddev = adev_to_drm(adev); + + drm_dev_unplug(ddev); + + amdgpu_irq_disable_all(adev); + + amdgpu_fence_driver_hw_fini(adev); + + adev->no_hw_access = true; + + amdgpu_device_unmap_mmio(adev); + + pci_disable_device(pdev); + pci_wait_for_pending_transaction(pdev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ea00090b3fb3..be45650250fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -67,7 +67,8 @@ #include "smuio_v11_0_6.h" #include "smuio_v13_0.h" -MODULE_FIRMWARE("amdgpu/ip_discovery.bin"); +#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" +MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); #define mmRCC_CONFIG_MEMSIZE 0xde3 #define mmMM_INDEX 0x0 @@ -179,7 +180,7 @@ static int hw_id_map[MAX_HWIP] = { [DCI_HWIP] = DCI_HWID, }; -static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) +static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary) { uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; @@ -189,6 +190,34 @@ static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *bin return 0; } +static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) +{ + const struct firmware *fw; + const char *fw_name; + int r; + + switch (amdgpu_discovery) { + case 2: + fw_name = FIRMWARE_IP_DISCOVERY; + break; + default: + dev_warn(adev->dev, "amdgpu_discovery is not set properly\n"); + return -EINVAL; + } + + r = request_firmware(&fw, fw_name, adev->dev); + if (r) { + dev_err(adev->dev, "can't load firmware \"%s\"\n", + fw_name); + return r; + } + + memcpy((u8 *)binary, (u8 *)fw->data, adev->mman.discovery_tmr_size); + release_firmware(fw); + + return 0; +} + static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size) { uint16_t checksum = 0; @@ -206,13 +235,20 @@ static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size return !!(amdgpu_discovery_calculate_checksum(data, size) == expected); } +static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary) +{ + struct binary_header *bhdr; + bhdr = (struct binary_header *)binary; + + return (le32_to_cpu(bhdr->binary_signature) == BINARY_SIGNATURE); +} + static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct gpu_info_header *ghdr; - const struct firmware *fw; uint16_t offset; uint16_t size; uint16_t checksum; @@ -223,31 +259,32 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) if (!adev->mman.discovery_bin) return -ENOMEM; - if (amdgpu_discovery == 2) { - r = request_firmware(&fw, "amdgpu/ip_discovery.bin", adev->dev); - if (r) - goto get_from_vram; - dev_info(adev->dev, "Using IP discovery from file\n"); - memcpy((u8 *)adev->mman.discovery_bin, (u8 *)fw->data, - adev->mman.discovery_tmr_size); - release_firmware(fw); - } else { -get_from_vram: - r = amdgpu_discovery_read_binary(adev, adev->mman.discovery_bin); + r = amdgpu_discovery_read_binary_from_vram(adev, adev->mman.discovery_bin); + if (r) { + dev_err(adev->dev, "failed to read ip discovery binary from vram\n"); + r = -EINVAL; + goto out; + } + + if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { + dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n"); + /* retry read ip discovery binary from file */ + r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); if (r) { - DRM_ERROR("failed to read ip discovery binary\n"); + dev_err(adev->dev, "failed to read ip discovery binary from file\n"); + r = -EINVAL; + goto out; + } + /* check the ip discovery binary signature */ + if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { + dev_warn(adev->dev, "get invalid ip discovery binary signature from file\n"); + r = -EINVAL; goto out; } } bhdr = (struct binary_header *)adev->mman.discovery_bin; - if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) { - DRM_ERROR("invalid ip discovery binary signature\n"); - r = -EINVAL; - goto out; - } - offset = offsetof(struct binary_header, binary_checksum) + sizeof(bhdr->binary_checksum); size = le16_to_cpu(bhdr->binary_size) - offset; @@ -255,7 +292,7 @@ get_from_vram: if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, size, checksum)) { - DRM_ERROR("invalid ip discovery binary checksum\n"); + dev_err(adev->dev, "invalid ip discovery binary checksum\n"); r = -EINVAL; goto out; } @@ -266,14 +303,14 @@ get_from_vram: ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset); if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { - DRM_ERROR("invalid ip discovery data table signature\n"); + dev_err(adev->dev, "invalid ip discovery data table signature\n"); r = -EINVAL; goto out; } if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, le16_to_cpu(ihdr->size), checksum)) { - DRM_ERROR("invalid ip discovery data table checksum\n"); + dev_err(adev->dev, "invalid ip discovery data table checksum\n"); r = -EINVAL; goto out; } @@ -285,7 +322,7 @@ get_from_vram: if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, le32_to_cpu(ghdr->size), checksum)) { - DRM_ERROR("invalid gc data table checksum\n"); + dev_err(adev->dev, "invalid gc data table checksum\n"); r = -EINVAL; goto out; } @@ -379,8 +416,18 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) ip->major, ip->minor, ip->revision); - if (le16_to_cpu(ip->hw_id) == VCN_HWID) + if (le16_to_cpu(ip->hw_id) == VCN_HWID) { + /* Bit [5:0]: original revision value + * Bit [7:6]: en/decode capability: + * 0b00 : VCN function normally + * 0b10 : encode is disabled + * 0b01 : decode is disabled + */ + adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = + ip->revision & 0xc0; + ip->revision &= ~0xc0; adev->vcn.num_vcn_inst++; + } if (le16_to_cpu(ip->hw_id) == SDMA0_HWID || le16_to_cpu(ip->hw_id) == SDMA1_HWID || le16_to_cpu(ip->hw_id) == SDMA2_HWID || @@ -472,14 +519,6 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int n return -EINVAL; } - -int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance, - int *major, int *minor, int *revision) -{ - return amdgpu_discovery_get_ip_version(adev, VCN_HWID, - vcn_instance, major, minor, revision); -} - void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { struct binary_header *bhdr; @@ -511,7 +550,8 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) } /* some IP discovery tables on Navy Flounder don't have this set correctly */ if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) && - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2))) + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2)) && + (adev->pdev->revision != 0xFF)) adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; if (vcn_harvest_count == adev->vcn.num_vcn_inst) { adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; @@ -526,10 +566,15 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) } } +union gc_info { + struct gc_info_v1_0 v1; + struct gc_info_v2_0 v2; +}; + int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) { struct binary_header *bhdr; - struct gc_info_v1_0 *gc_info; + union gc_info *gc_info; if (!adev->mman.discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); @@ -537,28 +582,55 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) } bhdr = (struct binary_header *)adev->mman.discovery_bin; - gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin + + gc_info = (union gc_info *)(adev->mman.discovery_bin + le16_to_cpu(bhdr->table_list[GC].offset)); - - adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); - adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + - le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); - adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); - adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); - adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); - adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); - adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); - adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); - adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); - adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); - adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); - adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); - adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); - adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / - le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); - + switch (gc_info->v1.header.version_major) { + case 1: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se); + adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) + + le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa)); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc); + break; + case 2: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); + adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc); + break; + default: + dev_err(adev->dev, + "Unhandled GC info table %d.%d\n", + gc_info->v1.header.version_major, + gc_info->v1.header.version_minor); + return -EINVAL; + } return 0; } @@ -917,7 +989,6 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 16): - case IP_VERSION(3, 0, 64): case IP_VERSION(3, 1, 1): case IP_VERSION(3, 0, 2): case IP_VERSION(3, 0, 192): @@ -954,7 +1025,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; default: - break;; + break; } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 0ea029e3b850..14537cec19db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -33,8 +33,6 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev); int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance, int *major, int *minor, int *revision); -int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance, - int *major, int *minor, int *revision); int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev); int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index dc50c05f23fc..82011e75ed85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -83,9 +83,6 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work) unsigned i; int vpos, hpos; - if (amdgpu_display_flip_handle_fence(work, &work->excl)) - return; - for (i = 0; i < work->shared_count; ++i) if (amdgpu_display_flip_handle_fence(work, &work->shared[i])) return; @@ -203,7 +200,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - r = dma_resv_get_fences(new_abo->tbo.base.resv, &work->excl, + r = dma_resv_get_fences(new_abo->tbo.base.resv, NULL, &work->shared_count, &work->shared); if (unlikely(r != 0)) { DRM_ERROR("failed to get fences for buffer\n"); @@ -253,7 +250,6 @@ unreserve: cleanup: amdgpu_bo_unref(&work->old_abo); - dma_fence_put(work->excl); for (i = 0; i < work->shared_count; ++i) dma_fence_put(work->shared[i]); kfree(work->shared); @@ -1364,7 +1360,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) && ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) || ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) && - drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && + connector->display_info.is_hdmi && amdgpu_display_is_hdtv_mode(mode)))) { if (amdgpu_encoder->underscan_hborder != 0) amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder; @@ -1603,13 +1599,10 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) continue; } robj = gem_to_amdgpu_bo(fb->obj[0]); - /* don't unpin kernel fb objects */ - if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { - r = amdgpu_bo_reserve(robj, true); - if (r == 0) { - amdgpu_bo_unpin(robj); - amdgpu_bo_unreserve(robj); - } + r = amdgpu_bo_reserve(robj, true); + if (r == 0) { + amdgpu_bo_unpin(robj); + amdgpu_bo_unreserve(robj); } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index ae6ab93c868b..579adfafe4d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -61,9 +61,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0) attach->peer2peer = false; - if (attach->dev->driver == adev->dev->driver) - return 0; - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) goto out; @@ -384,7 +381,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) struct amdgpu_vm_bo_base *bo_base; int r; - if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM) + if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; r = ttm_bo_validate(&bo->tbo, &placement, &ctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ad95de6399af..c610e2794c18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -31,7 +31,6 @@ #include "amdgpu_drv.h" #include <drm/drm_pciids.h> -#include <linux/console.h> #include <linux/module.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> @@ -39,6 +38,7 @@ #include <linux/mmu_notifier.h> #include <linux/suspend.h> #include <linux/cc_platform.h> +#include <linux/fb.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -314,9 +314,12 @@ module_param_named(dpm, amdgpu_dpm, int, 0444); /** * DOC: fw_load_type (int) - * Set different firmware loading type for debugging (0 = direct, 1 = SMU, 2 = PSP). The default is -1 (auto). + * Set different firmware loading type for debugging, if supported. + * Set to 0 to force direct loading if supported by the ASIC. Set + * to -1 to select the default loading mode for the ASIC, as defined + * by the driver. The default is -1 (auto). */ -MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)"); +MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = force direct if supported, -1 = auto)"); module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444); /** @@ -328,10 +331,11 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); /** * DOC: runpm (int) - * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down - * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality. + * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down + * the dGPUs when they are idle if supported. The default is -1 (auto enable). + * Setting the value to 0 disables this functionality. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** @@ -1889,6 +1893,26 @@ MODULE_DEVICE_TABLE(pci, pciidlist); static const struct drm_driver amdgpu_kms_driver; +static bool amdgpu_is_fw_framebuffer(resource_size_t base, + resource_size_t size) +{ + bool found = false; +#if IS_REACHABLE(CONFIG_FB) + struct apertures_struct *a; + + a = alloc_apertures(1); + if (!a) + return false; + + a->ranges[0].base = base; + a->ranges[0].size = size; + + found = is_firmware_framebuffer(a); + kfree(a); +#endif + return found; +} + static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1897,6 +1921,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, unsigned long flags = ent->driver_data; int ret, retry = 0, i; bool supports_atomic = false; + bool is_fw_fb; + resource_size_t base, size; /* skip devices which are owned by radeon */ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { @@ -1965,6 +1991,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, } #endif + base = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); + is_fw_fb = amdgpu_is_fw_framebuffer(base, size); + /* Get rid of things like offb */ ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); if (ret) @@ -1977,6 +2007,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, adev->dev = &pdev->dev; adev->pdev = pdev; ddev = adev_to_drm(adev); + adev->is_fw_fb = is_fw_fb; if (!supports_atomic) ddev->driver_features &= ~DRIVER_ATOMIC; @@ -2002,6 +2033,19 @@ retry_init: goto err_pci; } + /* + * 1. don't init fbdev on hw without DCE + * 2. don't init fbdev if there are no connectors + */ + if (adev->mode_info.mode_config_initialized && + !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) { + /* select 8 bpp console on low vram cards */ + if (adev->gmc.real_vram_size <= (32*1024*1024)) + drm_fbdev_generic_setup(adev_to_drm(adev), 8); + else + drm_fbdev_generic_setup(adev_to_drm(adev), 32); + } + ret = amdgpu_debugfs_init(adev); if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); @@ -2150,10 +2194,13 @@ static int amdgpu_pmops_suspend(struct device *dev) if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; - adev->in_s3 = true; + else + adev->in_s3 = true; r = amdgpu_device_suspend(drm_dev, true); - adev->in_s3 = false; - + if (r) + return r; + if (!adev->in_s0ix) + r = amdgpu_asic_reset(adev); return r; } @@ -2170,6 +2217,8 @@ static int amdgpu_pmops_resume(struct device *dev) r = amdgpu_device_resume(drm_dev, true); if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = false; + else + adev->in_s3 = false; return r; } @@ -2234,12 +2283,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) if (amdgpu_device_supports_px(drm_dev)) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + /* + * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some + * proper cleanups and put itself into a state ready for PNP. That + * can address some random resuming failure observed on BOCO capable + * platforms. + * TODO: this may be also needed for PX capable platform. + */ + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_UNLOAD; + ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_NONE; return ret; } + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_NONE; + if (amdgpu_device_supports_px(drm_dev)) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. @@ -2516,10 +2580,8 @@ static int __init amdgpu_init(void) { int r; - if (vgacon_text_force()) { - DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); + if (drm_firmware_drivers_only()) return -EINVAL; - } r = amdgpu_sync_init(); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h index e3a4f7048042..8178323e4bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h @@ -45,4 +45,7 @@ long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +long amdgpu_kms_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index af4ef84e27a7..c96e458ed088 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -222,7 +222,7 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder, case DRM_MODE_CONNECTOR_HDMIB: if (amdgpu_connector->use_digital) { /* HDMI 1.3 supports up to 340 Mhz over single link */ - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { if (pixel_clock > 340000) return true; else @@ -244,7 +244,7 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder, return false; else { /* HDMI 1.3 supports up to 340 Mhz over single link */ - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { if (pixel_clock > 340000) return true; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c deleted file mode 100644 index cd0acbea75da..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Copyright © 2007 David Airlie - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * David Airlie - */ - -#include <linux/module.h> -#include <linux/pm_runtime.h> -#include <linux/slab.h> -#include <linux/vga_switcheroo.h> - -#include <drm/amdgpu_drm.h> -#include <drm/drm_crtc.h> -#include <drm/drm_crtc_helper.h> -#include <drm/drm_fb_helper.h> -#include <drm/drm_fourcc.h> - -#include "amdgpu.h" -#include "cikd.h" -#include "amdgpu_gem.h" - -#include "amdgpu_display.h" - -/* object hierarchy - - this contains a helper + a amdgpu fb - the helper contains a pointer to amdgpu framebuffer baseclass. -*/ - -static int -amdgpufb_open(struct fb_info *info, int user) -{ - struct drm_fb_helper *fb_helper = info->par; - int ret = pm_runtime_get_sync(fb_helper->dev->dev); - if (ret < 0 && ret != -EACCES) { - pm_runtime_mark_last_busy(fb_helper->dev->dev); - pm_runtime_put_autosuspend(fb_helper->dev->dev); - return ret; - } - return 0; -} - -static int -amdgpufb_release(struct fb_info *info, int user) -{ - struct drm_fb_helper *fb_helper = info->par; - - pm_runtime_mark_last_busy(fb_helper->dev->dev); - pm_runtime_put_autosuspend(fb_helper->dev->dev); - return 0; -} - -static const struct fb_ops amdgpufb_ops = { - .owner = THIS_MODULE, - DRM_FB_HELPER_DEFAULT_OPS, - .fb_open = amdgpufb_open, - .fb_release = amdgpufb_release, - .fb_fillrect = drm_fb_helper_cfb_fillrect, - .fb_copyarea = drm_fb_helper_cfb_copyarea, - .fb_imageblit = drm_fb_helper_cfb_imageblit, -}; - - -int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int cpp, bool tiled) -{ - int aligned = width; - int pitch_mask = 0; - - switch (cpp) { - case 1: - pitch_mask = 255; - break; - case 2: - pitch_mask = 127; - break; - case 3: - case 4: - pitch_mask = 63; - break; - } - - aligned += pitch_mask; - aligned &= ~pitch_mask; - return aligned * cpp; -} - -static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj) -{ - struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); - int ret; - - ret = amdgpu_bo_reserve(abo, true); - if (likely(ret == 0)) { - amdgpu_bo_kunmap(abo); - amdgpu_bo_unpin(abo); - amdgpu_bo_unreserve(abo); - } - drm_gem_object_put(gobj); -} - -static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object **gobj_p) -{ - const struct drm_format_info *info; - struct amdgpu_device *adev = rfbdev->adev; - struct drm_gem_object *gobj = NULL; - struct amdgpu_bo *abo = NULL; - bool fb_tiled = false; /* useful for testing */ - u32 tiling_flags = 0, domain; - int ret; - int aligned_size, size; - int height = mode_cmd->height; - u32 cpp; - u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; - - info = drm_get_format_info(adev_to_drm(adev), mode_cmd); - cpp = info->cpp[0]; - - /* need to align pitch with crtc limits */ - mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, - fb_tiled); - domain = amdgpu_display_supported_domains(adev, flags); - height = ALIGN(mode_cmd->height, 8); - size = mode_cmd->pitches[0] * height; - aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, - ttm_bo_type_device, NULL, &gobj); - if (ret) { - pr_err("failed to allocate framebuffer (%d)\n", aligned_size); - return -ENOMEM; - } - abo = gem_to_amdgpu_bo(gobj); - - if (fb_tiled) - tiling_flags = AMDGPU_TILING_SET(ARRAY_MODE, GRPH_ARRAY_2D_TILED_THIN1); - - ret = amdgpu_bo_reserve(abo, false); - if (unlikely(ret != 0)) - goto out_unref; - - if (tiling_flags) { - ret = amdgpu_bo_set_tiling_flags(abo, - tiling_flags); - if (ret) - dev_err(adev->dev, "FB failed to set tiling flags\n"); - } - - ret = amdgpu_bo_pin(abo, domain); - if (ret) { - amdgpu_bo_unreserve(abo); - goto out_unref; - } - - ret = amdgpu_ttm_alloc_gart(&abo->tbo); - if (ret) { - amdgpu_bo_unreserve(abo); - dev_err(adev->dev, "%p bind failed\n", abo); - goto out_unref; - } - - ret = amdgpu_bo_kmap(abo, NULL); - amdgpu_bo_unreserve(abo); - if (ret) { - goto out_unref; - } - - *gobj_p = gobj; - return 0; -out_unref: - amdgpufb_destroy_pinned_object(gobj); - *gobj_p = NULL; - return ret; -} - -static int amdgpufb_create(struct drm_fb_helper *helper, - struct drm_fb_helper_surface_size *sizes) -{ - struct amdgpu_fbdev *rfbdev = (struct amdgpu_fbdev *)helper; - struct amdgpu_device *adev = rfbdev->adev; - struct fb_info *info; - struct drm_framebuffer *fb = NULL; - struct drm_mode_fb_cmd2 mode_cmd; - struct drm_gem_object *gobj = NULL; - struct amdgpu_bo *abo = NULL; - int ret; - - memset(&mode_cmd, 0, sizeof(mode_cmd)); - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - - if (sizes->surface_bpp == 24) - sizes->surface_bpp = 32; - - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - ret = amdgpufb_create_pinned_object(rfbdev, &mode_cmd, &gobj); - if (ret) { - DRM_ERROR("failed to create fbcon object %d\n", ret); - return ret; - } - - abo = gem_to_amdgpu_bo(gobj); - - /* okay we have an object now allocate the framebuffer */ - info = drm_fb_helper_alloc_fbi(helper); - if (IS_ERR(info)) { - ret = PTR_ERR(info); - goto out; - } - - ret = amdgpu_display_gem_fb_init(adev_to_drm(adev), &rfbdev->rfb, - &mode_cmd, gobj); - if (ret) { - DRM_ERROR("failed to initialize framebuffer %d\n", ret); - goto out; - } - - fb = &rfbdev->rfb.base; - - /* setup helper */ - rfbdev->helper.fb = fb; - - info->fbops = &amdgpufb_ops; - - info->fix.smem_start = amdgpu_gmc_vram_cpu_pa(adev, abo); - info->fix.smem_len = amdgpu_bo_size(abo); - info->screen_base = amdgpu_bo_kptr(abo); - info->screen_size = amdgpu_bo_size(abo); - - drm_fb_helper_fill_info(info, &rfbdev->helper, sizes); - - /* setup aperture base/size for vesafb takeover */ - info->apertures->ranges[0].base = adev_to_drm(adev)->mode_config.fb_base; - info->apertures->ranges[0].size = adev->gmc.aper_size; - - /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - - if (info->screen_base == NULL) { - ret = -ENOSPC; - goto out; - } - - DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start); - DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->gmc.aper_base); - DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo)); - DRM_INFO("fb depth is %d\n", fb->format->depth); - DRM_INFO(" pitch is %d\n", fb->pitches[0]); - - vga_switcheroo_client_fb_set(adev->pdev, info); - return 0; - -out: - if (fb && ret) { - drm_gem_object_put(gobj); - drm_framebuffer_unregister_private(fb); - drm_framebuffer_cleanup(fb); - kfree(fb); - } - return ret; -} - -static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfbdev) -{ - struct amdgpu_framebuffer *rfb = &rfbdev->rfb; - int i; - - drm_fb_helper_unregister_fbi(&rfbdev->helper); - - if (rfb->base.obj[0]) { - for (i = 0; i < rfb->base.format->num_planes; i++) - drm_gem_object_put(rfb->base.obj[0]); - amdgpufb_destroy_pinned_object(rfb->base.obj[0]); - rfb->base.obj[0] = NULL; - drm_framebuffer_unregister_private(&rfb->base); - drm_framebuffer_cleanup(&rfb->base); - } - drm_fb_helper_fini(&rfbdev->helper); - - return 0; -} - -static const struct drm_fb_helper_funcs amdgpu_fb_helper_funcs = { - .fb_probe = amdgpufb_create, -}; - -int amdgpu_fbdev_init(struct amdgpu_device *adev) -{ - struct amdgpu_fbdev *rfbdev; - int bpp_sel = 32; - int ret; - - /* don't init fbdev on hw without DCE */ - if (!adev->mode_info.mode_config_initialized) - return 0; - - /* don't init fbdev if there are no connectors */ - if (list_empty(&adev_to_drm(adev)->mode_config.connector_list)) - return 0; - - /* select 8 bpp console on low vram cards */ - if (adev->gmc.real_vram_size <= (32*1024*1024)) - bpp_sel = 8; - - rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL); - if (!rfbdev) - return -ENOMEM; - - rfbdev->adev = adev; - adev->mode_info.rfbdev = rfbdev; - - drm_fb_helper_prepare(adev_to_drm(adev), &rfbdev->helper, - &amdgpu_fb_helper_funcs); - - ret = drm_fb_helper_init(adev_to_drm(adev), &rfbdev->helper); - if (ret) { - kfree(rfbdev); - return ret; - } - - /* disable all the possible outputs/crtcs before entering KMS mode */ - if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display) - drm_helper_disable_unused_functions(adev_to_drm(adev)); - - drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel); - return 0; -} - -void amdgpu_fbdev_fini(struct amdgpu_device *adev) -{ - if (!adev->mode_info.rfbdev) - return; - - amdgpu_fbdev_destroy(adev_to_drm(adev), adev->mode_info.rfbdev); - kfree(adev->mode_info.rfbdev); - adev->mode_info.rfbdev = NULL; -} - -void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state) -{ - if (adev->mode_info.rfbdev) - drm_fb_helper_set_suspend_unlocked(&adev->mode_info.rfbdev->helper, - state); -} - -int amdgpu_fbdev_total_size(struct amdgpu_device *adev) -{ - struct amdgpu_bo *robj; - int size = 0; - - if (!adev->mode_info.rfbdev) - return 0; - - robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]); - size += amdgpu_bo_size(robj); - return size; -} - -bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) -{ - if (!adev->mode_info.rfbdev) - return false; - if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0])) - return true; - return false; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 3b7e86ea7167..45977a72b5dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -77,11 +77,13 @@ void amdgpu_fence_slab_fini(void) * Cast helper */ static const struct dma_fence_ops amdgpu_fence_ops; +static const struct dma_fence_ops amdgpu_job_fence_ops; static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) { struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); - if (__f->base.ops == &amdgpu_fence_ops) + if (__f->base.ops == &amdgpu_fence_ops || + __f->base.ops == &amdgpu_job_fence_ops) return __f; return NULL; @@ -158,19 +160,18 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd } seq = ++ring->fence_drv.sync_seq; - if (job != NULL && job->job_run_counter) { + if (job && job->job_run_counter) { /* reinit seq for resubmitted jobs */ fence->seqno = seq; } else { - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, - seq); - } - - if (job != NULL) { - /* mark this fence has a parent job */ - set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags); + if (job) + dma_fence_init(fence, &amdgpu_job_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); + else + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); } amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, @@ -546,9 +547,6 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - if (!ring->no_scheduler) - drm_sched_stop(&ring->sched, NULL); - /* You can't wait for HW to signal if it's gone */ if (!drm_dev_is_unplugged(adev_to_drm(adev))) r = amdgpu_fence_wait_empty(ring); @@ -608,11 +606,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - if (!ring->no_scheduler) { - drm_sched_resubmit_jobs(&ring->sched); - drm_sched_start(&ring->sched, true); - } - /* enable the interrupt */ if (ring->fence_drv.irq_src) amdgpu_irq_get(adev, ring->fence_drv.irq_src, @@ -621,6 +614,25 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) } /** + * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring + * + * @ring: fence of the ring to be cleared + * + */ +void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) +{ + int i; + struct dma_fence *old, **ptr; + + for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { + ptr = &ring->fence_drv.fences[i]; + old = rcu_dereference_protected(*ptr, 1); + if (old && old->ops == &amdgpu_job_fence_ops) + RCU_INIT_POINTER(*ptr, NULL); + } +} + +/** * amdgpu_fence_driver_force_completion - force signal latest fence of ring * * @ring: fence of the ring to signal @@ -643,16 +655,14 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence) static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_ring *ring; + return (const char *)to_amdgpu_fence(f)->ring->name; +} - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); +static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); - ring = to_amdgpu_ring(job->base.sched); - } else { - ring = to_amdgpu_fence(f)->ring; - } - return (const char *)ring->name; + return (const char *)to_amdgpu_ring(job->base.sched)->name; } /** @@ -665,18 +675,25 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) */ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_ring *ring; + if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring); - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + return true; +} - ring = to_amdgpu_ring(job->base.sched); - } else { - ring = to_amdgpu_fence(f)->ring; - } +/** + * amdgpu_job_fence_enable_signaling - enable signalling on job fence + * @f: fence + * + * This is the simliar function with amdgpu_fence_enable_signaling above, it + * only handles the job embedded fence. + */ +static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); - if (!timer_pending(&ring->fence_drv.fallback_timer)) - amdgpu_fence_schedule_fallback(ring); + if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); return true; } @@ -692,19 +709,23 @@ static void amdgpu_fence_free(struct rcu_head *rcu) { struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - /* free job if fence has a parent job */ - struct amdgpu_job *job; - - job = container_of(f, struct amdgpu_job, hw_fence); - kfree(job); - } else { /* free fence_slab if it's separated fence*/ - struct amdgpu_fence *fence; + kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f)); +} - fence = to_amdgpu_fence(f); - kmem_cache_free(amdgpu_fence_slab, fence); - } +/** + * amdgpu_job_fence_free - free up the job with embedded fence + * + * @rcu: RCU callback head + * + * Free up the job with embedded fence after the RCU grace period. + */ +static void amdgpu_job_fence_free(struct rcu_head *rcu) +{ + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); + + /* free job if fence has a parent job */ + kfree(container_of(f, struct amdgpu_job, hw_fence)); } /** @@ -720,6 +741,19 @@ static void amdgpu_fence_release(struct dma_fence *f) call_rcu(&f->rcu, amdgpu_fence_free); } +/** + * amdgpu_job_fence_release - callback that job embedded fence can be freed + * + * @f: fence + * + * This is the simliar function with amdgpu_fence_release above, it + * only handles the job embedded fence. + */ +static void amdgpu_job_fence_release(struct dma_fence *f) +{ + call_rcu(&f->rcu, amdgpu_job_fence_free); +} + static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, @@ -727,6 +761,12 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .release = amdgpu_fence_release, }; +static const struct dma_fence_ops amdgpu_job_fence_ops = { + .get_driver_name = amdgpu_fence_get_driver_name, + .get_timeline_name = amdgpu_job_fence_get_timeline_name, + .enable_signaling = amdgpu_job_fence_enable_signaling, + .release = amdgpu_job_fence_release, +}; /* * Fence debugfs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 7709caeb233d..2a786e788627 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -56,6 +56,9 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) return true; else return false; + case CHIP_ALDEBARAN: + /* All Aldebaran SKUs have the FRU */ + return true; default: return false; } @@ -88,13 +91,17 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, int amdgpu_fru_get_product_info(struct amdgpu_device *adev) { - unsigned char buff[34]; + unsigned char buff[AMDGPU_PRODUCT_NAME_LEN+2]; u32 addrptr; int size, len; + int offset = 2; if (!is_fru_eeprom_supported(adev)) return 0; + if (adev->asic_type == CHIP_ALDEBARAN) + offset = 0; + /* If algo exists, it means that the i2c_adapter's initialized */ if (!adev->pm.smu_i2c.algo) { DRM_WARN("Cannot access FRU, EEPROM accessor not initialized"); @@ -131,15 +138,13 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) } len = size; - /* Product name should only be 32 characters. Any more, - * and something could be wrong. Cap it at 32 to be safe - */ - if (len >= sizeof(adev->product_name)) { - DRM_WARN("FRU Product Number is larger than 32 characters. This is likely a mistake"); - len = sizeof(adev->product_name) - 1; + if (len >= AMDGPU_PRODUCT_NAME_LEN) { + DRM_WARN("FRU Product Name is larger than %d characters. This is likely a mistake", + AMDGPU_PRODUCT_NAME_LEN); + len = AMDGPU_PRODUCT_NAME_LEN - 1; } /* Start at 2 due to buff using fields 0 and 1 for the address */ - memcpy(adev->product_name, &buff[2], len); + memcpy(adev->product_name, &buff[offset], len); adev->product_name[len] = '\0'; addrptr += size + 1; @@ -157,7 +162,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake"); len = sizeof(adev->product_number) - 1; } - memcpy(adev->product_number, &buff[2], len); + memcpy(adev->product_number, &buff[offset], len); adev->product_number[len] = '\0'; addrptr += size + 1; @@ -184,7 +189,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake"); len = sizeof(adev->serial) - 1; } - memcpy(adev->serial, &buff[2], len); + memcpy(adev->serial, &buff[offset], len); adev->serial[len] = '\0'; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index d3e4203f6217..645950a653a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -114,80 +114,12 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) */ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { - int r; - - if (adev->gart.bo == NULL) { - struct amdgpu_bo_param bp; - - memset(&bp, 0, sizeof(bp)); - bp.size = adev->gart.table_size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, &bp, &adev->gart.bo); - if (r) { - return r; - } - } - return 0; -} - -/** - * amdgpu_gart_table_vram_pin - pin gart page table in vram - * - * @adev: amdgpu_device pointer - * - * Pin the GART page table in vram so it will not be moved - * by the memory manager (pcie r4xx, r5xx+). These asics require the - * gart table to be in video memory. - * Returns 0 for success, error for failure. - */ -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gart.bo, false); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM); - if (r) { - amdgpu_bo_unreserve(adev->gart.bo); - return r; - } - r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr); - if (r) - amdgpu_bo_unpin(adev->gart.bo); - amdgpu_bo_unreserve(adev->gart.bo); - return r; -} - -/** - * amdgpu_gart_table_vram_unpin - unpin gart page table in vram - * - * @adev: amdgpu_device pointer - * - * Unpin the GART page table in vram (pcie r4xx, r5xx+). - * These asics require the gart table to be in video memory. - */ -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev) -{ - int r; + if (adev->gart.bo != NULL) + return 0; - if (adev->gart.bo == NULL) { - return; - } - r = amdgpu_bo_reserve(adev->gart.bo, true); - if (likely(r == 0)) { - amdgpu_bo_kunmap(adev->gart.bo); - amdgpu_bo_unpin(adev->gart.bo); - amdgpu_bo_unreserve(adev->gart.bo); - adev->gart.ptr = NULL; - } + return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, + NULL, (void *)&adev->gart.ptr); } /** @@ -201,11 +133,7 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev) */ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) { - if (adev->gart.bo == NULL) { - return; - } - amdgpu_bo_unref(&adev->gart.bo); - adev->gart.ptr = NULL; + amdgpu_bo_free_kernel(&adev->gart.bo, NULL, (void *)&adev->gart.ptr); } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index a1e63ba4c54a..c0d8f40a5b45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -877,6 +877,32 @@ out: return r; } +static int amdgpu_gem_align_pitch(struct amdgpu_device *adev, + int width, + int cpp, + bool tiled) +{ + int aligned = width; + int pitch_mask = 0; + + switch (cpp) { + case 1: + pitch_mask = 255; + break; + case 2: + pitch_mask = 127; + break; + case 3: + case 4: + pitch_mask = 63; + break; + } + + aligned += pitch_mask; + aligned &= ~pitch_mask; + return aligned * cpp; +} + int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) @@ -885,7 +911,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_gem_object *gobj; uint32_t handle; u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_CPU_GTT_USWC; + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; u32 domain; int r; @@ -897,8 +924,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, if (adev->mman.buffer_funcs_enabled) flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; - args->pitch = amdgpu_align_pitch(adev, args->width, - DIV_ROUND_UP(args->bpp, 8), 0); + args->pitch = amdgpu_gem_align_pitch(adev, args->width, + DIV_ROUND_UP(args->bpp, 8), 0); args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); domain = amdgpu_bo_get_preferred_domain(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 08478fce00f2..2430d6223c2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -350,6 +350,7 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid) * amdgpu_gmc_filter_faults - filter VM faults * * @adev: amdgpu device structure + * @ih: interrupt ring that the fault received from * @addr: address of the VM fault * @pasid: PASID of the process causing the fault * @timestamp: timestamp of the fault @@ -358,7 +359,8 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid) * True if the fault was filtered and should not be processed further. * False if the fault is a new one and needs to be handled. */ -bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, uint64_t addr, uint16_t pasid, uint64_t timestamp) { struct amdgpu_gmc *gmc = &adev->gmc; @@ -366,6 +368,10 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, struct amdgpu_gmc_fault *fault; uint32_t hash; + /* Stale retry fault if timestamp goes backward */ + if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp)) + return true; + /* If we don't have space left in the ring buffer return immediately */ stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) - AMDGPU_GMC_FAULT_TIMEOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index e55201134a01..8458cebc6d5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -316,7 +316,8 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); -bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, uint64_t addr, uint16_t pasid, uint64_t timestamp); void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 675a72ef305d..72022df264f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -77,10 +77,8 @@ static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man; - man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); - return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man)); + return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr)); } static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO, @@ -206,30 +204,27 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, /** * amdgpu_gtt_mgr_usage - return usage of GTT domain * - * @man: TTM memory type manager + * @mgr: amdgpu_gtt_mgr pointer * * Return how many bytes are used in the GTT domain */ -uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man) +uint64_t amdgpu_gtt_mgr_usage(struct amdgpu_gtt_mgr *mgr) { - struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); - return atomic64_read(&mgr->used) * PAGE_SIZE; } /** * amdgpu_gtt_mgr_recover - re-init gart * - * @man: TTM memory type manager + * @mgr: amdgpu_gtt_mgr pointer * * Re-init the gart for each known BO in the GTT. */ -int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man) +int amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr) { - struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); - struct amdgpu_device *adev; struct amdgpu_gtt_node *node; struct drm_mm_node *mm_node; + struct amdgpu_device *adev; int r = 0; adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 0c7963dfacad..3df146579ad9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -164,52 +164,32 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, } } -/* Waiter helper that checks current rptr matches or passes checkpoint wptr */ -static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev, - struct amdgpu_ih_ring *ih, - uint32_t checkpoint_wptr, - uint32_t *prev_rptr) -{ - uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask); - - /* rptr has wrapped. */ - if (cur_rptr < *prev_rptr) - cur_rptr += ih->ptr_mask + 1; - *prev_rptr = cur_rptr; - - /* check ring is empty to workaround missing wptr overflow flag */ - return cur_rptr >= checkpoint_wptr || - (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih); -} - /** - * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint + * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint * * @adev: amdgpu_device pointer * @ih: ih ring to process * * Used to ensure ring has processed IVs up to the checkpoint write pointer. */ -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, +int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - uint32_t checkpoint_wptr, rptr; + uint32_t checkpoint_wptr; + uint64_t checkpoint_ts; + long timeout = HZ; if (!ih->enabled || adev->shutdown) return -ENODEV; checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); - /* Order wptr with rptr. */ + /* Order wptr with ring data. */ rmb(); - rptr = READ_ONCE(ih->rptr); - - /* wptr has wrapped. */ - if (rptr > checkpoint_wptr) - checkpoint_wptr += ih->ptr_mask + 1; + checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); - return wait_event_interruptible(ih->wait_process, - amdgpu_ih_has_checkpoint_processed(adev, ih, - checkpoint_wptr, &rptr)); + return wait_event_interruptible_timeout(ih->wait_process, + amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) || + ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout); } /** @@ -299,3 +279,18 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, /* wptr/rptr are in bytes! */ ih->rptr += 32; } + +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, + signed int offset) +{ + uint32_t iv_size = 32; + uint32_t ring_index; + uint32_t dw1, dw2; + + rptr += iv_size * offset; + ring_index = (rptr & ih->ptr_mask) >> 2; + + dw1 = le32_to_cpu(ih->ring[ring_index + 1]); + dw2 = le32_to_cpu(ih->ring[ring_index + 2]); + return dw1 | ((u64)(dw2 & 0xffff) << 32); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 0649b59830a5..dd1c2eded6b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -68,20 +68,30 @@ struct amdgpu_ih_ring { /* For waiting on IH processing at checkpoint. */ wait_queue_head_t wait_process; + uint64_t processed_timestamp; }; +/* return true if time stamp t2 is after t1 with 48bit wrap around */ +#define amdgpu_ih_ts_after(t1, t2) \ + (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL) + /* provided by the ih block */ struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, struct amdgpu_iv_entry *entry); + uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr, + signed int offset); void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); }; #define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih)) #define amdgpu_ih_decode_iv(adev, iv) \ (adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv)) +#define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \ + (WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \ + (adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr), (offset))) #define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih)) int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, @@ -89,10 +99,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, unsigned int num_dw); -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, - struct amdgpu_ih_ring *ih); +int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih); int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, struct amdgpu_iv_entry *entry); +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, + signed int offset); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c index 5cf142e849bb..a1cbd7c3deb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c @@ -1,4 +1,4 @@ -/** +/* * \file amdgpu_ioc32.c * * 32-bit ioctl compatibility routines for the AMDGPU DRM. @@ -37,12 +37,9 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { unsigned int nr = DRM_IOCTL_NR(cmd); - int ret; if (nr < DRM_COMMAND_BASE) return drm_compat_ioctl(filp, cmd, arg); - ret = amdgpu_drm_ioctl(filp, cmd, arg); - - return ret; + return amdgpu_drm_ioctl(filp, cmd, arg); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index cc2e0c9cfe0a..f5cbc2747ac6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -333,7 +333,6 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { if (!adev->enable_virtual_display) /* Disable vblank IRQs aggressively for power-saving */ - /* XXX: can this be enabled for DC? */ adev_to_drm(adev)->vblank_disable_immediate = true; r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); @@ -391,7 +390,7 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev) } /** - * amdgpu_irq_fini - shut down interrupt handling + * amdgpu_irq_fini_sw - shut down interrupt handling * * @adev: amdgpu device pointer * @@ -529,6 +528,9 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, /* Send it to amdkfd as well if it isn't already handled */ if (!handled) amdgpu_amdkfd_interrupt(adev, entry.iv_entry); + + if (amdgpu_ih_ts_after(ih->processed_timestamp, entry.timestamp)) + ih->processed_timestamp = entry.timestamp; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 651c7abfde03..1ebb91db2274 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -206,6 +206,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) adev->runpm = true; break; } + /* XXX: disable runtime pm if we are the primary adapter + * to avoid displays being re-enabled after DPMS. + * This needs to be sorted out and fixed properly. + */ + if (adev->is_fw_fb) + adev->runpm = false; if (adev->runpm) dev_info(adev->dev, "Using BACO for runtime pm\n"); } @@ -672,13 +678,13 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = amdgpu_vram_mgr_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM)); + ui64 = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: - ui64 = amdgpu_vram_mgr_vis_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM)); + ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GTT_USAGE: - ui64 = amdgpu_gtt_mgr_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)); + ui64 = amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GDS_CONFIG: { struct drm_amdgpu_info_gds gds_info; @@ -709,8 +715,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } case AMDGPU_INFO_MEMORY: { struct drm_amdgpu_memory_info mem; - struct ttm_resource_manager *vram_man = - ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); struct ttm_resource_manager *gtt_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); memset(&mem, 0, sizeof(mem)); @@ -719,7 +723,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) atomic64_read(&adev->vram_pin_size) - AMDGPU_VM_RESERVED_VRAM; mem.vram.heap_usage = - amdgpu_vram_mgr_usage(vram_man); + amdgpu_vram_mgr_usage(&adev->mman.vram_mgr); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = @@ -729,7 +733,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) atomic64_read(&adev->visible_pin_size), mem.vram.usable_heap_size); mem.cpu_accessible_vram.heap_usage = - amdgpu_vram_mgr_vis_usage(vram_man); + amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); mem.cpu_accessible_vram.max_allocation = mem.cpu_accessible_vram.usable_heap_size * 3 / 4; @@ -738,7 +742,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.gtt.usable_heap_size = mem.gtt.total_heap_size - atomic64_read(&adev->gart_pin_size); mem.gtt.heap_usage = - amdgpu_gtt_mgr_usage(gtt_man); + amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr); mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; return copy_to_user(out, &mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 89fb372ed49c..6043bf6fd414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -232,8 +232,6 @@ struct amdgpu_i2c_chan { struct mutex mutex; }; -struct amdgpu_fbdev; - struct amdgpu_afmt { bool enabled; int offset; @@ -309,13 +307,6 @@ struct amdgpu_framebuffer { uint64_t address; }; -struct amdgpu_fbdev { - struct drm_fb_helper helper; - struct amdgpu_framebuffer rfb; - struct list_head fbdev_list; - struct amdgpu_device *adev; -}; - struct amdgpu_mode_info { struct atom_context *atom_context; struct card_info *atom_card_info; @@ -341,8 +332,6 @@ struct amdgpu_mode_info { struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; - /* pointer to fbdev info structure */ - struct amdgpu_fbdev *rfbdev; /* firmware flags */ u32 firmware_flags; /* pointer to backlight encoder */ @@ -631,15 +620,6 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); -/* fbdev layer */ -int amdgpu_fbdev_init(struct amdgpu_device *adev); -void amdgpu_fbdev_fini(struct amdgpu_device *adev); -void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state); -int amdgpu_fbdev_total_size(struct amdgpu_device *adev); -bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj); - -int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled); - /* amdgpu_display.c */ void amdgpu_display_print_display_setup(struct drm_device *dev); int amdgpu_display_modeset_create_props(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 4fcfc2313b8c..5661b82d84d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <linux/dma-buf.h> +#include <drm/drm_drv.h> #include <drm/amdgpu_drm.h> #include <drm/drm_cache.h> #include "amdgpu.h" @@ -1032,9 +1033,14 @@ int amdgpu_bo_init(struct amdgpu_device *adev) /* On A+A platform, VRAM can be mapped as WB */ if (!adev->gmc.xgmi.connected_to_cpu) { /* reserve PAT memory space to WC for VRAM */ - arch_io_reserve_memtype_wc(adev->gmc.aper_base, + int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); + if (r) { + DRM_ERROR("Unable to set WC memtype for the aperture base\n"); + return r; + } + /* Add an MTRR for the VRAM */ adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base, adev->gmc.aper_size); @@ -1056,7 +1062,18 @@ int amdgpu_bo_init(struct amdgpu_device *adev) */ void amdgpu_bo_fini(struct amdgpu_device *adev) { + int idx; + amdgpu_ttm_fini(adev); + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + + if (!adev->gmc.xgmi.connected_to_cpu) { + arch_phys_wc_del(adev->gmc.vram_mtrr); + arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); + } + drm_dev_exit(idx); + } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index 4eaec446b49d..0bb2466d539a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -69,6 +69,7 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den, /** * amdgpu_pll_get_fb_ref_div - feedback and ref divider calculation * + * @adev: amdgpu_device pointer * @nom: nominator * @den: denominator * @post_div: post divider @@ -106,6 +107,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n /** * amdgpu_pll_compute - compute PLL paramaters * + * @adev: amdgpu_device pointer * @pll: information about the PLL * @freq: requested frequency * @dot_clock_p: resulting pixel clock diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 82e9ecf84352..71ee361d0972 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -233,6 +233,10 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_start)) + return; + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; @@ -268,6 +272,10 @@ static void amdgpu_perf_read(struct perf_event *event) pmu); u64 count, prev; + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_get_count)) + return; + do { prev = local64_read(&hwc->prev_count); @@ -297,6 +305,10 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) if (hwc->state & PERF_HES_UPTODATE) return; + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_stop)) + return; + switch (hwc->config_base) { case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: @@ -326,6 +338,10 @@ static int amdgpu_perf_add(struct perf_event *event, int flags) struct amdgpu_pmu_entry, pmu); + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_start)) + return -EINVAL; + switch (pe->pmu_perf_type) { case AMDGPU_PMU_PERF_TYPE_DF: hwc->config_base = AMDGPU_PMU_EVENT_CONFIG_TYPE_DF; @@ -371,6 +387,9 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) struct amdgpu_pmu_entry *pe = container_of(event->pmu, struct amdgpu_pmu_entry, pmu); + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_stop)) + return; amdgpu_perf_stop(event, PERF_EF_UPDATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c index d02c8637f909..786afe4f58f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c @@ -59,7 +59,7 @@ static DEVICE_ATTR_RO(mem_info_preempt_used); * @man: TTM memory type manager * @tbo: TTM BO we need this range for * @place: placement flags and restrictions - * @mem: the resulting mem object + * @res: TTM memory object * * Dummy, just count the space used without allocating resources or any limit. */ @@ -85,7 +85,7 @@ static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man, * amdgpu_preempt_mgr_del - free ranges * * @man: TTM memory type manager - * @mem: TTM memory object + * @res: TTM memory object * * Free the allocated GTT again. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index c641f84649d6..dee17a0e1187 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -518,7 +518,7 @@ static struct psp_gfx_cmd_resp *acquire_psp_cmd_buf(struct psp_context *psp) return cmd; } -void release_psp_cmd_buf(struct psp_context *psp) +static void release_psp_cmd_buf(struct psp_context *psp) { mutex_unlock(&psp->mutex); } @@ -2017,12 +2017,16 @@ static int psp_hw_start(struct psp_context *psp) return ret; } + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) + goto skip_pin_bo; + ret = psp_tmr_init(psp); if (ret) { DRM_ERROR("PSP tmr init failed!\n"); return ret; } +skip_pin_bo: /* * For ASICs with DF Cstate management centralized * to PMFW, TMR setup should be performed after PMFW @@ -2452,6 +2456,18 @@ skip_memalloc: return ret; } + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) { + if (adev->gmc.xgmi.num_physical_nodes > 1) { + ret = psp_xgmi_initialize(psp, false, true); + /* Warning the XGMI seesion initialize failure + * Instead of stop driver initialization + */ + if (ret) + dev_err(psp->adev->dev, + "XGMI: Failed to initialize XGMI session\n"); + } + } + if (psp->ta_fw) { ret = psp_ras_initialize(psp); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 08133de21fdd..8f47c14ecbc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -867,9 +867,9 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, /* feature ctl end */ -void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_err_data *err_data) +static void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_err_data *err_data) { switch (ras_block->sub_block_index) { case AMDGPU_RAS_MCA_BLOCK__MP0: @@ -892,6 +892,38 @@ void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, } } +static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; + + /* + * choosing right query method according to + * whether smu support query error information + */ + ret = smu_get_ecc_info(&adev->smu, (void *)&(ras->umc_ecc)); + if (ret == -EOPNOTSUPP) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_count) + adev->umc.ras_funcs->query_ras_error_count(adev, err_data); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_address) + adev->umc.ras_funcs->query_ras_error_address(adev, err_data); + } else if (!ret) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_count) + adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data); + + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_address) + adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data); + } +} + /* query/inject/cure begin */ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) @@ -905,15 +937,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, switch (info->head.block) { case AMDGPU_RAS_BLOCK__UMC: - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, &err_data); - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address) - adev->umc.ras_funcs->query_ras_error_address(adev, &err_data); + amdgpu_ras_get_ecc_info(adev, &err_data); break; case AMDGPU_RAS_BLOCK__SDMA: if (adev->sdma.funcs->query_ras_error_count) { @@ -1137,9 +1161,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, /** * amdgpu_ras_query_error_count -- Get error counts of all IPs - * adev: pointer to AMD GPU device - * ce_count: pointer to an integer to be set to the count of correctible errors. - * ue_count: pointer to an integer to be set to the count of uncorrectible + * @adev: pointer to AMD GPU device + * @ce_count: pointer to an integer to be set to the count of correctible errors. + * @ue_count: pointer to an integer to be set to the count of uncorrectible * errors. * * If set, @ce_count or @ue_count, count and return the corresponding @@ -1568,6 +1592,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) /* Let IP handle its data, maybe we need get the output * from the callback to udpate the error type/count, etc */ + memset(&err_data, 0, sizeof(err_data)); ret = data->cb(obj->adev, &err_data, &entry); /* ue will trigger an interrupt, and in that case * we need do a reset to recovery the whole system. @@ -1723,6 +1748,16 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF) continue; + /* + * this is a workaround for aldebaran, skip send msg to + * smu to get ecc_info table due to smu handle get ecc + * info table failed temporarily. + * should be removed until smu fix handle ecc_info table. + */ + if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) && + (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))) + continue; + amdgpu_ras_query_error_status(adev, &info); } } @@ -1804,8 +1839,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .size = AMDGPU_GPU_PAGE_SIZE, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; - status = amdgpu_vram_mgr_query_page_status( - ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), + status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr, data->bps[i].retired_page); if (status == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; @@ -1906,8 +1940,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, goto out; } - amdgpu_vram_mgr_reserve_range( - ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), + amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr, bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GPU_PAGE_SIZE); @@ -1935,9 +1968,11 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) if (!con || !con->eh_data) return 0; + mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; save_count = data->count - control->ras_num_recs; + mutex_unlock(&con->recovery_lock); /* only new entries are saved */ if (save_count > 0) { if (amdgpu_ras_eeprom_append(control, @@ -2336,7 +2371,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) } /* Init poison supported flag, the default value is false */ - if (adev->df.funcs && + if (adev->gmc.xgmi.connected_to_cpu) { + /* enabled by default when GPU is connected to CPU */ + con->poison_supported = true; + } + else if (adev->df.funcs && adev->df.funcs->query_ras_poison_mode && adev->umc.ras_funcs && adev->umc.ras_funcs->query_ras_poison_mode) { @@ -2477,7 +2516,6 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev, amdgpu_ras_sysfs_remove(adev, ras_block); if (ih_info->cb) amdgpu_ras_interrupt_remove_handler(adev, ih_info); - amdgpu_ras_feature_enable(adev, ras_block, 0); } /* do some init work after IP late init as dependence. @@ -2647,7 +2685,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, * and error occurred in DramECC (Extended error code = 0) then only * process the error, else bail out. */ - if (!m || !((smca_get_bank_type(m->bank) == SMCA_UMC_V2) && + if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) && (XEC(m->status, 0x3f) == 0x0))) return NOTIFY_DONE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e36f4de9fa55..1c708122d492 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -319,6 +319,19 @@ struct ras_common_if { char name[32]; }; +#define MAX_UMC_CHANNEL_NUM 32 + +struct ecc_info_per_ch { + uint16_t ce_count_lo_chip; + uint16_t ce_count_hi_chip; + uint64_t mca_umc_status; + uint64_t mca_umc_addr; +}; + +struct umc_ecc_info { + struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM]; +}; + struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -358,6 +371,9 @@ struct amdgpu_ras { struct delayed_work ras_counte_delay_work; atomic_t ras_ue_count; atomic_t ras_ce_count; + + /* record umc error info queried from smu */ + struct umc_ecc_info umc_ecc; }; struct ras_fs_data { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4d380e79752c..fae7d185ad0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -53,9 +53,6 @@ enum amdgpu_ring_priority_level { #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) -/* fence flag bit to indicate the face is embedded in job*/ -#define AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT (DMA_FENCE_FLAG_USER_BITS + 1) - #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) #define AMDGPU_IB_POOL_SIZE (1024 * 1024) @@ -114,6 +111,7 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 862eb3c1c4c5..f7d8487799b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -252,41 +252,25 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner) { - struct dma_resv_list *flist; + struct dma_resv_iter cursor; struct dma_fence *f; - unsigned i; - int r = 0; + int r; if (resv == NULL) return -EINVAL; - /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - dma_fence_chain_for_each(f, f) { - struct dma_fence_chain *chain = to_dma_fence_chain(f); - - if (amdgpu_sync_test_fence(adev, mode, owner, chain ? - chain->fence : f)) { - r = amdgpu_sync_fence(sync, f); - dma_fence_put(f); - if (r) - return r; - break; - } - } - - flist = dma_resv_shared_list(resv); - if (!flist) - return 0; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); - - if (amdgpu_sync_test_fence(adev, mode, owner, f)) { - r = amdgpu_sync_fence(sync, f); - if (r) - return r; + dma_resv_for_each_fence(&cursor, resv, true, f) { + dma_fence_chain_for_each(f, f) { + struct dma_fence_chain *chain = to_dma_fence_chain(f); + + if (amdgpu_sync_test_fence(adev, mode, owner, chain ? + chain->fence : f)) { + r = amdgpu_sync_fence(sync, f); + dma_fence_put(f); + if (r) + return r; + break; + } } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c875f1cdd2af..5c3f24069f2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include <linux/sizes.h> #include <linux/module.h> +#include <drm/drm_drv.h> #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_placement.h> @@ -116,17 +117,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, abo = ttm_to_amdgpu_bo(bo); if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) { - struct dma_fence *fence; - struct dma_resv *resv = &bo->base._resv; - - rcu_read_lock(); - fence = rcu_dereference(resv->fence_excl); - if (fence && !fence->ops->signaled) - dma_fence_enable_sw_signaling(fence); - placement->num_placement = 0; placement->num_busy_placement = 0; - rcu_read_unlock(); return; } @@ -922,11 +914,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, ttm->num_pages, bo_mem, ttm); } - if (bo_mem->mem_type == AMDGPU_PL_GDS || - bo_mem->mem_type == AMDGPU_PL_GWS || - bo_mem->mem_type == AMDGPU_PL_OA) - return -EINVAL; - if (bo_mem->mem_type != TTM_PL_TT || !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { gtt->offset = AMDGPU_BO_INVALID_OFFSET; @@ -1353,10 +1340,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages; + struct dma_resv_iter resv_cursor; struct amdgpu_res_cursor cursor; - struct dma_resv_list *flist; struct dma_fence *f; - int i; /* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1370,14 +1356,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_shared_list(bo->base.resv); - if (flist) { - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(bo->base.resv)); - if (amdkfd_fence_check_mm(f, current->mm)) - return false; - } + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + if (amdkfd_fence_check_mm(f, current->mm)) + return false; } switch (bo->resource->mem_type) { @@ -1824,6 +1805,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) */ void amdgpu_ttm_fini(struct amdgpu_device *adev) { + int idx; if (!adev->mman.initialized) return; @@ -1838,6 +1820,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + + if (adev->mman.aper_base_kaddr) + iounmap(adev->mman.aper_base_kaddr); + adev->mman.aper_base_kaddr = NULL; + + drm_dev_exit(idx); + } + amdgpu_vram_mgr_fini(adev); amdgpu_gtt_mgr_fini(adev); amdgpu_preempt_mgr_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 7346ecff4438..f8f48be16d80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -114,8 +114,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev); void amdgpu_vram_mgr_fini(struct amdgpu_device *adev); bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem); -uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man); -int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man); +uint64_t amdgpu_gtt_mgr_usage(struct amdgpu_gtt_mgr *mgr); +int amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr); uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man); @@ -129,11 +129,11 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, void amdgpu_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, struct sg_table *sgt); -uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man); -uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man); -int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, +uint64_t amdgpu_vram_mgr_usage(struct amdgpu_vram_mgr *mgr); +uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr); +int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, uint64_t start, uint64_t size); -int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, +int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); int amdgpu_ttm_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index a90029ee9733..46264a4002f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -23,6 +23,120 @@ #include "amdgpu_ras.h" +static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry, + bool reset) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret = 0; + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + ret = smu_get_ecc_info(&adev->smu, (void *)&(con->umc_ecc)); + if (ret == -EOPNOTSUPP) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_count) + adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); + + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + dev_warn(adev->dev, "Failed to alloc memory for " + "umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); + } + } else if (!ret) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_count) + adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status); + + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + dev_warn(adev->dev, "Failed to alloc memory for " + "umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status); + } + } + + /* only uncorrectable error needs gpu reset */ + if (err_data->ue_count) { + dev_info(adev->dev, "%ld uncorrectable hardware errors " + "detected in UMC block\n", + err_data->ue_count); + + if ((amdgpu_bad_page_threshold != 0) && + err_data->err_addr_cnt) { + amdgpu_ras_add_bad_pages(adev, err_data->err_addr, + err_data->err_addr_cnt); + amdgpu_ras_save_bad_pages(adev); + + if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num) + adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs); + } + + if (reset) + amdgpu_ras_reset_gpu(adev); + } + + kfree(err_data->err_addr); + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, + void *ras_error_status, + bool reset) +{ + int ret; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + + ret = + amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset); + + if (ret == AMDGPU_RAS_SUCCESS && obj) { + obj->err_data.ue_count += err_data->ue_count; + obj->err_data.ce_count += err_data->ce_count; + } + + return ret; +} + +static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry) +{ + return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); +} + int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) { int r; @@ -88,61 +202,6 @@ void amdgpu_umc_ras_fini(struct amdgpu_device *adev) } } -int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, - void *ras_error_status, - struct amdgpu_iv_entry *entry) -{ - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); - - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address && - adev->umc.max_ras_err_cnt_per_query) { - err_data->err_addr = - kcalloc(adev->umc.max_ras_err_cnt_per_query, - sizeof(struct eeprom_table_record), GFP_KERNEL); - - /* still call query_ras_error_address to clear error status - * even NOMEM error is encountered - */ - if(!err_data->err_addr) - dev_warn(adev->dev, "Failed to alloc memory for " - "umc error address record!\n"); - - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); - } - - /* only uncorrectable error needs gpu reset */ - if (err_data->ue_count) { - dev_info(adev->dev, "%ld uncorrectable hardware errors " - "detected in UMC block\n", - err_data->ue_count); - - if ((amdgpu_bad_page_threshold != 0) && - err_data->err_addr_cnt) { - amdgpu_ras_add_bad_pages(adev, err_data->err_addr, - err_data->err_addr_cnt); - amdgpu_ras_save_bad_pages(adev); - - if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num) - adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs); - } - - amdgpu_ras_reset_gpu(adev); - } - - kfree(err_data->err_addr); - return AMDGPU_RAS_SUCCESS; -} - int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 1f5fe2315236..b72194e8bfe5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -49,6 +49,10 @@ struct amdgpu_umc_ras_funcs { void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); bool (*query_ras_poison_mode)(struct amdgpu_device *adev); + void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); }; struct amdgpu_umc_funcs { @@ -74,9 +78,9 @@ struct amdgpu_umc { int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); void amdgpu_umc_ras_fini(struct amdgpu_device *adev); -int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, void *ras_error_status, - struct amdgpu_iv_entry *entry); + bool reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 688bef1649b5..344f711ad144 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -434,7 +434,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) * * @ring: ring we should submit the msg to * @handle: VCE session handle to use - * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Open up a stream for HW test diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 585961c2f5f2..9a19a6a57b23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -286,20 +286,13 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; + int vcn_config = adev->vcn.vcn_config[vcn_instance]; - int major; - int minor; - int revision; - - /* if cannot find IP data, then this VCN does not exist */ - if (amdgpu_discovery_get_vcn_version(adev, vcn_instance, &major, &minor, &revision) != 0) - return true; - - if ((type == VCN_ENCODE_RING) && (revision & VCN_BLOCK_ENCODE_DISABLE_MASK)) { + if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) { ret = true; - } else if ((type == VCN_DECODE_RING) && (revision & VCN_BLOCK_DECODE_DISABLE_MASK)) { + } else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) { ret = true; - } else if ((type == VCN_UNIFIED_RING) && (revision & VCN_BLOCK_QUEUE_DISABLE_MASK)) { + } else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) { ret = true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index bfa27ea94804..5d3728b027d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -235,6 +235,7 @@ struct amdgpu_vcn { uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; + uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES]; struct amdgpu_vcn_reg internal; struct mutex vcn_pg_lock; struct mutex vcn1_jpeg1_workaround; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 04cf9b207e62..894444ab0032 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -283,17 +283,15 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL); if (!*data) - return -ENOMEM; + goto data_failure; bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL); - bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL); + if (!bps) + goto bps_failure; - if (!bps || !bps_bo) { - kfree(bps); - kfree(bps_bo); - kfree(*data); - return -ENOMEM; - } + bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL); + if (!bps_bo) + goto bps_bo_failure; (*data)->bps = bps; (*data)->bps_bo = bps_bo; @@ -303,6 +301,13 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) virt->ras_init_done = true; return 0; + +bps_bo_failure: + kfree(bps); +bps_failure: + kfree(*data); +data_failure: + return -ENOMEM; } static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) @@ -548,7 +553,6 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev) static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) { struct amd_sriov_msg_vf2pf_info *vf2pf_info; - struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf; @@ -571,8 +575,8 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->driver_cert = 0; vf2pf_info->os_info.all = 0; - vf2pf_info->fb_usage = amdgpu_vram_mgr_usage(vram_man) >> 20; - vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(vram_man) >> 20; + vf2pf_info->fb_usage = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr) >> 20; + vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20; vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20; vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20; @@ -617,19 +621,37 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev) void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) { - uint64_t bp_block_offset = 0; - uint32_t bp_block_size = 0; - struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; - adev->virt.fw_reserve.p_pf2vf = NULL; adev->virt.fw_reserve.p_vf2pf = NULL; adev->virt.vf2pf_update_interval_ms = 0; - if (adev->mman.fw_vram_usage_va != NULL) { + if (adev->bios != NULL) { adev->virt.vf2pf_update_interval_ms = 2000; adev->virt.fw_reserve.p_pf2vf = (struct amd_sriov_msg_pf2vf_info_header *) + (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + + amdgpu_virt_read_pf2vf_data(adev); + } + + if (adev->virt.vf2pf_update_interval_ms != 0) { + INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item); + schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms)); + } +} + + +void amdgpu_virt_exchange_data(struct amdgpu_device *adev) +{ + uint64_t bp_block_offset = 0; + uint32_t bp_block_size = 0; + struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; + + if (adev->mman.fw_vram_usage_va != NULL) { + + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *) (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); adev->virt.fw_reserve.p_vf2pf = (struct amd_sriov_msg_vf2pf_info_header *) @@ -658,16 +680,10 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); amdgpu_virt_read_pf2vf_data(adev); - - return; - } - - if (adev->virt.vf2pf_update_interval_ms != 0) { - INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item); - schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms); } } + void amdgpu_detect_virtualization(struct amdgpu_device *adev) { uint32_t reg; @@ -710,6 +726,10 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) vi_set_virt_ops(adev); break; case CHIP_VEGA10: + soc15_set_virt_ops(adev); + /* send a dummy GPU_INIT_DATA request to host on vega10 */ + amdgpu_virt_request_init_data(adev); + break; case CHIP_VEGA20: case CHIP_ARCTURUS: case CHIP_ALDEBARAN: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 8d4c20bb71c5..9adfb8d63280 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -308,6 +308,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); +void amdgpu_virt_exchange_data(struct amdgpu_device *adev); void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); void amdgpu_detect_virtualization(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index ac9a8cd21c4b..d99c8779b51e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -16,6 +16,8 @@ #include "ivsrcid/ivsrcid_vislands30.h" #include "amdgpu_vkms.h" #include "amdgpu_display.h" +#include "atom.h" +#include "amdgpu_irq.h" /** * DOC: amdgpu_vkms @@ -41,16 +43,16 @@ static const u32 amdgpu_vkms_formats[] = { static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer) { - struct amdgpu_vkms_output *output = container_of(timer, - struct amdgpu_vkms_output, - vblank_hrtimer); - struct drm_crtc *crtc = &output->crtc; + struct amdgpu_crtc *amdgpu_crtc = container_of(timer, struct amdgpu_crtc, vblank_timer); + struct drm_crtc *crtc = &amdgpu_crtc->base; + struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc); u64 ret_overrun; bool ret; - ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer, + ret_overrun = hrtimer_forward_now(&amdgpu_crtc->vblank_timer, output->period_ns); - WARN_ON(ret_overrun != 1); + if (ret_overrun != 1) + DRM_WARN("%s: vblank timer overrun\n", __func__); ret = drm_crtc_handle_vblank(crtc); if (!ret) @@ -65,22 +67,21 @@ static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc) unsigned int pipe = drm_crtc_index(crtc); struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc); + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); drm_calc_timestamping_constants(crtc, &crtc->mode); - hrtimer_init(&out->vblank_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - out->vblank_hrtimer.function = &amdgpu_vkms_vblank_simulate; out->period_ns = ktime_set(0, vblank->framedur_ns); - hrtimer_start(&out->vblank_hrtimer, out->period_ns, HRTIMER_MODE_REL); + hrtimer_start(&amdgpu_crtc->vblank_timer, out->period_ns, HRTIMER_MODE_REL); return 0; } static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc) { - struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc); + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - hrtimer_cancel(&out->vblank_hrtimer); + hrtimer_cancel(&amdgpu_crtc->vblank_timer); } static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc, @@ -92,13 +93,14 @@ static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc, unsigned int pipe = crtc->index; struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc); struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); if (!READ_ONCE(vblank->enabled)) { *vblank_time = ktime_get(); return true; } - *vblank_time = READ_ONCE(output->vblank_hrtimer.node.expires); + *vblank_time = READ_ONCE(amdgpu_crtc->vblank_timer.node.expires); if (WARN_ON(*vblank_time == vblank->time)) return true; @@ -142,15 +144,16 @@ static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc, static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { + unsigned long flags; if (crtc->state->event) { - spin_lock(&crtc->dev->event_lock); + spin_lock_irqsave(&crtc->dev->event_lock, flags); if (drm_crtc_vblank_get(crtc) != 0) drm_crtc_send_vblank_event(crtc, crtc->state->event); else drm_crtc_arm_vblank_event(crtc, crtc->state->event); - spin_unlock(&crtc->dev->event_lock); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); crtc->state->event = NULL; } @@ -165,6 +168,8 @@ static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = { static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, struct drm_plane *primary, struct drm_plane *cursor) { + struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); int ret; ret = drm_crtc_init_with_planes(dev, crtc, primary, cursor, @@ -176,6 +181,17 @@ static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, drm_crtc_helper_add(crtc, &amdgpu_vkms_crtc_helper_funcs); + amdgpu_crtc->crtc_id = drm_crtc_index(crtc); + adev->mode_info.crtcs[drm_crtc_index(crtc)] = amdgpu_crtc; + + amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; + amdgpu_crtc->encoder = NULL; + amdgpu_crtc->connector = NULL; + amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; + + hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + amdgpu_crtc->vblank_timer.function = &amdgpu_vkms_vblank_simulate; + return ret; } @@ -375,6 +391,7 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, int index) { struct drm_plane *plane; + uint64_t modifiers[] = {DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID}; int ret; plane = kzalloc(sizeof(*plane), GFP_KERNEL); @@ -385,7 +402,7 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, &amdgpu_vkms_plane_funcs, amdgpu_vkms_formats, ARRAY_SIZE(amdgpu_vkms_formats), - NULL, type, NULL); + modifiers, type, NULL); if (ret) { kfree(plane); return ERR_PTR(ret); @@ -396,12 +413,12 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, return plane; } -int amdgpu_vkms_output_init(struct drm_device *dev, - struct amdgpu_vkms_output *output, int index) +static int amdgpu_vkms_output_init(struct drm_device *dev, struct + amdgpu_vkms_output *output, int index) { struct drm_connector *connector = &output->connector; struct drm_encoder *encoder = &output->encoder; - struct drm_crtc *crtc = &output->crtc; + struct drm_crtc *crtc = &output->crtc.base; struct drm_plane *primary, *cursor = NULL; int ret; @@ -465,6 +482,11 @@ static int amdgpu_vkms_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc, + sizeof(struct amdgpu_vkms_output), GFP_KERNEL); + if (!adev->amdgpu_vkms_output) + return -ENOMEM; + adev_to_drm(adev)->max_vblank_count = 0; adev_to_drm(adev)->mode_config.funcs = &amdgpu_vkms_mode_funcs; @@ -481,10 +503,6 @@ static int amdgpu_vkms_sw_init(void *handle) if (r) return r; - adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc, sizeof(struct amdgpu_vkms_output), GFP_KERNEL); - if (!adev->amdgpu_vkms_output) - return -ENOMEM; - /* allocate crtcs, encoders, connectors */ for (i = 0; i < adev->mode_info.num_crtc; i++) { r = amdgpu_vkms_output_init(adev_to_drm(adev), &adev->amdgpu_vkms_output[i], i); @@ -504,15 +522,16 @@ static int amdgpu_vkms_sw_fini(void *handle) int i = 0; for (i = 0; i < adev->mode_info.num_crtc; i++) - if (adev->amdgpu_vkms_output[i].vblank_hrtimer.function) - hrtimer_cancel(&adev->amdgpu_vkms_output[i].vblank_hrtimer); - - kfree(adev->mode_info.bios_hardcoded_edid); - kfree(adev->amdgpu_vkms_output); + if (adev->mode_info.crtcs[i]) + hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); drm_kms_helper_poll_fini(adev_to_drm(adev)); + drm_mode_config_cleanup(adev_to_drm(adev)); adev->mode_info.mode_config_initialized = false; + + kfree(adev->mode_info.bios_hardcoded_edid); + kfree(adev->amdgpu_vkms_output); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h index 97f1b79c0724..4f8722ff37c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h @@ -10,15 +10,14 @@ #define YRES_MAX 16384 #define drm_crtc_to_amdgpu_vkms_output(target) \ - container_of(target, struct amdgpu_vkms_output, crtc) + container_of(target, struct amdgpu_vkms_output, crtc.base) extern const struct amdgpu_ip_block_version amdgpu_vkms_ip_block; struct amdgpu_vkms_output { - struct drm_crtc crtc; + struct amdgpu_crtc crtc; struct drm_encoder encoder; struct drm_connector connector; - struct hrtimer vblank_hrtimer; ktime_t period_ns; struct drm_pending_vblank_event *event; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0e7dc23f78e7..b37fc7d7d2c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -53,7 +53,7 @@ * can be mapped as snooped (cached system pages) or unsnooped * (uncached system pages). * Each VM has an ID associated with it and there is a page table - * associated with each VMID. When execting a command buffer, + * associated with each VMID. When executing a command buffer, * the kernel tells the the ring what VMID to use for that command * buffer. VMIDs are allocated dynamically as commands are submitted. * The userspace drivers maintain their own address space and the kernel @@ -2102,30 +2102,14 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct dma_resv *resv = vm->root.bo->tbo.base.resv; - struct dma_fence *excl, **shared; - unsigned i, shared_count; - int r; + struct dma_resv_iter cursor; + struct dma_fence *fence; - r = dma_resv_get_fences(resv, &excl, &shared_count, &shared); - if (r) { - /* Not enough memory to grab the fence list, as last resort - * block for all the fences to complete. - */ - dma_resv_wait_timeout(resv, true, false, - MAX_SCHEDULE_TIMEOUT); - return; - } - - /* Add a callback for each fence in the reservation object */ - amdgpu_vm_prt_get(adev); - amdgpu_vm_add_prt_cb(adev, excl); - - for (i = 0; i < shared_count; ++i) { + dma_resv_for_each_fence(&cursor, resv, true, fence) { + /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); - amdgpu_vm_add_prt_cb(adev, shared[i]); + amdgpu_vm_add_prt_cb(adev, fence); } - - kfree(shared); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 7b2b0980ec41..7a2b487db57c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -96,10 +96,9 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man; - man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); - return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man)); + return sysfs_emit(buf, "%llu\n", + amdgpu_vram_mgr_usage(&adev->mman.vram_mgr)); } /** @@ -116,10 +115,9 @@ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man; - man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); - return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man)); + return sysfs_emit(buf, "%llu\n", + amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr)); } /** @@ -263,16 +261,15 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) /** * amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM * - * @man: TTM memory type manager + * @mgr: amdgpu_vram_mgr pointer * @start: start address of the range in VRAM * @size: size of the range * - * Reserve memory from start addess with the specified size in VRAM + * Reserve memory from start address with the specified size in VRAM */ -int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, +int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, uint64_t start, uint64_t size) { - struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_vram_reservation *rsv; rsv = kzalloc(sizeof(*rsv), GFP_KERNEL); @@ -285,7 +282,7 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, spin_lock(&mgr->lock); list_add_tail(&mgr->reservations_pending, &rsv->node); - amdgpu_vram_mgr_do_reserve(man); + amdgpu_vram_mgr_do_reserve(&mgr->manager); spin_unlock(&mgr->lock); return 0; @@ -294,7 +291,7 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, /** * amdgpu_vram_mgr_query_page_status - query the reservation status * - * @man: TTM memory type manager + * @mgr: amdgpu_vram_mgr pointer * @start: start address of a page in VRAM * * Returns: @@ -302,10 +299,9 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, * 0: the page has been reserved * -ENOENT: the input page is not a reservation */ -int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, +int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start) { - struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_vram_reservation *rsv; int ret; @@ -632,28 +628,24 @@ void amdgpu_vram_mgr_free_sgt(struct device *dev, /** * amdgpu_vram_mgr_usage - how many bytes are used in this domain * - * @man: TTM memory type manager + * @mgr: amdgpu_vram_mgr pointer * * Returns how many bytes are used in this domain. */ -uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man) +uint64_t amdgpu_vram_mgr_usage(struct amdgpu_vram_mgr *mgr) { - struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); - return atomic64_read(&mgr->usage); } /** * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part * - * @man: TTM memory type manager + * @mgr: amdgpu_vram_mgr pointer * * Returns how many bytes are used in the visible part of VRAM */ -uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man) +uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr) { - struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); - return atomic64_read(&mgr->vis_usage); } @@ -675,8 +667,8 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, spin_unlock(&mgr->lock); drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", - man->size, amdgpu_vram_mgr_usage(man) >> 20, - amdgpu_vram_mgr_vis_usage(man) >> 20); + man->size, amdgpu_vram_mgr_usage(mgr) >> 20, + amdgpu_vram_mgr_vis_usage(mgr) >> 20); } static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 567df2db23ac..e8b8f28c2f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -208,6 +208,7 @@ static struct attribute *amdgpu_xgmi_hive_attrs[] = { &amdgpu_xgmi_hive_id, NULL }; +ATTRIBUTE_GROUPS(amdgpu_xgmi_hive); static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj, struct attribute *attr, char *buf) @@ -237,7 +238,7 @@ static const struct sysfs_ops amdgpu_xgmi_hive_ops = { struct kobj_type amdgpu_xgmi_hive_type = { .release = amdgpu_xgmi_hive_release, .sysfs_ops = &amdgpu_xgmi_hive_ops, - .default_attrs = amdgpu_xgmi_hive_attrs, + .default_groups = amdgpu_xgmi_hive_groups, }; static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, @@ -265,6 +266,11 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); + if ((!adev->df.funcs) || + (!adev->df.funcs->get_fica) || + (!adev->df.funcs->set_fica)) + return -EINVAL; + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in); if (fica_out != 0x1f) pr_err("xGMI error counters not enabled!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 6134ed964027..a92d86e12718 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -469,7 +469,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) if (amdgpu_connector->use_digital && (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)) return ATOM_ENCODER_MODE_HDMI; - else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && + else if (connector->display_info.is_hdmi && (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO)) return ATOM_ENCODER_MODE_HDMI; else if (amdgpu_connector->use_digital) @@ -488,7 +488,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) if (amdgpu_audio != 0) { if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE) return ATOM_ENCODER_MODE_HDMI; - else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && + else if (connector->display_info.is_hdmi && (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO)) return ATOM_ENCODER_MODE_HDMI; else @@ -506,7 +506,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) } else if (amdgpu_audio != 0) { if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE) return ATOM_ENCODER_MODE_HDMI; - else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && + else if (connector->display_info.is_hdmi && (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO)) return ATOM_ENCODER_MODE_HDMI; else diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index b200b9e722d9..8318ee8339f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2092,22 +2092,18 @@ static int dce_v8_0_pick_dig_encoder(struct drm_encoder *encoder) return 1; else return 0; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: if (dig->linkb) return 3; else return 2; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: if (dig->linkb) return 5; else return 4; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: return 6; - break; default: DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b305fd39874f..9189fb85a4dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -63,6 +63,13 @@ #define mmGCEA_PROBE_MAP 0x070c #define mmGCEA_PROBE_MAP_BASE_IDX 0 +#define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28) +#define GFX9_RLCG_GC_WRITE (0x0 << 28) +#define GFX9_RLCG_GC_READ (0x1 << 28) +#define GFX9_RLCG_VFGATE_DISABLED 0x4000000 +#define GFX9_RLCG_WRONG_OPERATION_TYPE 0x2000000 +#define GFX9_RLCG_NOT_IN_RANGE 0x1000000 + MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); MODULE_FIRMWARE("amdgpu/vega10_me.bin"); @@ -739,7 +746,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, }; -static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) +static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag) { static void *scratch_reg0; static void *scratch_reg1; @@ -748,21 +755,20 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 f static void *spare_int; static uint32_t grbm_cntl; static uint32_t grbm_idx; + uint32_t i = 0; + uint32_t retries = 50000; + u32 ret = 0; + u32 tmp; scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; - scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; - scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; + scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4; + scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4; spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; - if (amdgpu_sriov_runtime(adev)) { - pr_err("shouldn't call rlcg write register during runtime\n"); - return; - } - if (offset == grbm_cntl || offset == grbm_idx) { if (offset == grbm_cntl) writel(v, scratch_reg2); @@ -771,41 +777,95 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 f writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); } else { - uint32_t i = 0; - uint32_t retries = 50000; - + /* + * SCRATCH_REG0 = read/write value + * SCRATCH_REG1[30:28] = command + * SCRATCH_REG1[19:0] = address in dword + * SCRATCH_REG1[26:24] = Error reporting + */ writel(v, scratch_reg0); - writel(offset | 0x80000000, scratch_reg1); + writel(offset | flag, scratch_reg1); writel(1, spare_int); - for (i = 0; i < retries; i++) { - u32 tmp; + for (i = 0; i < retries; i++) { tmp = readl(scratch_reg1); - if (!(tmp & 0x80000000)) + if (!(tmp & flag)) break; udelay(10); } - if (i >= retries) - pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); + + if (i >= retries) { + if (amdgpu_sriov_reg_indirect_gc(adev)) { + if (tmp & GFX9_RLCG_VFGATE_DISABLED) + pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset); + else if (tmp & GFX9_RLCG_WRONG_OPERATION_TYPE) + pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset); + else if (tmp & GFX9_RLCG_NOT_IN_RANGE) + pr_err("The register is not in range, program reg:0x%05x failed!\n", offset); + else + pr_err("Unknown error type, program reg:0x%05x failed!\n", offset); + } else + pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset); + } } + ret = readl(scratch_reg0); + + return ret; +} + +static bool gfx_v9_0_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, + int write, u32 *rlcg_flag) +{ + + switch (hwip) { + case GC_HWIP: + if (amdgpu_sriov_reg_indirect_gc(adev)) { + *rlcg_flag = write ? GFX9_RLCG_GC_WRITE : GFX9_RLCG_GC_READ; + + return true; + /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */ + } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) { + *rlcg_flag = GFX9_RLCG_GC_WRITE_OLD; + return true; + } + + break; + default: + return false; + } + + return false; +} + +static u32 gfx_v9_0_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) +{ + u32 rlcg_flag; + + if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag)) + return gfx_v9_0_rlcg_rw(adev, offset, 0, rlcg_flag); + + if (acc_flags & AMDGPU_REGS_NO_KIQ) + return RREG32_NO_KIQ(offset); + else + return RREG32(offset); } static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset, - u32 v, u32 acc_flags, u32 hwip) + u32 value, u32 acc_flags, u32 hwip) { - if ((acc_flags & AMDGPU_REGS_RLC) && - amdgpu_sriov_fullaccess(adev)) { - gfx_v9_0_rlcg_w(adev, offset, v, acc_flags); + u32 rlcg_flag; + if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) { + gfx_v9_0_rlcg_rw(adev, offset, value, rlcg_flag); return; } if (acc_flags & AMDGPU_REGS_NO_KIQ) - WREG32_NO_KIQ(offset, v); + WREG32_NO_KIQ(offset, value); else - WREG32(offset, v); + WREG32(offset, value); } #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 @@ -3070,8 +3130,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); + WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); gfx_v9_0_init_gfx_power_gating(adev); } } @@ -5135,7 +5195,7 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) if (amdgpu_sriov_is_pp_one_vf(adev)) data = RREG32_NO_KIQ(reg); else - data = RREG32(reg); + data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; @@ -5191,6 +5251,7 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { .start = gfx_v9_0_rlc_start, .update_spm_vmid = gfx_v9_0_update_spm_vmid, .sriov_wreg = gfx_v9_0_sriov_wreg, + .sriov_rreg = gfx_v9_0_sriov_rreg, .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, }; @@ -5796,16 +5857,16 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 0); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 1); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 480e41847d7c..ec4d5e15b766 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -162,7 +162,6 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 14c1c1a297dd..6e0ace2fbfab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -196,7 +196,6 @@ static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index e80d1dc43079..b4eddf6e98a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -197,7 +197,6 @@ static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 3ec5ff5a6dbe..38bb42727715 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -107,7 +107,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Process it onyl if it's the first fault for this address */ if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, entry->timestamp)) return 1; @@ -914,12 +914,6 @@ static int gmc_v10_0_sw_init(void *handle) return r; } - if (adev->gmc.xgmi.supported) { - r = adev->gfxhub.funcs->get_xgmi_info(adev); - if (r) - return r; - } - r = gmc_v10_0_mc_init(adev); if (r) return r; @@ -992,10 +986,14 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_gart_table_vram_pin(adev); + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) + goto skip_pin_bo; + + r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); if (r) return r; +skip_pin_bo: r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r; @@ -1062,7 +1060,6 @@ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) { adev->gfxhub.funcs->gart_disable(adev); adev->mmhub.funcs->gart_disable(adev); - amdgpu_gart_table_vram_unpin(adev); } static int gmc_v10_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 0fe714f54cca..cd6c38e083d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -476,7 +476,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } - r = amdgpu_gart_table_vram_pin(adev); + r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); if (r) return r; @@ -608,7 +608,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev) WREG32(mmVM_L2_CNTL3, VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK | (0UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); - amdgpu_gart_table_vram_unpin(adev); } static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 0a50fdaced7e..ab8adbff9e2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -620,7 +620,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } - r = amdgpu_gart_table_vram_pin(adev); + r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); if (r) return r; @@ -758,7 +758,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); WREG32(mmVM_L2_CNTL, tmp); WREG32(mmVM_L2_CNTL2, 0); - amdgpu_gart_table_vram_unpin(adev); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 492ebed2915b..054733838292 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -515,10 +515,10 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) static int gmc_v8_0_mc_init(struct amdgpu_device *adev) { int r; + u32 tmp; adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev); if (!adev->gmc.vram_width) { - u32 tmp; int chansize, numchan; /* Get VRAM informations */ @@ -562,8 +562,15 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize; } /* size in MB on si */ - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + tmp = RREG32(mmCONFIG_MEMSIZE); + /* some boards may have garbage in the upper 16 bits */ + if (tmp & 0xffff0000) { + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp); + if (tmp & 0xffff) + tmp &= 0xffff; + } + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { r = amdgpu_device_resize_fb_bar(adev); @@ -837,7 +844,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } - r = amdgpu_gart_table_vram_pin(adev); + r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); if (r) return r; @@ -992,7 +999,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); WREG32(mmVM_L2_CNTL, tmp); WREG32(mmVM_L2_CNTL2, 0); - amdgpu_gart_table_vram_unpin(adev); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index cb82404df534..88c1eb9ad068 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -72,6 +72,9 @@ #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2 +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2 + static const char *gfxhub_client_ids[] = { "CB", @@ -478,9 +481,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; - tmp = RREG32(reg); + + if (j == AMDGPU_GFXHUB_0) + tmp = RREG32_SOC15_IP(GC, reg); + else + tmp = RREG32_SOC15_IP(MMHUB, reg); + tmp &= ~bits; - WREG32(reg, tmp); + + if (j == AMDGPU_GFXHUB_0) + WREG32_SOC15_IP(GC, reg, tmp); + else + WREG32_SOC15_IP(MMHUB, reg, tmp); } } break; @@ -489,9 +501,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; - tmp = RREG32(reg); + + if (j == AMDGPU_GFXHUB_0) + tmp = RREG32_SOC15_IP(GC, reg); + else + tmp = RREG32_SOC15_IP(MMHUB, reg); + tmp |= bits; - WREG32(reg, tmp); + + if (j == AMDGPU_GFXHUB_0) + WREG32_SOC15_IP(GC, reg, tmp); + else + WREG32_SOC15_IP(MMHUB, reg, tmp); } } break; @@ -523,7 +544,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, /* Process it onyl if it's the first fault for this address */ if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, entry->timestamp)) return 1; @@ -788,9 +809,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { - /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng); + /* a read return value of 1 means semaphore acquire */ + if (vmhub == AMDGPU_GFXHUB_0) + tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng); + else + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng); + if (tmp & 0x1) break; udelay(1); @@ -801,8 +825,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } do { - WREG32_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng, inv_req); + if (vmhub == AMDGPU_GFXHUB_0) + WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); + else + WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to @@ -815,8 +841,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, hub->eng_distance * eng); for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + - hub->eng_distance * eng); + if (vmhub == AMDGPU_GFXHUB_0) + tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng); + else + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng); + if (tmp & (1 << vmid)) break; udelay(1); @@ -827,13 +856,16 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } while (inv_req); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (use_semaphore) + if (use_semaphore) { /* * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, 0); + if (vmhub == AMDGPU_GFXHUB_0) + WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + else + WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + } spin_unlock(&adev->gmc.invalidate_lock); @@ -1105,6 +1137,8 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); unsigned size; + /* TODO move to DC so GMC doesn't need to hard-code DCN registers */ + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = AMDGPU_VBIOS_VGA_ALLOCATION; } else { @@ -1113,7 +1147,6 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) switch (adev->ip_versions[DCE_HWIP][0]) { case IP_VERSION(1, 0, 0): case IP_VERSION(1, 0, 1): - case IP_VERSION(2, 1, 0): viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); size = (REG_GET_FIELD(viewport, HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * @@ -1121,6 +1154,14 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * 4); break; + case IP_VERSION(2, 1, 0): + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2); + size = (REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * + 4); + break; default: viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * @@ -1294,7 +1335,8 @@ static int gmc_v9_0_late_init(void *handle) if (!amdgpu_sriov_vf(adev) && (adev->ip_versions[UMC_HWIP][0] == IP_VERSION(6, 0, 0))) { if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) { - if (adev->df.funcs->enable_ecc_force_par_wr_rmw) + if (adev->df.funcs && + adev->df.funcs->enable_ecc_force_par_wr_rmw) adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } } @@ -1505,9 +1547,11 @@ static int gmc_v9_0_sw_init(void *handle) chansize = 64; else chansize = 128; - - numchan = adev->df.funcs->get_hbm_channel_number(adev); - adev->gmc.vram_width = numchan * chansize; + if (adev->df.funcs && + adev->df.funcs->get_hbm_channel_number) { + numchan = adev->df.funcs->get_hbm_channel_number(adev); + adev->gmc.vram_width = numchan * chansize; + } } adev->gmc.vram_type = vram_type; @@ -1596,12 +1640,6 @@ static int gmc_v9_0_sw_init(void *handle) } adev->need_swiotlb = drm_need_swiotlb(44); - if (adev->gmc.xgmi.supported) { - r = adev->gfxhub.funcs->get_xgmi_info(adev); - if (r) - return r; - } - r = gmc_v9_0_mc_init(adev); if (r) return r; @@ -1714,10 +1752,14 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_gart_table_vram_pin(adev); + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) + goto skip_pin_bo; + + r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); if (r) return r; +skip_pin_bo: r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r; @@ -1742,7 +1784,7 @@ static int gmc_v9_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool value; - int r, i; + int i; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); @@ -1777,9 +1819,7 @@ static int gmc_v9_0_hw_init(void *handle) if (adev->umc.funcs && adev->umc.funcs->init_registers) adev->umc.funcs->init_registers(adev); - r = gmc_v9_0_gart_enable(adev); - - return r; + return gmc_v9_0_gart_enable(adev); } /** @@ -1793,7 +1833,6 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) { adev->gfxhub.funcs->gart_disable(adev); adev->mmhub.funcs->gart_disable(adev); - amdgpu_gart_table_vram_unpin(adev); } static int gmc_v9_0_hw_fini(void *handle) @@ -1808,6 +1847,14 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } + /* + * Pair the operations did in gmc_v9_0_hw_init and thus maintain + * a correct cached state for GMC. Otherwise, the "gate" again + * operation on S3 resuming will fail due to wrong cached state. + */ + if (adev->mmhub.funcs->update_power_gating) + adev->mmhub.funcs->update_power_gating(adev, false); + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index a99953833820..1da2ec692057 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -145,7 +145,6 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); @@ -302,10 +301,10 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return; - if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true); - - } + if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB) + amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GMC, + enable); } static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index f80a14a1b82d..f5f7181f9af5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -165,7 +165,6 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 25f8e93e5ec3..3718ff610ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -267,7 +267,6 @@ static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index a11d60ec6321..9e16da28505a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -194,7 +194,6 @@ static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index c4ef822bbe8c..ff49eeaf7882 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -190,8 +190,6 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, - ECO_BITS, 0); - tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 23b066bcffb2..56da5ab82987 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -180,6 +180,11 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2)); } + } else if (req == IDH_REQ_GPU_INIT_DATA){ + /* Dummy REQ_GPU_INIT_DATA handling */ + r = xgpu_ai_poll_msg(adev, IDH_REQ_GPU_INIT_DATA_READY); + /* version set to 0 since dummy */ + adev->virt.req_init_data_ver = 0; } return 0; @@ -252,11 +257,12 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_write_trylock(&adev->reset_sem)) + if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) return; + down_write(&adev->reset_sem); + amdgpu_virt_fini_data_exchange(adev); - atomic_set(&adev->in_gpu_reset, 1); xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); @@ -380,10 +386,16 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } +static int xgpu_ai_request_init_data(struct amdgpu_device *adev) +{ + return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA); +} + const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, .reset_gpu = xgpu_ai_request_reset, .wait_reset = NULL, .trans_msg = xgpu_ai_mailbox_trans_msg, + .req_init_data = xgpu_ai_request_init_data, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index bd3b23171579..fa7e13e0459e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -26,7 +26,7 @@ #define AI_MAILBOX_POLL_ACK_TIMEDOUT 500 #define AI_MAILBOX_POLL_MSG_TIMEDOUT 6000 -#define AI_MAILBOX_POLL_FLR_TIMEDOUT 5000 +#define AI_MAILBOX_POLL_FLR_TIMEDOUT 10000 #define AI_MAILBOX_POLL_MSG_REP_MAX 11 enum idh_request { @@ -35,6 +35,7 @@ enum idh_request { IDH_REQ_GPU_FINI_ACCESS, IDH_REL_GPU_FINI_ACCESS, IDH_REQ_GPU_RESET_ACCESS, + IDH_REQ_GPU_INIT_DATA, IDH_LOG_VF_ERROR = 200, IDH_READY_TO_RESET = 201, @@ -48,6 +49,7 @@ enum idh_event { IDH_SUCCESS, IDH_FAIL, IDH_QUERY_ALIVE, + IDH_REQ_GPU_INIT_DATA_READY, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index a35e6d87e537..477d0dde19c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -281,11 +281,12 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_write_trylock(&adev->reset_sem)) + if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) return; + down_write(&adev->reset_sem); + amdgpu_virt_fini_data_exchange(adev); - atomic_set(&adev->in_gpu_reset, 1); xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 38241cf0e1f1..8ce5b8ca1fd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -716,6 +716,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = { static const struct amdgpu_ih_funcs navi10_ih_funcs = { .get_wptr = navi10_ih_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = navi10_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 2176ef85f137..d0e76b36d4ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -277,13 +277,15 @@ static bool psp_v11_0_is_sos_alive(struct psp_context *psp) return sol_reg != 0x0; } -static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) +static int psp_v11_0_bootloader_load_component(struct psp_context *psp, + struct psp_bin_desc *bin_desc, + enum psp_bootloader_cmd bl_cmd) { int ret; uint32_t psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; - /* Check tOS sign of life register to confirm sys driver and sOS + /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ if (psp_v11_0_is_sos_alive(psp)) @@ -293,13 +295,13 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) if (ret) return ret; - /* Copy PSP KDB binary to memory */ - psp_copy_fw(psp, psp->kdb.start_addr, psp->kdb.size_bytes); + /* Copy PSP System Driver binary to memory */ + psp_copy_fw(psp, bin_desc->start_addr, bin_desc->size_bytes); - /* Provide the PSP KDB to bootloader */ + /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE; + psp_gfxdrv_command_reg = bl_cmd; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -308,69 +310,19 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) return ret; } -static int psp_v11_0_bootloader_load_spl(struct psp_context *psp) +static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) { - int ret; - uint32_t psp_gfxdrv_command_reg = 0; - struct amdgpu_device *adev = psp->adev; - - /* Check tOS sign of life register to confirm sys driver and sOS - * are already been loaded. - */ - if (psp_v11_0_is_sos_alive(psp)) - return 0; - - ret = psp_v11_0_wait_for_bootloader(psp); - if (ret) - return ret; - - /* Copy PSP SPL binary to memory */ - psp_copy_fw(psp, psp->spl.start_addr, psp->spl.size_bytes); - - /* Provide the PSP SPL to bootloader */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, - (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = PSP_BL__LOAD_TOS_SPL_TABLE; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, - psp_gfxdrv_command_reg); - - ret = psp_v11_0_wait_for_bootloader(psp); + return psp_v11_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE); +} - return ret; +static int psp_v11_0_bootloader_load_spl(struct psp_context *psp) +{ + return psp_v11_0_bootloader_load_component(psp, &psp->spl, PSP_BL__LOAD_TOS_SPL_TABLE); } static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) { - int ret; - uint32_t psp_gfxdrv_command_reg = 0; - struct amdgpu_device *adev = psp->adev; - - /* Check sOS sign of life register to confirm sys driver and sOS - * are already been loaded. - */ - if (psp_v11_0_is_sos_alive(psp)) - return 0; - - ret = psp_v11_0_wait_for_bootloader(psp); - if (ret) - return ret; - - /* Copy PSP System Driver binary to memory */ - psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); - - /* Provide the sys driver to bootloader */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, - (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, - psp_gfxdrv_command_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - ret = psp_v11_0_wait_for_bootloader(psp); - - return ret; + return psp_v11_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV); } static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 853d1511b889..81e033549dda 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -481,8 +481,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse * * @ring: amdgpu ring pointer - * @job: job to retrieve vmid from - * @ib: IB object to schedule * * flush the IB by graphics cache rinse. */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 4d4d1aa51b8a..d3d6d5b045b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -368,8 +368,6 @@ static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring, * sdma_v5_2_ring_emit_mem_sync - flush the IB by graphics cache rinse * * @ring: amdgpu ring pointer - * @job: job to retrieve vmid from - * @ib: IB object to schedule * * flush the IB by graphics cache rinse. */ @@ -544,9 +542,6 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, - AUTO_CTXSW_ENABLE, enable ? 1 : 0); if (enable && amdgpu_sdma_phase_quantum) { WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), phase_quantum); @@ -555,7 +550,13 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), phase_quantum); } - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + + if (!amdgpu_sriov_vf(adev)) { + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, enable ? 1 : 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + } } } @@ -578,10 +579,12 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable) sdma_v5_2_rlc_stop(adev); } - for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + if (!amdgpu_sriov_vf(adev)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + } } } @@ -610,7 +613,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) ring = &adev->sdma.instance[i].ring; wb_offset = (ring->rptr_offs * 4); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + if (!amdgpu_sriov_vf(adev)) + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); @@ -685,32 +689,34 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) sdma_v5_2_ring_set_wptr(ring); /* set minor_ptr_update to 0 after wptr programed */ - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); - /* set utc l1 enable flag always to 1 */ - temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); - - /* enable MCBP */ - temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); - - /* Set up RESP_MODE to non-copy addresses */ - temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); - - /* program default cache read and write policy */ - temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); - /* clean read policy and write policy bits */ - temp &= 0xFF0FFF; - temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | - (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | - SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + /* SRIOV VF has no control of any of registers below */ if (!amdgpu_sriov_vf(adev)) { + /* set utc l1 enable flag always to 1 */ + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + + /* enable MCBP */ + temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | + (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | + SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); + /* unhalt engine */ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); @@ -1438,13 +1444,14 @@ static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { u32 sdma_cntl; - u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL); - sdma_cntl = RREG32(reg_offset); - sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(reg_offset, sdma_cntl); + if (!amdgpu_sriov_vf(adev)) { + sdma_cntl = RREG32(reg_offset); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32(reg_offset, sdma_cntl); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index de9b55383e9f..0fc1747e4a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -744,7 +744,7 @@ static void soc15_reg_base_init(struct amdgpu_device *adev) vega10_reg_base_init(adev); break; case CHIP_RENOIR: - /* It's safe to do ip discovery here for Renior, + /* It's safe to do ip discovery here for Renoir, * it doesn't support SRIOV. */ if (amdgpu_discovery) { r = amdgpu_discovery_reg_base_init(adev); @@ -1238,7 +1238,9 @@ static int soc15_common_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); - adev->df.funcs->sw_init(adev); + if (adev->df.funcs && + adev->df.funcs->sw_init) + adev->df.funcs->sw_init(adev); return 0; } @@ -1250,7 +1252,10 @@ static int soc15_common_sw_fini(void *handle) if (adev->nbio.ras_funcs && adev->nbio.ras_funcs->ras_fini) adev->nbio.ras_funcs->ras_fini(adev); - adev->df.funcs->sw_fini(adev); + + if (adev->df.funcs && + adev->df.funcs->sw_fini) + adev->df.funcs->sw_fini(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 8a9ca87d8663..473767e03676 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -51,6 +51,8 @@ #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP) +#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP) + #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ AMDGPU_REGS_NO_KIQ, ip##_HWIP) @@ -65,6 +67,9 @@ #define WREG32_SOC15_IP(ip, reg, value) \ __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP) +#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \ + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP) + #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ value, AMDGPU_REGS_NO_KIQ, ip##_HWIP) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index f7ec3fe134e5..6dd1e19e8d43 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -50,6 +50,165 @@ static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev, return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst; } +static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; +} + +static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t channel_index, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt; + uint64_t mc_umc_status; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + /* + * select the lower chip and check the error count + * skip add error count, calc error counter only from mca_umc_status + */ + ecc_err_cnt = ras->umc_ecc.ecc[channel_index].ce_count_lo_chip; + + /* + * select the higher chip and check the err counter + * skip add error count, calc error counter only from mca_umc_status + */ + ecc_err_cnt = ras->umc_ecc.ecc[channel_index].ce_count_hi_chip; + + /* check for SRAM correctable error + MCUMC_STATUS is a 64 bit register */ + mc_umc_status = ras->umc_ecc.ecc[channel_index].mca_umc_status; + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t channel_index, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + /* check the MCUMC_STATUS */ + mc_umc_status = ras->umc_ecc.ecc[channel_index].mca_umc_status; + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + uint32_t channel_index = 0; + + /*TODO: driver needs to toggle DF Cstate to ensure + * safe access of UMC registers. Will add the protection */ + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, + ch_inst); + channel_index = get_umc_v6_7_channel_index(adev, + umc_inst, + ch_inst); + umc_v6_7_ecc_info_query_correctable_error_count(adev, + channel_index, + &(err_data->ce_count)); + umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, + channel_index, + &(err_data->ue_count)); + } +} + +static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint64_t mc_umc_status, err_addr, retired_page; + struct eeprom_table_record *err_rec; + uint32_t channel_index; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + mc_umc_status = ras->umc_ecc.ecc[channel_index].mca_umc_status; + + if (mc_umc_status == 0) + return; + + if (!err_data->err_addr) + return; + + err_rec = &err_data->err_addr[err_data->err_addr_cnt]; + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + + err_addr = ras->umc_ecc.ecc[channel_index].mca_umc_addr; + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* translate umc channel address to soc pa, 3 parts are included */ + retired_page = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) + == 1) { + err_rec->address = err_addr; + /* page frame address is saved */ + err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + err_rec->ts = (uint64_t)ktime_get_real_seconds(); + err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; + err_rec->cu = 0; + err_rec->mem_channel = channel_index; + err_rec->mcumc_id = umc_inst; + + err_data->err_addr_cnt++; + } + } +} + +static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + /*TODO: driver needs to toggle DF Cstate to ensure + * safe access of UMC resgisters. Will add the protection + * when firmware interface is ready */ + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, + ch_inst); + umc_v6_7_ecc_info_query_error_address(adev, + err_data, + umc_reg_offset, + ch_inst, + umc_inst); + } +} + static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) @@ -327,4 +486,6 @@ const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = { .query_ras_error_count = umc_v6_7_query_ras_error_count, .query_ras_error_address = umc_v6_7_query_ras_error_address, .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, + .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, + .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index d54d720b3cf6..3799226defc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -246,6 +246,13 @@ static int vcn_v1_0_suspend(void *handle) { int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool idle_work_unexecuted; + + idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); + if (idle_work_unexecuted) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + } r = vcn_v1_0_hw_fini(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index a9ca6988009e..3070466f54e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -640,6 +640,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = { static const struct amdgpu_ih_funcs vega10_ih_funcs = { .get_wptr = vega10_ih_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = vega10_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index f51dfc38ac65..3b4eb8285943 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -688,6 +688,7 @@ const struct amd_ip_funcs vega20_ih_ip_funcs = { static const struct amdgpu_ih_funcs vega20_ih_funcs = { .get_wptr = vega20_ih_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = vega20_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index f6233019f042..d60576ce10cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -43,15 +43,15 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, */ if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && - dev->device_info->asic_family == CHIP_HAWAII) { + dev->adev->asic_type == CHIP_HAWAII) { struct cik_ih_ring_entry *tmp_ihre = (struct cik_ih_ring_entry *)patched_ihre; *patched_flag = true; *tmp_ihre = *ihre; - vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); - ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid); + vmid = f2g->read_vmid_from_vmfault_reg(dev->adev); + ret = f2g->get_atc_vmid_pasid_mapping_info(dev->adev, vmid, &pasid); tmp_ihre->ring_id &= 0x000000ff; tmp_ihre->ring_id |= vmid << 8; @@ -113,7 +113,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, kfd_process_vm_fault(dev->dqm, pasid); memset(&info, 0, sizeof(info)); - amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->kgd, &info); + amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info); if (!info.page_addr && !info.status) return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 24ebd61395d8..4bfc0c8ab764 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -321,7 +321,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Return gpu_id as doorbell offset for mmap usage */ args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); - if (KFD_IS_SOC15(dev->device_info->asic_family)) + if (KFD_IS_SOC15(dev)) /* On SOC15 ASICs, include the doorbell offset within the * process doorbell frame, which is 2 pages. */ @@ -580,7 +580,7 @@ static int kfd_ioctl_dbg_register(struct file *filep, if (!dev) return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { + if (dev->adev->asic_type == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); return -EINVAL; } @@ -631,7 +631,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, if (!dev || !dev->dbgmgr) return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { + if (dev->adev->asic_type == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); return -EINVAL; } @@ -676,7 +676,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, if (!dev) return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { + if (dev->adev->asic_type == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); return -EINVAL; } @@ -784,7 +784,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep, if (!dev) return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { + if (dev->adev->asic_type == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); return -EINVAL; } @@ -851,7 +851,7 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, dev = kfd_device_by_id(args->gpu_id); if (dev) /* Reading GPU clock counter from KGD */ - args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd); + args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->adev); else /* Node without GPU resource */ args->gpu_clock_counter = 0; @@ -1041,7 +1041,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, goto out_unlock; } - err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev, mem, &kern_addr, &size); if (err) { pr_err("Failed to map event page to kernel\n"); @@ -1051,7 +1051,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, err = kfd_event_page_set(p, kern_addr, size); if (err) { pr_err("Failed to set event page\n"); - amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, mem); + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem); goto out_unlock; } @@ -1137,7 +1137,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va) dev->kfd2kgd->set_scratch_backing_va( - dev->kgd, args->va_addr, pdd->qpd.vmid); + dev->adev, args->va_addr, pdd->qpd.vmid); return 0; @@ -1158,7 +1158,7 @@ static int kfd_ioctl_get_tile_config(struct file *filep, if (!dev) return -EINVAL; - amdgpu_amdkfd_get_tile_config(dev->kgd, &config); + amdgpu_amdkfd_get_tile_config(dev->adev, &config); args->gb_addr_config = config.gb_addr_config; args->num_banks = config.num_banks; @@ -1244,7 +1244,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev) if (dev->use_iommu_v2) return false; - amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->adev, &mem_info); if (mem_info.local_mem_size_private == 0 && mem_info.local_mem_size_public > 0) return true; @@ -1313,7 +1313,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, err = -EINVAL; goto err_unlock; } - offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + offset = dev->adev->rmmio_remap.bus_addr; if (!offset) { err = -ENOMEM; goto err_unlock; @@ -1321,7 +1321,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, } err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( - dev->kgd, args->va_addr, args->size, + dev->adev, args->va_addr, args->size, pdd->drm_priv, (struct kgd_mem **) &mem, &offset, flags); @@ -1353,7 +1353,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); @@ -1399,7 +1399,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, goto err_unlock; } - ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, + ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, pdd->drm_priv, &size); /* If freeing the buffer failed, leave the handle in place for @@ -1484,7 +1484,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - peer->kgd, (struct kgd_mem *)mem, + peer->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv, &table_freed); if (err) { pr_err("Failed to map to gpu %d/%d\n", @@ -1496,7 +1496,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, mutex_unlock(&p->mutex); - err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); + err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; @@ -1593,7 +1593,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); + peer->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { pr_err("Failed to unmap from gpu %d/%d\n", i, args->n_devices); @@ -1603,8 +1603,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, } mutex_unlock(&p->mutex); - if (dev->device_info->asic_family == CHIP_ALDEBARAN) { - err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, + if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) { + err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); @@ -1680,7 +1680,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, { struct kfd_ioctl_get_dmabuf_info_args *args = data; struct kfd_dev *dev = NULL; - struct kgd_dev *dma_buf_kgd; + struct amdgpu_device *dmabuf_adev; void *metadata_buffer = NULL; uint32_t flags; unsigned int i; @@ -1700,15 +1700,15 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, } /* Get dmabuf info from KGD */ - r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd, - &dma_buf_kgd, &args->size, + r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd, + &dmabuf_adev, &args->size, metadata_buffer, args->metadata_size, &args->metadata_size, &flags); if (r) goto exit; /* Reverse-lookup gpu_id from kgd pointer */ - dev = kfd_device_by_kgd(dma_buf_kgd); + dev = kfd_device_by_adev(dmabuf_adev); if (!dev) { r = -EINVAL; goto exit; @@ -1758,7 +1758,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, goto err_unlock; } - r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, + r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->adev, dmabuf, args->va_addr, pdd->drm_priv, (struct kgd_mem **)&mem, &size, NULL); @@ -1779,7 +1779,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); @@ -2066,7 +2066,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; - address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + address = dev->adev->rmmio_remap.bus_addr; vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cfedfb1e8596..9624bbe8b501 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1060,6 +1060,9 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, return -ENODEV; /* same everything but the other direction */ props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); + if (!props2) + return -ENOMEM; + props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL; @@ -1340,7 +1343,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, int ret; unsigned int num_cu_shared; - switch (kdev->device_info->asic_family) { + switch (kdev->adev->asic_type) { case CHIP_KAVERI: pcache_info = kaveri_cache_info; num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); @@ -1377,67 +1380,71 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = vegam_cache_info; num_of_cache_types = ARRAY_SIZE(vegam_cache_info); break; - case CHIP_VEGA10: - pcache_info = vega10_cache_info; - num_of_cache_types = ARRAY_SIZE(vega10_cache_info); - break; - case CHIP_VEGA12: - pcache_info = vega12_cache_info; - num_of_cache_types = ARRAY_SIZE(vega12_cache_info); - break; - case CHIP_VEGA20: - case CHIP_ARCTURUS: - pcache_info = vega20_cache_info; - num_of_cache_types = ARRAY_SIZE(vega20_cache_info); - break; - case CHIP_ALDEBARAN: - pcache_info = aldebaran_cache_info; - num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); - break; - case CHIP_RAVEN: - pcache_info = raven_cache_info; - num_of_cache_types = ARRAY_SIZE(raven_cache_info); - break; - case CHIP_RENOIR: - pcache_info = renoir_cache_info; - num_of_cache_types = ARRAY_SIZE(renoir_cache_info); - break; - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_CYAN_SKILLFISH: - pcache_info = navi10_cache_info; - num_of_cache_types = ARRAY_SIZE(navi10_cache_info); - break; - case CHIP_NAVI14: - pcache_info = navi14_cache_info; - num_of_cache_types = ARRAY_SIZE(navi14_cache_info); - break; - case CHIP_SIENNA_CICHLID: - pcache_info = sienna_cichlid_cache_info; - num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); - break; - case CHIP_NAVY_FLOUNDER: - pcache_info = navy_flounder_cache_info; - num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); - break; - case CHIP_DIMGREY_CAVEFISH: - pcache_info = dimgrey_cavefish_cache_info; - num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); - break; - case CHIP_VANGOGH: - pcache_info = vangogh_cache_info; - num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); - break; - case CHIP_BEIGE_GOBY: - pcache_info = beige_goby_cache_info; - num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); - break; - case CHIP_YELLOW_CARP: - pcache_info = yellow_carp_cache_info; - num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); - break; default: - return -EINVAL; + switch(KFD_GC_VERSION(kdev)) { + case IP_VERSION(9, 0, 1): + pcache_info = vega10_cache_info; + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + break; + case IP_VERSION(9, 2, 1): + pcache_info = vega12_cache_info; + num_of_cache_types = ARRAY_SIZE(vega12_cache_info); + break; + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): + pcache_info = vega20_cache_info; + num_of_cache_types = ARRAY_SIZE(vega20_cache_info); + break; + case IP_VERSION(9, 4, 2): + pcache_info = aldebaran_cache_info; + num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); + break; + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 2): + pcache_info = raven_cache_info; + num_of_cache_types = ARRAY_SIZE(raven_cache_info); + break; + case IP_VERSION(9, 3, 0): + pcache_info = renoir_cache_info; + num_of_cache_types = ARRAY_SIZE(renoir_cache_info); + break; + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 2): + case IP_VERSION(10, 1, 3): + pcache_info = navi10_cache_info; + num_of_cache_types = ARRAY_SIZE(navi10_cache_info); + break; + case IP_VERSION(10, 1, 1): + pcache_info = navi14_cache_info; + num_of_cache_types = ARRAY_SIZE(navi14_cache_info); + break; + case IP_VERSION(10, 3, 0): + pcache_info = sienna_cichlid_cache_info; + num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); + break; + case IP_VERSION(10, 3, 2): + pcache_info = navy_flounder_cache_info; + num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); + break; + case IP_VERSION(10, 3, 4): + pcache_info = dimgrey_cavefish_cache_info; + num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); + break; + case IP_VERSION(10, 3, 1): + pcache_info = vangogh_cache_info; + num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); + break; + case IP_VERSION(10, 3, 5): + pcache_info = beige_goby_cache_info; + num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); + break; + case IP_VERSION(10, 3, 3): + pcache_info = yellow_carp_cache_info; + num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); + break; + default: + return -EINVAL; + } } *size_filled = 0; @@ -1963,8 +1970,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain) { - struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd; - *avail_size -= sizeof(struct crat_subtype_iolink); if (*avail_size < 0) return -ENOMEM; @@ -1981,7 +1986,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, /* Fill in IOLINK subtype. * TODO: Fill-in other fields of iolink subtype */ - if (adev->gmc.xgmi.connected_to_cpu) { + if (kdev->adev->gmc.xgmi.connected_to_cpu) { /* * with host gpu xgmi link, host can access gpu memory whether * or not pcie bar type is large, so always create bidirectional @@ -1990,19 +1995,19 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->num_hops_xgmi = 1; - if (adev->asic_type == CHIP_ALDEBARAN) { + if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) { sub_type_hdr->minimum_bandwidth_mbs = amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( - kdev->kgd, NULL, true); + kdev->adev, NULL, true); sub_type_hdr->maximum_bandwidth_mbs = sub_type_hdr->minimum_bandwidth_mbs; } } else { sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; sub_type_hdr->minimum_bandwidth_mbs = - amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, true); + amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true); sub_type_hdr->maximum_bandwidth_mbs = - amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, false); + amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false); } sub_type_hdr->proximity_domain_from = proximity_domain; @@ -2044,11 +2049,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, sub_type_hdr->proximity_domain_from = proximity_domain_from; sub_type_hdr->proximity_domain_to = proximity_domain_to; sub_type_hdr->num_hops_xgmi = - amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); + amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); sub_type_hdr->maximum_bandwidth_mbs = - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false); + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false); sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0; + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; return 0; } @@ -2114,7 +2119,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; cu->proximity_domain = proximity_domain; - amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); cu->num_simd_per_cu = cu_info.simd_per_cu; cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; cu->max_waves_simd = cu_info.max_waves_per_simd; @@ -2145,7 +2150,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * report the total FB size (public+private) as a single * private heap. */ - amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(kdev->adev, &local_mem_info); sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 159add0f5aaa..1e30717b5253 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -41,7 +41,7 @@ static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) { - dev->kfd2kgd->address_watch_disable(dev->kgd); + dev->kfd2kgd->address_watch_disable(dev->adev); } static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, @@ -322,7 +322,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); pdd->dev->kfd2kgd->address_watch_execute( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, cntl.u32All, addrHi.u32All, @@ -420,7 +420,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword = dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, ADDRESS_WATCH_REG_CNTL); @@ -431,7 +431,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword = dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, ADDRESS_WATCH_REG_ADDR_HI); @@ -441,7 +441,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword = dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, ADDRESS_WATCH_REG_ADDR_LO); @@ -457,7 +457,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword = dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, ADDRESS_WATCH_REG_CNTL); @@ -752,7 +752,7 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, + return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->adev, reg_gfx_index.u32All, reg_sq_cmd.u32All); } @@ -784,7 +784,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info - (dev->kgd, vmid, &queried_pasid); + (dev->adev, vmid, &queried_pasid); if (status && queried_pasid == p->pasid) { pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", @@ -811,7 +811,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) /* for non DIQ we need to patch the VMID: */ reg_sq_cmd.bits.vm_id = vmid; - dev->kfd2kgd->wave_control_execute(dev->kgd, + dev->kfd2kgd->wave_control_execute(dev->adev, reg_gfx_index.u32All, reg_sq_cmd.u32All); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3b119db16003..2b65d0acae2c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -53,770 +53,310 @@ extern const struct kfd2kgd_calls aldebaran_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; -#ifdef KFD_SUPPORT_IOMMU_V2 -static const struct kfd_device_info kaveri_device_info = { - .asic_family = CHIP_KAVERI, - .asic_name = "kaveri", - .gfx_target_version = 70000, - .max_pasid_bits = 16, - /* max num of queues for KV.TODO should be a dynamic value */ - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = false, - .needs_iommu_device = true, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info carrizo_device_info = { - .asic_family = CHIP_CARRIZO, - .asic_name = "carrizo", - .gfx_target_version = 80001, - .max_pasid_bits = 16, - /* max num of queues for CZ.TODO should be a dynamic value */ - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = true, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info raven_device_info = { - .asic_family = CHIP_RAVEN, - .asic_name = "raven", - .gfx_target_version = 90002, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = true, - .needs_pci_atomics = true, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; -#endif - -#ifdef CONFIG_DRM_AMDGPU_CIK -static const struct kfd_device_info hawaii_device_info = { - .asic_family = CHIP_HAWAII, - .asic_name = "hawaii", - .gfx_target_version = 70001, - .max_pasid_bits = 16, - /* max num of queues for KV.TODO should be a dynamic value */ - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = false, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; -#endif - -static const struct kfd_device_info tonga_device_info = { - .asic_family = CHIP_TONGA, - .asic_name = "tonga", - .gfx_target_version = 80002, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = false, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info fiji_device_info = { - .asic_family = CHIP_FIJI, - .asic_name = "fiji", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info fiji_vf_device_info = { - .asic_family = CHIP_FIJI, - .asic_name = "fiji", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - - -static const struct kfd_device_info polaris10_device_info = { - .asic_family = CHIP_POLARIS10, - .asic_name = "polaris10", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info polaris10_vf_device_info = { - .asic_family = CHIP_POLARIS10, - .asic_name = "polaris10", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info polaris11_device_info = { - .asic_family = CHIP_POLARIS11, - .asic_name = "polaris11", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info polaris12_device_info = { - .asic_family = CHIP_POLARIS12, - .asic_name = "polaris12", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vegam_device_info = { - .asic_family = CHIP_VEGAM, - .asic_name = "vegam", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega10_device_info = { - .asic_family = CHIP_VEGA10, - .asic_name = "vega10", - .gfx_target_version = 90000, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega10_vf_device_info = { - .asic_family = CHIP_VEGA10, - .asic_name = "vega10", - .gfx_target_version = 90000, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega12_device_info = { - .asic_family = CHIP_VEGA12, - .asic_name = "vega12", - .gfx_target_version = 90004, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega20_device_info = { - .asic_family = CHIP_VEGA20, - .asic_name = "vega20", - .gfx_target_version = 90006, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info arcturus_device_info = { - .asic_family = CHIP_ARCTURUS, - .asic_name = "arcturus", - .gfx_target_version = 90008, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 6, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info aldebaran_device_info = { - .asic_family = CHIP_ALDEBARAN, - .asic_name = "aldebaran", - .gfx_target_version = 90010, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 3, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info renoir_device_info = { - .asic_family = CHIP_RENOIR, - .asic_name = "renoir", - .gfx_target_version = 90012, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info navi10_device_info = { - .asic_family = CHIP_NAVI10, - .asic_name = "navi10", - .gfx_target_version = 100100, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 145, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info navi12_device_info = { - .asic_family = CHIP_NAVI12, - .asic_name = "navi12", - .gfx_target_version = 100101, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 145, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info navi14_device_info = { - .asic_family = CHIP_NAVI14, - .asic_name = "navi14", - .gfx_target_version = 100102, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 145, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info sienna_cichlid_device_info = { - .asic_family = CHIP_SIENNA_CICHLID, - .asic_name = "sienna_cichlid", - .gfx_target_version = 100300, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 4, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info navy_flounder_device_info = { - .asic_family = CHIP_NAVY_FLOUNDER, - .asic_name = "navy_flounder", - .gfx_target_version = 100301, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info vangogh_device_info = { - .asic_family = CHIP_VANGOGH, - .asic_name = "vangogh", - .gfx_target_version = 100303, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info dimgrey_cavefish_device_info = { - .asic_family = CHIP_DIMGREY_CAVEFISH, - .asic_name = "dimgrey_cavefish", - .gfx_target_version = 100302, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info beige_goby_device_info = { - .asic_family = CHIP_BEIGE_GOBY, - .asic_name = "beige_goby", - .gfx_target_version = 100304, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info yellow_carp_device_info = { - .asic_family = CHIP_YELLOW_CARP, - .asic_name = "yellow_carp", - .gfx_target_version = 100305, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info cyan_skillfish_device_info = { - .asic_family = CHIP_CYAN_SKILLFISH, - .asic_name = "cyan_skillfish", - .gfx_target_version = 100103, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size); static void kfd_gtt_sa_fini(struct kfd_dev *kfd); static int kfd_resume(struct kfd_dev *kfd); -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) { - struct kfd_dev *kfd; - const struct kfd_device_info *device_info; - const struct kfd2kgd_calls *f2g; - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0]; + + switch (sdma_version) { + case IP_VERSION(4, 0, 0):/* VEGA10 */ + case IP_VERSION(4, 0, 1):/* VEGA12 */ + case IP_VERSION(4, 1, 0):/* RAVEN */ + case IP_VERSION(4, 1, 1):/* RAVEN */ + case IP_VERSION(4, 1, 2):/* RENOIR */ + case IP_VERSION(5, 2, 1):/* VANGOGH */ + case IP_VERSION(5, 2, 3):/* YELLOW_CARP */ + kfd->device_info.num_sdma_queues_per_engine = 2; + break; + case IP_VERSION(4, 2, 0):/* VEGA20 */ + case IP_VERSION(4, 2, 2):/* ARCTURUS */ + case IP_VERSION(4, 4, 0):/* ALDEBARAN */ + case IP_VERSION(5, 0, 0):/* NAVI10 */ + case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */ + case IP_VERSION(5, 0, 2):/* NAVI14 */ + case IP_VERSION(5, 0, 5):/* NAVI12 */ + case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */ + case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */ + case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */ + case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */ + kfd->device_info.num_sdma_queues_per_engine = 8; + break; + default: + dev_warn(kfd_device, + "Default sdma queue per engine(8) is set due to " + "mismatch of sdma ip block(SDMA_HWIP:0x%x).\n", + sdma_version); + kfd->device_info.num_sdma_queues_per_engine = 8; + } +} + +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) +{ + uint32_t gc_version = KFD_GC_VERSION(kfd); + + switch (gc_version) { + case IP_VERSION(9, 0, 1): /* VEGA10 */ + case IP_VERSION(9, 1, 0): /* RAVEN */ + case IP_VERSION(9, 2, 1): /* VEGA12 */ + case IP_VERSION(9, 2, 2): /* RAVEN */ + case IP_VERSION(9, 3, 0): /* RENOIR */ + case IP_VERSION(9, 4, 0): /* VEGA20 */ + case IP_VERSION(9, 4, 1): /* ARCTURUS */ + case IP_VERSION(9, 4, 2): /* ALDEBARAN */ + case IP_VERSION(10, 3, 1): /* VANGOGH */ + case IP_VERSION(10, 3, 3): /* YELLOW_CARP */ + case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */ + case IP_VERSION(10, 1, 10): /* NAVI10 */ + case IP_VERSION(10, 1, 2): /* NAVI12 */ + case IP_VERSION(10, 1, 1): /* NAVI14 */ + case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */ + case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */ + case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */ + case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */ + kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; + break; + default: + dev_warn(kfd_device, "v9 event interrupt handler is set due to " + "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version); + kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; + } +} + +static void kfd_device_info_init(struct kfd_dev *kfd, + bool vf, uint32_t gfx_target_version) +{ + uint32_t gc_version = KFD_GC_VERSION(kfd); + uint32_t asic_type = kfd->adev->asic_type; + + kfd->device_info.max_pasid_bits = 16; + kfd->device_info.max_no_of_hqd = 24; + kfd->device_info.num_of_watch_points = 4; + kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED; + kfd->device_info.gfx_target_version = gfx_target_version; + + if (KFD_IS_SOC15(kfd)) { + kfd->device_info.doorbell_size = 8; + kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t); + kfd->device_info.supports_cwsr = true; + + kfd_device_info_set_sdma_queue_num(kfd); + + kfd_device_info_set_event_interrupt_class(kfd); + + /* Raven */ + if (gc_version == IP_VERSION(9, 1, 0) || + gc_version == IP_VERSION(9, 2, 2)) + kfd->device_info.needs_iommu_device = true; + + if (gc_version < IP_VERSION(11, 0, 0)) { + /* Navi2x+, Navi1x+ */ + if (gc_version >= IP_VERSION(10, 3, 0)) + kfd->device_info.no_atomic_fw_version = 92; + else if (gc_version >= IP_VERSION(10, 1, 1)) + kfd->device_info.no_atomic_fw_version = 145; + + /* Navi1x+ */ + if (gc_version >= IP_VERSION(10, 1, 1)) + kfd->device_info.needs_pci_atomics = true; + } + } else { + kfd->device_info.doorbell_size = 4; + kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t); + kfd->device_info.event_interrupt_class = &event_interrupt_class_cik; + kfd->device_info.num_sdma_queues_per_engine = 2; + + if (asic_type != CHIP_KAVERI && + asic_type != CHIP_HAWAII && + asic_type != CHIP_TONGA) + kfd->device_info.supports_cwsr = true; + + if (asic_type == CHIP_KAVERI || + asic_type == CHIP_CARRIZO) + kfd->device_info.needs_iommu_device = true; + + if (asic_type != CHIP_HAWAII && !vf) + kfd->device_info.needs_pci_atomics = true; + } +} + +struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) +{ + struct kfd_dev *kfd = NULL; + const struct kfd2kgd_calls *f2g = NULL; struct pci_dev *pdev = adev->pdev; + uint32_t gfx_target_version = 0; switch (adev->asic_type) { #ifdef KFD_SUPPORT_IOMMU_V2 #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: - if (vf) - device_info = NULL; - else - device_info = &kaveri_device_info; - f2g = &gfx_v7_kfd2kgd; + gfx_target_version = 70000; + if (!vf) + f2g = &gfx_v7_kfd2kgd; break; #endif case CHIP_CARRIZO: - if (vf) - device_info = NULL; - else - device_info = &carrizo_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80001; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; #endif #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_HAWAII: - if (vf) - device_info = NULL; - else - device_info = &hawaii_device_info; - f2g = &gfx_v7_kfd2kgd; + gfx_target_version = 70001; + if (!amdgpu_exp_hw_support) + pr_info( + "KFD support on Hawaii is experimental. See modparam exp_hw_support\n" + ); + else if (!vf) + f2g = &gfx_v7_kfd2kgd; break; #endif case CHIP_TONGA: - if (vf) - device_info = NULL; - else - device_info = &tonga_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80002; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; case CHIP_FIJI: - if (vf) - device_info = &fiji_vf_device_info; - else - device_info = &fiji_device_info; + gfx_target_version = 80003; f2g = &gfx_v8_kfd2kgd; break; case CHIP_POLARIS10: - if (vf) - device_info = &polaris10_vf_device_info; - else - device_info = &polaris10_device_info; + gfx_target_version = 80003; f2g = &gfx_v8_kfd2kgd; break; case CHIP_POLARIS11: - if (vf) - device_info = NULL; - else - device_info = &polaris11_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80003; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; case CHIP_POLARIS12: - if (vf) - device_info = NULL; - else - device_info = &polaris12_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80003; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; case CHIP_VEGAM: - if (vf) - device_info = NULL; - else - device_info = &vegam_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80003; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; default: switch (adev->ip_versions[GC_HWIP][0]) { + /* Vega 10 */ case IP_VERSION(9, 0, 1): - if (vf) - device_info = &vega10_vf_device_info; - else - device_info = &vega10_device_info; + gfx_target_version = 90000; f2g = &gfx_v9_kfd2kgd; break; #ifdef KFD_SUPPORT_IOMMU_V2 + /* Raven */ case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): - if (vf) - device_info = NULL; - else - device_info = &raven_device_info; - f2g = &gfx_v9_kfd2kgd; + gfx_target_version = 90002; + if (!vf) + f2g = &gfx_v9_kfd2kgd; break; #endif + /* Vega12 */ case IP_VERSION(9, 2, 1): - if (vf) - device_info = NULL; - else - device_info = &vega12_device_info; - f2g = &gfx_v9_kfd2kgd; + gfx_target_version = 90004; + if (!vf) + f2g = &gfx_v9_kfd2kgd; break; + /* Renoir */ case IP_VERSION(9, 3, 0): - if (vf) - device_info = NULL; - else - device_info = &renoir_device_info; - f2g = &gfx_v9_kfd2kgd; + gfx_target_version = 90012; + if (!vf) + f2g = &gfx_v9_kfd2kgd; break; + /* Vega20 */ case IP_VERSION(9, 4, 0): - if (vf) - device_info = NULL; - else - device_info = &vega20_device_info; - f2g = &gfx_v9_kfd2kgd; + gfx_target_version = 90006; + if (!vf) + f2g = &gfx_v9_kfd2kgd; break; + /* Arcturus */ case IP_VERSION(9, 4, 1): - device_info = &arcturus_device_info; + gfx_target_version = 90008; f2g = &arcturus_kfd2kgd; break; + /* Aldebaran */ case IP_VERSION(9, 4, 2): - device_info = &aldebaran_device_info; + gfx_target_version = 90010; f2g = &aldebaran_kfd2kgd; break; + /* Navi10 */ case IP_VERSION(10, 1, 10): - if (vf) - device_info = NULL; - else - device_info = &navi10_device_info; - f2g = &gfx_v10_kfd2kgd; + gfx_target_version = 100100; + if (!vf) + f2g = &gfx_v10_kfd2kgd; break; + /* Navi12 */ case IP_VERSION(10, 1, 2): - device_info = &navi12_device_info; + gfx_target_version = 100101; f2g = &gfx_v10_kfd2kgd; break; + /* Navi14 */ case IP_VERSION(10, 1, 1): - if (vf) - device_info = NULL; - else - device_info = &navi14_device_info; - f2g = &gfx_v10_kfd2kgd; + gfx_target_version = 100102; + if (!vf) + f2g = &gfx_v10_kfd2kgd; break; + /* Cyan Skillfish */ case IP_VERSION(10, 1, 3): - if (vf) - device_info = NULL; - else - device_info = &cyan_skillfish_device_info; - f2g = &gfx_v10_kfd2kgd; + gfx_target_version = 100103; + if (!vf) + f2g = &gfx_v10_kfd2kgd; break; + /* Sienna Cichlid */ case IP_VERSION(10, 3, 0): - device_info = &sienna_cichlid_device_info; + gfx_target_version = 100300; f2g = &gfx_v10_3_kfd2kgd; break; + /* Navy Flounder */ case IP_VERSION(10, 3, 2): - device_info = &navy_flounder_device_info; + gfx_target_version = 100301; f2g = &gfx_v10_3_kfd2kgd; break; + /* Van Gogh */ case IP_VERSION(10, 3, 1): - if (vf) - device_info = NULL; - else - device_info = &vangogh_device_info; - f2g = &gfx_v10_3_kfd2kgd; + gfx_target_version = 100303; + if (!vf) + f2g = &gfx_v10_3_kfd2kgd; break; + /* Dimgrey Cavefish */ case IP_VERSION(10, 3, 4): - device_info = &dimgrey_cavefish_device_info; + gfx_target_version = 100302; f2g = &gfx_v10_3_kfd2kgd; break; + /* Beige Goby */ case IP_VERSION(10, 3, 5): - device_info = &beige_goby_device_info; + gfx_target_version = 100304; f2g = &gfx_v10_3_kfd2kgd; break; + /* Yellow Carp */ case IP_VERSION(10, 3, 3): - if (vf) - device_info = NULL; - else - device_info = &yellow_carp_device_info; - f2g = &gfx_v10_3_kfd2kgd; + gfx_target_version = 100305; + if (!vf) + f2g = &gfx_v10_3_kfd2kgd; break; default: - return NULL; + break; } break; } - if (!device_info || !f2g) { - dev_err(kfd_device, "%s %s not supported in kfd\n", - amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); + if (!f2g) { + if (adev->ip_versions[GC_HWIP][0]) + dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n", + adev->ip_versions[GC_HWIP][0], vf ? "VF" : ""); + else + dev_err(kfd_device, "%s %s not supported in kfd\n", + amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); return NULL; } @@ -824,8 +364,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) if (!kfd) return NULL; - kfd->kgd = kgd; - kfd->device_info = device_info; + kfd->adev = adev; + kfd_device_info_init(kfd, vf, gfx_target_version); kfd->pdev = pdev; kfd->init_complete = false; kfd->kfd2kgd = f2g; @@ -844,24 +384,24 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) static void kfd_cwsr_init(struct kfd_dev *kfd) { - if (cwsr_enable && kfd->device_info->supports_cwsr) { - if (kfd->device_info->asic_family < CHIP_VEGA10) { + if (cwsr_enable && kfd->device_info.supports_cwsr) { + if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx8_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); - } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { + } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) { BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_arcturus_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); - } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) { + } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) { BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_aldebaran_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); - } else if (kfd->device_info->asic_family < CHIP_NAVI10) { + } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx9_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); - } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) { + } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) { BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_nv1x_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); @@ -882,18 +422,17 @@ static int kfd_gws_init(struct kfd_dev *kfd) if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) return 0; - if (hws_gws_support - || (kfd->device_info->asic_family == CHIP_VEGA10 - && kfd->mec2_fw_version >= 0x81b3) - || (kfd->device_info->asic_family >= CHIP_VEGA12 - && kfd->device_info->asic_family <= CHIP_RAVEN - && kfd->mec2_fw_version >= 0x1b3) - || (kfd->device_info->asic_family == CHIP_ARCTURUS - && kfd->mec2_fw_version >= 0x30) - || (kfd->device_info->asic_family == CHIP_ALDEBARAN - && kfd->mec2_fw_version >= 0x28)) - ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, - amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); + if (hws_gws_support || (KFD_IS_SOC15(kfd) && + ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1) + && kfd->mec2_fw_version >= 0x81b3) || + (KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0) + && kfd->mec2_fw_version >= 0x1b3) || + (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) + && kfd->mec2_fw_version >= 0x30) || + (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) + && kfd->mec2_fw_version >= 0x28)))) + ret = amdgpu_amdkfd_alloc_gws(kfd->adev, + kfd->adev->gds.gws_size, &kfd->gws); return ret; } @@ -910,11 +449,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, unsigned int size, map_process_packet_size; kfd->ddev = ddev; - kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC1); - kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC2); - kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; @@ -927,16 +466,16 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, * 32 and 64-bit requests are possible and must be * supported. */ - kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd); + kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev); if (!kfd->pci_atomic_requested && - kfd->device_info->needs_pci_atomics && - (!kfd->device_info->no_atomic_fw_version || - kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) { + kfd->device_info.needs_pci_atomics && + (!kfd->device_info.no_atomic_fw_version || + kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) { dev_info(kfd_device, "skipped device %x:%x, PCI rejects atomics %d<%d\n", kfd->pdev->vendor, kfd->pdev->device, kfd->mec_fw_version, - kfd->device_info->no_atomic_fw_version); + kfd->device_info.no_atomic_fw_version); return false; } @@ -953,16 +492,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * - kfd->device_info->mqd_size_aligned; + kfd->device_info.mqd_size_aligned; /* * calculate max size of runlist packet. * There can be only 2 packets at once */ - map_process_packet_size = - kfd->device_info->asic_family == CHIP_ALDEBARAN ? + map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ? sizeof(struct pm4_mes_map_process_aldebaran) : - sizeof(struct pm4_mes_map_process); + sizeof(struct pm4_mes_map_process); size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size + max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) + sizeof(struct pm4_mes_runlist)) * 2; @@ -974,7 +512,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, size += 512 * 1024; if (amdgpu_amdkfd_alloc_gtt_mem( - kfd->kgd, size, &kfd->gtt_mem, + kfd->adev, size, &kfd->gtt_mem, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, false)) { dev_err(kfd_device, "Could not allocate %d bytes\n", size); @@ -995,9 +533,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_doorbell_error; } - kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd); + kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; - kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd); + kfd->noretry = kfd->adev->gmc.noretry; if (kfd_interrupt_init(kfd)) { dev_err(kfd_device, "Error initializing interrupts\n"); @@ -1015,7 +553,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, */ if (kfd_gws_init(kfd)) { dev_err(kfd_device, "Could not allocate %d gws\n", - amdgpu_amdkfd_get_num_gws(kfd->kgd)); + kfd->adev->gds.gws_size); goto gws_error; } @@ -1030,7 +568,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd); - svm_migrate_init((struct amdgpu_device *)kfd->kgd); + svm_migrate_init(kfd->adev); if(kgd2kfd_resume_iommu(kfd)) goto device_iommu_error; @@ -1068,10 +606,10 @@ kfd_interrupt_error: kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: - amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); alloc_gtt_mem_failure: if (kfd->gws) - amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); + amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); @@ -1088,9 +626,9 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); - amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); if (kfd->gws) - amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); + amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); } kfree(kfd); @@ -1229,7 +767,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) if (!kfd->init_complete) return; - if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { + if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) { dev_err_once(kfd_device, "Ring entry too small\n"); return; } @@ -1526,7 +1064,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) void kfd_inc_compute_active(struct kfd_dev *kfd) { if (atomic_inc_return(&kfd->compute_profile) == 1) - amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); + amdgpu_amdkfd_set_compute_idle(kfd->adev, false); } void kfd_dec_compute_active(struct kfd_dev *kfd) @@ -1534,7 +1072,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd) int count = atomic_dec_return(&kfd->compute_profile); if (count == 0) - amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); + amdgpu_amdkfd_set_compute_idle(kfd->adev, true); WARN_ONCE(count < 0, "Compute profile ref. count error"); } @@ -1544,6 +1082,26 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); } +/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and + * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA. + * When the device has more than two engines, we reserve two for PCIe to enable + * full-duplex and the rest are used as XGMI. + */ +unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev) +{ + /* If XGMI is not supported, all SDMA engines are PCIe */ + if (!kdev->adev->gmc.xgmi.supported) + return kdev->adev->sdma.num_instances; + + return min(kdev->adev->sdma.num_instances, 2); +} + +unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev) +{ + /* After reserved for PCIe, the rest of engines are XGMI */ + return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 93e33dd84dd4..4b6814949aad 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -47,7 +47,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, uint32_t filter_param); static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param); + uint32_t filter_param, bool reset); static int map_queues_cpsch(struct device_queue_manager *dqm); @@ -99,38 +99,29 @@ unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) return dqm->dev->shared_resources.num_pipe_per_mec; } -static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) -{ - return dqm->dev->device_info->num_sdma_engines; -} - -static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) -{ - return dqm->dev->device_info->num_xgmi_sdma_engines; -} - static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) { - return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm); + return kfd_get_num_sdma_engines(dqm->dev) + + kfd_get_num_xgmi_sdma_engines(dqm->dev); } unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { - return dqm->dev->device_info->num_sdma_engines - * dqm->dev->device_info->num_sdma_queues_per_engine; + return kfd_get_num_sdma_engines(dqm->dev) * + dqm->dev->device_info.num_sdma_queues_per_engine; } unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) { - return dqm->dev->device_info->num_xgmi_sdma_engines - * dqm->dev->device_info->num_sdma_queues_per_engine; + return kfd_get_num_xgmi_sdma_engines(dqm->dev) * + dqm->dev->device_info.num_sdma_queues_per_engine; } void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { return dqm->dev->kfd2kgd->program_sh_mem_settings( - dqm->dev->kgd, qpd->vmid, + dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, @@ -157,7 +148,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) { struct kfd_dev *dev = qpd->dqm->dev; - if (!KFD_IS_SOC15(dev->device_info->asic_family)) { + if (!KFD_IS_SOC15(dev)) { /* On pre-SOC15 chips we need to use the queue ID to * preserve the user mode ABI. */ @@ -202,7 +193,7 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, unsigned int old; struct kfd_dev *dev = qpd->dqm->dev; - if (!KFD_IS_SOC15(dev->device_info->asic_family) || + if (!KFD_IS_SOC15(dev) || q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) return; @@ -216,7 +207,7 @@ static void program_trap_handler_settings(struct device_queue_manager *dqm, { if (dqm->dev->kfd2kgd->program_trap_handler_settings) dqm->dev->kfd2kgd->program_trap_handler_settings( - dqm->dev->kgd, qpd->vmid, + dqm->dev->adev, qpd->vmid, qpd->tba_addr, qpd->tma_addr); } @@ -250,21 +241,20 @@ static int allocate_vmid(struct device_queue_manager *dqm, program_sh_mem_settings(dqm, qpd); - if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 && - dqm->dev->cwsr_enabled) + if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled) program_trap_handler_settings(dqm, qpd); /* qpd->page_table_base is set earlier when register_process() * is called, i.e. when the first queue is created. */ - dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, + dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); if (dqm->dev->kfd2kgd->set_scratch_backing_va) - dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, + dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, qpd->sh_hidden_private_base, qpd->vmid); return 0; @@ -283,7 +273,7 @@ static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, if (ret) return ret; - return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, + return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, qpd->ib_base, (uint32_t *)qpd->ib_kaddr, pmf->release_mem_size / sizeof(uint32_t)); } @@ -293,7 +283,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, struct queue *q) { /* On GFX v7, CP doesn't flush TC at dequeue */ - if (q->device->device_info->asic_family == CHIP_HAWAII) + if (q->device->adev->asic_type == CHIP_HAWAII) if (flush_texture_cache_nocpsch(q->device, qpd)) pr_err("Failed to flush TC\n"); @@ -580,7 +570,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, /* Make sure the queue is unmapped before updating the MQD */ if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { retval = unmap_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); if (retval) { pr_err("unmap queue failed\n"); goto out_unlock; @@ -776,7 +766,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, if (!list_empty(&qpd->queues_list)) { dqm->dev->kfd2kgd->set_vm_context_page_table_base( - dqm->dev->kgd, + dqm->dev->adev, qpd->vmid, qpd->page_table_base); kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); @@ -954,7 +944,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, unsigned int vmid) { return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( - dqm->dev->kgd, pasid, vmid); + dqm->dev->adev, pasid, vmid); } static void init_interrupts(struct device_queue_manager *dqm) @@ -963,7 +953,7 @@ static void init_interrupts(struct device_queue_manager *dqm) for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) if (is_pipe_enabled(dqm, 0, i)) - dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); + dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i); } static int initialize_nocpsch(struct device_queue_manager *dqm) @@ -1014,19 +1004,22 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { + int r = 0; + pr_info("SW scheduler is used"); init_interrupts(dqm); - if (dqm->dev->device_info->asic_family == CHIP_HAWAII) - return pm_init(&dqm->packet_mgr, dqm); - dqm->sched_running = true; + if (dqm->dev->adev->asic_type == CHIP_HAWAII) + r = pm_init(&dqm->packet_mgr, dqm); + if (!r) + dqm->sched_running = true; - return 0; + return r; } static int stop_nocpsch(struct device_queue_manager *dqm) { - if (dqm->dev->device_info->asic_family == CHIP_HAWAII) + if (dqm->dev->adev->asic_type == CHIP_HAWAII) pm_uninit(&dqm->packet_mgr, false); dqm->sched_running = false; @@ -1055,9 +1048,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, dqm->sdma_bitmap &= ~(1ULL << bit); q->sdma_id = bit; q->properties.sdma_engine_id = q->sdma_id % - get_num_sdma_engines(dqm); + kfd_get_num_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / - get_num_sdma_engines(dqm); + kfd_get_num_sdma_engines(dqm->dev); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { if (dqm->xgmi_sdma_bitmap == 0) { pr_err("No more XGMI SDMA queue to allocate\n"); @@ -1072,10 +1065,11 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, * assumes the first N engines are always * PCIe-optimized ones */ - q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + - q->sdma_id % get_num_xgmi_sdma_engines(dqm); + q->properties.sdma_engine_id = + kfd_get_num_sdma_engines(dqm->dev) + + q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / - get_num_xgmi_sdma_engines(dqm); + kfd_get_num_xgmi_sdma_engines(dqm->dev); } pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -1132,7 +1126,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) res.queue_mask |= 1ull << amdgpu_queue_mask_bit_to_set_resource_bit( - (struct amdgpu_device *)dqm->dev->kgd, i); + dqm->dev->adev, i); } res.gws_mask = ~0ull; res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; @@ -1232,7 +1226,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) } if (!dqm->is_hws_hang) - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); hanging = dqm->is_hws_hang || dqm->is_resetting; dqm->sched_running = false; @@ -1428,7 +1422,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) /* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param) + uint32_t filter_param, bool reset) { int retval = 0; struct mqd_manager *mqd_mgr; @@ -1441,7 +1435,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, return retval; retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE, - filter, filter_param, false, 0); + filter, filter_param, reset, 0); if (retval) return retval; @@ -1485,6 +1479,21 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, return retval; } +/* only for compute queue */ +static int reset_queues_cpsch(struct device_queue_manager *dqm, + uint16_t pasid) +{ + int retval; + + dqm_lock(dqm); + + retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, + pasid, true); + + dqm_unlock(dqm); + return retval; +} + /* dqm->lock mutex has to be locked before calling this function */ static int execute_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, @@ -1494,7 +1503,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, if (dqm->is_hws_hang) return -EIO; - retval = unmap_queues_cpsch(dqm, filter, filter_param); + retval = unmap_queues_cpsch(dqm, filter, filter_param, false); if (retval) return retval; @@ -1847,10 +1856,10 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * get_num_all_sdma_engines(dqm) * - dev->device_info->num_sdma_queues_per_engine + + dev->device_info.num_sdma_queues_per_engine + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; - retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size, + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), (void *)&(mem_obj->cpu_ptr), false); @@ -1867,7 +1876,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) if (!dqm) return NULL; - switch (dev->device_info->asic_family) { + switch (dev->adev->asic_type) { /* HWS is not available on Hawaii. */ case CHIP_HAWAII: /* HWS depends on CWSR for timely dequeue. CWSR is not @@ -1905,6 +1914,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.evict_process_queues = evict_process_queues_cpsch; dqm->ops.restore_process_queues = restore_process_queues_cpsch; dqm->ops.get_wave_state = get_wave_state; + dqm->ops.reset_queues = reset_queues_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1930,7 +1940,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) goto out_free; } - switch (dev->device_info->asic_family) { + switch (dev->adev->asic_type) { case CHIP_CARRIZO: device_queue_manager_init_vi(&dqm->asic_ops); break; @@ -1952,31 +1962,16 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - device_queue_manager_init_v9(&dqm->asic_ops); - break; - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - device_queue_manager_init_v10_navi10(&dqm->asic_ops); - break; default: - WARN(1, "Unexpected ASIC family %u", - dev->device_info->asic_family); - goto out_free; + if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) + device_queue_manager_init_v10_navi10(&dqm->asic_ops); + else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) + device_queue_manager_init_v9(&dqm->asic_ops); + else { + WARN(1, "Unexpected ASIC family %u", + dev->adev->asic_type); + goto out_free; + } } if (init_mqd_managers(dqm)) @@ -2000,7 +1995,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, { WARN(!mqd, "No hiq sdma mqd trunk to free"); - amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); } void device_queue_manager_uninit(struct device_queue_manager *dqm) @@ -2031,7 +2026,7 @@ static void kfd_process_hw_exception(struct work_struct *work) { struct device_queue_manager *dqm = container_of(work, struct device_queue_manager, hw_exception_work); - amdgpu_amdkfd_gpu_reset(dqm->dev->kgd); + amdgpu_amdkfd_gpu_reset(dqm->dev->adev); } #if defined(CONFIG_DEBUG_FS) @@ -2070,7 +2065,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) return 0; } - r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); if (!r) { @@ -2092,7 +2087,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) continue; r = dqm->dev->kfd2kgd->hqd_dump( - dqm->dev->kgd, pipe, queue, &dump, &n_regs); + dqm->dev->adev, pipe, queue, &dump, &n_regs); if (r) break; @@ -2106,10 +2101,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { for (queue = 0; - queue < dqm->dev->device_info->num_sdma_queues_per_engine; + queue < dqm->dev->device_info.num_sdma_queues_per_engine; queue++) { r = dqm->dev->kfd2kgd->hqd_sdma_dump( - dqm->dev->kgd, pipe, queue, &dump, &n_regs); + dqm->dev->adev, pipe, queue, &dump, &n_regs); if (r) break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 499fc0ea387f..e145e4deb53a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -81,6 +81,8 @@ struct device_process_node { * * @get_wave_state: Retrieves context save state and optionally copies the * control stack, if kept in the MQD, to the given userspace address. + * + * @reset_queues: reset queues which consume RAS poison */ struct device_queue_manager_ops { @@ -134,6 +136,9 @@ struct device_queue_manager_ops { void __user *ctl_stack, u32 *ctl_stack_used_size, u32 *save_area_used_size); + + int (*reset_queues)(struct device_queue_manager *dqm, + uint16_t pasid); }; struct device_queue_manager_asic_ops { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index b5c3d13643f1..f20434d9980e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -62,7 +62,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; - if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) { + if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) { /* Aldebaran can safely support different XNACK modes * per process */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 768d153acff4..0dbcf54657ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -48,7 +48,7 @@ /* # of doorbell bytes allocated for each process. */ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) { - return roundup(kfd->device_info->doorbell_size * + return roundup(kfd->device_info.doorbell_size * KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, PAGE_SIZE); } @@ -180,7 +180,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; - inx *= kfd->device_info->doorbell_size / sizeof(u32); + inx *= kfd->device_info.doorbell_size / sizeof(u32); /* * Calculating the kernel doorbell offset using the first @@ -201,7 +201,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) unsigned int inx; inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) - * sizeof(u32) / kfd->device_info->doorbell_size; + * sizeof(u32) / kfd->device_info.doorbell_size; mutex_lock(&kfd->doorbell_mutex); __clear_bit(inx, kfd->doorbell_available_index); @@ -239,7 +239,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, return kfd->doorbell_base_dw_offset + pdd->doorbell_index * kfd_doorbell_process_slice(kfd) / sizeof(u32) + - doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); + doorbell_id * kfd->device_info.doorbell_size / sizeof(u32); } uint64_t kfd_get_number_elems(struct kfd_dev *kfd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 3eea4edee355..afe72dd11325 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -935,8 +935,10 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid, /* Workaround on Raven to not kill the process when memory is freed * before IOMMU is able to finish processing all the excessive PPRs */ - if (dev->device_info->asic_family != CHIP_RAVEN && - dev->device_info->asic_family != CHIP_RENOIR) { + + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) { mutex_lock(&p->event_mutex); /* Lookup events by type and signal them */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index d1388896f9c1..2e2b7ceb71db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -394,7 +394,7 @@ int kfd_init_apertures(struct kfd_process *process) pdd->gpuvm_base = pdd->gpuvm_limit = 0; pdd->scratch_base = pdd->scratch_limit = 0; } else { - switch (dev->device_info->asic_family) { + switch (dev->adev->asic_type) { case CHIP_KAVERI: case CHIP_HAWAII: case CHIP_CARRIZO: @@ -406,29 +406,14 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_VEGAM: kfd_init_apertures_vi(pdd, id); break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - kfd_init_apertures_v9(pdd, id); - break; default: - WARN(1, "Unexpected ASIC family %u", - dev->device_info->asic_family); - return -EINVAL; + if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) + kfd_init_apertures_v9(pdd, id); + else { + WARN(1, "Unexpected ASIC family %u", + dev->adev->asic_type); + return -EINVAL; + } } if (!dev->use_iommu_v2) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 543e7ea75593..e8bc28009c22 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -89,6 +89,44 @@ enum SQ_INTERRUPT_ERROR_TYPE { #define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000 #define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20 +static void event_interrupt_poison_consumption(struct kfd_dev *dev, + uint16_t pasid, uint16_t source_id) +{ + int ret = -EINVAL; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + + if (!p) + return; + + /* all queues of a process will be unmapped in one time */ + if (atomic_read(&p->poison)) { + kfd_unref_process(p); + return; + } + + atomic_set(&p->poison, 1); + kfd_unref_process(p); + + switch (source_id) { + case SOC15_INTSRC_SQ_INTERRUPT_MSG: + if (dev->dqm->ops.reset_queues) + ret = dev->dqm->ops.reset_queues(dev->dqm, pasid); + break; + case SOC15_INTSRC_SDMA_ECC: + default: + break; + } + + kfd_signal_poison_consumed_event(dev, pasid); + + /* resetting queue passes, do page retirement without gpu reset + resetting queue fails, fallback to gpu reset solution */ + if (!ret) + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false); + else + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); +} + static bool event_interrupt_isr_v9(struct kfd_dev *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, @@ -135,7 +173,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, *patched_flag = true; memcpy(patched_ihre, ih_ring_entry, - dev->device_info->ih_ring_entry_size); + dev->device_info.ih_ring_entry_size); pasid = dev->dqm->vmid_pasid[vmid]; @@ -159,6 +197,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, */ return source_id == SOC15_INTSRC_CP_END_OF_PIPE || source_id == SOC15_INTSRC_SDMA_TRAP || + source_id == SOC15_INTSRC_SDMA_ECC || source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || source_id == SOC15_INTSRC_CP_BAD_OPCODE || ((client_id == SOC15_IH_CLIENTID_VMC || @@ -230,8 +269,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, sq_intr_err); if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - kfd_signal_poison_consumed_event(dev, pasid); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd); + event_interrupt_poison_consumption(dev, pasid, source_id); return; } break; @@ -252,8 +290,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); } else if (source_id == SOC15_INTSRC_SDMA_ECC) { - kfd_signal_poison_consumed_event(dev, pasid); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd); + event_interrupt_poison_consumption(dev, pasid, source_id); return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index bc47f6a44456..81887c2013c9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -54,7 +54,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd) int r; r = kfifo_alloc(&kfd->ih_fifo, - KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size, + KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size, GFP_KERNEL); if (r) { dev_err(kfd_chardev(), "Failed to allocate IH fifo\n"); @@ -114,8 +114,8 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) int count; count = kfifo_in(&kfd->ih_fifo, ih_ring_entry, - kfd->device_info->ih_ring_entry_size); - if (count != kfd->device_info->ih_ring_entry_size) { + kfd->device_info.ih_ring_entry_size); + if (count != kfd->device_info.ih_ring_entry_size) { dev_err_ratelimited(kfd_chardev(), "Interrupt ring overflow, dropping interrupt %d\n", count); @@ -133,11 +133,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) int count; count = kfifo_out(&kfd->ih_fifo, ih_ring_entry, - kfd->device_info->ih_ring_entry_size); + kfd->device_info.ih_ring_entry_size); - WARN_ON(count && count != kfd->device_info->ih_ring_entry_size); + WARN_ON(count && count != kfd->device_info.ih_ring_entry_size); - return count == kfd->device_info->ih_ring_entry_size; + return count == kfd->device_info.ih_ring_entry_size; } static void interrupt_wq(struct work_struct *work) @@ -146,13 +146,13 @@ static void interrupt_wq(struct work_struct *work) interrupt_work); uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; - if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) { + if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { dev_err_once(kfd_chardev(), "Ring entry too small\n"); return; } while (dequeue_ih_ring_entry(dev, ih_ring_entry)) - dev->device_info->event_interrupt_class->interrupt_wq(dev, + dev->device_info.event_interrupt_class->interrupt_wq(dev, ih_ring_entry); } @@ -163,7 +163,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev, /* integer and bitwise OR so there is no boolean short-circuiting */ unsigned int wanted = 0; - wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, + wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev, ih_ring_entry, patched_ihre, flag); return wanted != 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 73f2257acc23..66ad8d0b8f7f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -89,7 +89,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd) } pasid_limit = min_t(unsigned int, - (unsigned int)(1 << kfd->device_info->max_pasid_bits), + (unsigned int)(1 << kfd->device_info.max_pasid_bits), iommu_info.max_pasids); if (!kfd_set_pasid_limit(pasid_limit)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 64b4ac339904..16f8bc4ca7f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -91,7 +91,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->pq_gpu_addr = kq->pq->gpu_addr; /* For CIK family asics, kq->eop_mem is not needed */ - if (dev->device_info->asic_family > CHIP_MULLINS) { + if (dev->adev->asic_type > CHIP_MULLINS) { retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); if (retval != 0) goto err_eop_allocate_vidmem; @@ -111,7 +111,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; - retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, + retval = kfd_gtt_sa_allocate(dev, dev->device_info.doorbell_size, &kq->wptr_mem); if (retval != 0) @@ -297,7 +297,7 @@ void kq_submit_packet(struct kernel_queue *kq) } pr_debug("\n"); #endif - if (kq->dev->device_info->doorbell_size == 8) { + if (kq->dev->device_info.doorbell_size == 8) { *kq->wptr64_kernel = kq->pending_wptr64; write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, kq->pending_wptr64); @@ -310,7 +310,7 @@ void kq_submit_packet(struct kernel_queue *kq) void kq_rollback_packet(struct kernel_queue *kq) { - if (kq->dev->device_info->doorbell_size == 8) { + if (kq->dev->device_info.doorbell_size == 8) { kq->pending_wptr64 = *kq->wptr64_kernel; kq->pending_wptr = *kq->wptr_kernel % (kq->queue->properties.queue_size / 4); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 9b9c2b9bf2ef..ed5385137f48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -108,8 +108,8 @@ error_free: * svm_migrate_copy_memory_gart - sdma copy data between ram and vram * * @adev: amdgpu device the sdma ring running - * @src: source page address array - * @dst: destination page address array + * @sys: system DMA pointer to be copied + * @vram: vram destination DMA pointer * @npages: number of pages to copy * @direction: enum MIGRATION_COPY_DIR * @mfence: output, sdma fence to signal after sdma is done @@ -549,7 +549,7 @@ static void svm_migrate_page_free(struct page *page) if (svm_bo) { pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref)); - svm_range_bo_unref(svm_bo); + svm_range_bo_unref_async(svm_bo); } } @@ -938,7 +938,7 @@ int svm_migrate_init(struct amdgpu_device *adev) void *r; /* Page migration works on Vega10 or newer */ - if (kfddev->device_info->asic_family < CHIP_VEGA10) + if (!KFD_IS_SOC15(kfddev)) return -EINVAL; pgmap = &kfddev->pgmap; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index c021519af810..e2825ad4d699 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -71,7 +71,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, return NULL; offset = (q->sdma_engine_id * - dev->device_info->num_sdma_queues_per_engine + + dev->device_info.num_sdma_queues_per_engine + q->sdma_queue_id) * dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; @@ -100,7 +100,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, struct kfd_cu_info cu_info; uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; int i, se, sh, cu; - amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info); if (cu_mask_count > cu_info.cu_active_number) cu_mask_count = cu_info.cu_active_number; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 8128f4d312f1..e9a8e21e144e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -171,7 +171,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); - return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, wptr_shift, wptr_mask, mms); } @@ -180,7 +180,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd, (uint32_t __user *)p->write_ptr, mms); } @@ -276,7 +276,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout, + return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout, pipe_id, queue_id); } @@ -289,7 +289,7 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout); } static bool is_occupied(struct mqd_manager *mm, void *mqd, @@ -297,7 +297,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address, + return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address, pipe_id, queue_id); } @@ -306,7 +306,7 @@ static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); } /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 270160fc401b..d74d8a6ac27a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -148,7 +148,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - r = mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, wptr_shift, 0, mms); return r; @@ -158,7 +158,7 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id, queue_id, p->doorbell_off); } @@ -239,7 +239,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_destroy - (mm->dev->kgd, mqd, type, timeout, + (mm->dev->adev, mqd, type, timeout, pipe_id, queue_id); } @@ -254,7 +254,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_is_occupied( - mm->dev->kgd, queue_address, + mm->dev->adev, queue_address, pipe_id, queue_id); } @@ -320,7 +320,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd, (uint32_t __user *)p->write_ptr, mms); } @@ -363,14 +363,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout); } static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 4e5932f54b5a..326eb2285029 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -108,7 +108,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); if (!mqd_mem_obj) return NULL; - retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, + retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->adev, ALIGN(q->ctl_stack_size, PAGE_SIZE) + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), &(mqd_mem_obj->gtt_mem), @@ -199,7 +199,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, wptr_shift, 0, mms); } @@ -208,7 +208,7 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id, queue_id, p->doorbell_off); } @@ -291,7 +291,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_destroy - (mm->dev->kgd, mqd, type, timeout, + (mm->dev->adev, mqd, type, timeout, pipe_id, queue_id); } @@ -301,7 +301,7 @@ static void free_mqd(struct mqd_manager *mm, void *mqd, struct kfd_dev *kfd = mm->dev; if (mqd_mem_obj->gtt_mem) { - amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, mqd_mem_obj->gtt_mem); kfree(mqd_mem_obj); } else { kfd_gtt_sa_free(mm->dev, mqd_mem_obj); @@ -313,7 +313,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_is_occupied( - mm->dev->kgd, queue_address, + mm->dev->adev, queue_address, pipe_id, queue_id); } @@ -375,7 +375,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd, (uint32_t __user *)p->write_ptr, mms); } @@ -418,14 +418,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout); } static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index cd9220eb8a7a..d456e950ce1d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -162,7 +162,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); - return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, wptr_shift, wptr_mask, mms); } @@ -265,7 +265,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_destroy - (mm->dev->kgd, mqd, type, timeout, + (mm->dev->adev, mqd, type, timeout, pipe_id, queue_id); } @@ -280,7 +280,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_is_occupied( - mm->dev->kgd, queue_address, + mm->dev->adev, queue_address, pipe_id, queue_id); } @@ -347,7 +347,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd, (uint32_t __user *)p->write_ptr, mms); } @@ -389,14 +389,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout); } static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index e547f1f8c49f..1439420925a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -223,7 +223,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { - switch (dqm->dev->device_info->asic_family) { + switch (dqm->dev->adev->asic_type) { case CHIP_KAVERI: case CHIP_HAWAII: /* PM4 packet structures on CIK are the same as on VI */ @@ -236,31 +236,16 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_VEGAM: pm->pmf = &kfd_vi_pm_funcs; break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - pm->pmf = &kfd_v9_pm_funcs; - break; - case CHIP_ALDEBARAN: - pm->pmf = &kfd_aldebaran_pm_funcs; - break; default: - WARN(1, "Unexpected ASIC family %u", - dqm->dev->device_info->asic_family); - return -EINVAL; + if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) + pm->pmf = &kfd_aldebaran_pm_funcs; + else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) + pm->pmf = &kfd_v9_pm_funcs; + else { + WARN(1, "Unexpected ASIC family %u", + dqm->dev->adev->asic_type); + return -EINVAL; + } } pm->dqm = dqm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c index 08442e7d9944..3c0658e32e93 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c @@ -110,8 +110,8 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, return 0; } -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, - struct scheduling_resources *res) +static int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res) { struct pm4_mes_set_resources *packet; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 8fd48d0ed240..ea68f3b3a4e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -183,7 +183,8 @@ enum cache_policy { cache_policy_noncoherent }; -#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) +#define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0]) +#define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1))) struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, @@ -194,8 +195,6 @@ struct kfd_event_interrupt_class { }; struct kfd_device_info { - enum amd_asic_type asic_family; - const char *asic_name; uint32_t gfx_target_version; const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; @@ -208,11 +207,12 @@ struct kfd_device_info { bool needs_iommu_device; bool needs_pci_atomics; uint32_t no_atomic_fw_version; - unsigned int num_sdma_engines; - unsigned int num_xgmi_sdma_engines; unsigned int num_sdma_queues_per_engine; }; +unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev); +unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev); + struct kfd_mem_obj { uint32_t range_start; uint32_t range_end; @@ -228,9 +228,9 @@ struct kfd_vmid_info { }; struct kfd_dev { - struct kgd_dev *kgd; + struct amdgpu_device *adev; - const struct kfd_device_info *device_info; + struct kfd_device_info device_info; struct pci_dev *pdev; struct drm_device *ddev; @@ -856,6 +856,8 @@ struct kfd_process { struct svm_range_list svms; bool xnack_enabled; + + atomic_t poison; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ @@ -891,7 +893,7 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); -int kfd_process_gpuid_from_kgd(struct kfd_process *p, +int kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev, uint32_t *gpuid, uint32_t *gpuidx); static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, @@ -984,7 +986,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain( struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); -struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd); +struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev); int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); int kfd_numa_node_to_apic_id(int numa_node_id); void kfd_double_confirm_iommu_support(struct kfd_dev *gpu); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b993011cfa64..d1145da5348f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -251,14 +251,13 @@ cleanup: } /** - * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device + * kfd_get_cu_occupancy - Collect number of waves in-flight on this device * by current process. Translates acquired wave count into number of compute units * that are occupied. * - * @atr: Handle of attribute that allows reporting of wave count. The attribute + * @attr: Handle of attribute that allows reporting of wave count. The attribute * handle encapsulates GPU device it is associated with, thereby allowing collection * of waves in flight, etc - * * @buffer: Handle of user provided buffer updated with wave count * * Return: Number of bytes written to user buffer or an error value @@ -288,7 +287,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) /* Collect wave count from device if it supports */ wave_cnt = 0; max_waves_per_cu = 0; - dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt, + dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt, &max_waves_per_cu); /* Translate wave count to number of compute units */ @@ -462,6 +461,7 @@ static struct attribute *procfs_queue_attrs[] = { &attr_queue_gpuid, NULL }; +ATTRIBUTE_GROUPS(procfs_queue); static const struct sysfs_ops procfs_queue_ops = { .show = kfd_procfs_queue_show, @@ -469,7 +469,7 @@ static const struct sysfs_ops procfs_queue_ops = { static struct kobj_type procfs_queue_type = { .sysfs_ops = &procfs_queue_ops, - .default_attrs = procfs_queue_attrs, + .default_groups = procfs_queue_groups, }; static const struct sysfs_ops procfs_stats_ops = { @@ -692,12 +692,12 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, struct kfd_dev *dev = pdd->dev; if (kptr) { - amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem); + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem); kptr = NULL; } - amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv); - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv, + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv, NULL); } @@ -714,24 +714,24 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, struct kfd_dev *kdev = pdd->dev; int err; - err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, + err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size, pdd->drm_priv, mem, NULL, flags); if (err) goto err_alloc_mem; - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem, + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem, pdd->drm_priv, NULL); if (err) goto err_map_mem; - err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true); + err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } if (kptr) { - err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd, + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev, (struct kgd_mem *)*mem, kptr, NULL); if (err) { pr_debug("Map GTT BO to kernel failed\n"); @@ -742,10 +742,10 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, return err; sync_memory_failed: - amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv); + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv); err_map_mem: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv, + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv, NULL); err_alloc_mem: *mem = NULL; @@ -940,10 +940,10 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) if (!peer_pdd->drm_priv) continue; amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - peer_pdd->dev->kgd, mem, peer_pdd->drm_priv); + peer_pdd->dev->adev, mem, peer_pdd->drm_priv); } - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem, pdd->drm_priv, NULL); kfd_process_device_remove_obj_handle(pdd, id); } @@ -974,7 +974,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p) if (!mem) goto out; - amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem); + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem); out: mutex_unlock(&p->mutex); @@ -1003,7 +1003,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->drm_file) { amdgpu_amdkfd_gpuvm_release_process_vm( - pdd->dev->kgd, pdd->drm_priv); + pdd->dev->adev, pdd->drm_priv); fput(pdd->drm_file); } @@ -1011,7 +1011,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, get_order(KFD_CWSR_TBA_TMA_SIZE)); - kfree(pdd->qpd.doorbell_bitmap); + bitmap_free(pdd->qpd.doorbell_bitmap); idr_destroy(&pdd->alloc_idr); kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index); @@ -1317,14 +1317,13 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) * support the SVM APIs and don't need to be considered * for the XNACK mode selection. */ - if (dev->device_info->asic_family < CHIP_VEGA10) + if (!KFD_IS_SOC15(dev)) continue; /* Aldebaran can always support XNACK because it can support * per-process XNACK mode selection. But let the dev->noretry * setting still influence the default XNACK mode. */ - if (supported && - dev->device_info->asic_family == CHIP_ALDEBARAN) + if (supported && KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) continue; /* GFXv10 and later GPUs do not support shader preemption @@ -1332,7 +1331,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) * management and memory-manager-related preemptions or * even deadlocks. */ - if (dev->device_info->asic_family >= CHIP_NAVI10) + if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) return false; if (dev->noretry) @@ -1431,12 +1430,11 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, int range_start = dev->shared_resources.non_cp_doorbells_start; int range_end = dev->shared_resources.non_cp_doorbells_end; - if (!KFD_IS_SOC15(dev->device_info->asic_family)) + if (!KFD_IS_SOC15(dev)) return 0; - qpd->doorbell_bitmap = - kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, - BITS_PER_BYTE), GFP_KERNEL); + qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + GFP_KERNEL); if (!qpd->doorbell_bitmap) return -ENOMEM; @@ -1448,9 +1446,9 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { if (i >= range_start && i <= range_end) { - set_bit(i, qpd->doorbell_bitmap); - set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, - qpd->doorbell_bitmap); + __set_bit(i, qpd->doorbell_bitmap); + __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, + qpd->doorbell_bitmap); } } @@ -1547,7 +1545,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, dev = pdd->dev; ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( - dev->kgd, drm_file, p->pasid, + dev->adev, drm_file, p->pasid, &p->kgd_process_info, &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); @@ -1779,14 +1777,13 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) } int -kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev, +kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev, uint32_t *gpuid, uint32_t *gpuidx) { - struct kgd_dev *kgd = (struct kgd_dev *)adev; int i; for (i = 0; i < p->n_pdds; i++) - if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) { + if (p->pdds[i] && p->pdds[i]->dev->adev == adev) { *gpuid = p->pdds[i]->dev->id; *gpuidx = i; return 0; @@ -1951,10 +1948,10 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) * only happens when the first queue is created. */ if (pdd->qpd.vmid) - amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev, pdd->qpd.vmid); } else { - amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev, pdd->process->pasid, type); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 3627e7ac161b..5e5c84a8e1ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -118,7 +118,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, return ret; pqn->q->gws = mem; - pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0; + pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, pqn->q, NULL); @@ -135,9 +135,8 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p) int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { INIT_LIST_HEAD(&pqm->queues); - pqm->queue_slot_bitmap = - kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, - BITS_PER_BYTE), GFP_KERNEL); + pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + GFP_KERNEL); if (!pqm->queue_slot_bitmap) return -ENOMEM; pqm->process = p; @@ -159,7 +158,7 @@ void pqm_uninit(struct process_queue_manager *pqm) kfree(pqn); } - kfree(pqm->queue_slot_bitmap); + bitmap_free(pqm->queue_slot_bitmap); pqm->queue_slot_bitmap = NULL; } @@ -220,7 +219,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, * Hence we also check the type as well */ if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) - max_queues = dev->device_info->max_no_of_hqd/2; + max_queues = dev->device_info.max_no_of_hqd/2; if (pdd->qpd.queue_count >= max_queues) return -ENOSPC; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index ed4bc5f844ce..deae12dc777d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -207,7 +207,6 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, uint64_t throttle_bitmask) { - struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; /* * ThermalThrottle msg = throttle_bitmask(8): * thermal_interrupt_count(16): @@ -223,14 +222,13 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n", KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask, - atomic64_read(&adev->smu.throttle_int_counter)); + atomic64_read(&dev->adev->smu.throttle_int_counter)); add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len); } void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) { - struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; struct amdgpu_task_info task_info; /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */ /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n + @@ -243,7 +241,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) return; memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, pasid, &task_info); + amdgpu_vm_get_task_info(dev->adev, pasid, &task_info); /* Report VM faults from user applications, not retry from kernel */ if (!task_info.pid) return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3cb4681c5f53..f2805ba74c80 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -107,7 +107,7 @@ static void svm_range_add_to_svms(struct svm_range *prange) pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); - list_add_tail(&prange->list, &prange->svms->list); + list_move_tail(&prange->list, &prange->svms->list); prange->it_node.start = prange->start; prange->it_node.last = prange->last; interval_tree_insert(&prange->it_node, &prange->svms->objects); @@ -193,7 +193,6 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { struct kfd_process_device *pdd; - struct amdgpu_device *adev; pr_debug("mapping to gpu idx 0x%x\n", gpuidx); pdd = kfd_process_device_from_gpuidx(p, gpuidx); @@ -201,9 +200,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, pr_debug("failed to find device idx %d\n", gpuidx); return -EINVAL; } - adev = (struct amdgpu_device *)pdd->dev->kgd; - r = svm_range_dma_map_dev(adev, prange, offset, npages, + r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, hmm_pfns, gpuidx); if (r) break; @@ -297,8 +295,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, prange->last = last; INIT_LIST_HEAD(&prange->list); INIT_LIST_HEAD(&prange->update_list); - INIT_LIST_HEAD(&prange->remove_list); - INIT_LIST_HEAD(&prange->insert_list); INIT_LIST_HEAD(&prange->svm_bo_list); INIT_LIST_HEAD(&prange->deferred_list); INIT_LIST_HEAD(&prange->child_list); @@ -334,6 +330,8 @@ static void svm_range_bo_release(struct kref *kref) struct svm_range_bo *svm_bo; svm_bo = container_of(kref, struct svm_range_bo, kref); + pr_debug("svm_bo 0x%p\n", svm_bo); + spin_lock(&svm_bo->list_lock); while (!list_empty(&svm_bo->range_list)) { struct svm_range *prange = @@ -367,12 +365,33 @@ static void svm_range_bo_release(struct kref *kref) kfree(svm_bo); } -void svm_range_bo_unref(struct svm_range_bo *svm_bo) +static void svm_range_bo_wq_release(struct work_struct *work) { - if (!svm_bo) - return; + struct svm_range_bo *svm_bo; - kref_put(&svm_bo->kref, svm_range_bo_release); + svm_bo = container_of(work, struct svm_range_bo, release_work); + svm_range_bo_release(&svm_bo->kref); +} + +static void svm_range_bo_release_async(struct kref *kref) +{ + struct svm_range_bo *svm_bo; + + svm_bo = container_of(kref, struct svm_range_bo, kref); + pr_debug("svm_bo 0x%p\n", svm_bo); + INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release); + schedule_work(&svm_bo->release_work); +} + +void svm_range_bo_unref_async(struct svm_range_bo *svm_bo) +{ + kref_put(&svm_bo->kref, svm_range_bo_release_async); +} + +static void svm_range_bo_unref(struct svm_range_bo *svm_bo) +{ + if (svm_bo) + kref_put(&svm_bo->kref, svm_range_bo_release); } static bool @@ -581,7 +600,7 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) return NULL; } - return (struct amdgpu_device *)pdd->dev->kgd; + return pdd->dev->adev; } struct kfd_process_device * @@ -593,7 +612,7 @@ svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev) p = container_of(prange->svms, struct kfd_process, svms); - r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx); + r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpu_idx); if (r) { pr_debug("failed to get device id by adev %p\n", adev); return NULL; @@ -706,6 +725,61 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, } } +static bool +svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + uint32_t i; + int gpuidx; + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + if (prange->preferred_loc != attrs[i].value) + return false; + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + /* Prefetch should always trigger a migration even + * if the value of the attribute didn't change. + */ + return false; + case KFD_IOCTL_SVM_ATTR_ACCESS: + case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + gpuidx = kfd_process_gpuidx_from_gpuid(p, + attrs[i].value); + if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) { + if (test_bit(gpuidx, prange->bitmap_access) || + test_bit(gpuidx, prange->bitmap_aip)) + return false; + } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) { + if (!test_bit(gpuidx, prange->bitmap_access)) + return false; + } else { + if (!test_bit(gpuidx, prange->bitmap_aip)) + return false; + } + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + if ((prange->flags & attrs[i].value) != attrs[i].value) + return false; + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + if ((prange->flags & attrs[i].value) != 0) + return false; + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + if (prange->granularity != attrs[i].value) + return false; + break; + default: + WARN_ONCE(1, "svm_range_check_attrs wasn't called?"); + } + } + + return true; +} + /** * svm_range_debug_dump - print all range information from svms * @svms: svm range list header @@ -743,14 +817,6 @@ static void svm_range_debug_dump(struct svm_range_list *svms) } } -static bool -svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new) -{ - return (old->prefetch_loc == new->prefetch_loc && - old->flags == new->flags && - old->granularity == new->granularity); -} - static int svm_range_split_array(void *ppnew, void *ppold, size_t size, uint64_t old_start, uint64_t old_n, @@ -943,26 +1009,26 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, } static int -svm_range_split_tail(struct svm_range *prange, struct svm_range *new, +svm_range_split_tail(struct svm_range *prange, uint64_t new_last, struct list_head *insert_list) { struct svm_range *tail; int r = svm_range_split(prange, prange->start, new_last, &tail); if (!r) - list_add(&tail->insert_list, insert_list); + list_add(&tail->list, insert_list); return r; } static int -svm_range_split_head(struct svm_range *prange, struct svm_range *new, +svm_range_split_head(struct svm_range *prange, uint64_t new_start, struct list_head *insert_list) { struct svm_range *head; int r = svm_range_split(prange, new_start, prange->last, &head); if (!r) - list_add(&head->insert_list, insert_list); + list_add(&head->list, insert_list); return r; } @@ -1053,8 +1119,8 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, if (domain == SVM_RANGE_VRAM_DOMAIN) bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); - switch (adev->asic_type) { - case CHIP_ARCTURUS: + switch (KFD_GC_VERSION(adev->kfd.dev)) { + case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_adev == adev) { mapping_flags |= coherent ? @@ -1070,7 +1136,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; - case CHIP_ALDEBARAN: + case IP_VERSION(9, 4, 2): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_adev == adev) { mapping_flags |= coherent ? @@ -1129,7 +1195,6 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); struct kfd_process_device *pdd; struct dma_fence *fence = NULL; - struct amdgpu_device *adev; struct kfd_process *p; uint32_t gpuidx; int r = 0; @@ -1145,9 +1210,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, pr_debug("failed to find device idx %d\n", gpuidx); return -EINVAL; } - adev = (struct amdgpu_device *)pdd->dev->kgd; - r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv), + r = svm_range_unmap_from_gpu(pdd->dev->adev, + drm_priv_to_vm(pdd->drm_priv), start, last, &fence); if (r) break; @@ -1159,7 +1224,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, if (r) break; } - amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, + amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev, p->pasid, TLB_FLUSH_HEAVYWEIGHT); } @@ -1172,7 +1237,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned long npages, bool readonly, dma_addr_t *dma_addr, struct amdgpu_device *bo_adev, struct dma_fence **fence) { - struct amdgpu_bo_va bo_va; bool table_freed = false; uint64_t pte_flags; unsigned long last_start; @@ -1185,9 +1249,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, last_start, last_start + npages - 1, readonly); - if (prange->svm_bo && prange->ttm_res) - bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); - for (i = offset; i < offset + npages; i++) { last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; @@ -1243,8 +1304,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct kfd_process *p; p = container_of(prange->svms, struct kfd_process, svms); - amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, - p->pasid, TLB_FLUSH_LEGACY); + amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, TLB_FLUSH_LEGACY); } out: return r; @@ -1257,7 +1317,6 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, { struct kfd_process_device *pdd; struct amdgpu_device *bo_adev; - struct amdgpu_device *adev; struct kfd_process *p; struct dma_fence *fence = NULL; uint32_t gpuidx; @@ -1276,19 +1335,18 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, pr_debug("failed to find device idx %d\n", gpuidx); return -EINVAL; } - adev = (struct amdgpu_device *)pdd->dev->kgd; pdd = kfd_bind_process_to_device(pdd->dev, p); if (IS_ERR(pdd)) return -EINVAL; - if (bo_adev && adev != bo_adev && - !amdgpu_xgmi_same_hive(adev, bo_adev)) { + if (bo_adev && pdd->dev->adev != bo_adev && + !amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) { pr_debug("cannot map to device idx %d\n", gpuidx); continue; } - r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), + r = svm_range_map_to_gpu(pdd->dev->adev, drm_priv_to_vm(pdd->drm_priv), prange, offset, npages, readonly, prange->dma_addr[gpuidx], bo_adev, wait ? &fence : NULL); @@ -1322,7 +1380,6 @@ struct svm_validate_context { static int svm_range_reserve_bos(struct svm_validate_context *ctx) { struct kfd_process_device *pdd; - struct amdgpu_device *adev; struct amdgpu_vm *vm; uint32_t gpuidx; int r; @@ -1334,7 +1391,6 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx) pr_debug("failed to find device idx %d\n", gpuidx); return -EINVAL; } - adev = (struct amdgpu_device *)pdd->dev->kgd; vm = drm_priv_to_vm(pdd->drm_priv); ctx->tv[gpuidx].bo = &vm->root.bo->tbo; @@ -1356,9 +1412,9 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx) r = -EINVAL; goto unreserve_out; } - adev = (struct amdgpu_device *)pdd->dev->kgd; - r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv), + r = amdgpu_vm_validate_pt_bos(pdd->dev->adev, + drm_priv_to_vm(pdd->drm_priv), svm_range_bo_validate, NULL); if (r) { pr_debug("failed %d validate pt bos\n", r); @@ -1381,12 +1437,10 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx) static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) { struct kfd_process_device *pdd; - struct amdgpu_device *adev; pdd = kfd_process_device_from_gpuidx(p, gpuidx); - adev = (struct amdgpu_device *)pdd->dev->kgd; - return SVM_ADEV_PGMAP_OWNER(adev); + return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev); } /* @@ -1660,6 +1714,10 @@ out_reschedule: /** * svm_range_evict - evict svm range + * @prange: svm range structure + * @mm: current process mm_struct + * @start: starting process queue number + * @last: last process queue number * * Stop all queues of the process to ensure GPU doesn't access the memory, then * return to let CPU evict the buffer and proceed CPU pagetable update. @@ -1764,46 +1822,49 @@ static struct svm_range *svm_range_clone(struct svm_range *old) } /** - * svm_range_handle_overlap - split overlap ranges - * @svms: svm range list header - * @new: range added with this attributes - * @start: range added start address, in pages - * @last: range last address, in pages - * @update_list: output, the ranges attributes are updated. For set_attr, this - * will do validation and map to GPUs. For unmap, this will be - * removed and unmap from GPUs - * @insert_list: output, the ranges will be inserted into svms, attributes are - * not changes. For set_attr, this will add into svms. - * @remove_list:output, the ranges will be removed from svms - * @left: the remaining range after overlap, For set_attr, this will be added - * as new range. + * svm_range_add - add svm range and handle overlap + * @p: the range add to this process svms + * @start: page size aligned + * @size: page size aligned + * @nattr: number of attributes + * @attrs: array of attributes + * @update_list: output, the ranges need validate and update GPU mapping + * @insert_list: output, the ranges need insert to svms + * @remove_list: output, the ranges are replaced and need remove from svms * - * Total have 5 overlap cases. + * Check if the virtual address range has overlap with any existing ranges, + * split partly overlapping ranges and add new ranges in the gaps. All changes + * should be applied to the range_list and interval tree transactionally. If + * any range split or allocation fails, the entire update fails. Therefore any + * existing overlapping svm_ranges are cloned and the original svm_ranges left + * unchanged. * - * This function handles overlap of an address interval with existing - * struct svm_ranges for applying new attributes. This may require - * splitting existing struct svm_ranges. All changes should be applied to - * the range_list and interval tree transactionally. If any split operation - * fails, the entire update fails. Therefore the existing overlapping - * svm_ranges are cloned and the original svm_ranges left unchanged. If the - * transaction succeeds, the modified clones are added and the originals - * freed. Otherwise the clones are removed and the old svm_ranges remain. + * If the transaction succeeds, the caller can update and insert clones and + * new ranges, then free the originals. * - * Context: The caller must hold svms->lock + * Otherwise the caller can free the clones and new ranges, while the old + * svm_ranges remain unchanged. + * + * Context: Process context, caller must hold svms->lock + * + * Return: + * 0 - OK, otherwise error code */ static int -svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, - unsigned long start, unsigned long last, - struct list_head *update_list, - struct list_head *insert_list, - struct list_head *remove_list, - unsigned long *left) +svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, + struct list_head *update_list, struct list_head *insert_list, + struct list_head *remove_list) { + unsigned long last = start + size - 1UL; + struct svm_range_list *svms = &p->svms; struct interval_tree_node *node; struct svm_range *prange; struct svm_range *tmp; int r = 0; + pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last); + INIT_LIST_HEAD(update_list); INIT_LIST_HEAD(insert_list); INIT_LIST_HEAD(remove_list); @@ -1811,37 +1872,44 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, node = interval_tree_iter_first(&svms->objects, start, last); while (node) { struct interval_tree_node *next; - struct svm_range *old; unsigned long next_start; pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start, node->last); - old = container_of(node, struct svm_range, it_node); + prange = container_of(node, struct svm_range, it_node); next = interval_tree_iter_next(node, start, last); next_start = min(node->last, last) + 1; - if (node->start < start || node->last > last) { - /* node intersects the updated range, clone+split it */ + if (svm_range_is_same_attrs(p, prange, nattr, attrs)) { + /* nothing to do */ + } else if (node->start < start || node->last > last) { + /* node intersects the update range and its attributes + * will change. Clone and split it, apply updates only + * to the overlapping part + */ + struct svm_range *old = prange; + prange = svm_range_clone(old); if (!prange) { r = -ENOMEM; goto out; } - list_add(&old->remove_list, remove_list); - list_add(&prange->insert_list, insert_list); + list_add(&old->update_list, remove_list); + list_add(&prange->list, insert_list); + list_add(&prange->update_list, update_list); if (node->start < start) { pr_debug("change old range start\n"); - r = svm_range_split_head(prange, new, start, + r = svm_range_split_head(prange, start, insert_list); if (r) goto out; } if (node->last > last) { pr_debug("change old range last\n"); - r = svm_range_split_tail(prange, new, last, + r = svm_range_split_tail(prange, last, insert_list); if (r) goto out; @@ -1850,22 +1918,18 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, /* The node is contained within start..last, * just update it */ - prange = old; - } - - if (!svm_range_is_same_attrs(prange, new)) list_add(&prange->update_list, update_list); + } /* insert a new node if needed */ if (node->start > start) { - prange = svm_range_new(prange->svms, start, - node->start - 1); + prange = svm_range_new(svms, start, node->start - 1); if (!prange) { r = -ENOMEM; goto out; } - list_add(&prange->insert_list, insert_list); + list_add(&prange->list, insert_list); list_add(&prange->update_list, update_list); } @@ -1873,12 +1937,20 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, start = next_start; } - if (left && start <= last) - *left = last - start + 1; + /* add a final range at the end if needed */ + if (start <= last) { + prange = svm_range_new(svms, start, last); + if (!prange) { + r = -ENOMEM; + goto out; + } + list_add(&prange->list, insert_list); + list_add(&prange->update_list, update_list); + } out: if (r) - list_for_each_entry_safe(prange, tmp, insert_list, insert_list) + list_for_each_entry_safe(prange, tmp, insert_list, list) svm_range_free(prange); return r; @@ -1962,7 +2034,6 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) static void svm_range_drain_retry_fault(struct svm_range_list *svms) { struct kfd_process_device *pdd; - struct amdgpu_device *adev; struct kfd_process *p; int drain; uint32_t i; @@ -1980,9 +2051,9 @@ restart: continue; pr_debug("drain retry fault gpu %d svms %p\n", i, svms); - adev = (struct amdgpu_device *)pdd->dev->kgd; - amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1); + amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, + &pdd->dev->adev->irq.ih1); pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) @@ -2172,6 +2243,9 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, /** * svm_range_cpu_invalidate_pagetables - interval notifier callback + * @mni: mmu_interval_notifier struct + * @range: mmu_notifier_range struct + * @cur_seq: value to pass to mmu_interval_set_seq() * * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it * is from migration, or CPU page invalidation callback. @@ -2201,8 +2275,8 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, start = mni->interval_tree.start; last = mni->interval_tree.last; - start = (start > range->start ? start : range->start) >> PAGE_SHIFT; - last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT; + start = max(start, range->start) >> PAGE_SHIFT; + last = min(last, range->end - 1) >> PAGE_SHIFT; pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n", start, last, range->start >> PAGE_SHIFT, (range->end - 1) >> PAGE_SHIFT, @@ -2304,7 +2378,7 @@ svm_range_best_restore_location(struct svm_range *prange, p = container_of(prange->svms, struct kfd_process, svms); - r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx); + r = kfd_process_gpuid_from_adev(p, adev, &gpuid, gpuidx); if (r < 0) { pr_debug("failed to get gpuid from kgd\n"); return -1; @@ -2481,7 +2555,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, pr_debug("Failed to create prange in address [0x%llx]\n", addr); return NULL; } - if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) { + if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) { pr_debug("failed to get gpuid from kgd\n"); svm_range_free(prange); return NULL; @@ -2548,7 +2622,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, uint32_t gpuid; int r; - r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx); + r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx); if (r < 0) return; } @@ -2895,59 +2969,6 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size) } /** - * svm_range_add - add svm range and handle overlap - * @p: the range add to this process svms - * @start: page size aligned - * @size: page size aligned - * @nattr: number of attributes - * @attrs: array of attributes - * @update_list: output, the ranges need validate and update GPU mapping - * @insert_list: output, the ranges need insert to svms - * @remove_list: output, the ranges are replaced and need remove from svms - * - * Check if the virtual address range has overlap with the registered ranges, - * split the overlapped range, copy and adjust pages address and vram nodes in - * old and new ranges. - * - * Context: Process context, caller must hold svms->lock - * - * Return: - * 0 - OK, otherwise error code - */ -static int -svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, - uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, - struct list_head *update_list, struct list_head *insert_list, - struct list_head *remove_list) -{ - uint64_t last = start + size - 1UL; - struct svm_range_list *svms; - struct svm_range new = {0}; - struct svm_range *prange; - unsigned long left = 0; - int r = 0; - - pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last); - - svm_range_apply_attrs(p, &new, nattr, attrs); - - svms = &p->svms; - - r = svm_range_handle_overlap(svms, &new, start, last, update_list, - insert_list, remove_list, &left); - if (r) - return r; - - if (left) { - prange = svm_range_new(svms, last - left + 1, last); - list_add(&prange->insert_list, insert_list); - list_add(&prange->update_list, update_list); - } - - return 0; -} - -/** * svm_range_best_prefetch_location - decide the best prefetch location * @prange: svm range structure * @@ -2980,7 +3001,6 @@ svm_range_best_prefetch_location(struct svm_range *prange) uint32_t best_loc = prange->prefetch_loc; struct kfd_process_device *pdd; struct amdgpu_device *bo_adev; - struct amdgpu_device *adev; struct kfd_process *p; uint32_t gpuidx; @@ -3008,12 +3028,11 @@ svm_range_best_prefetch_location(struct svm_range *prange) pr_debug("failed to get device by idx 0x%x\n", gpuidx); continue; } - adev = (struct amdgpu_device *)pdd->dev->kgd; - if (adev == bo_adev) + if (pdd->dev->adev == bo_adev) continue; - if (!amdgpu_xgmi_same_hive(adev, bo_adev)) { + if (!amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) { best_loc = 0; break; } @@ -3215,7 +3234,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, goto out; } /* Apply changes as a transaction */ - list_for_each_entry_safe(prange, next, &insert_list, insert_list) { + list_for_each_entry_safe(prange, next, &insert_list, list) { svm_range_add_to_svms(prange); svm_range_add_notifier_locked(mm, prange); } @@ -3223,8 +3242,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, svm_range_apply_attrs(p, prange, nattr, attrs); /* TODO: unmap ranges from GPU that lost access */ } - list_for_each_entry_safe(prange, next, &remove_list, - remove_list) { + list_for_each_entry_safe(prange, next, &remove_list, update_list) { pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 6dc91c33e80f..949b477e2f4c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -48,6 +48,7 @@ struct svm_range_bo { struct work_struct eviction_work; struct svm_range_list *svms; uint32_t evicting; + struct work_struct release_work; }; enum svm_work_list_ops { @@ -75,8 +76,6 @@ struct svm_work_list_item { * aligned, page size is (last - start + 1) * @list: link list node, used to scan all ranges of svms * @update_list:link list node used to add to update_list - * @remove_list:link list node used to add to remove list - * @insert_list:link list node used to add to insert list * @mapping: bo_va mapping structure to create and update GPU page table * @npages: number of pages * @dma_addr: dma mapping address on each GPU for system memory physical page @@ -112,8 +111,6 @@ struct svm_range { struct interval_tree_node it_node; struct list_head list; struct list_head update_list; - struct list_head remove_list; - struct list_head insert_list; uint64_t npages; dma_addr_t *dma_addr[MAX_GPU_INSTANCE]; struct ttm_resource *ttm_res; @@ -195,7 +192,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s */ #define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0) -void svm_range_bo_unref(struct svm_range_bo *svm_bo); +void svm_range_bo_unref_async(struct svm_range_bo *svm_bo); #else struct kfd_process; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index dd593ad0614a..948fbb39336e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -113,7 +113,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) return device; } -struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) +struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev) { struct kfd_topology_device *top_dev; struct kfd_dev *device = NULL; @@ -121,7 +121,7 @@ struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu && top_dev->gpu->kgd == kgd) { + if (top_dev->gpu && top_dev->gpu->adev == adev) { device = top_dev->gpu; break; } @@ -503,7 +503,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, if (dev->gpu) { log_max_watch_addr = - __ilog2_u32(dev->gpu->device_info->num_of_watch_points); + __ilog2_u32(dev->gpu->device_info.num_of_watch_points); if (log_max_watch_addr) { dev->node_props.capability |= @@ -515,7 +515,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); } - if (dev->gpu->device_info->asic_family == CHIP_TONGA) + if (dev->gpu->adev->asic_type == CHIP_TONGA) dev->node_props.capability |= HSA_CAP_AQL_QUEUE_DOUBLE_MAP; @@ -531,7 +531,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", dev->gpu->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", - amdgpu_amdkfd_get_unique_id(dev->gpu->kgd)); + dev->gpu->adev->unique_id); } @@ -1106,7 +1106,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) if (!gpu) return 0; - amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(gpu->adev, &local_mem_info); local_mem_size = local_mem_info.local_mem_size_private + local_mem_info.local_mem_size_public; @@ -1189,7 +1189,7 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) * for APUs - If CRAT from ACPI reports more than one bank, then * all the banks will report the same mem_clk_max information */ - amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info); list_for_each_entry(mem, &dev->mem_props, list) mem->mem_clk_max = local_mem_info.mem_clk_max; @@ -1217,8 +1217,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, /* set gpu (dev) flags. */ } else { if (!dev->gpu->pci_atomic_requested || - dev->gpu->device_info->asic_family == - CHIP_HAWAII) + dev->gpu->adev->asic_type == CHIP_HAWAII) link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; } @@ -1239,7 +1238,7 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, */ if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && - to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) { + KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; } @@ -1286,7 +1285,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) void *crat_image = NULL; size_t image_size = 0; int proximity_domain; - struct amdgpu_device *adev; + int i; + const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; INIT_LIST_HEAD(&temp_topology_device_list); @@ -1296,10 +1296,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); - adev = (struct amdgpu_device *)(gpu->kgd); - /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */ - if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) { + if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) { struct kfd_topology_device *top_dev; down_read(&topology_lock); @@ -1372,45 +1370,48 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * needed for the topology */ - amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info); - strncpy(dev->node_props.name, gpu->device_info->asic_name, - KFD_TOPOLOGY_PUBLIC_NAME_SIZE); + for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { + dev->node_props.name[i] = __tolower(asic_name[i]); + if (asic_name[i] == '\0') + break; + } + dev->node_props.name[i] = '\0'; dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; - dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version; + dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version; dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; dev->node_props.capability |= - ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) << - HSA_CAP_ASIC_REVISION_SHIFT) & + ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->pdev); dev->node_props.domain = pci_domain_nr(gpu->pdev->bus); dev->node_props.max_engine_clk_fcompute = - amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); + amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); dev->node_props.max_engine_clk_ccompute = cpufreq_quick_get_max(0) / 1000; dev->node_props.drm_render_minor = gpu->shared_resources.drm_render_minor; dev->node_props.hive_id = gpu->hive_id; - dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; + dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); dev->node_props.num_sdma_xgmi_engines = - gpu->device_info->num_xgmi_sdma_engines; + kfd_get_num_xgmi_sdma_engines(gpu); dev->node_props.num_sdma_queues_per_engine = - gpu->device_info->num_sdma_queues_per_engine; + gpu->device_info.num_sdma_queues_per_engine; dev->node_props.num_gws = (dev->gpu->gws && dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? - amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; + dev->gpu->adev->gds.gws_size : 0; dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); kfd_fill_mem_clk_max_info(dev); kfd_fill_iolink_non_crat_info(dev); - switch (dev->gpu->device_info->asic_family) { + switch (dev->gpu->adev->asic_type) { case CHIP_KAVERI: case CHIP_HAWAII: case CHIP_TONGA: @@ -1429,30 +1430,14 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << - HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & - HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); - break; default: - WARN(1, "Unexpected ASIC family %u", - dev->gpu->device_info->asic_family); + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1)) + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + else + WARN(1, "Unexpected ASIC family %u", + dev->gpu->adev->asic_type); } /* @@ -1469,7 +1454,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * because it doesn't consider masked out CUs * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd */ - if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { + if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { dev->node_props.simd_count = cu_info.simd_per_cu * cu_info.cu_active_number; dev->node_props.max_waves_per_simd = 10; @@ -1477,16 +1462,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu) /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ dev->node_props.capability |= - ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? + ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? HSA_CAP_SRAM_EDCSUPPORTED : 0; - dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? + dev->node_props.capability |= + ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? HSA_CAP_MEM_EDCSUPPORTED : 0; - if (adev->asic_type != CHIP_VEGA10) - dev->node_props.capability |= (adev->ras_enabled != 0) ? + if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) + dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? HSA_CAP_RASEVENTNOTIFY : 0; - if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) + if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev->kfd.dev)) dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; kfd_debug_print_topology(); @@ -1592,7 +1578,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu) gpu->use_iommu_v2 = false; - if (!gpu->device_info->needs_iommu_device) + if (!gpu->device_info.needs_iommu_device) return; down_read(&topology_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index a8db017c9b8e..f0cc59d2fd5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -25,38 +25,11 @@ #include <linux/types.h> #include <linux/list.h> +#include <linux/kfd_sysfs.h> #include "kfd_crat.h" #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32 -#define HSA_CAP_HOT_PLUGGABLE 0x00000001 -#define HSA_CAP_ATS_PRESENT 0x00000002 -#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004 -#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008 -#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010 -#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020 -#define HSA_CAP_VA_LIMIT 0x00000040 -#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 - -#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 -#define HSA_CAP_DOORBELL_TYPE_1_0 0x1 -#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 -#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 - -#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 /* Old buggy user mode depends on this being 0 */ -#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 -#define HSA_CAP_RASEVENTNOTIFY 0x00200000 -#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 -#define HSA_CAP_ASIC_REVISION_SHIFT 22 -#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 -#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 -#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 -#define HSA_CAP_RESERVED 0xe00f8000 - struct kfd_node_properties { uint64_t hive_id; uint32_t cpu_cores_count; @@ -93,17 +66,6 @@ struct kfd_node_properties { char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; -#define HSA_MEM_HEAP_TYPE_SYSTEM 0 -#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1 -#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2 -#define HSA_MEM_HEAP_TYPE_GPU_GDS 3 -#define HSA_MEM_HEAP_TYPE_GPU_LDS 4 -#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 - -#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 -#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 -#define HSA_MEM_FLAGS_RESERVED 0xfffffffc - struct kfd_mem_properties { struct list_head list; uint32_t heap_type; @@ -116,12 +78,6 @@ struct kfd_mem_properties { struct attribute attr; }; -#define HSA_CACHE_TYPE_DATA 0x00000001 -#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002 -#define HSA_CACHE_TYPE_CPU 0x00000004 -#define HSA_CACHE_TYPE_HSACU 0x00000008 -#define HSA_CACHE_TYPE_RESERVED 0xfffffff0 - struct kfd_cache_properties { struct list_head list; uint32_t processor_id_low; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 122dae1a1813..7f9773f8dab6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -624,7 +624,7 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params) #endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */ /** - * dmub_aux_setconfig_reply_callback - Callback for AUX or SET_CONFIG command. + * dmub_aux_setconfig_callback - Callback for AUX or SET_CONFIG command. * @adev: amdgpu_device pointer * @notify: dmub notification structure * @@ -632,7 +632,8 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params) * Copies dmub notification to DM which is to be read by AUX command. * issuing thread and also signals the event to wake up the thread. */ -void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct dmub_notification *notify) +static void dmub_aux_setconfig_callback(struct amdgpu_device *adev, + struct dmub_notification *notify) { if (adev->dm.dmub_notify) memcpy(adev->dm.dmub_notify, notify, sizeof(struct dmub_notification)); @@ -648,7 +649,8 @@ void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct dmub_notific * Dmub Hpd interrupt processing callback. Gets displayindex through the * ink index and calls helper to do the processing. */ -void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *notify) +static void dmub_hpd_callback(struct amdgpu_device *adev, + struct dmub_notification *notify) { struct amdgpu_dm_connector *aconnector; struct amdgpu_dm_connector *hpd_aconnector = NULL; @@ -656,7 +658,7 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not struct drm_connector_list_iter iter; struct dc_link *link; uint8_t link_index = 0; - struct drm_device *dev = adev->dm.ddev; + struct drm_device *dev; if (adev == NULL) return; @@ -673,6 +675,7 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not link_index = notify->link_index; link = adev->dm.dc->links[link_index]; + dev = adev->dm.ddev; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -705,8 +708,10 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not * to dmub interrupt handling thread * Return: true if successfully registered, false if there is existing registration */ -bool register_dmub_notify_callback(struct amdgpu_device *adev, enum dmub_notification_type type, -dmub_notify_interrupt_callback_t callback, bool dmub_int_thread_offload) +static bool register_dmub_notify_callback(struct amdgpu_device *adev, + enum dmub_notification_type type, + dmub_notify_interrupt_callback_t callback, + bool dmub_int_thread_offload) { if (callback != NULL && type < ARRAY_SIZE(adev->dm.dmub_thread_offload)) { adev->dm.dmub_callback[type] = callback; @@ -790,8 +795,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) plink = adev->dm.dc->links[notify.link_index]; if (plink) { plink->hpd_status = - notify.hpd_status == - DP_HPD_PLUG ? true : false; + notify.hpd_status == DP_HPD_PLUG; } } queue_work(adev->dm.delayed_hpd_wq, &dmub_hpd_wrk->handle_hpd_work); @@ -1051,6 +1055,11 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return 0; } + /* Reset DMCUB if it was previously running - before we overwrite its memory. */ + status = dmub_srv_hw_reset(dmub_srv); + if (status != DMUB_STATUS_OK) + DRM_WARN("Error resetting DMUB HW: %d\n", status); + hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data; fw_inst_const = dmub_fw->data + @@ -1153,6 +1162,32 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return 0; } +static void dm_dmub_hw_resume(struct amdgpu_device *adev) +{ + struct dmub_srv *dmub_srv = adev->dm.dmub_srv; + enum dmub_status status; + bool init; + + if (!dmub_srv) { + /* DMUB isn't supported on the ASIC. */ + return; + } + + status = dmub_srv_is_hw_init(dmub_srv, &init); + if (status != DMUB_STATUS_OK) + DRM_WARN("DMUB hardware init check failed: %d\n", status); + + if (status == DMUB_STATUS_OK && init) { + /* Wait for firmware load to finish. */ + status = dmub_srv_wait_for_auto_load(dmub_srv, 100000); + if (status != DMUB_STATUS_OK) + DRM_WARN("Wait for DMUB auto-load failed: %d\n", status); + } else { + /* Perform the full hardware initialization. */ + dm_dmub_hw_init(adev); + } +} + #if defined(CONFIG_DRM_AMD_DC_DCN) static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_addr_space_config *pa_config) { @@ -1454,8 +1489,21 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_feature_mask & DC_EDP_NO_POWER_SEQUENCING) init_data.flags.edp_no_power_sequencing = true; +#ifdef CONFIG_DRM_AMD_DC_DCN + if (amdgpu_dc_feature_mask & DC_DISABLE_LTTPR_DP1_4A) + init_data.flags.allow_lttpr_non_transparent_mode.bits.DP1_4A = true; + if (amdgpu_dc_feature_mask & DC_DISABLE_LTTPR_DP2_0) + init_data.flags.allow_lttpr_non_transparent_mode.bits.DP2_0 = true; +#endif + init_data.flags.power_down_display_on_boot = true; + if (check_seamless_boot_capability(adev)) { + init_data.flags.power_down_display_on_boot = false; + init_data.flags.allow_seamless_boot_optimization = true; + DRM_INFO("Seamless boot condition check passed\n"); + } + INIT_LIST_HEAD(&adev->dm.da_list); /* Display Core create. */ adev->dm.dc = dc_create(&init_data); @@ -1480,8 +1528,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER) adev->dm.dc->debug.disable_stutter = true; - if (amdgpu_dc_debug_mask & DC_DISABLE_DSC) + if (amdgpu_dc_debug_mask & DC_DISABLE_DSC) { adev->dm.dc->debug.disable_dsc = true; + adev->dm.dc->debug.disable_dsc_edp = true; + } if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING) adev->dm.dc->debug.disable_clock_gate = true; @@ -2304,14 +2354,6 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) goto fail; } - - res = dc_validate_global_state(dc, context, false); - - if (res != DC_OK) { - DRM_ERROR("%s:resource validation failed, dc_status:%d\n", __func__, res); - goto fail; - } - res = dc_commit_state(dc, context); fail: @@ -2595,15 +2637,6 @@ static int dm_resume(void *handle) = 0xffffffff; } } -#if defined(CONFIG_DRM_AMD_DC_DCN) - /* - * Resource allocation happens for link encoders for newer ASIC in - * dc_validate_global_state, so we need to revalidate it. - * - * This shouldn't fail (it passed once before), so warn if it does. - */ - WARN_ON(dc_validate_global_state(dm->dc, dc_state, false) != DC_OK); -#endif WARN_ON(!dc_commit_state(dm->dc, dc_state)); @@ -2631,9 +2664,7 @@ static int dm_resume(void *handle) amdgpu_dm_outbox_init(adev); /* Before powering on DC we need to re-initialize DMUB. */ - r = dm_dmub_hw_init(adev); - if (r) - DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); + dm_dmub_hw_resume(adev); /* power on hardware */ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); @@ -2960,13 +2991,12 @@ void amdgpu_dm_update_connector_after_detect( aconnector->edid = (struct edid *)sink->dc_edid.raw_edid; - drm_connector_update_edid_property(connector, - aconnector->edid); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, aconnector->edid); } + drm_connector_update_edid_property(connector, aconnector->edid); amdgpu_dm_update_freesync_caps(connector, aconnector->edid); update_connector_ext_caps(aconnector); } else { @@ -3034,7 +3064,7 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector) drm_modeset_unlock_all(dev); if (aconnector->base.force == DRM_FORCE_UNSPECIFIED) - drm_kms_helper_hotplug_event(dev); + drm_kms_helper_connector_hotplug_event(connector); } else if (dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD)) { if (new_connection_type == dc_connection_none && @@ -3049,7 +3079,7 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector) drm_modeset_unlock_all(dev); if (aconnector->base.force == DRM_FORCE_UNSPECIFIED) - drm_kms_helper_hotplug_event(dev); + drm_kms_helper_connector_hotplug_event(connector); } mutex_unlock(&aconnector->hpd_lock); @@ -3243,7 +3273,7 @@ out: dm_restore_drm_connector_state(dev, connector); drm_modeset_unlock_all(dev); - drm_kms_helper_hotplug_event(dev); + drm_kms_helper_connector_hotplug_event(connector); } else if (dc_link_detect(dc_link, DETECT_REASON_HPDRX)) { if (aconnector->fake_enable) @@ -3256,7 +3286,7 @@ out: dm_restore_drm_connector_state(dev, connector); drm_modeset_unlock_all(dev); - drm_kms_helper_hotplug_event(dev); + drm_kms_helper_connector_hotplug_event(connector); } } #ifdef CONFIG_DRM_AMD_DC_HDCP @@ -4276,6 +4306,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } + /* + * Disable vblank IRQs aggressively for power-saving. + * + * TODO: Fix vblank control helpers to delay PSR entry to allow this when PSR + * is also supported. + */ + adev_to_drm(adev)->vblank_disable_immediate = !psr_feature_enabled; + /* Software is initialized. Now we can register interrupt handlers. */ switch (adev->asic_type) { #if defined(CONFIG_DRM_AMD_DC_SI) @@ -6060,12 +6098,74 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, struct dsc_dec_dpcd_caps *dsc_caps) { stream->timing.flags.DSC = 0; + dsc_caps->is_dsc_supported = false; + + if (aconnector->dc_link && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || + sink->sink_signal == SIGNAL_TYPE_EDP)) { + if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE || + sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) + dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc, + aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw, + aconnector->dc_link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw, + dsc_caps); + } +} + +static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector, + struct dc_sink *sink, struct dc_stream_state *stream, + struct dsc_dec_dpcd_caps *dsc_caps, + uint32_t max_dsc_target_bpp_limit_override) +{ + const struct dc_link_settings *verified_link_cap = NULL; + uint32_t link_bw_in_kbps; + uint32_t edp_min_bpp_x16, edp_max_bpp_x16; + struct dc *dc = sink->ctx->dc; + struct dc_dsc_bw_range bw_range = {0}; + struct dc_dsc_config dsc_cfg = {0}; + + verified_link_cap = dc_link_get_link_cap(stream->link); + link_bw_in_kbps = dc_link_bandwidth_kbps(stream->link, verified_link_cap); + edp_min_bpp_x16 = 8 * 16; + edp_max_bpp_x16 = 8 * 16; + + if (edp_max_bpp_x16 > dsc_caps->edp_max_bits_per_pixel) + edp_max_bpp_x16 = dsc_caps->edp_max_bits_per_pixel; + + if (edp_max_bpp_x16 < edp_min_bpp_x16) + edp_min_bpp_x16 = edp_max_bpp_x16; + + if (dc_dsc_compute_bandwidth_range(dc->res_pool->dscs[0], + dc->debug.dsc_min_slice_height_override, + edp_min_bpp_x16, edp_max_bpp_x16, + dsc_caps, + &stream->timing, + &bw_range)) { + + if (bw_range.max_kbps < link_bw_in_kbps) { + if (dc_dsc_compute_config(dc->res_pool->dscs[0], + dsc_caps, + dc->debug.dsc_min_slice_height_override, + max_dsc_target_bpp_limit_override, + 0, + &stream->timing, + &dsc_cfg)) { + stream->timing.dsc_cfg = dsc_cfg; + stream->timing.flags.DSC = 1; + stream->timing.dsc_cfg.bits_per_pixel = edp_max_bpp_x16; + } + return; + } + } - if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) { - dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc, - aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw, - aconnector->dc_link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw, - dsc_caps); + if (dc_dsc_compute_config(dc->res_pool->dscs[0], + dsc_caps, + dc->debug.dsc_min_slice_height_override, + max_dsc_target_bpp_limit_override, + link_bw_in_kbps, + &stream->timing, + &dsc_cfg)) { + stream->timing.dsc_cfg = dsc_cfg; + stream->timing.flags.DSC = 1; } } @@ -6076,6 +6176,9 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, struct drm_connector *drm_connector = &aconnector->base; uint32_t link_bandwidth_kbps; uint32_t max_dsc_target_bpp_limit_override = 0; + struct dc *dc = sink->ctx->dc; + uint32_t max_supported_bw_in_kbps, timing_bw_in_kbps; + uint32_t dsc_max_supported_bw_in_kbps; link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, dc_link_get_link_cap(aconnector->dc_link)); @@ -6088,17 +6191,43 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, dc_dsc_policy_set_enable_dsc_when_not_needed( aconnector->dsc_settings.dsc_force_enable == DSC_CLK_FORCE_ENABLE); - if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) { + if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_EDP && !dc->debug.disable_dsc_edp && + dc->caps.edp_dsc_support && aconnector->dsc_settings.dsc_force_enable != DSC_CLK_FORCE_DISABLE) { + + apply_dsc_policy_for_edp(aconnector, sink, stream, dsc_caps, max_dsc_target_bpp_limit_override); - if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0], + } else if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) { + if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) { + if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0], dsc_caps, aconnector->dc_link->ctx->dc->debug.dsc_min_slice_height_override, max_dsc_target_bpp_limit_override, link_bandwidth_kbps, &stream->timing, &stream->timing.dsc_cfg)) { - stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", __func__, drm_connector->name); + stream->timing.flags.DSC = 1; + DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", + __func__, drm_connector->name); + } + } else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { + timing_bw_in_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing); + max_supported_bw_in_kbps = link_bandwidth_kbps; + dsc_max_supported_bw_in_kbps = link_bandwidth_kbps; + + if (timing_bw_in_kbps > max_supported_bw_in_kbps && + max_supported_bw_in_kbps > 0 && + dsc_max_supported_bw_in_kbps > 0) + if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0], + dsc_caps, + aconnector->dc_link->ctx->dc->debug.dsc_min_slice_height_override, + max_dsc_target_bpp_limit_override, + dsc_max_supported_bw_in_kbps, + &stream->timing, + &stream->timing.dsc_cfg)) { + stream->timing.flags.DSC = 1; + DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from DP-HDMI PCON\n", + __func__, drm_connector->name); + } } } @@ -8242,15 +8371,8 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, break; case DRM_MODE_CONNECTOR_DisplayPort: aconnector->base.polled = DRM_CONNECTOR_POLL_HPD; - if (link->is_dig_mapping_flexible && - link->dc->res_pool->funcs->link_encs_assign) { - link->link_enc = - link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link); - if (!link->link_enc) - link->link_enc = - link_enc_cfg_get_next_avail_link_enc(link->ctx->dc); - } - + link->link_enc = dp_get_link_enc(link); + ASSERT(link->link_enc); if (link->link_enc) aconnector->base.ycbcr_420_allowed = link->link_enc->features.dp_ycbcr420_supported ? true : false; @@ -10641,6 +10763,8 @@ static int dm_update_plane_state(struct dc *dc, dm_new_plane_state->dc_state = dc_new_plane_state; + dm_new_crtc_state->mpo_requested |= (plane->type == DRM_PLANE_TYPE_OVERLAY); + /* Tell DC to do a full surface update every time there * is a plane change. Inefficient, but works for now. */ @@ -10653,6 +10777,24 @@ static int dm_update_plane_state(struct dc *dc, return ret; } +static void dm_get_oriented_plane_size(struct drm_plane_state *plane_state, + int *src_w, int *src_h) +{ + switch (plane_state->rotation & DRM_MODE_ROTATE_MASK) { + case DRM_MODE_ROTATE_90: + case DRM_MODE_ROTATE_270: + *src_w = plane_state->src_h >> 16; + *src_h = plane_state->src_w >> 16; + break; + case DRM_MODE_ROTATE_0: + case DRM_MODE_ROTATE_180: + default: + *src_w = plane_state->src_w >> 16; + *src_h = plane_state->src_h >> 16; + break; + } +} + static int dm_check_crtc_cursor(struct drm_atomic_state *state, struct drm_crtc *crtc, struct drm_crtc_state *new_crtc_state) @@ -10661,6 +10803,8 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, struct drm_plane_state *new_cursor_state, *new_underlying_state; int i; int cursor_scale_w, cursor_scale_h, underlying_scale_w, underlying_scale_h; + int cursor_src_w, cursor_src_h; + int underlying_src_w, underlying_src_h; /* On DCE and DCN there is no dedicated hardware cursor plane. We get a * cursor per pipe but it's going to inherit the scaling and @@ -10672,10 +10816,9 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, return 0; } - cursor_scale_w = new_cursor_state->crtc_w * 1000 / - (new_cursor_state->src_w >> 16); - cursor_scale_h = new_cursor_state->crtc_h * 1000 / - (new_cursor_state->src_h >> 16); + dm_get_oriented_plane_size(new_cursor_state, &cursor_src_w, &cursor_src_h); + cursor_scale_w = new_cursor_state->crtc_w * 1000 / cursor_src_w; + cursor_scale_h = new_cursor_state->crtc_h * 1000 / cursor_src_h; for_each_new_plane_in_state_reverse(state, underlying, new_underlying_state, i) { /* Narrow down to non-cursor planes on the same CRTC as the cursor */ @@ -10686,10 +10829,10 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, if (!new_underlying_state->fb) continue; - underlying_scale_w = new_underlying_state->crtc_w * 1000 / - (new_underlying_state->src_w >> 16); - underlying_scale_h = new_underlying_state->crtc_h * 1000 / - (new_underlying_state->src_h >> 16); + dm_get_oriented_plane_size(new_underlying_state, + &underlying_src_w, &underlying_src_h); + underlying_scale_w = new_underlying_state->crtc_w * 1000 / underlying_src_w; + underlying_scale_h = new_underlying_state->crtc_h * 1000 / underlying_src_h; if (cursor_scale_w != underlying_scale_w || cursor_scale_h != underlying_scale_h) { @@ -10774,7 +10917,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, enum dc_status status; int ret, i; bool lock_and_validation_needed = false; - struct dm_crtc_state *dm_old_crtc_state; + struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_mst_fairness_vars vars[MAX_PIPES]; struct drm_dp_mst_topology_state *mst_state; @@ -10784,8 +10927,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, trace_amdgpu_dm_atomic_check_begin(state); ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_atomic_helper_check_modeset() failed\n"); goto fail; + } /* Check connector changes */ for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { @@ -10801,6 +10946,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_crtc_state = drm_atomic_get_crtc_state(state, new_con_state->crtc); if (IS_ERR(new_crtc_state)) { + DRM_DEBUG_DRIVER("drm_atomic_get_crtc_state() failed\n"); ret = PTR_ERR(new_crtc_state); goto fail; } @@ -10815,8 +10961,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (drm_atomic_crtc_needs_modeset(new_crtc_state)) { ret = add_affected_mst_dsc_crtcs(state, crtc); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("add_affected_mst_dsc_crtcs() failed\n"); goto fail; + } } } } @@ -10831,19 +10979,25 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, continue; ret = amdgpu_dm_verify_lut_sizes(new_crtc_state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("amdgpu_dm_verify_lut_sizes() failed\n"); goto fail; + } if (!new_crtc_state->enable) continue; ret = drm_atomic_add_affected_connectors(state, crtc); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_atomic_add_affected_connectors() failed\n"); goto fail; + } ret = drm_atomic_add_affected_planes(state, crtc); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_atomic_add_affected_planes() failed\n"); goto fail; + } if (dm_old_crtc_state->dsc_force_changed) new_crtc_state->mode_changed = true; @@ -10880,6 +11034,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (IS_ERR(new_plane_state)) { ret = PTR_ERR(new_plane_state); + DRM_DEBUG_DRIVER("new_plane_state is BAD\n"); goto fail; } } @@ -10892,8 +11047,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_plane_state, false, &lock_and_validation_needed); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_update_plane_state() failed\n"); goto fail; + } } /* Disable all crtcs which require disable */ @@ -10903,8 +11060,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_crtc_state, false, &lock_and_validation_needed); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("DISABLE: dm_update_crtc_state() failed\n"); goto fail; + } } /* Enable all crtcs which require enable */ @@ -10914,8 +11073,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_crtc_state, true, &lock_and_validation_needed); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("ENABLE: dm_update_crtc_state() failed\n"); goto fail; + } } /* Add new/modified planes */ @@ -10925,20 +11086,32 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_plane_state, true, &lock_and_validation_needed); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_update_plane_state() failed\n"); goto fail; + } } /* Run this here since we want to validate the streams we created */ ret = drm_atomic_helper_check_planes(dev, state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_atomic_helper_check_planes() failed\n"); goto fail; + } + + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + if (dm_new_crtc_state->mpo_requested) + DRM_DEBUG_DRIVER("MPO enablement requested on crtc:[%p]\n", crtc); + } /* Check cursor planes scaling */ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { ret = dm_check_crtc_cursor(state, crtc, new_crtc_state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_check_crtc_cursor() failed\n"); goto fail; + } } if (state->legacy_cursor_update) { @@ -11025,20 +11198,28 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, */ if (lock_and_validation_needed) { ret = dm_atomic_get_state(state, &dm_state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_atomic_get_state() failed\n"); goto fail; + } ret = do_aquire_global_lock(dev, state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("do_aquire_global_lock() failed\n"); goto fail; + } #if defined(CONFIG_DRM_AMD_DC_DCN) - if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) + if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) { + DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); goto fail; + } ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_update_mst_vcpi_slots_for_dsc() failed\n"); goto fail; + } #endif /* @@ -11048,12 +11229,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, * to get stuck in an infinite loop and hang eventually. */ ret = drm_dp_mst_atomic_check(state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n"); goto fail; - status = dc_validate_global_state(dc, dm_state->context, false); + } + status = dc_validate_global_state(dc, dm_state->context, true); if (status != DC_OK) { - drm_dbg_atomic(dev, - "DC global validation failure: %s (%d)", + DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)", dc_status_to_str(status), status); ret = -EINVAL; goto fail; @@ -11175,7 +11357,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, sizeof(cmd.edid_cea) - sizeof(cmd.edid_cea.header); input->offset = offset; input->length = length; - input->total_length = total_length; + input->cea_total_length = total_length; memcpy(input->payload, data, length); res = dc_dmub_srv_cmd_with_reply_data(dm->dc->ctx->dmub_srv, &cmd); @@ -11482,8 +11664,10 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address, return value; } -int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, struct dc_context *ctx, - uint8_t status_type, uint32_t *operation_result) +static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, + struct dc_context *ctx, + uint8_t status_type, + uint32_t *operation_result) { struct amdgpu_device *adev = ctx->driver_context; int return_status = -1; @@ -11554,3 +11738,24 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, (uint32_t *)operation_result); } + +/* + * Check whether seamless boot is supported. + * + * So far we only support seamless boot on CHIP_VANGOGH. + * If everything goes well, we may consider expanding + * seamless boot to other ASICs. + */ +bool check_seamless_boot_capability(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VANGOGH: + if (!adev->mman.keep_stolen_vga_memory) + return true; + break; + default: + break; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 37e61a88d49e..b9a69b0cef23 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -50,9 +50,9 @@ #define AMDGPU_DMUB_NOTIFICATION_MAX 5 -/** +/* * DMUB Async to Sync Mechanism Status - **/ + */ #define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 #define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 #define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 @@ -626,6 +626,8 @@ struct dm_crtc_state { bool cm_has_degamma; bool cm_is_degamma_srgb; + bool mpo_requested; + int update_type; int active_planes; @@ -731,4 +733,7 @@ extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs; int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx, unsigned int link_index, void *payload, void *operation_result); + +bool check_seamless_boot_capability(struct amdgpu_device *adev); + #endif /* __AMDGPU_DM_H__ */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a022e5bb30a5..a71177305bcd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -285,8 +285,12 @@ static int __set_input_tf(struct dc_transfer_func *func, } /** + * amdgpu_dm_verify_lut_sizes + * @crtc_state: the DRM CRTC state + * * Verifies that the Degamma and Gamma LUTs attached to the |crtc_state| are of * the expected size. + * * Returns 0 on success. */ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 9d43ecb1f692..26719efa5396 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -824,6 +824,48 @@ static int dmub_fw_state_show(struct seq_file *m, void *data) return seq_write(m, state_base, state_size); } +/* psr_capability_show() - show eDP panel PSR capability + * + * The read function: sink_psr_capability_show + * Shows if sink has PSR capability or not. + * If yes - the PSR version is appended + * + * cat /sys/kernel/debug/dri/0/eDP-X/psr_capability + * + * Expected output: + * "Sink support: no\n" - if panel doesn't support PSR + * "Sink support: yes [0x01]\n" - if panel supports PSR1 + * "Driver support: no\n" - if driver doesn't support PSR + * "Driver support: yes [0x01]\n" - if driver supports PSR1 + */ +static int psr_capability_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct dc_link *link = aconnector->dc_link; + + if (!link) + return -ENODEV; + + if (link->type == dc_connection_none) + return -ENODEV; + + if (!(link->connector_signal & SIGNAL_TYPE_EDP)) + return -ENODEV; + + seq_printf(m, "Sink support: %s", yesno(link->dpcd_caps.psr_caps.psr_version != 0)); + if (link->dpcd_caps.psr_caps.psr_version) + seq_printf(m, " [0x%02x]", link->dpcd_caps.psr_caps.psr_version); + seq_puts(m, "\n"); + + seq_printf(m, "Driver support: %s", yesno(link->psr_settings.psr_feature_enabled)); + if (link->psr_settings.psr_version) + seq_printf(m, " [0x%02x]", link->psr_settings.psr_version); + seq_puts(m, "\n"); + + return 0; +} + /* * Returns the current and maximum output bpc for the connector. * Example usage: cat /sys/kernel/debug/dri/0/DP-1/output_bpc @@ -1243,7 +1285,7 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, dm_restore_drm_connector_state(dev, connector); drm_modeset_unlock_all(dev); - drm_kms_helper_hotplug_event(dev); + drm_kms_helper_connector_hotplug_event(connector); } else if (param[0] == 0) { if (!aconnector->dc_link) goto unlock; @@ -1265,7 +1307,7 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, dm_restore_drm_connector_state(dev, connector); drm_modeset_unlock_all(dev); - drm_kms_helper_hotplug_event(dev); + drm_kms_helper_connector_hotplug_event(connector); } unlock: @@ -2467,6 +2509,7 @@ DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status); DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); #endif DEFINE_SHOW_ATTRIBUTE(internal_display); +DEFINE_SHOW_ATTRIBUTE(psr_capability); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2712,6 +2755,138 @@ static const struct { {"internal_display", &internal_display_fops} }; +/* + * Returns supported customized link rates by this eDP panel. + * Example usage: cat /sys/kernel/debug/dri/0/eDP-x/ilr_setting + */ +static int edp_ilr_show(struct seq_file *m, void *unused) +{ + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private); + struct dc_link *link = aconnector->dc_link; + uint8_t supported_link_rates[16]; + uint32_t link_rate_in_khz; + uint32_t entry = 0; + uint8_t dpcd_rev; + + memset(supported_link_rates, 0, sizeof(supported_link_rates)); + dm_helpers_dp_read_dpcd(link->ctx, link, DP_SUPPORTED_LINK_RATES, + supported_link_rates, sizeof(supported_link_rates)); + + dpcd_rev = link->dpcd_caps.dpcd_rev.raw; + + if (dpcd_rev >= DP_DPCD_REV_13 && + (supported_link_rates[entry+1] != 0 || supported_link_rates[entry] != 0)) { + + for (entry = 0; entry < 16; entry += 2) { + link_rate_in_khz = (supported_link_rates[entry+1] * 0x100 + + supported_link_rates[entry]) * 200; + seq_printf(m, "[%d] %d kHz\n", entry/2, link_rate_in_khz); + } + } else { + seq_printf(m, "ILR is not supported by this eDP panel.\n"); + } + + return 0; +} + +/* + * Set supported customized link rate to eDP panel. + * + * echo <lane_count> <link_rate option> > ilr_setting + * + * for example, supported ILR : [0] 1620000 kHz [1] 2160000 kHz [2] 2430000 kHz ... + * echo 4 1 > /sys/kernel/debug/dri/0/eDP-x/ilr_setting + * to set 4 lanes and 2.16 GHz + */ +static ssize_t edp_ilr_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_dm_connector *connector = file_inode(f)->i_private; + struct dc_link *link = connector->dc_link; + struct amdgpu_device *adev = drm_to_adev(connector->base.dev); + struct dc *dc = (struct dc *)link->dc; + struct dc_link_settings prefer_link_settings; + char *wr_buf = NULL; + const uint32_t wr_buf_size = 40; + /* 0: lane_count; 1: link_rate */ + int max_param_num = 2; + uint8_t param_nums = 0; + long param[2]; + bool valid_input = true; + + if (size == 0) + return -EINVAL; + + wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL); + if (!wr_buf) + return -ENOMEM; + + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, + (long *)param, buf, + max_param_num, + ¶m_nums)) { + kfree(wr_buf); + return -EINVAL; + } + + if (param_nums <= 0) { + kfree(wr_buf); + return -EINVAL; + } + + switch (param[0]) { + case LANE_COUNT_ONE: + case LANE_COUNT_TWO: + case LANE_COUNT_FOUR: + break; + default: + valid_input = false; + break; + } + + if (param[1] >= link->dpcd_caps.edp_supported_link_rates_count) + valid_input = false; + + if (!valid_input) { + kfree(wr_buf); + DRM_DEBUG_DRIVER("Invalid Input value. No HW will be programmed\n"); + prefer_link_settings.use_link_rate_set = false; + dc_link_set_preferred_training_settings(dc, NULL, NULL, link, true); + return size; + } + + /* save user force lane_count, link_rate to preferred settings + * spread spectrum will not be changed + */ + prefer_link_settings.link_spread = link->cur_link_settings.link_spread; + prefer_link_settings.lane_count = param[0]; + prefer_link_settings.use_link_rate_set = true; + prefer_link_settings.link_rate_set = param[1]; + prefer_link_settings.link_rate = link->dpcd_caps.edp_supported_link_rates[param[1]]; + + mutex_lock(&adev->dm.dc_lock); + dc_link_set_preferred_training_settings(dc, &prefer_link_settings, + NULL, link, false); + mutex_unlock(&adev->dm.dc_lock); + + kfree(wr_buf); + return size; +} + +static int edp_ilr_open(struct inode *inode, struct file *file) +{ + return single_open(file, edp_ilr_show, inode->i_private); +} + +static const struct file_operations edp_ilr_debugfs_fops = { + .owner = THIS_MODULE, + .open = edp_ilr_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = edp_ilr_write +}; + void connector_debugfs_init(struct amdgpu_dm_connector *connector) { int i; @@ -2726,11 +2901,14 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector) } } if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) { + debugfs_create_file_unsafe("psr_capability", 0444, dir, connector, &psr_capability_fops); debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops); debugfs_create_file("amdgpu_current_backlight_pwm", 0444, dir, connector, ¤t_backlight_fops); debugfs_create_file("amdgpu_target_backlight_pwm", 0444, dir, connector, &target_backlight_fops); + debugfs_create_file("ilr_setting", 0644, dir, connector, + &edp_ilr_debugfs_fops); } for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) { @@ -2909,10 +3087,13 @@ static int crc_win_update_set(void *data, u64 val) struct amdgpu_device *adev = drm_to_adev(new_crtc->dev); struct crc_rd_work *crc_rd_wrk = adev->dm.crc_rd_wrk; + if (!crc_rd_wrk) + return 0; + if (val) { spin_lock_irq(&adev_to_drm(adev)->event_lock); spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock); - if (crc_rd_wrk && crc_rd_wrk->crtc) { + if (crc_rd_wrk->crtc) { old_crtc = crc_rd_wrk->crtc; old_acrtc = to_amdgpu_crtc(old_crtc); } @@ -3190,6 +3371,32 @@ static int disable_hpd_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(disable_hpd_ops, disable_hpd_get, disable_hpd_set, "%llu\n"); +#if defined(CONFIG_DRM_AMD_DC_DCN) +/* + * Temporary w/a to force sst sequence in M42D DP2 mst receiver + * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dp_set_mst_en_for_sst + */ +static int dp_force_sst_set(void *data, u64 val) +{ + struct amdgpu_device *adev = data; + + adev->dm.dc->debug.set_mst_en_for_sst = val; + + return 0; +} + +static int dp_force_sst_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = data; + + *val = adev->dm.dc->debug.set_mst_en_for_sst; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(dp_set_mst_en_for_sst_ops, dp_force_sst_get, + dp_force_sst_set, "%llu\n"); +#endif + /* * Sets the DC visual confirm debug option from the given string. * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_visual_confirm @@ -3299,6 +3506,10 @@ void dtn_debugfs_init(struct amdgpu_device *adev) adev, &mst_topo_fops); debugfs_create_file("amdgpu_dm_dtn_log", 0644, root, adev, &dtn_log_fops); +#if defined(CONFIG_DRM_AMD_DC_DCN) + debugfs_create_file("amdgpu_dm_dp_set_mst_en_for_sst", 0644, root, adev, + &dp_set_mst_en_for_sst_ops); +#endif debugfs_create_file_unsafe("amdgpu_dm_visual_confirm", 0644, root, adev, &visual_confirm_fops); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 8cbeeb7c986d..29f07c26d080 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -83,16 +83,17 @@ static int amdgpu_dm_patch_edid_caps(struct dc_edid_caps *edid_caps) * void * */ enum dc_edid_status dm_helpers_parse_edid_caps( - struct dc_context *ctx, + struct dc_link *link, const struct dc_edid *edid, struct dc_edid_caps *edid_caps) { + struct amdgpu_dm_connector *aconnector = link->priv; + struct drm_connector *connector = &aconnector->base; struct edid *edid_buf = (struct edid *) edid->raw_edid; struct cea_sad *sads; int sad_count = -1; int sadb_count = -1; int i = 0; - int j = 0; uint8_t *sadb = NULL; enum dc_edid_status result = EDID_OK; @@ -111,23 +112,11 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->manufacture_week = edid_buf->mfg_week; edid_caps->manufacture_year = edid_buf->mfg_year; - /* One of the four detailed_timings stores the monitor name. It's - * stored in an array of length 13. */ - for (i = 0; i < 4; i++) { - if (edid_buf->detailed_timings[i].data.other_data.type == 0xfc) { - while (j < 13 && edid_buf->detailed_timings[i].data.other_data.data.str.str[j]) { - if (edid_buf->detailed_timings[i].data.other_data.data.str.str[j] == '\n') - break; - - edid_caps->display_name[j] = - edid_buf->detailed_timings[i].data.other_data.data.str.str[j]; - j++; - } - } - } + drm_edid_get_monitor_name(edid_buf, + edid_caps->display_name, + AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS); - edid_caps->edid_hdmi = drm_detect_hdmi_monitor( - (struct edid *) edid->raw_edid); + edid_caps->edid_hdmi = connector->display_info.is_hdmi; sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); if (sad_count <= 0) @@ -584,9 +573,18 @@ bool dm_helpers_dp_write_dsc_enable( ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); } - if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT) { - ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); - DC_LOG_DC("Send DSC %s to sst display\n", enable_dsc ? "enable" : "disable"); + if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) { +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) { +#endif + ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Send DSC %s to SST RX\n", enable_dsc ? "enable" : "disable"); +#if defined(CONFIG_DRM_AMD_DC_DCN) + } else if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { + ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Send DSC %s to DP-HDMI PCON\n", enable_dsc ? "enable" : "disable"); + } +#endif } return (ret > 0); @@ -650,14 +648,8 @@ enum dc_edid_status dm_helpers_read_local_edid( /* We don't need the original edid anymore */ kfree(edid); - /* connector->display_info will be parsed from EDID and saved - * into drm_connector->display_info from edid by call stack - * below: - * drm_parse_ycbcr420_deep_color_info - * drm_parse_hdmi_forum_vsdb - * drm_parse_cea_ext - * drm_add_display_info - * drm_connector_update_edid_property + /* connector->display_info is parsed from EDID and saved + * into drm_connector->display_info * * drm_connector->display_info will be used by amdgpu_dm funcs, * like fill_stream_properties_from_drm_display_mode @@ -665,7 +657,7 @@ enum dc_edid_status dm_helpers_read_local_edid( amdgpu_dm_update_connector_after_detect(aconnector); edid_status = dm_helpers_parse_edid_caps( - ctx, + link, &sink->dc_edid, &sink->edid_caps); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index c022e56f9459..c510638b4f99 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -26,6 +26,73 @@ #include "amdgpu_dm_psr.h" #include "dc.h" #include "dm_helpers.h" +#include "amdgpu_dm.h" + +static bool link_get_psr_caps(struct dc_link *link) +{ + uint8_t psr_dpcd_data[EDP_PSR_RECEIVER_CAP_SIZE]; + uint8_t edp_rev_dpcd_data; + + + + if (!dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT, + psr_dpcd_data, sizeof(psr_dpcd_data))) + return false; + + if (!dm_helpers_dp_read_dpcd(NULL, link, DP_EDP_DPCD_REV, + &edp_rev_dpcd_data, sizeof(edp_rev_dpcd_data))) + return false; + + link->dpcd_caps.psr_caps.psr_version = psr_dpcd_data[0]; + link->dpcd_caps.psr_caps.edp_revision = edp_rev_dpcd_data; + +#ifdef CONFIG_DRM_AMD_DC_DCN + if (link->dpcd_caps.psr_caps.psr_version > 0x1) { + uint8_t alpm_dpcd_data; + uint8_t su_granularity_dpcd_data; + + if (!dm_helpers_dp_read_dpcd(NULL, link, DP_RECEIVER_ALPM_CAP, + &alpm_dpcd_data, sizeof(alpm_dpcd_data))) + return false; + + if (!dm_helpers_dp_read_dpcd(NULL, link, DP_PSR2_SU_Y_GRANULARITY, + &su_granularity_dpcd_data, sizeof(su_granularity_dpcd_data))) + return false; + + link->dpcd_caps.psr_caps.y_coordinate_required = psr_dpcd_data[1] & DP_PSR2_SU_Y_COORDINATE_REQUIRED; + link->dpcd_caps.psr_caps.su_granularity_required = psr_dpcd_data[1] & DP_PSR2_SU_GRANULARITY_REQUIRED; + + link->dpcd_caps.psr_caps.alpm_cap = alpm_dpcd_data & DP_ALPM_CAP; + link->dpcd_caps.psr_caps.standby_support = alpm_dpcd_data & (1 << 1); + + link->dpcd_caps.psr_caps.su_y_granularity = su_granularity_dpcd_data; + } +#endif + return true; +} + +#ifdef CONFIG_DRM_AMD_DC_DCN +static bool link_supports_psrsu(struct dc_link *link) +{ + struct dc *dc = link->ctx->dc; + + if (!dc->caps.dmcub_support) + return false; + + if (dc->ctx->dce_version < DCN_VERSION_3_1) + return false; + + if (!link->dpcd_caps.psr_caps.alpm_cap || + !link->dpcd_caps.psr_caps.y_coordinate_required) + return false; + + if (link->dpcd_caps.psr_caps.su_granularity_required && + !link->dpcd_caps.psr_caps.su_y_granularity) + return false; + + return true; +} +#endif /* * amdgpu_dm_set_psr_caps() - set link psr capabilities @@ -34,26 +101,34 @@ */ void amdgpu_dm_set_psr_caps(struct dc_link *link) { - uint8_t dpcd_data[EDP_PSR_RECEIVER_CAP_SIZE]; - if (!(link->connector_signal & SIGNAL_TYPE_EDP)) return; + if (link->type == dc_connection_none) return; - if (dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT, - dpcd_data, sizeof(dpcd_data))) { - link->dpcd_caps.psr_caps.psr_version = dpcd_data[0]; - - if (dpcd_data[0] == 0) { - link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; - link->psr_settings.psr_feature_enabled = false; - } else { + + if (!link_get_psr_caps(link)) { + DRM_ERROR("amdgpu: Failed to read PSR Caps!\n"); + return; + } + + if (link->dpcd_caps.psr_caps.psr_version == 0) { + link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; + link->psr_settings.psr_feature_enabled = false; + + } else { +#ifdef CONFIG_DRM_AMD_DC_DCN + if (link_supports_psrsu(link)) + link->psr_settings.psr_version = DC_PSR_VERSION_SU_1; + else +#endif link->psr_settings.psr_version = DC_PSR_VERSION_1; - link->psr_settings.psr_feature_enabled = true; - } - DRM_INFO("PSR support:%d\n", link->psr_settings.psr_feature_enabled); + link->psr_settings.psr_feature_enabled = true; } + + DRM_INFO("PSR support:%d\n", link->psr_settings.psr_feature_enabled); + } /* diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index a4bef4364afd..1e385d55e7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2995,7 +2995,7 @@ static bool bios_parser2_construct( &bp->object_info_tbl.revision); if (bp->object_info_tbl.revision.major == 1 - && bp->object_info_tbl.revision.minor >= 4) { + && bp->object_info_tbl.revision.minor == 4) { struct display_object_info_table_v1_4 *tbl_v1_4; tbl_v1_4 = GET_IMAGE(struct display_object_info_table_v1_4, @@ -3004,8 +3004,10 @@ static bool bios_parser2_construct( return false; bp->object_info_tbl.v1_4 = tbl_v1_4; - } else + } else { + ASSERT(0); return false; + } dal_firmware_parser_init_cmd_tbl(bp); dal_bios_parser_init_cmd_tbl_helper2(&bp->cmd_helper, dce_version); diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 6b248cd2a461..ec19678a0702 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -739,7 +739,9 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v, hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_100hz); } -unsigned int get_highest_allowed_voltage_level(uint32_t chip_family, uint32_t hw_internal_rev, uint32_t pci_revision_id) +static unsigned int get_highest_allowed_voltage_level(uint32_t chip_family, + uint32_t hw_internal_rev, + uint32_t pci_revision_id) { /* for low power RV2 variants, the highest voltage level we want is 0 */ if ((chip_family == FAMILY_RV) && @@ -763,7 +765,7 @@ unsigned int get_highest_allowed_voltage_level(uint32_t chip_family, uint32_t hw return 4; } -bool dcn_validate_bandwidth( +bool dcn10_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 26f96ee32472..9200c8ce02ba 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -308,8 +308,7 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base) case FAMILY_NV: if (ASICREV_IS_SIENNA_CICHLID_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) { dcn3_clk_mgr_destroy(clk_mgr); - } - if (ASICREV_IS_DIMGREY_CAVEFISH_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) { + } else if (ASICREV_IS_DIMGREY_CAVEFISH_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) { dcn3_clk_mgr_destroy(clk_mgr); } if (ASICREV_IS_BEIGE_GOBY_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c index 76ec8ec92efd..60761ff3cbf1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c @@ -34,7 +34,7 @@ #include "rv1_clk_mgr_vbios_smu.h" #include "rv1_clk_mgr_clk.h" -void rv1_init_clocks(struct clk_mgr *clk_mgr) +static void rv1_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c index fe18bb9e19aa..06bab24d8e27 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c @@ -28,6 +28,8 @@ #include "reg_helper.h" #include <linux/delay.h> +#include "rv1_clk_mgr_vbios_smu.h" + #define MAX_INSTANCE 5 #define MAX_SEGMENT 5 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 2108bff49d4e..9f35f2e8f971 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -409,7 +409,7 @@ void dcn2_init_clocks(struct clk_mgr *clk_mgr) clk_mgr->clks.prev_p_state_change_support = true; } -void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base) +static void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct pp_smu_funcs_nv *pp_smu = NULL; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c index db9950244c7b..fbdd0a92d146 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c @@ -74,42 +74,6 @@ static const struct clk_mgr_mask clk_mgr_mask = { CLK_COMMON_MASK_SH_LIST_DCN201_BASE(_MASK) }; -void dcn201_update_clocks_vbios(struct clk_mgr *clk_mgr, - struct dc_state *context, - bool safe_to_lower) -{ - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; - - bool update_dppclk = false; - bool update_dispclk = false; - - if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->clks.dppclk_khz)) { - clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz; - update_dppclk = true; - } - - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr->clks.dispclk_khz)) { - clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz; - update_dispclk = true; - } - - if (update_dppclk || update_dispclk) { - struct bp_set_dce_clock_parameters dce_clk_params; - struct dc_bios *bp = clk_mgr->ctx->dc_bios; - - if (update_dispclk) { - memset(&dce_clk_params, 0, sizeof(dce_clk_params)); - dce_clk_params.target_clock_frequency = new_clocks->dispclk_khz; - dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; - dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK; - bp->funcs->set_dce_clock(bp, &dce_clk_params); - } - /* currently there is no DCECLOCK_TYPE_DPPCLK type defined in VBIOS interface. - * vbios program DPPCLK to the same DispCLK limitation - */ - } -} - static void dcn201_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); @@ -126,10 +90,8 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; bool update_dppclk = false; bool update_dispclk = false; - bool enter_display_off = false; bool dpp_clock_lowered = false; bool force_reset = false; bool p_state_change_support; @@ -145,10 +107,7 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base, dcn2_read_clocks_from_hw_dentist(clk_mgr_base); } - display_count = clk_mgr_helper_get_active_display_cnt(dc, context); - - if (display_count == 0) - enter_display_off = true; + clk_mgr_helper_get_active_display_cnt(dc, context); if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index ac2d4c4f04e4..fbda42313bfe 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -56,9 +56,7 @@ /* TODO: evaluate how to lower or disable all dcn clocks in screen off case */ -int rn_get_active_display_cnt_wa( - struct dc *dc, - struct dc_state *context) +static int rn_get_active_display_cnt_wa(struct dc *dc, struct dc_state *context) { int i, display_count; bool tmds_present = false; @@ -77,7 +75,8 @@ int rn_get_active_display_cnt_wa( const struct dc_link *link = dc->links[i]; /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */ - if (link->link_enc->funcs->is_dig_enabled(link->link_enc)) + if (link->link_enc->funcs->is_dig_enabled && + link->link_enc->funcs->is_dig_enabled(link->link_enc)) display_count++; } @@ -88,7 +87,7 @@ int rn_get_active_display_cnt_wa( return display_count; } -void rn_set_low_power_state(struct clk_mgr *clk_mgr_base) +static void rn_set_low_power_state(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); @@ -122,7 +121,7 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, } -void rn_update_clocks(struct clk_mgr *clk_mgr_base, +static void rn_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower) { @@ -437,25 +436,14 @@ static void rn_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an } } -/* This function produce translated logical clk state values*/ -void rn_get_clk_states(struct clk_mgr *clk_mgr_base, struct clk_states *s) -{ - struct clk_state_registers_and_bypass sb = { 0 }; - struct clk_log_info log_info = { 0 }; - - rn_dump_clk_registers(&sb, clk_mgr_base, &log_info); - - s->dprefclk_khz = sb.dprefclk * 1000; -} - -void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base) +static void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); rn_vbios_smu_enable_pme_wa(clk_mgr); } -void rn_init_clocks(struct clk_mgr *clk_mgr) +static void rn_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index 9f7eed6688c4..8161a6ae410d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -33,6 +33,8 @@ #include "mp/mp_12_0_0_offset.h" #include "mp/mp_12_0_0_sh_mask.h" +#include "rn_clk_mgr_vbios_smu.h" + #define REG(reg_name) \ (MP0_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) @@ -86,7 +88,9 @@ static uint32_t rn_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsig } -int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned int msg_id, unsigned int param) +static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, + unsigned int msg_id, + unsigned int param) { uint32_t result; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 1861a147a7fa..f977f29907df 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -252,6 +252,7 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, bool update_dispclk = false; bool enter_display_off = false; bool dpp_clock_lowered = false; + bool update_pstate_unsupported_clk = false; struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; bool force_reset = false; bool update_uclk = false; @@ -299,13 +300,28 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context); p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0); - if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { + + // invalidate the current P-State forced min in certain dc_mode_softmax situations + if (dc->clk_mgr->dc_mode_softmax_enabled && safe_to_lower && !p_state_change_support) { + if ((new_clocks->dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) != + (clk_mgr_base->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000)) + update_pstate_unsupported_clk = true; + } + + if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support) || + update_pstate_unsupported_clk) { clk_mgr_base->clks.p_state_change_support = p_state_change_support; /* to disable P-State switching, set UCLK min = max */ - if (!clk_mgr_base->clks.p_state_change_support) - dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + if (!clk_mgr_base->clks.p_state_change_support) { + if (dc->clk_mgr->dc_mode_softmax_enabled && + new_clocks->dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + dc->clk_mgr->bw_params->dc_mode_softmax_memclk); + else + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries - 1].memclk_mhz); + } } /* Always update saved value, even if new value not set due to P-State switching unsupported */ @@ -421,6 +437,24 @@ static void dcn3_set_hard_max_memclk(struct clk_mgr *clk_mgr_base) clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries - 1].memclk_mhz); } +static void dcn3_set_max_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + if (!clk_mgr->smu_present) + return; + + dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz); +} +static void dcn3_set_min_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + if (!clk_mgr->smu_present) + return; + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz); +} + /* Get current memclk states, update bounding box */ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) { @@ -436,6 +470,8 @@ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) &num_levels); clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; + clk_mgr_base->bw_params->dc_mode_softmax_memclk = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); + /* Refresh bounding box */ clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box( clk_mgr->base.ctx->dc, clk_mgr_base->bw_params); @@ -505,6 +541,8 @@ static struct clk_mgr_funcs dcn3_funcs = { .notify_wm_ranges = dcn3_notify_wm_ranges, .set_hard_min_memclk = dcn3_set_hard_min_memclk, .set_hard_max_memclk = dcn3_set_hard_max_memclk, + .set_max_memclk = dcn3_set_max_memclk, + .set_min_memclk = dcn3_set_min_memclk, .get_memclk_states_from_smu = dcn3_get_memclk_states_from_smu, .are_clock_states_equal = dcn3_are_clock_states_equal, .enable_pme_wa = dcn3_enable_pme_wa, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c index 6ea642615854..d9920d91838d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c @@ -88,9 +88,9 @@ static uint32_t dcn301_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, u return res_val; } -int dcn301_smu_send_msg_with_param( - struct clk_mgr_internal *clk_mgr, - unsigned int msg_id, unsigned int param) +static int dcn301_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, + unsigned int msg_id, + unsigned int param) { uint32_t result; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index 3eee32faa208..48005def1164 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -89,9 +89,9 @@ static int vg_get_active_display_cnt_wa( return display_count; } -void vg_update_clocks(struct clk_mgr *clk_mgr_base, - struct dc_state *context, - bool safe_to_lower) +static void vg_update_clocks(struct clk_mgr *clk_mgr_base, + struct dc_state *context, + bool safe_to_lower) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; @@ -367,18 +367,6 @@ static void vg_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an } } -/* This function produce translated logical clk state values*/ -void vg_get_clk_states(struct clk_mgr *clk_mgr_base, struct clk_states *s) -{ - - struct clk_state_registers_and_bypass sb = { 0 }; - struct clk_log_info log_info = { 0 }; - - vg_dump_clk_registers(&sb, clk_mgr_base, &log_info); - - s->dprefclk_khz = sb.dprefclk * 1000; -} - static void vg_enable_pme_wa(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); @@ -386,7 +374,7 @@ static void vg_enable_pme_wa(struct clk_mgr *clk_mgr_base) dcn301_smu_enable_pme_wa(clk_mgr); } -void vg_init_clocks(struct clk_mgr *clk_mgr) +static void vg_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate @@ -753,7 +741,7 @@ void vg_clk_mgr_construct( sizeof(struct watermarks), &clk_mgr->smu_wm_set.mc_address.quad_part); - if (clk_mgr->smu_wm_set.wm_set == 0) { + if (!clk_mgr->smu_wm_set.wm_set) { clk_mgr->smu_wm_set.wm_set = &dummy_wms; clk_mgr->smu_wm_set.mc_address.quad_part = 0; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index f4c9a458ace8..4162ce40089b 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -66,7 +66,7 @@ #define TO_CLK_MGR_DCN31(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn31, base) -int dcn31_get_active_display_cnt_wa( +static int dcn31_get_active_display_cnt_wa( struct dc *dc, struct dc_state *context) { @@ -118,7 +118,7 @@ static void dcn31_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable) } } -static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, +void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower) { @@ -158,6 +158,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, union display_idle_optimization_u idle_info = { 0 }; idle_info.idle_info.df_request_disabled = 1; idle_info.idle_info.phy_ref_clk_off = 1; + idle_info.idle_info.s0i2_rdy = 1; dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data); /* update power state */ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; @@ -284,7 +285,7 @@ static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base) dcn31_smu_enable_pme_wa(clk_mgr); } -static void dcn31_init_clocks(struct clk_mgr *clk_mgr) +void dcn31_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate @@ -294,7 +295,7 @@ static void dcn31_init_clocks(struct clk_mgr *clk_mgr) clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; } -static bool dcn31_are_clock_states_equal(struct dc_clocks *a, +bool dcn31_are_clock_states_equal(struct dc_clocks *a, struct dc_clocks *b) { if (a->dispclk_khz != b->dispclk_khz) @@ -540,10 +541,9 @@ static unsigned int find_clk_for_voltage( return clock; } -void dcn31_clk_mgr_helper_populate_bw_params( - struct clk_mgr_internal *clk_mgr, - struct integrated_info *bios_info, - const DpmClocks_t *clock_table) +static void dcn31_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr, + struct integrated_info *bios_info, + const DpmClocks_t *clock_table) { int i, j; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h index f8f100535526..961b10a49486 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h @@ -39,6 +39,13 @@ struct clk_mgr_dcn31 { struct dcn31_smu_watermark_set smu_wm_set; }; +bool dcn31_are_clock_states_equal(struct dc_clocks *a, + struct dc_clocks *b); +void dcn31_init_clocks(struct clk_mgr *clk_mgr); +void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, + struct dc_state *context, + bool safe_to_lower); + void dcn31_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_dcn31 *clk_mgr, struct pp_smu_funcs *pp_smu, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c index 8c2b77eb9459..a1011f3273f3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c @@ -95,9 +95,9 @@ static uint32_t dcn31_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un return res_val; } -int dcn31_smu_send_msg_with_param( - struct clk_mgr_internal *clk_mgr, - unsigned int msg_id, unsigned int param) +static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, + unsigned int msg_id, + unsigned int param) { uint32_t result; @@ -119,6 +119,12 @@ int dcn31_smu_send_msg_with_param( result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000); + if (result == VBIOSSMC_Result_Failed) { + ASSERT(0); + REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK); + return -1; + } + if (IS_SMU_TIMEOUT(result)) { ASSERT(0); dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 0ded4decee05..01c8849b9db2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -221,9 +221,9 @@ static bool create_links( link = link_create(&link_init_params); if (link) { - dc->links[dc->link_count] = link; - link->dc = dc; - ++dc->link_count; + dc->links[dc->link_count] = link; + link->dc = dc; + ++dc->link_count; } } @@ -274,24 +274,6 @@ static bool create_links( goto failed_alloc; } -#if defined(CONFIG_DRM_AMD_DC_DCN) - if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && - dc->caps.dp_hpo && - link->dc->res_pool->res_cap->num_hpo_dp_link_encoder > 0) { - /* FPGA case - Allocate HPO DP link encoder */ - if (i < link->dc->res_pool->res_cap->num_hpo_dp_link_encoder) { - link->hpo_dp_link_enc = link->dc->res_pool->hpo_dp_link_enc[i]; - - if (link->hpo_dp_link_enc == NULL) { - BREAK_TO_DEBUGGER(); - goto failed_alloc; - } - link->hpo_dp_link_enc->hpd_source = link->link_enc->hpd_source; - link->hpo_dp_link_enc->transmitter = link->link_enc->transmitter; - } - } -#endif - link->link_status.dpcd_caps = &link->dpcd_caps; enc_init.ctx = dc->ctx; @@ -808,6 +790,10 @@ void dc_stream_set_static_screen_params(struct dc *dc, static void dc_destruct(struct dc *dc) { + // reset link encoder assignment table on destruct + if (dc->res_pool && dc->res_pool->funcs->link_encs_assign) + link_enc_cfg_init(dc, dc->current_state); + if (dc->current_state) { dc_release_state(dc->current_state); dc->current_state = NULL; @@ -1016,8 +1002,6 @@ static bool dc_construct(struct dc *dc, goto fail; } - dc_resource_state_construct(dc, dc->current_state); - if (!create_links(dc, init_params->num_virtual_links)) goto fail; @@ -1027,8 +1011,7 @@ static bool dc_construct(struct dc *dc, if (!create_link_encoders(dc)) goto fail; - /* Initialise DIG link encoder resource tracking variables. */ - link_enc_cfg_init(dc, dc->current_state); + dc_resource_state_construct(dc, dc->current_state); return true; @@ -1830,6 +1813,19 @@ bool dc_commit_state(struct dc *dc, struct dc_state *context) dc_stream_log(dc, stream); } + /* + * Previous validation was perfomred with fast_validation = true and + * the full DML state required for hardware programming was skipped. + * + * Re-validate here to calculate these parameters / watermarks. + */ + result = dc_validate_global_state(dc, context, false); + if (result != DC_OK) { + DC_LOG_ERROR("DC commit global validation failure: %s (%d)", + dc_status_to_str(result), result); + return result; + } + result = dc_commit_state_no_check(dc, context); return (result == DC_OK); @@ -2870,7 +2866,8 @@ static void commit_planes_for_stream(struct dc *dc, #endif if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) - if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { + if (top_pipe_to_program && + top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { if (should_use_dmub_lock(stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; @@ -2979,12 +2976,12 @@ static void commit_planes_for_stream(struct dc *dc, #ifdef CONFIG_DRM_AMD_DC_DCN if (dc->debug.validate_dml_output) { for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx cur_pipe = context->res_ctx.pipe_ctx[i]; - if (cur_pipe.stream == NULL) + struct pipe_ctx *cur_pipe = &context->res_ctx.pipe_ctx[i]; + if (cur_pipe->stream == NULL) continue; - cur_pipe.plane_res.hubp->funcs->validate_dml_output( - cur_pipe.plane_res.hubp, dc->ctx, + cur_pipe->plane_res.hubp->funcs->validate_dml_output( + cur_pipe->plane_res.hubp, dc->ctx, &context->res_ctx.pipe_ctx[i].rq_regs, &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs); @@ -3426,7 +3423,7 @@ struct dc_sink *dc_link_add_remote_sink( goto fail_add_sink; edid_status = dm_helpers_parse_edid_caps( - link->ctx, + link, &dc_sink->dc_edid, &dc_sink->edid_caps); @@ -3583,6 +3580,98 @@ void dc_lock_memory_clock_frequency(struct dc *dc) core_link_enable_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); } +static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int memclk_mhz) +{ + struct dc_state *context = dc->current_state; + struct hubp *hubp; + struct pipe_ctx *pipe; + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream != NULL) { + dc->hwss.disable_pixel_data(dc, pipe, true); + + // wait for double buffer + pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VACTIVE); + pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VBLANK); + pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VACTIVE); + + hubp = pipe->plane_res.hubp; + hubp->funcs->set_blank_regs(hubp, true); + } + } + + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); + dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream != NULL) { + dc->hwss.disable_pixel_data(dc, pipe, false); + + hubp = pipe->plane_res.hubp; + hubp->funcs->set_blank_regs(hubp, false); + } + } +} + + +/** + * dc_enable_dcmode_clk_limit() - lower clocks in dc (battery) mode + * @dc: pointer to dc of the dm calling this + * @enable: True = transition to DC mode, false = transition back to AC mode + * + * Some SoCs define additional clock limits when in DC mode, DM should + * invoke this function when the platform undergoes a power source transition + * so DC can apply/unapply the limit. This interface may be disruptive to + * the onscreen content. + * + * Context: Triggered by OS through DM interface, or manually by escape calls. + * Need to hold a dclock when doing so. + * + * Return: none (void function) + * + */ +void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) +{ + uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; + unsigned int softMax, maxDPM, funcMin; + bool p_state_change_support; + + if (!ASICREV_IS_BEIGE_GOBY_P(hw_internal_rev)) + return; + + softMax = dc->clk_mgr->bw_params->dc_mode_softmax_memclk; + maxDPM = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz; + funcMin = (dc->clk_mgr->clks.dramclk_khz + 999) / 1000; + p_state_change_support = dc->clk_mgr->clks.p_state_change_support; + + if (enable && !dc->clk_mgr->dc_mode_softmax_enabled) { + if (p_state_change_support) { + if (funcMin <= softMax) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, softMax); + // else: No-Op + } else { + if (funcMin <= softMax) + blank_and_force_memclk(dc, true, softMax); + // else: No-Op + } + } else if (!enable && dc->clk_mgr->dc_mode_softmax_enabled) { + if (p_state_change_support) { + if (funcMin <= softMax) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, maxDPM); + // else: No-Op + } else { + if (funcMin <= softMax) + blank_and_force_memclk(dc, true, maxDPM); + // else: No-Op + } + } + dc->clk_mgr->dc_mode_softmax_enabled = enable; +} bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc, struct dc_plane_state *plane, struct dc_cursor_attributes *cursor_attr) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 21be2a684393..643762542e4d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -422,6 +422,8 @@ char *dc_status_to_str(enum dc_status status) return "The operation is not supported."; case DC_UNSUPPORTED_VALUE: return "The value specified is not supported."; + case DC_NO_LINK_ENC_RESOURCE: + return "No link encoder resource"; case DC_ERROR_UNEXPECTED: return "Unexpected error"; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index c8457babfdea..b5e570d33ca9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -66,31 +66,6 @@ /******************************************************************************* * Private functions ******************************************************************************/ -#if defined(CONFIG_DRM_AMD_DC_DCN) -static bool add_dp_hpo_link_encoder_to_link(struct dc_link *link) -{ - struct hpo_dp_link_encoder *enc = resource_get_unused_hpo_dp_link_encoder( - link->dc->res_pool); - - if (!link->hpo_dp_link_enc && enc) { - link->hpo_dp_link_enc = enc; - link->hpo_dp_link_enc->transmitter = link->link_enc->transmitter; - link->hpo_dp_link_enc->hpd_source = link->link_enc->hpd_source; - } - - return (link->hpo_dp_link_enc != NULL); -} - -static void remove_dp_hpo_link_encoder_from_link(struct dc_link *link) -{ - if (link->hpo_dp_link_enc) { - link->hpo_dp_link_enc->hpd_source = HPD_SOURCEID_UNKNOWN; - link->hpo_dp_link_enc->transmitter = TRANSMITTER_UNKNOWN; - link->hpo_dp_link_enc = NULL; - } -} -#endif - static void dc_link_destruct(struct dc_link *link) { int i; @@ -118,12 +93,6 @@ static void dc_link_destruct(struct dc_link *link) link->link_enc->funcs->destroy(&link->link_enc); } -#if defined(CONFIG_DRM_AMD_DC_DCN) - if (link->hpo_dp_link_enc) { - remove_dp_hpo_link_encoder_from_link(link); - } -#endif - if (link->local_sink) dc_sink_release(link->local_sink); @@ -270,10 +239,10 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type) /* Link may not have physical HPD pin. */ if (link->ep_type != DISPLAY_ENDPOINT_PHY) { - if (link->hpd_status) - *type = dc_connection_single; - else + if (link->is_hpd_pending || !link->hpd_status) *type = dc_connection_none; + else + *type = dc_connection_single; return true; } @@ -881,6 +850,7 @@ static bool dc_link_detect_helper(struct dc_link *link, enum dc_connection_type pre_connection_type = dc_connection_none; bool perform_dp_seamless_boot = false; const uint32_t post_oui_delay = 30; // 30ms + struct link_resource link_res = { 0 }; DC_LOGGER_INIT(link->ctx->logger); @@ -975,7 +945,10 @@ static bool dc_link_detect_helper(struct dc_link *link, #if defined(CONFIG_DRM_AMD_DC_DCN) if (dp_get_link_encoding_format(&link->reported_link_cap) == DP_128b_132b_ENCODING) - add_dp_hpo_link_encoder_to_link(link); + link_res.hpo_dp_link_enc = resource_get_hpo_dp_link_enc_for_det_lt( + &link->dc->current_state->res_ctx, + link->dc->res_pool, + link); #endif if (link->type == dc_connection_mst_branch) { @@ -986,7 +959,7 @@ static bool dc_link_detect_helper(struct dc_link *link, * empty which leads to allocate_mst_payload() has "0" * pbn_per_slot value leading to exception on dc_fixpt_div() */ - dp_verify_mst_link_cap(link); + dp_verify_mst_link_cap(link, &link_res); /* * This call will initiate MST topology discovery. Which @@ -1150,6 +1123,7 @@ static bool dc_link_detect_helper(struct dc_link *link, // verify link cap for SST non-seamless boot if (!perform_dp_seamless_boot) dp_verify_link_cap_with_retries(link, + &link_res, &link->reported_link_cap, LINK_TRAINING_MAX_VERIFY_RETRY); } else { @@ -1844,6 +1818,8 @@ static void enable_stream_features(struct pipe_ctx *pipe_ctx) union down_spread_ctrl old_downspread; union down_spread_ctrl new_downspread; + memset(&old_downspread, 0, sizeof(old_downspread)); + core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL, &old_downspread.raw, sizeof(old_downspread)); @@ -2015,6 +1991,57 @@ static enum dc_status enable_link_dp_mst( return enable_link_dp(state, pipe_ctx); } +void dc_link_blank_all_dp_displays(struct dc *dc) +{ + unsigned int i; + uint8_t dpcd_power_state = '\0'; + enum dc_status status = DC_ERROR_UNEXPECTED; + + for (i = 0; i < dc->link_count; i++) { + if ((dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) || + (dc->links[i]->priv == NULL) || (dc->links[i]->local_sink == NULL)) + continue; + + /* DP 2.0 spec requires that we read LTTPR caps first */ + dp_retrieve_lttpr_cap(dc->links[i]); + /* if any of the displays are lit up turn them off */ + status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, + &dpcd_power_state, sizeof(dpcd_power_state)); + + if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) + dc_link_blank_dp_stream(dc->links[i], true); + } + +} + +void dc_link_blank_dp_stream(struct dc_link *link, bool hw_init) +{ + unsigned int j; + struct dc *dc = link->ctx->dc; + enum signal_type signal = link->connector_signal; + + if ((signal == SIGNAL_TYPE_EDP) || + (signal == SIGNAL_TYPE_DISPLAY_PORT)) { + if (link->ep_type == DISPLAY_ENDPOINT_PHY && + link->link_enc->funcs->get_dig_frontend && + link->link_enc->funcs->is_dig_enabled(link->link_enc)) { + unsigned int fe = link->link_enc->funcs->get_dig_frontend(link->link_enc); + + if (fe != ENGINE_ID_UNKNOWN) + for (j = 0; j < dc->res_pool->stream_enc_count; j++) { + if (fe == dc->res_pool->stream_enc[j]->id) { + dc->res_pool->stream_enc[j]->funcs->dp_blank(link, + dc->res_pool->stream_enc[j]); + break; + } + } + } + + if ((!link->wa_flags.dp_keep_receiver_powered) || hw_init) + dp_receiver_power_ctrl(link, false); + } +} + static bool get_ext_hdmi_settings(struct pipe_ctx *pipe_ctx, enum engine_id eng_id, struct ext_hdmi_settings *settings) @@ -2452,7 +2479,8 @@ static void write_i2c_redriver_setting( DC_LOG_DEBUG("Set redriver failed"); } -static void disable_link(struct dc_link *link, enum signal_type signal) +static void disable_link(struct dc_link *link, const struct link_resource *link_res, + enum signal_type signal) { /* * TODO: implement call for dp_set_hw_test_pattern @@ -2471,20 +2499,20 @@ static void disable_link(struct dc_link *link, enum signal_type signal) struct dc_link_settings link_settings = link->cur_link_settings; #endif if (dc_is_dp_sst_signal(signal)) - dp_disable_link_phy(link, signal); + dp_disable_link_phy(link, link_res, signal); else - dp_disable_link_phy_mst(link, signal); + dp_disable_link_phy_mst(link, link_res, signal); if (dc_is_dp_sst_signal(signal) || link->mst_stream_alloc_table.stream_count == 0) { #if defined(CONFIG_DRM_AMD_DC_DCN) if (dp_get_link_encoding_format(&link_settings) == DP_8b_10b_ENCODING) { dp_set_fec_enable(link, false); - dp_set_fec_ready(link, false); + dp_set_fec_ready(link, link_res, false); } #else dp_set_fec_enable(link, false); - dp_set_fec_ready(link, false); + dp_set_fec_ready(link, link_res, false); #endif } } else { @@ -2595,7 +2623,7 @@ static enum dc_status enable_link( * new link settings. */ if (link->link_status.link_active) { - disable_link(link, pipe_ctx->stream->signal); + disable_link(link, &pipe_ctx->link_res, pipe_ctx->stream->signal); } switch (pipe_ctx->stream->signal) { @@ -2715,8 +2743,23 @@ static bool dp_active_dongle_validate_timing( return false; } +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (dongle_caps->dp_hdmi_frl_max_link_bw_in_kbps > 0) { // DP to HDMI FRL converter + struct dc_crtc_timing outputTiming = *timing; + + if (timing->flags.DSC && !timing->dsc_cfg.is_frl) + /* DP input has DSC, HDMI FRL output doesn't have DSC, remove DSC from output timing */ + outputTiming.flags.DSC = 0; + if (dc_bandwidth_in_kbps_from_timing(&outputTiming) > dongle_caps->dp_hdmi_frl_max_link_bw_in_kbps) + return false; + } else { // DP to HDMI TMDS converter + if (get_timing_pixel_clock_100hz(timing) > (dongle_caps->dp_hdmi_max_pixel_clk_in_khz * 10)) + return false; + } +#else if (get_timing_pixel_clock_100hz(timing) > (dongle_caps->dp_hdmi_max_pixel_clk_in_khz * 10)) return false; +#endif #if defined(CONFIG_DRM_AMD_DC_DCN) } @@ -2962,7 +3005,7 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active link->psr_settings.psr_power_opt = *power_opts; if (psr != NULL && link->psr_settings.psr_feature_enabled && psr->funcs->psr_set_power_opt) - psr->funcs->psr_set_power_opt(psr, link->psr_settings.psr_power_opt); + psr->funcs->psr_set_power_opt(psr, link->psr_settings.psr_power_opt, panel_inst); } /* Enable or Disable PSR */ @@ -3350,11 +3393,12 @@ static void dc_log_vcp_x_y(const struct dc_link *link, struct fixed31_32 avg_tim /* * Payload allocation/deallocation for SST introduced in DP2.0 */ -enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, bool allocate) +static enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, + bool allocate) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; - struct hpo_dp_link_encoder *hpo_dp_link_encoder = link->hpo_dp_link_enc; + struct hpo_dp_link_encoder *hpo_dp_link_encoder = pipe_ctx->link_res.hpo_dp_link_enc; struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc; struct link_mst_stream_allocation_table proposed_table = {0}; struct fixed31_32 avg_time_slots_per_mtp; @@ -3436,7 +3480,7 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) struct link_encoder *link_encoder = NULL; struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; #if defined(CONFIG_DRM_AMD_DC_DCN) - struct hpo_dp_link_encoder *hpo_dp_link_encoder = link->hpo_dp_link_enc; + struct hpo_dp_link_encoder *hpo_dp_link_encoder = pipe_ctx->link_res.hpo_dp_link_enc; struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc; #endif struct dp_mst_stream_allocation_table proposed_table = {0}; @@ -3766,7 +3810,7 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) struct link_encoder *link_encoder = NULL; struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; #if defined(CONFIG_DRM_AMD_DC_DCN) - struct hpo_dp_link_encoder *hpo_dp_link_encoder = link->hpo_dp_link_enc; + struct hpo_dp_link_encoder *hpo_dp_link_encoder = pipe_ctx->link_res.hpo_dp_link_enc; struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc; #endif struct dp_mst_stream_allocation_table proposed_table = {0}; @@ -3927,112 +3971,73 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) { struct cp_psp *cp_psp = &pipe_ctx->stream->ctx->cp_psp; -#if defined(CONFIG_DRM_AMD_DC_DCN) struct link_encoder *link_enc = NULL; - struct dc_state *state = pipe_ctx->stream->ctx->dc->current_state; - struct link_enc_assignment link_enc_assign; - int i; -#endif + struct cp_psp_stream_config config = {0}; + enum dp_panel_mode panel_mode = + dp_get_panel_mode(pipe_ctx->stream->link); - if (cp_psp && cp_psp->funcs.update_stream_config) { - struct cp_psp_stream_config config = {0}; - enum dp_panel_mode panel_mode = - dp_get_panel_mode(pipe_ctx->stream->link); + if (cp_psp == NULL || cp_psp->funcs.update_stream_config == NULL) + return; - config.otg_inst = (uint8_t) pipe_ctx->stream_res.tg->inst; - /*stream_enc_inst*/ - config.dig_fe = (uint8_t) pipe_ctx->stream_res.stream_enc->stream_enc_inst; - config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst; -#if defined(CONFIG_DRM_AMD_DC_DCN) - config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - ENGINE_ID_DIGA; - - if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY || - pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - link_enc = pipe_ctx->stream->link->link_enc; - config.dio_output_type = pipe_ctx->stream->link->ep_type; - config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; - if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY) - link_enc = pipe_ctx->stream->link->link_enc; - else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) - if (pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign) { - link_enc = link_enc_cfg_get_link_enc_used_by_stream( - pipe_ctx->stream->ctx->dc, - pipe_ctx->stream); - } - // Initialize PHY ID with ABCDE - 01234 mapping except when it is B0 - config.phy_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY) + link_enc = pipe_ctx->stream->link->link_enc; + else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign) + link_enc = link_enc_cfg_get_link_enc_used_by_stream( + pipe_ctx->stream->ctx->dc, + pipe_ctx->stream); + ASSERT(link_enc); + if (link_enc == NULL) + return; - //look up the link_enc_assignment for the current pipe_ctx - for (i = 0; i < state->stream_count; i++) { - if (pipe_ctx->stream == state->streams[i]) { - link_enc_assign = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i]; - } - } - // Add flag to guard new A0 DIG mapping - if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == true) { - config.dig_be = link_enc_assign.eng_id; - config.dio_output_type = pipe_ctx->stream->link->ep_type; - config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; - } else { - config.dio_output_type = 0; - config.dio_output_idx = 0; - } + /* otg instance */ + config.otg_inst = (uint8_t) pipe_ctx->stream_res.tg->inst; - // Add flag to guard B0 implementation - if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == true && - link_enc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { - if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - link_enc = link_enc_assign.stream->link_enc; + /* dig front end */ + config.dig_fe = (uint8_t) pipe_ctx->stream_res.stream_enc->stream_enc_inst; - // enum ID 1-4 maps to DPIA PHY ID 0-3 - config.phy_idx = link_enc_assign.ep_id.link_id.enum_id - ENUM_ID_1; - } else { // for non DPIA mode over B0, ABCDE maps to 01564 + /* stream encoder index */ + config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - ENGINE_ID_DIGA; +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (is_dp_128b_132b_signal(pipe_ctx)) + config.stream_enc_idx = + pipe_ctx->stream_res.hpo_dp_stream_enc->id - ENGINE_ID_HPO_DP_0; +#endif - switch (link_enc->transmitter) { - case TRANSMITTER_UNIPHY_A: - config.phy_idx = 0; - break; - case TRANSMITTER_UNIPHY_B: - config.phy_idx = 1; - break; - case TRANSMITTER_UNIPHY_C: - config.phy_idx = 5; - break; - case TRANSMITTER_UNIPHY_D: - config.phy_idx = 6; - break; - case TRANSMITTER_UNIPHY_E: - config.phy_idx = 4; - break; - default: - config.phy_idx = 0; - break; - } + /* dig back end */ + config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst; - } - } - } else if (pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign) { - link_enc = link_enc_cfg_get_link_enc_used_by_stream( - pipe_ctx->stream->ctx->dc, - pipe_ctx->stream); - config.phy_idx = 0; /* Clear phy_idx for non-physical display endpoints. */ - } - ASSERT(link_enc); - if (link_enc) - config.link_enc_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; - if (is_dp_128b_132b_signal(pipe_ctx)) { - config.stream_enc_idx = pipe_ctx->stream_res.hpo_dp_stream_enc->id - ENGINE_ID_HPO_DP_0; - config.link_enc_idx = pipe_ctx->stream->link->hpo_dp_link_enc->inst; - config.dp2_enabled = 1; - } + /* link encoder index */ + config.link_enc_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (is_dp_128b_132b_signal(pipe_ctx)) + config.link_enc_idx = pipe_ctx->link_res.hpo_dp_link_enc->inst; #endif - config.dpms_off = dpms_off; - config.dm_stream_ctx = pipe_ctx->stream->dm_stream_context; - config.assr_enabled = (panel_mode == DP_PANEL_MODE_EDP); - config.mst_enabled = (pipe_ctx->stream->signal == - SIGNAL_TYPE_DISPLAY_PORT_MST); - cp_psp->funcs.update_stream_config(cp_psp->handle, &config); - } + /* dio output index */ + config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; + + /* phy index */ + config.phy_idx = resource_transmitter_to_phy_idx( + pipe_ctx->stream->link->dc, link_enc->transmitter); + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + /* USB4 DPIA doesn't use PHY in our soc, initialize it to 0 */ + config.phy_idx = 0; + + /* stream properties */ + config.assr_enabled = (panel_mode == DP_PANEL_MODE_EDP) ? 1 : 0; + config.mst_enabled = (pipe_ctx->stream->signal == + SIGNAL_TYPE_DISPLAY_PORT_MST) ? 1 : 0; +#if defined(CONFIG_DRM_AMD_DC_DCN) + config.dp2_enabled = is_dp_128b_132b_signal(pipe_ctx) ? 1 : 0; +#endif + config.usb4_enabled = (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? + 1 : 0; + config.dpms_off = dpms_off; + + /* dm stream context */ + config.dm_stream_ctx = pipe_ctx->stream->dm_stream_context; + + cp_psp->funcs.update_stream_config(cp_psp->handle, &config); } #endif @@ -4053,7 +4058,7 @@ static void fpga_dp_hpo_enable_link_and_stream(struct dc_state *state, struct pi stream->link->cur_link_settings = link_settings; /* Enable clock, Configure lane count, and Enable Link Encoder*/ - enable_dp_hpo_output(stream->link, &stream->link->cur_link_settings); + enable_dp_hpo_output(stream->link, &pipe_ctx->link_res, &stream->link->cur_link_settings); #ifdef DIAGS_BUILD /* Workaround for FPGA HPO capture DP link data: @@ -4103,12 +4108,12 @@ static void fpga_dp_hpo_enable_link_and_stream(struct dc_state *state, struct pi proposed_table.stream_allocations[0].hpo_dp_stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc; } - stream->link->hpo_dp_link_enc->funcs->update_stream_allocation_table( - stream->link->hpo_dp_link_enc, + pipe_ctx->link_res.hpo_dp_link_enc->funcs->update_stream_allocation_table( + pipe_ctx->link_res.hpo_dp_link_enc, &proposed_table); - stream->link->hpo_dp_link_enc->funcs->set_throttled_vcp_size( - stream->link->hpo_dp_link_enc, + pipe_ctx->link_res.hpo_dp_link_enc->funcs->set_throttled_vcp_size( + pipe_ctx->link_res.hpo_dp_link_enc, pipe_ctx->stream_res.hpo_dp_stream_enc->inst, avg_time_slots_per_mtp); @@ -4258,7 +4263,8 @@ void core_link_enable_stream( /* eDP lit up by bios already, no need to enable again. */ if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && apply_edp_fast_boot_optimization && - !pipe_ctx->stream->timing.flags.DSC) { + !pipe_ctx->stream->timing.flags.DSC && + !pipe_ctx->next_odm_pipe) { pipe_ctx->stream->dpms_off = false; #if defined(CONFIG_DRM_AMD_DC_HDCP) update_psp_stream_config(pipe_ctx, false); @@ -4296,7 +4302,8 @@ void core_link_enable_stream( if (status != DC_FAIL_DP_LINK_TRAINING || pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { if (false == stream->link->link_status.link_active) - disable_link(stream->link, pipe_ctx->stream->signal); + disable_link(stream->link, &pipe_ctx->link_res, + pipe_ctx->stream->signal); BREAK_TO_DEBUGGER(); return; } @@ -4445,14 +4452,14 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx) * state machine. * In DP2 or MST mode, our encoder will stay video active */ - disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); + disable_link(pipe_ctx->stream->link, &pipe_ctx->link_res, pipe_ctx->stream->signal); dc->hwss.disable_stream(pipe_ctx); } else { dc->hwss.disable_stream(pipe_ctx); - disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); + disable_link(pipe_ctx->stream->link, &pipe_ctx->link_res, pipe_ctx->stream->signal); } #else - disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); + disable_link(pipe_ctx->stream->link, &pipe_ctx->link_res, pipe_ctx->stream->signal); dc->hwss.disable_stream(pipe_ctx); #endif @@ -4535,16 +4542,22 @@ void dc_link_set_drive_settings(struct dc *dc, { int i; + struct pipe_ctx *pipe = NULL; + const struct link_resource *link_res; - for (i = 0; i < dc->link_count; i++) { - if (dc->links[i] == link) - break; - } + link_res = dc_link_get_cur_link_res(link); - if (i >= dc->link_count) + for (i = 0; i < MAX_PIPES; i++) { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe->stream && pipe->stream->link) { + if (pipe->stream->link == link) + break; + } + } + if (pipe && link_res) + dc_link_dp_set_drive_settings(pipe->stream->link, link_res, lt_settings); + else ASSERT_CRITICAL(false); - - dc_link_dp_set_drive_settings(dc->links[i], lt_settings); } void dc_link_set_preferred_link_settings(struct dc *dc, @@ -4605,11 +4618,9 @@ void dc_link_set_preferred_training_settings(struct dc *dc, if (link_setting != NULL) { link->preferred_link_setting = *link_setting; #if defined(CONFIG_DRM_AMD_DC_DCN) - if (dp_get_link_encoding_format(link_setting) == - DP_128b_132b_ENCODING && !link->hpo_dp_link_enc) { - if (!add_dp_hpo_link_encoder_to_link(link)) - memset(&link->preferred_link_setting, 0, sizeof(link->preferred_link_setting)); - } + if (dp_get_link_encoding_format(link_setting) == DP_128b_132b_ENCODING) + /* TODO: add dc update for acquiring link res */ + skip_immediate_retrain = true; #endif } else { link->preferred_link_setting.lane_count = LANE_COUNT_UNKNOWN; @@ -4736,6 +4747,9 @@ void dc_link_overwrite_extended_receiver_cap( bool dc_link_is_fec_supported(const struct dc_link *link) { + /* TODO - use asic cap instead of link_enc->features + * we no longer know which link enc to use for this link before commit + */ struct link_encoder *link_enc = NULL; /* Links supporting dynamically assigned link encoder will be assigned next @@ -4765,6 +4779,8 @@ bool dc_link_should_enable_fec(const struct dc_link *link) link->local_sink && link->local_sink->edid_caps.panel_patch.disable_fec) || (link->connector_signal == SIGNAL_TYPE_EDP + // enable FEC for EDP if DSC is supported + && link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT == false )) is_fec_disable = true; @@ -4828,3 +4844,125 @@ uint32_t dc_bandwidth_in_kbps_from_timing( return kbps; } + +const struct link_resource *dc_link_get_cur_link_res(const struct dc_link *link) +{ + int i; + struct pipe_ctx *pipe = NULL; + const struct link_resource *link_res = NULL; + + for (i = 0; i < MAX_PIPES; i++) { + pipe = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe->stream && pipe->stream->link && pipe->top_pipe == NULL) { + if (pipe->stream->link == link) { + link_res = &pipe->link_res; + break; + } + } + } + + return link_res; +} + +/** + * dc_get_cur_link_res_map() - take a snapshot of current link resource allocation state + * @dc: pointer to dc of the dm calling this + * @map: a dc link resource snapshot defined internally to dc. + * + * DM needs to capture a snapshot of current link resource allocation mapping + * and store it in its persistent storage. + * + * Some of the link resource is using first come first serve policy. + * The allocation mapping depends on original hotplug order. This information + * is lost after driver is loaded next time. The snapshot is used in order to + * restore link resource to its previous state so user will get consistent + * link capability allocation across reboot. + * + * Return: none (void function) + * + */ +void dc_get_cur_link_res_map(const struct dc *dc, uint32_t *map) +{ +#if defined(CONFIG_DRM_AMD_DC_DCN) + struct dc_link *link; + uint8_t i; + uint32_t hpo_dp_recycle_map = 0; + + *map = 0; + + if (dc->caps.dp_hpo) { + for (i = 0; i < dc->caps.max_links; i++) { + link = dc->links[i]; + if (link->link_status.link_active && + dp_get_link_encoding_format(&link->reported_link_cap) == DP_128b_132b_ENCODING && + dp_get_link_encoding_format(&link->cur_link_settings) != DP_128b_132b_ENCODING) + /* hpo dp link encoder is considered as recycled, when RX reports 128b/132b encoding capability + * but current link doesn't use it. + */ + hpo_dp_recycle_map |= (1 << i); + } + *map |= (hpo_dp_recycle_map << LINK_RES_HPO_DP_REC_MAP__SHIFT); + } +#endif +} + +/** + * dc_restore_link_res_map() - restore link resource allocation state from a snapshot + * @dc: pointer to dc of the dm calling this + * @map: a dc link resource snapshot defined internally to dc. + * + * DM needs to call this function after initial link detection on boot and + * before first commit streams to restore link resource allocation state + * from previous boot session. + * + * Some of the link resource is using first come first serve policy. + * The allocation mapping depends on original hotplug order. This information + * is lost after driver is loaded next time. The snapshot is used in order to + * restore link resource to its previous state so user will get consistent + * link capability allocation across reboot. + * + * Return: none (void function) + * + */ +void dc_restore_link_res_map(const struct dc *dc, uint32_t *map) +{ +#if defined(CONFIG_DRM_AMD_DC_DCN) + struct dc_link *link; + uint8_t i; + unsigned int available_hpo_dp_count; + uint32_t hpo_dp_recycle_map = (*map & LINK_RES_HPO_DP_REC_MAP__MASK) + >> LINK_RES_HPO_DP_REC_MAP__SHIFT; + + if (dc->caps.dp_hpo) { + available_hpo_dp_count = dc->res_pool->hpo_dp_link_enc_count; + /* remove excess 128b/132b encoding support for not recycled links */ + for (i = 0; i < dc->caps.max_links; i++) { + if ((hpo_dp_recycle_map & (1 << i)) == 0) { + link = dc->links[i]; + if (link->type != dc_connection_none && + dp_get_link_encoding_format(&link->verified_link_cap) == DP_128b_132b_ENCODING) { + if (available_hpo_dp_count > 0) + available_hpo_dp_count--; + else + /* remove 128b/132b encoding capability by limiting verified link rate to HBR3 */ + link->verified_link_cap.link_rate = LINK_RATE_HIGH3; + } + } + } + /* remove excess 128b/132b encoding support for recycled links */ + for (i = 0; i < dc->caps.max_links; i++) { + if ((hpo_dp_recycle_map & (1 << i)) != 0) { + link = dc->links[i]; + if (link->type != dc_connection_none && + dp_get_link_encoding_format(&link->verified_link_cap) == DP_128b_132b_ENCODING) { + if (available_hpo_dp_count > 0) + available_hpo_dp_count--; + else + /* remove 128b/132b encoding capability by limiting verified link rate to HBR3 */ + link->verified_link_cap.link_rate = LINK_RATE_HIGH3; + } + } + } + } +#endif +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 60539b1f2a80..24dc662ec3e4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -626,7 +626,7 @@ bool dal_ddc_submit_aux_command(struct ddc_service *ddc, do { struct aux_payload current_payload; bool is_end_of_payload = (retrieved + DEFAULT_AUX_MAX_DATA_SIZE) >= - payload->length ? true : false; + payload->length; uint32_t payload_length = is_end_of_payload ? payload->length - retrieved : DEFAULT_AUX_MAX_DATA_SIZE; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 13bc69d6b679..05e216524370 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -100,6 +100,7 @@ static const struct dp_lt_fallback_entry dp_lt_fallbacks[] = { #endif static bool decide_fallback_link_setting( + struct dc_link *link, struct dc_link_settings initial_link_settings, struct dc_link_settings *current_link_setting, enum link_training_result training_result); @@ -398,6 +399,223 @@ static uint8_t get_dpcd_link_rate(const struct dc_link_settings *link_settings) } #endif +static void vendor_specific_lttpr_wa_one_start(struct dc_link *link) +{ + const uint8_t vendor_lttpr_write_data[4] = {0x1, 0x50, 0x63, 0xff}; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t vendor_lttpr_write_address = 0xF004F; + + if (offset != 0xFF) + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + + /* W/A for certain LTTPR to reset their lane settings, part one of two */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data[0], + sizeof(vendor_lttpr_write_data)); +} + +static void vendor_specific_lttpr_wa_one_end( + struct dc_link *link, + uint8_t retry_count) +{ + const uint8_t vendor_lttpr_write_data[4] = {0x1, 0x50, 0x63, 0x0}; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t vendor_lttpr_write_address = 0xF004F; + + if (!retry_count) { + if (offset != 0xFF) + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + + /* W/A for certain LTTPR to reset their lane settings, part two of two */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data[0], + sizeof(vendor_lttpr_write_data)); + } +} + +static void vendor_specific_lttpr_wa_one_two( + struct dc_link *link, + const uint8_t rate) +{ + if (link->apply_vendor_specific_lttpr_link_rate_wa) { + uint8_t toggle_rate = 0x0; + + if (rate == 0x6) + toggle_rate = 0xA; + else + toggle_rate = 0x6; + + if (link->vendor_specific_lttpr_link_rate_wa == rate) { + /* W/A for certain LTTPR to reset internal state for link training */ + core_link_write_dpcd( + link, + DP_LINK_BW_SET, + &toggle_rate, + 1); + } + + /* Store the last attempted link rate for this link */ + link->vendor_specific_lttpr_link_rate_wa = rate; + } +} + +static void vendor_specific_lttpr_wa_three( + struct dc_link *link, + union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX]) +{ + const uint8_t vendor_lttpr_write_data_vs[3] = {0x0, 0x53, 0x63}; + const uint8_t vendor_lttpr_write_data_pe[3] = {0x0, 0x54, 0x63}; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t vendor_lttpr_write_address = 0xF004F; + uint32_t vendor_lttpr_read_address = 0xF0053; + uint8_t dprx_vs = 0; + uint8_t dprx_pe = 0; + uint8_t lane; + + if (offset != 0xFF) { + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + vendor_lttpr_read_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + } + + /* W/A to read lane settings requested by DPRX */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_read_dpcd( + link, + vendor_lttpr_read_address, + &dprx_vs, + 1); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + core_link_read_dpcd( + link, + vendor_lttpr_read_address, + &dprx_pe, + 1); + + for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) { + dpcd_lane_adjust[lane].bits.VOLTAGE_SWING_LANE = (dprx_vs >> (2 * lane)) & 0x3; + dpcd_lane_adjust[lane].bits.PRE_EMPHASIS_LANE = (dprx_pe >> (2 * lane)) & 0x3; + } +} + +static void vendor_specific_lttpr_wa_three_dpcd( + struct dc_link *link, + union dpcd_training_lane dpcd_lane_adjust[LANE_COUNT_DP_MAX]) +{ + union lane_adjust lane_adjust[LANE_COUNT_DP_MAX]; + uint8_t lane = 0; + + vendor_specific_lttpr_wa_three(link, lane_adjust); + + for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) { + dpcd_lane_adjust[lane].bits.VOLTAGE_SWING_SET = lane_adjust[lane].bits.VOLTAGE_SWING_LANE; + dpcd_lane_adjust[lane].bits.PRE_EMPHASIS_SET = lane_adjust[lane].bits.PRE_EMPHASIS_LANE; + } +} + +static void vendor_specific_lttpr_wa_four( + struct dc_link *link, + bool apply_wa) +{ + const uint8_t vendor_lttpr_write_data_one[4] = {0x1, 0x55, 0x63, 0x8}; + const uint8_t vendor_lttpr_write_data_two[4] = {0x1, 0x55, 0x63, 0x0}; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t vendor_lttpr_write_address = 0xF004F; +#if defined(CONFIG_DRM_AMD_DC_DP2_0) + uint8_t sink_status = 0; + uint8_t i; +#endif + + if (offset != 0xFF) + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + + /* W/A to pass through DPCD write of TPS=0 to DPRX */ + if (apply_wa) { + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_one[0], + sizeof(vendor_lttpr_write_data_one)); + } + + /* clear training pattern set */ + dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); + + if (apply_wa) { + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_two[0], + sizeof(vendor_lttpr_write_data_two)); + } + +#if defined(CONFIG_DRM_AMD_DC_DP2_0) + /* poll for intra-hop disable */ + for (i = 0; i < 10; i++) { + if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) && + (sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0) + break; + udelay(1000); + } +#endif +} + +static void vendor_specific_lttpr_wa_five( + struct dc_link *link, + const union dpcd_training_lane dpcd_lane_adjust[LANE_COUNT_DP_MAX], + uint8_t lane_count) +{ + const uint32_t vendor_lttpr_write_address = 0xF004F; + const uint8_t vendor_lttpr_write_data_reset[4] = {0x1, 0x50, 0x63, 0xFF}; + uint8_t vendor_lttpr_write_data_vs[4] = {0x1, 0x51, 0x63, 0x0}; + uint8_t vendor_lttpr_write_data_pe[4] = {0x1, 0x52, 0x63, 0x0}; + uint8_t lane = 0; + + for (lane = 0; lane < lane_count; lane++) { + vendor_lttpr_write_data_vs[3] |= + dpcd_lane_adjust[lane].bits.VOLTAGE_SWING_SET << (2 * lane); + vendor_lttpr_write_data_pe[3] |= + dpcd_lane_adjust[lane].bits.PRE_EMPHASIS_SET << (2 * lane); + } + + /* Force LTTPR to output desired VS and PE */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_reset[0], + sizeof(vendor_lttpr_write_data_reset)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); +} + enum dc_status dpcd_set_link_settings( struct dc_link *link, const struct link_training_settings *lt_settings) @@ -430,7 +648,7 @@ enum dc_status dpcd_set_link_settings( status = core_link_write_dpcd(link, DP_LANE_COUNT_SET, &lane_count_set.raw, 1); - if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 && + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_13 && lt_settings->link_settings.use_link_rate_set == true) { rate = 0; /* WA for some MUX chips that will power down with eDP and lose supported @@ -452,6 +670,15 @@ enum dc_status dpcd_set_link_settings( #else rate = (uint8_t) (lt_settings->link_settings.link_rate); #endif + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) + vendor_specific_lttpr_wa_one_start(link); + + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN)) + vendor_specific_lttpr_wa_one_two(link, rate); + status = core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1); } @@ -1024,6 +1251,7 @@ bool dp_is_max_vs_reached( static bool perform_post_lt_adj_req_sequence( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings) { enum dc_lane_count lane_count = @@ -1087,6 +1315,7 @@ static bool perform_post_lt_adj_req_sequence( lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); dc_link_dp_set_drive_settings(link, + link_res, lt_settings); break; } @@ -1161,6 +1390,7 @@ enum link_training_result dp_get_cr_failure(enum dc_lane_count ln_count, static enum link_training_result perform_channel_equalization_sequence( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings, uint32_t offset) { @@ -1183,12 +1413,12 @@ static enum link_training_result perform_channel_equalization_sequence( tr_pattern = DP_TRAINING_PATTERN_SEQUENCE_4; #endif - dp_set_hw_training_pattern(link, tr_pattern, offset); + dp_set_hw_training_pattern(link, link_res, tr_pattern, offset); for (retries_ch_eq = 0; retries_ch_eq <= LINK_TRAINING_MAX_RETRY_COUNT; retries_ch_eq++) { - dp_set_hw_lane_settings(link, lt_settings, offset); + dp_set_hw_lane_settings(link, link_res, lt_settings, offset); /* 2. update DPCD*/ if (!retries_ch_eq) @@ -1211,6 +1441,12 @@ static enum link_training_result perform_channel_equalization_sequence( dp_translate_training_aux_read_interval( link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + wait_time_microsec = 16000; + } + dp_wait_for_training_aux_rd_interval( link, wait_time_microsec); @@ -1246,18 +1482,20 @@ static enum link_training_result perform_channel_equalization_sequence( } static void start_clock_recovery_pattern_early(struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings, uint32_t offset) { DC_LOG_HW_LINK_TRAINING("%s\n GPU sends TPS1. Wait 400us.\n", __func__); - dp_set_hw_training_pattern(link, lt_settings->pattern_for_cr, offset); - dp_set_hw_lane_settings(link, lt_settings, offset); + dp_set_hw_training_pattern(link, link_res, lt_settings->pattern_for_cr, offset); + dp_set_hw_lane_settings(link, link_res, lt_settings, offset); udelay(400); } static enum link_training_result perform_clock_recovery_sequence( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings, uint32_t offset) { @@ -1273,7 +1511,7 @@ static enum link_training_result perform_clock_recovery_sequence( retry_count = 0; if (!link->ctx->dc->work_arounds.lt_early_cr_pattern) - dp_set_hw_training_pattern(link, lt_settings->pattern_for_cr, offset); + dp_set_hw_training_pattern(link, link_res, lt_settings->pattern_for_cr, offset); /* najeeb - The synaptics MST hub can put the LT in * infinite loop by switching the VS @@ -1290,6 +1528,7 @@ static enum link_training_result perform_clock_recovery_sequence( /* 1. call HWSS to set lane settings*/ dp_set_hw_lane_settings( link, + link_res, lt_settings, offset); @@ -1311,8 +1550,10 @@ static enum link_training_result perform_clock_recovery_sequence( /* 3. wait receiver to lock-on*/ wait_time_microsec = lt_settings->cr_pattern_time; - if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) - wait_time_microsec = TRAINING_AUX_RD_INTERVAL; + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN)) { + wait_time_microsec = 16000; + } dp_wait_for_training_aux_rd_interval( link, @@ -1329,6 +1570,13 @@ static enum link_training_result perform_clock_recovery_sequence( dpcd_lane_adjust, offset); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + vendor_specific_lttpr_wa_one_end(link, retry_count); + vendor_specific_lttpr_wa_three(link, dpcd_lane_adjust); + } + /* 5. check CR done*/ if (dp_is_cr_done(lane_count, dpcd_lane_status)) return LINK_TRAINING_SUCCESS; @@ -1379,13 +1627,14 @@ static enum link_training_result perform_clock_recovery_sequence( static inline enum link_training_result dp_transition_to_video_idle( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings, enum link_training_result status) { union lane_count_set lane_count_set = {0}; /* 4. mainlink output idle pattern*/ - dp_set_hw_test_pattern(link, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); + dp_set_hw_test_pattern(link, link_res, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); /* * 5. post training adjust if required @@ -1409,7 +1658,7 @@ static inline enum link_training_result dp_transition_to_video_idle( } if (status == LINK_TRAINING_SUCCESS && - perform_post_lt_adj_req_sequence(link, lt_settings) == false) + perform_post_lt_adj_req_sequence(link, link_res, lt_settings) == false) status = LINK_TRAINING_LQA_FAIL; lane_count_set.bits.LANE_COUNT_SET = lt_settings->link_settings.lane_count; @@ -1852,10 +2101,11 @@ static void print_status_message( void dc_link_dp_set_drive_settings( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings) { /* program ASIC PHY settings*/ - dp_set_hw_lane_settings(link, lt_settings, DPRX); + dp_set_hw_lane_settings(link, link_res, lt_settings, DPRX); dp_hw_to_dpcd_lane_settings(lt_settings, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); @@ -1866,6 +2116,7 @@ void dc_link_dp_set_drive_settings( bool dc_link_dp_perform_link_training_skip_aux( struct dc_link *link, + const struct link_resource *link_res, const struct dc_link_settings *link_setting) { struct link_training_settings lt_settings = {0}; @@ -1882,10 +2133,10 @@ bool dc_link_dp_perform_link_training_skip_aux( /* 1. Perform_clock_recovery_sequence. */ /* transmit training pattern for clock recovery */ - dp_set_hw_training_pattern(link, lt_settings.pattern_for_cr, DPRX); + dp_set_hw_training_pattern(link, link_res, lt_settings.pattern_for_cr, DPRX); /* call HWSS to set lane settings*/ - dp_set_hw_lane_settings(link, <_settings, DPRX); + dp_set_hw_lane_settings(link, link_res, <_settings, DPRX); /* wait receiver to lock-on*/ dp_wait_for_training_aux_rd_interval(link, lt_settings.cr_pattern_time); @@ -1893,10 +2144,10 @@ bool dc_link_dp_perform_link_training_skip_aux( /* 2. Perform_channel_equalization_sequence. */ /* transmit training pattern for channel equalization. */ - dp_set_hw_training_pattern(link, lt_settings.pattern_for_eq, DPRX); + dp_set_hw_training_pattern(link, link_res, lt_settings.pattern_for_eq, DPRX); /* call HWSS to set lane settings*/ - dp_set_hw_lane_settings(link, <_settings, DPRX); + dp_set_hw_lane_settings(link, link_res, <_settings, DPRX); /* wait receiver to lock-on. */ dp_wait_for_training_aux_rd_interval(link, lt_settings.eq_pattern_time); @@ -1904,7 +2155,7 @@ bool dc_link_dp_perform_link_training_skip_aux( /* 3. Perform_link_training_int. */ /* Mainlink output idle pattern. */ - dp_set_hw_test_pattern(link, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); + dp_set_hw_test_pattern(link, link_res, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); print_status_message(link, <_settings, LINK_TRAINING_SUCCESS); @@ -1985,6 +2236,7 @@ static void dpcd_128b_132b_get_aux_rd_interval(struct dc_link *link, static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings) { uint8_t loop_count; @@ -1996,7 +2248,7 @@ static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence( union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0}; /* Transmit 128b/132b_TPS1 over Main-Link */ - dp_set_hw_training_pattern(link, lt_settings->pattern_for_cr, DPRX); + dp_set_hw_training_pattern(link, link_res, lt_settings->pattern_for_cr, DPRX); /* Set TRAINING_PATTERN_SET to 01h */ dpcd_set_training_pattern(link, lt_settings->pattern_for_cr); @@ -2006,8 +2258,8 @@ static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence( &dpcd_lane_status_updated, dpcd_lane_adjust, DPRX); dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); - dp_set_hw_lane_settings(link, lt_settings, DPRX); - dp_set_hw_training_pattern(link, lt_settings->pattern_for_eq, DPRX); + dp_set_hw_lane_settings(link, link_res, lt_settings, DPRX); + dp_set_hw_training_pattern(link, link_res, lt_settings->pattern_for_eq, DPRX); /* Set loop counter to start from 1 */ loop_count = 1; @@ -2034,7 +2286,7 @@ static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence( } else if (dpcd_lane_status_updated.bits.LT_FAILED_128b_132b) { status = DP_128b_132b_LT_FAILED; } else { - dp_set_hw_lane_settings(link, lt_settings, DPRX); + dp_set_hw_lane_settings(link, link_res, lt_settings, DPRX); dpcd_set_lane_settings(link, lt_settings, DPRX); } loop_count++; @@ -2063,6 +2315,7 @@ static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence( static enum link_training_result dp_perform_128b_132b_cds_done_sequence( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings) { /* Assumption: assume hardware has transmitted eq pattern */ @@ -2099,6 +2352,7 @@ static enum link_training_result dp_perform_128b_132b_cds_done_sequence( static enum link_training_result dp_perform_8b_10b_link_training( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings) { enum link_training_result status = LINK_TRAINING_SUCCESS; @@ -2108,7 +2362,7 @@ static enum link_training_result dp_perform_8b_10b_link_training( uint8_t lane = 0; if (link->ctx->dc->work_arounds.lt_early_cr_pattern) - start_clock_recovery_pattern_early(link, lt_settings, DPRX); + start_clock_recovery_pattern_early(link, link_res, lt_settings, DPRX); /* 1. set link rate, lane count and spread. */ dpcd_set_link_settings(link, lt_settings); @@ -2122,12 +2376,13 @@ static enum link_training_result dp_perform_8b_10b_link_training( for (repeater_id = repeater_cnt; (repeater_id > 0 && status == LINK_TRAINING_SUCCESS); repeater_id--) { - status = perform_clock_recovery_sequence(link, lt_settings, repeater_id); + status = perform_clock_recovery_sequence(link, link_res, lt_settings, repeater_id); if (status != LINK_TRAINING_SUCCESS) break; status = perform_channel_equalization_sequence(link, + link_res, lt_settings, repeater_id); @@ -2142,9 +2397,10 @@ static enum link_training_result dp_perform_8b_10b_link_training( } if (status == LINK_TRAINING_SUCCESS) { - status = perform_clock_recovery_sequence(link, lt_settings, DPRX); + status = perform_clock_recovery_sequence(link, link_res, lt_settings, DPRX); if (status == LINK_TRAINING_SUCCESS) { status = perform_channel_equalization_sequence(link, + link_res, lt_settings, DPRX); } @@ -2156,6 +2412,7 @@ static enum link_training_result dp_perform_8b_10b_link_training( #if defined(CONFIG_DRM_AMD_DC_DCN) static enum link_training_result dp_perform_128b_132b_link_training( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings) { enum link_training_result result = LINK_TRAINING_SUCCESS; @@ -2167,23 +2424,358 @@ static enum link_training_result dp_perform_128b_132b_link_training( decide_8b_10b_training_settings(link, <_settings->link_settings, &legacy_settings); - return dp_perform_8b_10b_link_training(link, &legacy_settings); + return dp_perform_8b_10b_link_training(link, link_res, &legacy_settings); } dpcd_set_link_settings(link, lt_settings); if (result == LINK_TRAINING_SUCCESS) - result = dp_perform_128b_132b_channel_eq_done_sequence(link, lt_settings); + result = dp_perform_128b_132b_channel_eq_done_sequence(link, link_res, lt_settings); if (result == LINK_TRAINING_SUCCESS) - result = dp_perform_128b_132b_cds_done_sequence(link, lt_settings); + result = dp_perform_128b_132b_cds_done_sequence(link, link_res, lt_settings); return result; } #endif +static enum link_training_result dc_link_dp_perform_fixed_vs_pe_training_sequence( + struct dc_link *link, + const struct link_resource *link_res, + struct link_training_settings *lt_settings) +{ + const uint8_t vendor_lttpr_write_data_reset[4] = {0x1, 0x50, 0x63, 0xFF}; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + const uint8_t vendor_lttpr_write_data_intercept_en[4] = {0x1, 0x55, 0x63, 0x0}; + const uint8_t vendor_lttpr_write_data_intercept_dis[4] = {0x1, 0x55, 0x63, 0x68}; + uint8_t vendor_lttpr_write_data_vs[4] = {0x1, 0x51, 0x63, 0x0}; + uint8_t vendor_lttpr_write_data_pe[4] = {0x1, 0x52, 0x63, 0x0}; + uint32_t vendor_lttpr_write_address = 0xF004F; + enum link_training_result status = LINK_TRAINING_SUCCESS; + uint8_t lane = 0; + union down_spread_ctrl downspread = {0}; + union lane_count_set lane_count_set = {0}; + uint8_t toggle_rate; + uint8_t rate; + + /* Only 8b/10b is supported */ + ASSERT(dp_get_link_encoding_format(<_settings->link_settings) == + DP_8b_10b_ENCODING); + + if (offset != 0xFF) { + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + } + + /* Vendor specific: Reset lane settings */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_reset[0], + sizeof(vendor_lttpr_write_data_reset)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + + /* Vendor specific: Enable intercept */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_intercept_en[0], + sizeof(vendor_lttpr_write_data_intercept_en)); + + /* 1. set link rate, lane count and spread. */ + + downspread.raw = (uint8_t)(lt_settings->link_settings.link_spread); + + lane_count_set.bits.LANE_COUNT_SET = + lt_settings->link_settings.lane_count; + + lane_count_set.bits.ENHANCED_FRAMING = lt_settings->enhanced_framing; + lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = 0; + + + if (lt_settings->pattern_for_eq < DP_TRAINING_PATTERN_SEQUENCE_4) { + lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = + link->dpcd_caps.max_ln_count.bits.POST_LT_ADJ_REQ_SUPPORTED; + } + + core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, + &downspread.raw, sizeof(downspread)); + + core_link_write_dpcd(link, DP_LANE_COUNT_SET, + &lane_count_set.raw, 1); + +#if defined(CONFIG_DRM_AMD_DC_DCN) + rate = get_dpcd_link_rate(<_settings->link_settings); +#else + rate = (uint8_t) (lt_settings->link_settings.link_rate); +#endif + + /* Vendor specific: Toggle link rate */ + toggle_rate = (rate == 0x6) ? 0xA : 0x6; + + if (link->vendor_specific_lttpr_link_rate_wa == rate) { + core_link_write_dpcd( + link, + DP_LINK_BW_SET, + &toggle_rate, + 1); + } + + link->vendor_specific_lttpr_link_rate_wa = rate; + + core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1); + + DC_LOG_HW_LINK_TRAINING("%s\n %x rate = %x\n %x lane = %x framing = %x\n %x spread = %x\n", + __func__, + DP_LINK_BW_SET, + lt_settings->link_settings.link_rate, + DP_LANE_COUNT_SET, + lt_settings->link_settings.lane_count, + lt_settings->enhanced_framing, + DP_DOWNSPREAD_CTRL, + lt_settings->link_settings.link_spread); + + /* 2. Perform link training */ + + /* Perform Clock Recovery Sequence */ + if (status == LINK_TRAINING_SUCCESS) { + uint32_t retries_cr; + uint32_t retry_count; + uint32_t wait_time_microsec; + enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX]; + union lane_align_status_updated dpcd_lane_status_updated; + union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0}; + + retries_cr = 0; + retry_count = 0; + + while ((retries_cr < LINK_TRAINING_MAX_RETRY_COUNT) && + (retry_count < LINK_TRAINING_MAX_CR_RETRY)) { + + memset(&dpcd_lane_status, '\0', sizeof(dpcd_lane_status)); + memset(&dpcd_lane_status_updated, '\0', + sizeof(dpcd_lane_status_updated)); + + /* 1. call HWSS to set lane settings */ + dp_set_hw_lane_settings( + link, + link_res, + lt_settings, + 0); + + /* 2. update DPCD of the receiver */ + if (!retry_count) { + /* EPR #361076 - write as a 5-byte burst, + * but only for the 1-st iteration. + */ + dpcd_set_lt_pattern_and_lane_settings( + link, + lt_settings, + lt_settings->pattern_for_cr, + 0); + /* Vendor specific: Disable intercept */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_intercept_dis[0], + sizeof(vendor_lttpr_write_data_intercept_dis)); + } else { + vendor_lttpr_write_data_vs[3] = 0; + vendor_lttpr_write_data_pe[3] = 0; + + for (lane = 0; lane < lane_count; lane++) { + vendor_lttpr_write_data_vs[3] |= + lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET << (2 * lane); + vendor_lttpr_write_data_pe[3] |= + lt_settings->dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET << (2 * lane); + } + + /* Vendor specific: Update VS and PE to DPRX requested value */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + + dpcd_set_lane_settings( + link, + lt_settings, + 0); + } + + /* 3. wait receiver to lock-on*/ + wait_time_microsec = lt_settings->cr_pattern_time; + + dp_wait_for_training_aux_rd_interval( + link, + wait_time_microsec); + + /* 4. Read lane status and requested drive + * settings as set by the sink + */ + dp_get_lane_status_and_lane_adjust( + link, + lt_settings, + dpcd_lane_status, + &dpcd_lane_status_updated, + dpcd_lane_adjust, + 0); + + /* 5. check CR done*/ + if (dp_is_cr_done(lane_count, dpcd_lane_status)) { + status = LINK_TRAINING_SUCCESS; + break; + } + + /* 6. max VS reached*/ + if (dp_is_max_vs_reached(lt_settings)) + break; + + /* 7. same lane settings */ + /* Note: settings are the same for all lanes, + * so comparing first lane is sufficient + */ + if (lt_settings->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET == + dpcd_lane_adjust[0].bits.VOLTAGE_SWING_LANE) + retries_cr++; + else + retries_cr = 0; + + /* 8. update VS/PE/PC2 in lt_settings*/ + dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, + lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); + retry_count++; + } + + if (retry_count >= LINK_TRAINING_MAX_CR_RETRY) { + ASSERT(0); + DC_LOG_ERROR("%s: Link Training Error, could not get CR after %d tries. Possibly voltage swing issue", + __func__, + LINK_TRAINING_MAX_CR_RETRY); + + } + + status = dp_get_cr_failure(lane_count, dpcd_lane_status); + } + + /* Perform Channel EQ Sequence */ + if (status == LINK_TRAINING_SUCCESS) { + enum dc_dp_training_pattern tr_pattern; + uint32_t retries_ch_eq; + uint32_t wait_time_microsec; + enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; + union lane_align_status_updated dpcd_lane_status_updated = {0}; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; + union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0}; + + /* Note: also check that TPS4 is a supported feature*/ + tr_pattern = lt_settings->pattern_for_eq; + + dp_set_hw_training_pattern(link, link_res, tr_pattern, 0); + + status = LINK_TRAINING_EQ_FAIL_EQ; + + for (retries_ch_eq = 0; retries_ch_eq <= LINK_TRAINING_MAX_RETRY_COUNT; + retries_ch_eq++) { + + dp_set_hw_lane_settings(link, link_res, lt_settings, 0); + + vendor_lttpr_write_data_vs[3] = 0; + vendor_lttpr_write_data_pe[3] = 0; + + for (lane = 0; lane < lane_count; lane++) { + vendor_lttpr_write_data_vs[3] |= + lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET << (2 * lane); + vendor_lttpr_write_data_pe[3] |= + lt_settings->dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET << (2 * lane); + } + + /* Vendor specific: Update VS and PE to DPRX requested value */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + + /* 2. update DPCD*/ + if (!retries_ch_eq) + /* EPR #361076 - write as a 5-byte burst, + * but only for the 1-st iteration + */ + + dpcd_set_lt_pattern_and_lane_settings( + link, + lt_settings, + tr_pattern, 0); + else + dpcd_set_lane_settings(link, lt_settings, 0); + + /* 3. wait for receiver to lock-on*/ + wait_time_microsec = lt_settings->eq_pattern_time; + + dp_wait_for_training_aux_rd_interval( + link, + wait_time_microsec); + + /* 4. Read lane status and requested + * drive settings as set by the sink + */ + dp_get_lane_status_and_lane_adjust( + link, + lt_settings, + dpcd_lane_status, + &dpcd_lane_status_updated, + dpcd_lane_adjust, + 0); + + /* 5. check CR done*/ + if (!dp_is_cr_done(lane_count, dpcd_lane_status)) { + status = LINK_TRAINING_EQ_FAIL_CR; + break; + } + + /* 6. check CHEQ done*/ + if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) && + dp_is_symbol_locked(lane_count, dpcd_lane_status) && + dp_is_interlane_aligned(dpcd_lane_status_updated)) { + status = LINK_TRAINING_SUCCESS; + break; + } + + /* 7. update VS/PE/PC2 in lt_settings*/ + dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, + lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); + } + } + + return status; +} + + enum link_training_result dc_link_dp_perform_link_training( struct dc_link *link, + const struct link_resource *link_res, const struct dc_link_settings *link_settings, bool skip_video_pattern) { @@ -2203,30 +2795,51 @@ enum link_training_result dc_link_dp_perform_link_training( <_settings); /* reset previous training states */ - dpcd_exit_training_mode(link); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + link->apply_vendor_specific_lttpr_link_rate_wa = true; + vendor_specific_lttpr_wa_four(link, true); + } else { + dpcd_exit_training_mode(link); + } /* configure link prior to entering training mode */ dpcd_configure_lttpr_mode(link, <_settings); - dp_set_fec_ready(link, lt_settings.should_set_fec_ready); + dp_set_fec_ready(link, link_res, lt_settings.should_set_fec_ready); dpcd_configure_channel_coding(link, <_settings); /* enter training mode: * Per DP specs starting from here, DPTX device shall not issue * Non-LT AUX transactions inside training mode. */ - if (encoding == DP_8b_10b_ENCODING) - status = dp_perform_8b_10b_link_training(link, <_settings); + if (!link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) + status = dc_link_dp_perform_fixed_vs_pe_training_sequence(link, link_res, <_settings); + else if (encoding == DP_8b_10b_ENCODING) + status = dp_perform_8b_10b_link_training(link, link_res, <_settings); #if defined(CONFIG_DRM_AMD_DC_DCN) else if (encoding == DP_128b_132b_ENCODING) - status = dp_perform_128b_132b_link_training(link, <_settings); + status = dp_perform_128b_132b_link_training(link, link_res, <_settings); #endif else ASSERT(0); - /* exit training mode and switch to video idle */ - dpcd_exit_training_mode(link); + /* exit training mode */ + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + link->apply_vendor_specific_lttpr_link_rate_wa = false; + vendor_specific_lttpr_wa_four(link, (status != LINK_TRAINING_SUCCESS)); + } else { + dpcd_exit_training_mode(link); + } + + /* switch to video idle */ if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) status = dp_transition_to_video_idle(link, + link_res, <_settings, status); @@ -2278,6 +2891,7 @@ bool perform_link_training_with_retries( dp_enable_link_phy( link, + &pipe_ctx->link_res, signal, pipe_ctx->clock_source->id, ¤t_setting); @@ -2305,23 +2919,24 @@ bool perform_link_training_with_retries( dp_set_panel_mode(link, panel_mode); if (link->aux_access_disabled) { - dc_link_dp_perform_link_training_skip_aux(link, ¤t_setting); + dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, ¤t_setting); return true; } else { /** @todo Consolidate USB4 DP and DPx.x training. */ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { status = dc_link_dpia_perform_link_training(link, - ¤t_setting, - skip_video_pattern); + &pipe_ctx->link_res, + ¤t_setting, + skip_video_pattern); /* Transmit idle pattern once training successful. */ if (status == LINK_TRAINING_SUCCESS) - dp_set_hw_test_pattern(link, DP_TEST_PATTERN_VIDEO_MODE, - NULL, 0); + dp_set_hw_test_pattern(link, &pipe_ctx->link_res, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); } else { status = dc_link_dp_perform_link_training(link, - ¤t_setting, - skip_video_pattern); + &pipe_ctx->link_res, + ¤t_setting, + skip_video_pattern); } if (status == LINK_TRAINING_SUCCESS) @@ -2336,7 +2951,7 @@ bool perform_link_training_with_retries( DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n", __func__, (unsigned int)j + 1, attempts); - dp_disable_link_phy(link, signal); + dp_disable_link_phy(link, &pipe_ctx->link_res, signal); /* Abort link training if failure due to sink being unplugged. */ if (status == LINK_TRAINING_ABORT) { @@ -2349,7 +2964,7 @@ bool perform_link_training_with_retries( uint32_t req_bw; uint32_t link_bw; - decide_fallback_link_setting(*link_setting, ¤t_setting, status); + decide_fallback_link_setting(link, *link_setting, ¤t_setting, status); /* Fail link training if reduced link bandwidth no longer meets * stream requirements. */ @@ -2385,12 +3000,13 @@ static enum clock_source_id get_clock_source_id(struct dc_link *link) return dp_cs_id; } -static void set_dp_mst_mode(struct dc_link *link, bool mst_enable) +static void set_dp_mst_mode(struct dc_link *link, const struct link_resource *link_res, + bool mst_enable) { if (mst_enable == false && link->type == dc_connection_mst_branch) { /* Disable MST on link. Use only local sink. */ - dp_disable_link_phy_mst(link, link->connector_signal); + dp_disable_link_phy_mst(link, link_res, link->connector_signal); link->type = dc_connection_single; link->local_sink = link->remote_sinks[0]; @@ -2401,7 +3017,7 @@ static void set_dp_mst_mode(struct dc_link *link, bool mst_enable) link->type == dc_connection_single && link->remote_sinks[0] != NULL) { /* Re-enable MST on link. */ - dp_disable_link_phy(link, link->connector_signal); + dp_disable_link_phy(link, link_res, link->connector_signal); dp_enable_mst_on_sink(link, true); link->type = dc_connection_mst_branch; @@ -2427,6 +3043,7 @@ bool dc_link_dp_sync_lt_begin(struct dc_link *link) enum link_training_result dc_link_dp_sync_lt_attempt( struct dc_link *link, + const struct link_resource *link_res, struct dc_link_settings *link_settings, struct dc_link_training_overrides *lt_overrides) { @@ -2446,14 +3063,14 @@ enum link_training_result dc_link_dp_sync_lt_attempt( <_settings); /* Setup MST Mode */ if (lt_overrides->mst_enable) - set_dp_mst_mode(link, *lt_overrides->mst_enable); + set_dp_mst_mode(link, link_res, *lt_overrides->mst_enable); /* Disable link */ - dp_disable_link_phy(link, link->connector_signal); + dp_disable_link_phy(link, link_res, link->connector_signal); /* Enable link */ dp_cs_id = get_clock_source_id(link); - dp_enable_link_phy(link, link->connector_signal, + dp_enable_link_phy(link, link_res, link->connector_signal, dp_cs_id, link_settings); /* Set FEC enable */ @@ -2461,7 +3078,7 @@ enum link_training_result dc_link_dp_sync_lt_attempt( if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING) { #endif fec_enable = lt_overrides->fec_enable && *lt_overrides->fec_enable; - dp_set_fec_ready(link, fec_enable); + dp_set_fec_ready(link, NULL, fec_enable); #if defined(CONFIG_DRM_AMD_DC_DCN) } #endif @@ -2478,7 +3095,7 @@ enum link_training_result dc_link_dp_sync_lt_attempt( /* Attempt to train with given link training settings */ if (link->ctx->dc->work_arounds.lt_early_cr_pattern) - start_clock_recovery_pattern_early(link, <_settings, DPRX); + start_clock_recovery_pattern_early(link, link_res, <_settings, DPRX); /* Set link rate, lane count and spread. */ dpcd_set_link_settings(link, <_settings); @@ -2486,9 +3103,10 @@ enum link_training_result dc_link_dp_sync_lt_attempt( /* 2. perform link training (set link training done * to false is done as well) */ - lt_status = perform_clock_recovery_sequence(link, <_settings, DPRX); + lt_status = perform_clock_recovery_sequence(link, link_res, <_settings, DPRX); if (lt_status == LINK_TRAINING_SUCCESS) { lt_status = perform_channel_equalization_sequence(link, + link_res, <_settings, DPRX); } @@ -2509,11 +3127,11 @@ bool dc_link_dp_sync_lt_end(struct dc_link *link, bool link_down) #if defined(CONFIG_DRM_AMD_DC_DCN) struct dc_link_settings link_settings = link->cur_link_settings; #endif - dp_disable_link_phy(link, link->connector_signal); + dp_disable_link_phy(link, NULL, link->connector_signal); #if defined(CONFIG_DRM_AMD_DC_DCN) if (dp_get_link_encoding_format(&link_settings) == DP_8b_10b_ENCODING) #endif - dp_set_fec_ready(link, false); + dp_set_fec_ready(link, NULL, false); } link->sync_lt_in_progress = false; @@ -2568,7 +3186,8 @@ bool dc_link_dp_get_max_link_enc_cap(const struct dc_link *link, struct dc_link_ return false; } -static struct dc_link_settings get_max_link_cap(struct dc_link *link) +static struct dc_link_settings get_max_link_cap(struct dc_link *link, + const struct link_resource *link_res) { struct dc_link_settings max_link_cap = {0}; #if defined(CONFIG_DRM_AMD_DC_DCN) @@ -2592,9 +3211,11 @@ static struct dc_link_settings get_max_link_cap(struct dc_link *link) if (link_enc) link_enc->funcs->get_max_link_cap(link_enc, &max_link_cap); #if defined(CONFIG_DRM_AMD_DC_DCN) - if (max_link_cap.link_rate >= LINK_RATE_UHBR10 && - !link->hpo_dp_link_enc) - max_link_cap.link_rate = LINK_RATE_HIGH3; + if (max_link_cap.link_rate >= LINK_RATE_UHBR10) { + if (!link_res->hpo_dp_link_enc || + link->dc->debug.disable_uhbr) + max_link_cap.link_rate = LINK_RATE_HIGH3; + } #endif /* Lower link settings based on sink's link cap */ @@ -2612,7 +3233,7 @@ static struct dc_link_settings get_max_link_cap(struct dc_link *link) * account for lttpr repeaters cap * notes: repeaters do not snoop in the DPRX Capabilities addresses (3.6.3). */ - if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) { + if (link->lttpr_mode != LTTPR_MODE_NON_LTTPR) { if (link->dpcd_caps.lttpr_caps.max_lane_count < max_link_cap.lane_count) max_link_cap.lane_count = link->dpcd_caps.lttpr_caps.max_lane_count; @@ -2751,6 +3372,7 @@ bool hpd_rx_irq_check_link_loss_status( bool dp_verify_link_cap( struct dc_link *link, + const struct link_resource *link_res, struct dc_link_settings *known_limit_link_setting, int *fail_count) { @@ -2768,7 +3390,7 @@ bool dp_verify_link_cap( /* link training starts with the maximum common settings * supported by both sink and ASIC. */ - max_link_cap = get_max_link_cap(link); + max_link_cap = get_max_link_cap(link, link_res); initial_link_settings = get_common_supported_link_settings( *known_limit_link_setting, max_link_cap); @@ -2808,7 +3430,7 @@ bool dp_verify_link_cap( * find the physical link capability */ /* disable PHY done possible by BIOS, will be done by driver itself */ - dp_disable_link_phy(link, link->connector_signal); + dp_disable_link_phy(link, link_res, link->connector_signal); dp_cs_id = get_clock_source_id(link); @@ -2820,8 +3442,8 @@ bool dp_verify_link_cap( */ if (link->link_enc && link->link_enc->features.flags.bits.DP_IS_USB_C && link->dc->debug.usbc_combo_phy_reset_wa) { - dp_enable_link_phy(link, link->connector_signal, dp_cs_id, cur); - dp_disable_link_phy(link, link->connector_signal); + dp_enable_link_phy(link, link_res, link->connector_signal, dp_cs_id, cur); + dp_disable_link_phy(link, link_res, link->connector_signal); } do { @@ -2832,6 +3454,7 @@ bool dp_verify_link_cap( dp_enable_link_phy( link, + link_res, link->connector_signal, dp_cs_id, cur); @@ -2842,6 +3465,7 @@ bool dp_verify_link_cap( else { status = dc_link_dp_perform_link_training( link, + link_res, cur, skip_video_pattern); if (status == LINK_TRAINING_SUCCESS) @@ -2863,8 +3487,8 @@ bool dp_verify_link_cap( * setting or before returning we'll enable it later * based on the actual mode we're driving */ - dp_disable_link_phy(link, link->connector_signal); - } while (!success && decide_fallback_link_setting( + dp_disable_link_phy(link, link_res, link->connector_signal); + } while (!success && decide_fallback_link_setting(link, initial_link_settings, cur, status)); /* Link Training failed for all Link Settings @@ -2887,6 +3511,7 @@ bool dp_verify_link_cap( bool dp_verify_link_cap_with_retries( struct dc_link *link, + const struct link_resource *link_res, struct dc_link_settings *known_limit_link_setting, int attempts) { @@ -2904,7 +3529,7 @@ bool dp_verify_link_cap_with_retries( link->verified_link_cap.link_rate = LINK_RATE_LOW; link->verified_link_cap.link_spread = LINK_SPREAD_DISABLED; break; - } else if (dp_verify_link_cap(link, + } else if (dp_verify_link_cap(link, link_res, known_limit_link_setting, &fail_count) && fail_count == 0) { success = true; @@ -2916,13 +3541,13 @@ bool dp_verify_link_cap_with_retries( } bool dp_verify_mst_link_cap( - struct dc_link *link) + struct dc_link *link, const struct link_resource *link_res) { struct dc_link_settings max_link_cap = {0}; if (dp_get_link_encoding_format(&link->reported_link_cap) == DP_8b_10b_ENCODING) { - max_link_cap = get_max_link_cap(link); + max_link_cap = get_max_link_cap(link, link_res); link->verified_link_cap = get_common_supported_link_settings( link->reported_link_cap, max_link_cap); @@ -2931,6 +3556,7 @@ bool dp_verify_mst_link_cap( else if (dp_get_link_encoding_format(&link->reported_link_cap) == DP_128b_132b_ENCODING) { dp_verify_link_cap_with_retries(link, + link_res, &link->reported_link_cap, LINK_TRAINING_MAX_VERIFY_RETRY); } @@ -3116,6 +3742,7 @@ static bool decide_fallback_link_setting_max_bw_policy( * and no further fallback could be done */ static bool decide_fallback_link_setting( + struct dc_link *link, struct dc_link_settings initial_link_settings, struct dc_link_settings *current_link_setting, enum link_training_result training_result) @@ -3123,7 +3750,8 @@ static bool decide_fallback_link_setting( if (!current_link_setting) return false; #if defined(CONFIG_DRM_AMD_DC_DCN) - if (dp_get_link_encoding_format(&initial_link_settings) == DP_128b_132b_ENCODING) + if (dp_get_link_encoding_format(&initial_link_settings) == DP_128b_132b_ENCODING || + link->dc->debug.force_dp2_lt_fallback_method) return decide_fallback_link_setting_max_bw_policy(&initial_link_settings, current_link_setting); #endif @@ -3346,6 +3974,148 @@ bool decide_edp_link_settings(struct dc_link *link, struct dc_link_settings *lin return false; } +static bool decide_edp_link_settings_with_dsc(struct dc_link *link, + struct dc_link_settings *link_setting, + uint32_t req_bw, + enum dc_link_rate max_link_rate) +{ + struct dc_link_settings initial_link_setting; + struct dc_link_settings current_link_setting; + uint32_t link_bw; + + unsigned int policy = 0; + + policy = link->ctx->dc->debug.force_dsc_edp_policy; + if (max_link_rate == LINK_RATE_UNKNOWN) + max_link_rate = link->verified_link_cap.link_rate; + /* + * edp_supported_link_rates_count is only valid for eDP v1.4 or higher. + * Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h" + */ + if ((link->dpcd_caps.dpcd_rev.raw < DPCD_REV_13 || + link->dpcd_caps.edp_supported_link_rates_count == 0)) { + /* for DSC enabled case, we search for minimum lane count */ + memset(&initial_link_setting, 0, sizeof(initial_link_setting)); + initial_link_setting.lane_count = LANE_COUNT_ONE; + initial_link_setting.link_rate = LINK_RATE_LOW; + initial_link_setting.link_spread = LINK_SPREAD_DISABLED; + initial_link_setting.use_link_rate_set = false; + initial_link_setting.link_rate_set = 0; + current_link_setting = initial_link_setting; + if (req_bw > dc_link_bandwidth_kbps(link, &link->verified_link_cap)) + return false; + + /* search for the minimum link setting that: + * 1. is supported according to the link training result + * 2. could support the b/w requested by the timing + */ + while (current_link_setting.link_rate <= + max_link_rate) { + link_bw = dc_link_bandwidth_kbps( + link, + ¤t_link_setting); + if (req_bw <= link_bw) { + *link_setting = current_link_setting; + return true; + } + if (policy) { + /* minimize lane */ + if (current_link_setting.link_rate < max_link_rate) { + current_link_setting.link_rate = + increase_link_rate( + current_link_setting.link_rate); + } else { + if (current_link_setting.lane_count < + link->verified_link_cap.lane_count) { + current_link_setting.lane_count = + increase_lane_count( + current_link_setting.lane_count); + current_link_setting.link_rate = initial_link_setting.link_rate; + } else + break; + } + } else { + /* minimize link rate */ + if (current_link_setting.lane_count < + link->verified_link_cap.lane_count) { + current_link_setting.lane_count = + increase_lane_count( + current_link_setting.lane_count); + } else { + current_link_setting.link_rate = + increase_link_rate( + current_link_setting.link_rate); + current_link_setting.lane_count = + initial_link_setting.lane_count; + } + } + } + return false; + } + + /* if optimize edp link is supported */ + memset(&initial_link_setting, 0, sizeof(initial_link_setting)); + initial_link_setting.lane_count = LANE_COUNT_ONE; + initial_link_setting.link_rate = link->dpcd_caps.edp_supported_link_rates[0]; + initial_link_setting.link_spread = LINK_SPREAD_DISABLED; + initial_link_setting.use_link_rate_set = true; + initial_link_setting.link_rate_set = 0; + current_link_setting = initial_link_setting; + + /* search for the minimum link setting that: + * 1. is supported according to the link training result + * 2. could support the b/w requested by the timing + */ + while (current_link_setting.link_rate <= + max_link_rate) { + link_bw = dc_link_bandwidth_kbps( + link, + ¤t_link_setting); + if (req_bw <= link_bw) { + *link_setting = current_link_setting; + return true; + } + if (policy) { + /* minimize lane */ + if (current_link_setting.link_rate_set < + link->dpcd_caps.edp_supported_link_rates_count + && current_link_setting.link_rate < max_link_rate) { + current_link_setting.link_rate_set++; + current_link_setting.link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + } else { + if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { + current_link_setting.lane_count = + increase_lane_count( + current_link_setting.lane_count); + current_link_setting.link_rate_set = initial_link_setting.link_rate_set; + current_link_setting.link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + } else + break; + } + } else { + /* minimize link rate */ + if (current_link_setting.lane_count < + link->verified_link_cap.lane_count) { + current_link_setting.lane_count = + increase_lane_count( + current_link_setting.lane_count); + } else { + if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + current_link_setting.link_rate_set++; + current_link_setting.link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + current_link_setting.lane_count = + initial_link_setting.lane_count; + } else + break; + } + } + } + return false; +} + static bool decide_mst_link_settings(const struct dc_link *link, struct dc_link_settings *link_setting) { *link_setting = link->verified_link_cap; @@ -3380,7 +4150,25 @@ void decide_link_settings(struct dc_stream_state *stream, if (decide_mst_link_settings(link, link_setting)) return; } else if (link->connector_signal == SIGNAL_TYPE_EDP) { - if (decide_edp_link_settings(link, link_setting, req_bw)) + /* enable edp link optimization for DSC eDP case */ + if (stream->timing.flags.DSC) { + enum dc_link_rate max_link_rate = LINK_RATE_UNKNOWN; + + if (link->ctx->dc->debug.force_dsc_edp_policy) { + /* calculate link max link rate cap*/ + struct dc_link_settings tmp_link_setting; + struct dc_crtc_timing tmp_timing = stream->timing; + uint32_t orig_req_bw; + + tmp_link_setting.link_rate = LINK_RATE_UNKNOWN; + tmp_timing.flags.DSC = 0; + orig_req_bw = dc_bandwidth_in_kbps_from_timing(&tmp_timing); + decide_edp_link_settings(link, &tmp_link_setting, orig_req_bw); + max_link_rate = tmp_link_setting.link_rate; + } + if (decide_edp_link_settings_with_dsc(link, link_setting, req_bw, max_link_rate)) + return; + } else if (decide_edp_link_settings(link, link_setting, req_bw)) return; } else if (decide_dp_link_settings(link, link_setting, req_bw)) return; @@ -3421,7 +4209,6 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link) &psr_configuration.raw, sizeof(psr_configuration.raw)); - if (psr_configuration.bits.ENABLE) { unsigned char dpcdbuf[3] = {0}; union psr_error_status psr_error_status; @@ -3453,10 +4240,12 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link) sizeof(psr_error_status.raw)); /* PSR error, disable and re-enable PSR */ - allow_active = false; - dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL); - allow_active = true; - dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL); + if (link->psr_settings.psr_allow_active) { + allow_active = false; + dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL); + allow_active = true; + dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL); + } return true; } else if (psr_sink_psr_status.bits.SINK_SELF_REFRESH_STATUS == @@ -3534,6 +4323,13 @@ static void dp_test_send_phy_test_pattern(struct dc_link *link) &dpcd_lane_adjustment[0].raw, sizeof(dpcd_lane_adjustment)); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) + vendor_specific_lttpr_wa_three_dpcd( + link, + link_training_settings.dpcd_lane_settings); + /*get post cursor 2 parameters * For DP 1.1a or eariler, this DPCD register's value is 0 * For DP 1.2 or later: @@ -4153,6 +4949,56 @@ static int translate_dpcd_max_bpc(enum dpcd_downstream_port_max_bpc bpc) return -1; } +#if defined(CONFIG_DRM_AMD_DC_DCN) +uint32_t dc_link_bw_kbps_from_raw_frl_link_rate_data(uint8_t bw) +{ + switch (bw) { + case 0b001: + return 9000000; + case 0b010: + return 18000000; + case 0b011: + return 24000000; + case 0b100: + return 32000000; + case 0b101: + return 40000000; + case 0b110: + return 48000000; + } + + return 0; +} + +/** + * Return PCON's post FRL link training supported BW if its non-zero, otherwise return max_supported_frl_bw. + */ +static uint32_t intersect_frl_link_bw_support( + const uint32_t max_supported_frl_bw_in_kbps, + const union hdmi_encoded_link_bw hdmi_encoded_link_bw) +{ + uint32_t supported_bw_in_kbps = max_supported_frl_bw_in_kbps; + + // HDMI_ENCODED_LINK_BW bits are only valid if HDMI Link Configuration bit is 1 (FRL mode) + if (hdmi_encoded_link_bw.bits.FRL_MODE) { + if (hdmi_encoded_link_bw.bits.BW_48Gbps) + supported_bw_in_kbps = 48000000; + else if (hdmi_encoded_link_bw.bits.BW_40Gbps) + supported_bw_in_kbps = 40000000; + else if (hdmi_encoded_link_bw.bits.BW_32Gbps) + supported_bw_in_kbps = 32000000; + else if (hdmi_encoded_link_bw.bits.BW_24Gbps) + supported_bw_in_kbps = 24000000; + else if (hdmi_encoded_link_bw.bits.BW_18Gbps) + supported_bw_in_kbps = 18000000; + else if (hdmi_encoded_link_bw.bits.BW_9Gbps) + supported_bw_in_kbps = 9000000; + } + + return supported_bw_in_kbps; +} +#endif + static void read_dp_device_vendor_id(struct dc_link *link) { struct dp_device_vendor_id dp_id; @@ -4264,6 +5110,27 @@ static void get_active_converter_info( translate_dpcd_max_bpc( hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT); +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (link->dc->caps.hdmi_frl_pcon_support) { + union hdmi_encoded_link_bw hdmi_encoded_link_bw; + + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = + dc_link_bw_kbps_from_raw_frl_link_rate_data( + hdmi_color_caps.bits.MAX_ENCODED_LINK_BW_SUPPORT); + + // Intersect reported max link bw support with the supported link rate post FRL link training + if (core_link_read_dpcd(link, DP_PCON_HDMI_POST_FRL_STATUS, + &hdmi_encoded_link_bw.raw, sizeof(hdmi_encoded_link_bw)) == DC_OK) { + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support( + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps, + hdmi_encoded_link_bw); + } + + if (link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps > 0) + link->dpcd_caps.dongle_caps.extendedCapValid = true; + } +#endif + if (link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz != 0) link->dpcd_caps.dongle_caps.extendedCapValid = true; } @@ -4454,7 +5321,7 @@ bool dp_retrieve_lttpr_cap(struct dc_link *link) lttpr_dpcd_data, sizeof(lttpr_dpcd_data)); if (status != DC_OK) { - dm_error("%s: Read LTTPR caps data failed.\n", __func__); + DC_LOG_DP2("%s: Read LTTPR caps data failed.\n", __func__); return false; } @@ -5218,7 +6085,7 @@ bool dc_link_dp_set_test_pattern( DP_TEST_PATTERN_VIDEO_MODE) { /* Set CRTC Test Pattern */ set_crtc_test_pattern(link, pipe_ctx, test_pattern, test_pattern_color_space); - dp_set_hw_test_pattern(link, test_pattern, + dp_set_hw_test_pattern(link, &pipe_ctx->link_res, test_pattern, (uint8_t *)p_custom_pattern, (uint32_t)cust_pattern_size); @@ -5240,8 +6107,18 @@ bool dc_link_dp_set_test_pattern( if (is_dp_phy_pattern(test_pattern)) { /* Set DPCD Lane Settings before running test pattern */ if (p_link_settings != NULL) { - dp_set_hw_lane_settings(link, p_link_settings, DPRX); - dpcd_set_lane_settings(link, p_link_settings, DPRX); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + dpcd_set_lane_settings(link, p_link_settings, DPRX); + vendor_specific_lttpr_wa_five( + link, + p_link_settings->dpcd_lane_settings, + p_link_settings->link_settings.lane_count); + } else { + dp_set_hw_lane_settings(link, &pipe_ctx->link_res, p_link_settings, DPRX); + dpcd_set_lane_settings(link, p_link_settings, DPRX); + } } /* Blank stream if running test pattern */ @@ -5254,7 +6131,7 @@ bool dc_link_dp_set_test_pattern( pipes->stream_res.stream_enc->funcs->dp_blank(link, pipe_ctx->stream_res.stream_enc); } - dp_set_hw_test_pattern(link, test_pattern, + dp_set_hw_test_pattern(link, &pipe_ctx->link_res, test_pattern, (uint8_t *)p_custom_pattern, (uint32_t)cust_pattern_size); @@ -5574,7 +6451,7 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link) return DP_PANEL_MODE_DEFAULT; } -enum dc_status dp_set_fec_ready(struct dc_link *link, bool ready) +enum dc_status dp_set_fec_ready(struct dc_link *link, const struct link_resource *link_res, bool ready) { /* FEC has to be "set ready" before the link training. * The policy is to always train with FEC @@ -5665,6 +6542,23 @@ void dp_set_fec_enable(struct dc_link *link, bool enable) } } +struct link_encoder *dp_get_link_enc(struct dc_link *link) +{ + struct link_encoder *link_enc; + + link_enc = link->link_enc; + if (link->is_dig_mapping_flexible && + link->dc->res_pool->funcs->link_encs_assign) { + link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, + link); + if (!link->link_enc) + link_enc = link_enc_cfg_get_next_avail_link_enc( + link->ctx->dc); + } + + return link_enc; +} + void dpcd_set_source_specific_data(struct dc_link *link) { if (!link->dc->vendor_signature.is_valid) { @@ -5885,7 +6779,10 @@ bool is_edp_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timin req_bw = dc_bandwidth_in_kbps_from_timing(crtc_timing); - decide_edp_link_settings(link, &link_setting, req_bw); + if (!crtc_timing->flags.DSC) + decide_edp_link_settings(link, &link_setting, req_bw); + else + decide_edp_link_settings_with_dsc(link, &link_setting, req_bw, LINK_RATE_UNKNOWN); if (link->dpcd_caps.edp_supported_link_rates[link_rate_set] != link_setting.link_rate || lane_count_set.bits.LANE_COUNT_SET != link_setting.lane_count) { @@ -6121,8 +7018,21 @@ struct fixed31_32 calculate_sst_avg_time_slots_per_mtp( bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx) { + /* If this assert is hit then we have a link encoder dynamic management issue */ + ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); return (pipe_ctx->stream_res.hpo_dp_stream_enc && - pipe_ctx->stream->link->hpo_dp_link_enc && + pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal)); } #endif + +void edp_panel_backlight_power_on(struct dc_link *link) +{ + if (link->connector_signal != SIGNAL_TYPE_EDP) + return; + + link->dc->hwss.edp_power_control(link, true); + link->dc->hwss.edp_wait_for_hpd_ready(link, true); + if (link->dc->hwss.edp_backlight_control) + link->dc->hwss.edp_backlight_control(link, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c index b1c9f77d6bf4..0e95bc5df4e7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c @@ -77,7 +77,9 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link) * @param[in] link_setting Lane count, link rate and downspread control. * @param[out] lt_settings Link settings and drive settings (voltage swing and pre-emphasis). */ -static enum link_training_result dpia_configure_link(struct dc_link *link, +static enum link_training_result dpia_configure_link( + struct dc_link *link, + const struct link_resource *link_res, const struct dc_link_settings *link_setting, struct link_training_settings *lt_settings) { @@ -94,25 +96,25 @@ static enum link_training_result dpia_configure_link(struct dc_link *link, lt_settings); status = dpcd_configure_channel_coding(link, lt_settings); - if (status != DC_OK && !link->hpd_status) + if (status != DC_OK && link->is_hpd_pending) return LINK_TRAINING_ABORT; /* Configure lttpr mode */ status = dpcd_configure_lttpr_mode(link, lt_settings); - if (status != DC_OK && !link->hpd_status) + if (status != DC_OK && link->is_hpd_pending) return LINK_TRAINING_ABORT; /* Set link rate, lane count and spread. */ status = dpcd_set_link_settings(link, lt_settings); - if (status != DC_OK && !link->hpd_status) + if (status != DC_OK && link->is_hpd_pending) return LINK_TRAINING_ABORT; if (link->preferred_training_settings.fec_enable) fec_enable = *link->preferred_training_settings.fec_enable; else fec_enable = true; - status = dp_set_fec_ready(link, fec_enable); - if (status != DC_OK && !link->hpd_status) + status = dp_set_fec_ready(link, link_res, fec_enable); + if (status != DC_OK && link->is_hpd_pending) return LINK_TRAINING_ABORT; return LINK_TRAINING_SUCCESS; @@ -252,7 +254,9 @@ static enum dc_status dpcd_set_lt_pattern(struct dc_link *link, * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis). * @param hop The Hop in display path. DPRX = 0. */ -static enum link_training_result dpia_training_cr_non_transparent(struct dc_link *link, +static enum link_training_result dpia_training_cr_non_transparent( + struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings, uint32_t hop) { @@ -388,7 +392,7 @@ static enum link_training_result dpia_training_cr_non_transparent(struct dc_link } /* Abort link training if clock recovery failed due to HPD unplug. */ - if (!link->hpd_status) + if (link->is_hpd_pending) result = LINK_TRAINING_ABORT; DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) clock recovery\n" @@ -411,7 +415,9 @@ static enum link_training_result dpia_training_cr_non_transparent(struct dc_link * @param link DPIA link being trained. * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis). */ -static enum link_training_result dpia_training_cr_transparent(struct dc_link *link, +static enum link_training_result dpia_training_cr_transparent( + struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings) { enum link_training_result result = LINK_TRAINING_CR_FAIL_LANE0; @@ -490,7 +496,7 @@ static enum link_training_result dpia_training_cr_transparent(struct dc_link *li } /* Abort link training if clock recovery failed due to HPD unplug. */ - if (!link->hpd_status) + if (link->is_hpd_pending) result = LINK_TRAINING_ABORT; DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) clock recovery\n" @@ -511,16 +517,18 @@ static enum link_training_result dpia_training_cr_transparent(struct dc_link *li * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis). * @param hop The Hop in display path. DPRX = 0. */ -static enum link_training_result dpia_training_cr_phase(struct dc_link *link, +static enum link_training_result dpia_training_cr_phase( + struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings, uint32_t hop) { enum link_training_result result = LINK_TRAINING_CR_FAIL_LANE0; if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) - result = dpia_training_cr_non_transparent(link, lt_settings, hop); + result = dpia_training_cr_non_transparent(link, link_res, lt_settings, hop); else - result = dpia_training_cr_transparent(link, lt_settings); + result = dpia_training_cr_transparent(link, link_res, lt_settings); return result; } @@ -561,7 +569,9 @@ static uint32_t dpia_get_eq_aux_rd_interval(const struct dc_link *link, * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis). * @param hop The Hop in display path. DPRX = 0. */ -static enum link_training_result dpia_training_eq_non_transparent(struct dc_link *link, +static enum link_training_result dpia_training_eq_non_transparent( + struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings, uint32_t hop) { @@ -675,7 +685,7 @@ static enum link_training_result dpia_training_eq_non_transparent(struct dc_link } /* Abort link training if equalization failed due to HPD unplug. */ - if (!link->hpd_status) + if (link->is_hpd_pending) result = LINK_TRAINING_ABORT; DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) equalization\n" @@ -700,7 +710,9 @@ static enum link_training_result dpia_training_eq_non_transparent(struct dc_link * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis). * @param hop The Hop in display path. DPRX = 0. */ -static enum link_training_result dpia_training_eq_transparent(struct dc_link *link, +static enum link_training_result dpia_training_eq_transparent( + struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings) { enum link_training_result result = LINK_TRAINING_EQ_FAIL_EQ; @@ -758,7 +770,7 @@ static enum link_training_result dpia_training_eq_transparent(struct dc_link *li } /* Abort link training if equalization failed due to HPD unplug. */ - if (!link->hpd_status) + if (link->is_hpd_pending) result = LINK_TRAINING_ABORT; DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) equalization\n" @@ -779,16 +791,18 @@ static enum link_training_result dpia_training_eq_transparent(struct dc_link *li * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis). * @param hop The Hop in display path. DPRX = 0. */ -static enum link_training_result dpia_training_eq_phase(struct dc_link *link, +static enum link_training_result dpia_training_eq_phase( + struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings, uint32_t hop) { enum link_training_result result; if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) - result = dpia_training_eq_non_transparent(link, lt_settings, hop); + result = dpia_training_eq_non_transparent(link, link_res, lt_settings, hop); else - result = dpia_training_eq_transparent(link, lt_settings); + result = dpia_training_eq_transparent(link, link_res, lt_settings); return result; } @@ -892,10 +906,10 @@ static void dpia_training_abort(struct dc_link *link, uint32_t hop) __func__, link->link_id.enum_id - ENUM_ID_1, link->lttpr_mode, - link->hpd_status); + link->is_hpd_pending); /* Abandon clean-up if sink unplugged. */ - if (!link->hpd_status) + if (link->is_hpd_pending) return; if (hop != DPRX) @@ -908,7 +922,9 @@ static void dpia_training_abort(struct dc_link *link, uint32_t hop) core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data); } -enum link_training_result dc_link_dpia_perform_link_training(struct dc_link *link, +enum link_training_result dc_link_dpia_perform_link_training( + struct dc_link *link, + const struct link_resource *link_res, const struct dc_link_settings *link_setting, bool skip_video_pattern) { @@ -918,7 +934,7 @@ enum link_training_result dc_link_dpia_perform_link_training(struct dc_link *lin int8_t repeater_id; /* Current hop. */ /* Configure link as prescribed in link_setting and set LTTPR mode. */ - result = dpia_configure_link(link, link_setting, <_settings); + result = dpia_configure_link(link, link_res, link_setting, <_settings); if (result != LINK_TRAINING_SUCCESS) return result; @@ -930,12 +946,12 @@ enum link_training_result dc_link_dpia_perform_link_training(struct dc_link *lin */ for (repeater_id = repeater_cnt; repeater_id >= 0; repeater_id--) { /* Clock recovery. */ - result = dpia_training_cr_phase(link, <_settings, repeater_id); + result = dpia_training_cr_phase(link, link_res, <_settings, repeater_id); if (result != LINK_TRAINING_SUCCESS) break; /* Equalization. */ - result = dpia_training_eq_phase(link, <_settings, repeater_id); + result = dpia_training_eq_phase(link, link_res, <_settings, repeater_id); if (result != LINK_TRAINING_SUCCESS) break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 25e48a8cbb78..a55944da8d53 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -118,7 +118,10 @@ static void remove_link_enc_assignment( */ if (get_stream_using_link_enc(state, eng_id) == NULL) state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_idx] = eng_id; + stream->link_enc = NULL; + state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id = ENGINE_ID_UNKNOWN; + state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream = NULL; break; } } @@ -148,6 +151,7 @@ static void add_link_enc_assignment( .ep_type = stream->link->ep_type}, .eng_id = eng_id, .stream = stream}; + dc_stream_retain(stream); state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_idx] = ENGINE_ID_UNKNOWN; stream->link_enc = stream->ctx->dc->res_pool->link_encoders[eng_idx]; break; @@ -227,7 +231,7 @@ static struct link_encoder *get_link_enc_used_by_link( .link_id = link->link_id, .ep_type = link->ep_type}; - for (i = 0; i < state->stream_count; i++) { + for (i = 0; i < MAX_PIPES; i++) { struct link_enc_assignment assignment = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i]; if (assignment.valid == true && are_ep_ids_equal(&assignment.ep_id, &ep_id)) @@ -237,28 +241,18 @@ static struct link_encoder *get_link_enc_used_by_link( return link_enc; } /* Clear all link encoder assignments. */ -static void clear_enc_assignments(struct dc_state *state) +static void clear_enc_assignments(const struct dc *dc, struct dc_state *state) { int i; - enum engine_id eng_id; - struct dc_stream_state *stream; for (i = 0; i < MAX_PIPES; i++) { state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].valid = false; - eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id; - stream = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream; - if (eng_id != ENGINE_ID_UNKNOWN) - state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_id - ENGINE_ID_DIGA] = eng_id; - if (stream) - stream->link_enc = NULL; + state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id = ENGINE_ID_UNKNOWN; + if (state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream != NULL) { + dc_stream_release(state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream); + state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream = NULL; + } } -} - -void link_enc_cfg_init( - struct dc *dc, - struct dc_state *state) -{ - int i; for (i = 0; i < dc->res_pool->res_cap->num_dig_link_enc; i++) { if (dc->res_pool->link_encoders[i]) @@ -266,8 +260,13 @@ void link_enc_cfg_init( else state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i] = ENGINE_ID_UNKNOWN; } +} - clear_enc_assignments(state); +void link_enc_cfg_init( + const struct dc *dc, + struct dc_state *state) +{ + clear_enc_assignments(dc, state); state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY; } @@ -284,12 +283,9 @@ void link_enc_cfg_link_encs_assign( ASSERT(state->stream_count == stream_count); - if (stream_count == 0) - clear_enc_assignments(state); - /* Release DIG link encoder resources before running assignment algorithm. */ - for (i = 0; i < stream_count; i++) - dc->res_pool->funcs->link_enc_unassign(state, streams[i]); + for (i = 0; i < dc->current_state->stream_count; i++) + dc->res_pool->funcs->link_enc_unassign(state, dc->current_state->streams[i]); for (i = 0; i < MAX_PIPES; i++) ASSERT(state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].valid == false); @@ -544,6 +540,7 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state) uint8_t dig_stream_count = 0; int matching_stream_ptrs = 0; int eng_ids_per_ep_id[MAX_PIPES] = {0}; + int valid_bitmap = 0; /* (1) No. valid entries same as stream count. */ for (i = 0; i < MAX_PIPES; i++) { @@ -625,5 +622,15 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state) is_valid = valid_entries && valid_stream_ptrs && valid_uniqueness && valid_avail && valid_streams; ASSERT(is_valid); + if (is_valid == false) { + valid_bitmap = + (valid_entries & 0x1) | + ((valid_stream_ptrs & 0x1) << 1) | + ((valid_uniqueness & 0x1) << 2) | + ((valid_avail & 0x1) << 3) | + ((valid_streams & 0x1) << 4); + dm_error("Invalid link encoder assignments: 0x%x\n", valid_bitmap); + } + return is_valid; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index 368e834c6809..45d03d3a95c3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -71,6 +71,7 @@ void dp_source_sequence_trace(struct dc_link *link, uint8_t dp_test_mode) void dp_enable_link_phy( struct dc_link *link, + const struct link_resource *link_res, enum signal_type signal, enum clock_source_id clock_source, const struct dc_link_settings *link_settings) @@ -135,7 +136,7 @@ void dp_enable_link_phy( #if defined(CONFIG_DRM_AMD_DC_DCN) if (dp_get_link_encoding_format(link_settings) == DP_128b_132b_ENCODING) { - enable_dp_hpo_output(link, link_settings); + enable_dp_hpo_output(link, link_res, link_settings); } else if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING) { if (dc_is_dp_sst_signal(signal)) { link_enc->funcs->enable_dp_output( @@ -236,12 +237,13 @@ bool edp_receiver_ready_T7(struct dc_link *link) return result; } -void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) +void dp_disable_link_phy(struct dc_link *link, const struct link_resource *link_res, + enum signal_type signal) { struct dc *dc = link->ctx->dc; struct dmcu *dmcu = dc->res_pool->dmcu; #if defined(CONFIG_DRM_AMD_DC_DCN) - struct hpo_dp_link_encoder *hpo_link_enc = link->hpo_dp_link_enc; + struct hpo_dp_link_encoder *hpo_link_enc = link_res->hpo_dp_link_enc; #endif struct link_encoder *link_enc; @@ -260,7 +262,7 @@ void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) link->dc->hwss.edp_backlight_control(link, false); #if defined(CONFIG_DRM_AMD_DC_DCN) if (dp_get_link_encoding_format(&link->cur_link_settings) == DP_128b_132b_ENCODING) - disable_dp_hpo_output(link, signal); + disable_dp_hpo_output(link, link_res, signal); else link_enc->funcs->disable_output(link_enc, signal); #else @@ -274,7 +276,7 @@ void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) #if defined(CONFIG_DRM_AMD_DC_DCN) if (dp_get_link_encoding_format(&link->cur_link_settings) == DP_128b_132b_ENCODING && hpo_link_enc) - disable_dp_hpo_output(link, signal); + disable_dp_hpo_output(link, link_res, signal); else link_enc->funcs->disable_output(link_enc, signal); #else @@ -294,13 +296,14 @@ void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link); } -void dp_disable_link_phy_mst(struct dc_link *link, enum signal_type signal) +void dp_disable_link_phy_mst(struct dc_link *link, const struct link_resource *link_res, + enum signal_type signal) { /* MST disable link only when no stream use the link */ if (link->mst_stream_alloc_table.stream_count > 0) return; - dp_disable_link_phy(link, signal); + dp_disable_link_phy(link, link_res, signal); /* set the sink to SST mode after disabling the link */ dp_enable_mst_on_sink(link, false); @@ -308,6 +311,7 @@ void dp_disable_link_phy_mst(struct dc_link *link, enum signal_type signal) bool dp_set_hw_training_pattern( struct dc_link *link, + const struct link_resource *link_res, enum dc_dp_training_pattern pattern, uint32_t offset) { @@ -338,7 +342,7 @@ bool dp_set_hw_training_pattern( break; } - dp_set_hw_test_pattern(link, test_pattern, NULL, 0); + dp_set_hw_test_pattern(link, link_res, test_pattern, NULL, 0); return true; } @@ -349,6 +353,7 @@ bool dp_set_hw_training_pattern( #endif void dp_set_hw_lane_settings( struct dc_link *link, + const struct link_resource *link_res, const struct link_training_settings *link_settings, uint32_t offset) { @@ -361,8 +366,8 @@ void dp_set_hw_lane_settings( #if defined(CONFIG_DRM_AMD_DC_DCN) if (dp_get_link_encoding_format(&link_settings->link_settings) == DP_128b_132b_ENCODING) { - link->hpo_dp_link_enc->funcs->set_ffe( - link->hpo_dp_link_enc, + link_res->hpo_dp_link_enc->funcs->set_ffe( + link_res->hpo_dp_link_enc, &link_settings->link_settings, link_settings->lane_settings[0].FFE_PRESET.raw); } else if (dp_get_link_encoding_format(&link_settings->link_settings) @@ -379,6 +384,7 @@ void dp_set_hw_lane_settings( void dp_set_hw_test_pattern( struct dc_link *link, + const struct link_resource *link_res, enum dp_test_pattern test_pattern, uint8_t *custom_pattern, uint32_t custom_pattern_size) @@ -406,8 +412,8 @@ void dp_set_hw_test_pattern( #if defined(CONFIG_DRM_AMD_DC_DCN) switch (link_encoding_format) { case DP_128b_132b_ENCODING: - link->hpo_dp_link_enc->funcs->set_link_test_pattern( - link->hpo_dp_link_enc, &pattern_param); + link_res->hpo_dp_link_enc->funcs->set_link_test_pattern( + link_res->hpo_dp_link_enc, &pattern_param); break; case DP_8b_10b_ENCODING: ASSERT(encoder); @@ -446,7 +452,7 @@ void dp_retrain_link_dp_test(struct dc_link *link, pipes[i].stream_res.stream_enc); /* disable any test pattern that might be active */ - dp_set_hw_test_pattern(link, + dp_set_hw_test_pattern(link, &pipes[i].link_res, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); dp_receiver_power_ctrl(link, false); @@ -763,7 +769,9 @@ static enum phyd32clk_clock_source get_phyd32clk_src(struct dc_link *link) } } -void enable_dp_hpo_output(struct dc_link *link, const struct dc_link_settings *link_settings) +void enable_dp_hpo_output(struct dc_link *link, + const struct link_resource *link_res, + const struct dc_link_settings *link_settings) { const struct dc *dc = link->dc; enum phyd32clk_clock_source phyd32clk; @@ -789,10 +797,11 @@ void enable_dp_hpo_output(struct dc_link *link, const struct dc_link_settings *l } } else { /* DP2.0 HW: call transmitter control to enable PHY */ - link->hpo_dp_link_enc->funcs->enable_link_phy( - link->hpo_dp_link_enc, + link_res->hpo_dp_link_enc->funcs->enable_link_phy( + link_res->hpo_dp_link_enc, link_settings, - link->link_enc->transmitter); + link->link_enc->transmitter, + link->link_enc->hpd_source); } /* DCCG muxing and DTBCLK DTO */ @@ -806,24 +815,26 @@ void enable_dp_hpo_output(struct dc_link *link, const struct dc_link_settings *l phyd32clk = get_phyd32clk_src(link); dc->res_pool->dccg->funcs->enable_symclk32_le( dc->res_pool->dccg, - link->hpo_dp_link_enc->inst, + link_res->hpo_dp_link_enc->inst, phyd32clk); - link->hpo_dp_link_enc->funcs->link_enable( - link->hpo_dp_link_enc, - link_settings->lane_count); + link_res->hpo_dp_link_enc->funcs->link_enable( + link_res->hpo_dp_link_enc, + link_settings->lane_count); } } -void disable_dp_hpo_output(struct dc_link *link, enum signal_type signal) +void disable_dp_hpo_output(struct dc_link *link, + const struct link_resource *link_res, + enum signal_type signal) { const struct dc *dc = link->dc; - link->hpo_dp_link_enc->funcs->link_disable(link->hpo_dp_link_enc); + link_res->hpo_dp_link_enc->funcs->link_disable(link_res->hpo_dp_link_enc); if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { dc->res_pool->dccg->funcs->disable_symclk32_le( dc->res_pool->dccg, - link->hpo_dp_link_enc->inst); + link_res->hpo_dp_link_enc->inst); dc->res_pool->dccg->funcs->set_physymclk( dc->res_pool->dccg, @@ -834,8 +845,8 @@ void disable_dp_hpo_output(struct dc_link *link, enum signal_type signal) dm_set_phyd32clk(dc->ctx, 0); } else { /* DP2.0 HW: call transmitter control to disable PHY */ - link->hpo_dp_link_enc->funcs->disable_link_phy( - link->hpo_dp_link_enc, + link_res->hpo_dp_link_enc->funcs->disable_link_phy( + link_res->hpo_dp_link_enc, signal); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index e2d9a46d0e1a..d4ff6cc6b8d9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1724,6 +1724,94 @@ static void update_hpo_dp_stream_engine_usage( res_ctx->is_hpo_dp_stream_enc_acquired[i] = acquired; } } + +static inline int find_acquired_hpo_dp_link_enc_for_link( + const struct resource_context *res_ctx, + const struct dc_link *link) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_to_link_idx); i++) + if (res_ctx->hpo_dp_link_enc_ref_cnts[i] > 0 && + res_ctx->hpo_dp_link_enc_to_link_idx[i] == link->link_index) + return i; + + return -1; +} + +static inline int find_free_hpo_dp_link_enc(const struct resource_context *res_ctx, + const struct resource_pool *pool) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_ref_cnts); i++) + if (res_ctx->hpo_dp_link_enc_ref_cnts[i] == 0) + break; + + return (i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_ref_cnts) && + i < pool->hpo_dp_link_enc_count) ? i : -1; +} + +static inline void acquire_hpo_dp_link_enc( + struct resource_context *res_ctx, + unsigned int link_index, + int enc_index) +{ + res_ctx->hpo_dp_link_enc_to_link_idx[enc_index] = link_index; + res_ctx->hpo_dp_link_enc_ref_cnts[enc_index] = 1; +} + +static inline void retain_hpo_dp_link_enc( + struct resource_context *res_ctx, + int enc_index) +{ + res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]++; +} + +static inline void release_hpo_dp_link_enc( + struct resource_context *res_ctx, + int enc_index) +{ + ASSERT(res_ctx->hpo_dp_link_enc_ref_cnts[enc_index] > 0); + res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]--; +} + +static bool add_hpo_dp_link_enc_to_ctx(struct resource_context *res_ctx, + const struct resource_pool *pool, + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream) +{ + int enc_index; + + enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, stream->link); + + if (enc_index >= 0) { + retain_hpo_dp_link_enc(res_ctx, enc_index); + } else { + enc_index = find_free_hpo_dp_link_enc(res_ctx, pool); + if (enc_index >= 0) + acquire_hpo_dp_link_enc(res_ctx, stream->link->link_index, enc_index); + } + + if (enc_index >= 0) + pipe_ctx->link_res.hpo_dp_link_enc = pool->hpo_dp_link_enc[enc_index]; + + return pipe_ctx->link_res.hpo_dp_link_enc != NULL; +} + +static void remove_hpo_dp_link_enc_from_ctx(struct resource_context *res_ctx, + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream) +{ + int enc_index; + + enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, stream->link); + + if (enc_index >= 0) { + release_hpo_dp_link_enc(res_ctx, enc_index); + pipe_ctx->link_res.hpo_dp_link_enc = NULL; + } +} #endif /* TODO: release audio object */ @@ -1886,6 +1974,7 @@ enum dc_status dc_remove_stream_from_ctx( &new_ctx->res_ctx, dc->res_pool, del_pipe->stream_res.hpo_dp_stream_enc, false); + remove_hpo_dp_link_enc_from_ctx(&new_ctx->res_ctx, del_pipe, del_pipe->stream); } #endif @@ -2082,7 +2171,6 @@ static void mark_seamless_boot_stream( { struct dc_bios *dcb = dc->ctx->dc_bios; - /* TODO: Check Linux */ if (dc->config.allow_seamless_boot_optimization && !dcb->funcs->is_accelerated_mode(dcb)) { if (dc_validate_seamless_boot_timing(dc, stream->sink, &stream->timing)) @@ -2162,6 +2250,8 @@ enum dc_status resource_map_pool_resources( &context->res_ctx, pool, pipe_ctx->stream_res.hpo_dp_stream_enc, true); + if (!add_hpo_dp_link_enc_to_ctx(&context->res_ctx, pool, pipe_ctx, stream)) + return DC_NO_LINK_ENC_RESOURCE; } } #endif @@ -2228,6 +2318,9 @@ void dc_resource_state_construct( struct dc_state *dst_ctx) { dst_ctx->clk_mgr = dc->clk_mgr; + + /* Initialise DIG link encoder resource tracking variables. */ + link_enc_cfg_init(dc, dst_ctx); } @@ -2510,17 +2603,7 @@ static void set_avi_info_frame( /* TODO : We should handle YCC quantization */ /* but we do not have matrix calculation */ - if (stream->qy_bit == 1) { - if (color_space == COLOR_SPACE_SRGB || - color_space == COLOR_SPACE_2020_RGB_FULLRANGE) - hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - else if (color_space == COLOR_SPACE_SRGB_LIMITED || - color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) - hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - else - hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - } else - hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; ///VIC format = stream->timing.timing_3d_format; @@ -2844,6 +2927,8 @@ bool pipe_need_reprogram( #if defined(CONFIG_DRM_AMD_DC_DCN) if (pipe_ctx_old->stream_res.hpo_dp_stream_enc != pipe_ctx->stream_res.hpo_dp_stream_enc) return true; + if (pipe_ctx_old->link_res.hpo_dp_link_enc != pipe_ctx->link_res.hpo_dp_link_enc) + return true; #endif /* DIG link encoder resource assignment for stream changed. */ @@ -3112,21 +3197,55 @@ void get_audio_check(struct audio_info *aud_modes, } #if defined(CONFIG_DRM_AMD_DC_DCN) -struct hpo_dp_link_encoder *resource_get_unused_hpo_dp_link_encoder( - const struct resource_pool *pool) +struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt( + const struct resource_context *res_ctx, + const struct resource_pool *pool, + const struct dc_link *link) { - uint8_t i; - struct hpo_dp_link_encoder *enc = NULL; + struct hpo_dp_link_encoder *hpo_dp_link_enc = NULL; + int enc_index; + + enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, link); + + if (enc_index < 0) + enc_index = find_free_hpo_dp_link_enc(res_ctx, pool); - ASSERT(pool->hpo_dp_link_enc_count <= MAX_HPO_DP2_LINK_ENCODERS); + if (enc_index >= 0) + hpo_dp_link_enc = pool->hpo_dp_link_enc[enc_index]; - for (i = 0; i < pool->hpo_dp_link_enc_count; i++) { - if (pool->hpo_dp_link_enc[i]->transmitter == TRANSMITTER_UNKNOWN) { - enc = pool->hpo_dp_link_enc[i]; + return hpo_dp_link_enc; +} +#endif + +uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter) +{ + /* TODO - get transmitter to phy idx mapping from DMUB */ + uint8_t phy_idx = transmitter - TRANSMITTER_UNIPHY_A; + +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (dc->ctx->dce_version == DCN_VERSION_3_1 && + dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { + switch (transmitter) { + case TRANSMITTER_UNIPHY_A: + phy_idx = 0; + break; + case TRANSMITTER_UNIPHY_B: + phy_idx = 1; + break; + case TRANSMITTER_UNIPHY_C: + phy_idx = 5; + break; + case TRANSMITTER_UNIPHY_D: + phy_idx = 6; + break; + case TRANSMITTER_UNIPHY_E: + phy_idx = 4; + break; + default: + phy_idx = 0; break; } } - - return enc; -} #endif + return phy_idx; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c index a249a0e5edd0..4b5e4d8e7735 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c @@ -33,14 +33,6 @@ * Private functions ******************************************************************************/ -static void dc_sink_destruct(struct dc_sink *sink) -{ - if (sink->dc_container_id) { - kfree(sink->dc_container_id); - sink->dc_container_id = NULL; - } -} - static bool dc_sink_construct(struct dc_sink *sink, const struct dc_sink_init_data *init_params) { @@ -75,7 +67,7 @@ void dc_sink_retain(struct dc_sink *sink) static void dc_sink_free(struct kref *kref) { struct dc_sink *sink = container_of(kref, struct dc_sink, refcount); - dc_sink_destruct(sink); + kfree(sink->dc_container_id); kfree(sink); } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 618e7989176f..da2c78ce14d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.160" +#define DC_VER "3.2.167" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -75,6 +75,16 @@ enum dc_plane_type { DC_PLANE_TYPE_DCN_UNIVERSAL, }; +// Sizes defined as multiples of 64KB +enum det_size { + DET_SIZE_DEFAULT = 0, + DET_SIZE_192KB = 3, + DET_SIZE_256KB = 4, + DET_SIZE_320KB = 5, + DET_SIZE_384KB = 6 +}; + + struct dc_plane_cap { enum dc_plane_type type; uint32_t blends_with_above : 1; @@ -187,7 +197,9 @@ struct dc_caps { struct dc_color_caps color; #if defined(CONFIG_DRM_AMD_DC_DCN) bool dp_hpo; + bool hdmi_frl_pcon_support; #endif + bool edp_dsc_support; bool vbios_lttpr_aware; bool vbios_lttpr_enable; }; @@ -509,7 +521,8 @@ union dpia_debug_options { uint32_t force_non_lttpr:1; uint32_t extend_aux_rd_interval:1; uint32_t disable_mst_dsc_work_around:1; - uint32_t reserved:28; + uint32_t hpd_delay_in_ms:12; + uint32_t reserved:16; } bits; uint32_t raw; }; @@ -574,6 +587,8 @@ struct dc_debug_options { bool native422_support; bool disable_dsc; enum visual_confirm visual_confirm; + int visual_confirm_rect_height; + bool sanity_checks; bool max_disp_clk; bool surface_trace; @@ -668,11 +683,15 @@ struct dc_debug_options { bool validate_dml_output; bool enable_dmcub_surface_flip; bool usbc_combo_phy_reset_wa; + bool disable_dsc_edp; + unsigned int force_dsc_edp_policy; bool enable_dram_clock_change_one_display_vactive; #if defined(CONFIG_DRM_AMD_DC_DCN) /* TODO - remove once tested */ bool legacy_dp2_lt; bool set_mst_en_for_sst; + bool disable_uhbr; + bool force_dp2_lt_fallback_method; #endif union mem_low_power_enable_options enable_mem_low_power; union root_clock_optimization_options root_clock_optimization; @@ -685,11 +704,14 @@ struct dc_debug_options { /* FEC/PSR1 sequence enable delay in 100us */ uint8_t fec_enable_delay_in100us; bool enable_driver_sequence_debug; + enum det_size crb_alloc_policy; + int crb_alloc_policy_min_disp_count; #if defined(CONFIG_DRM_AMD_DC_DCN) bool disable_z10; bool enable_sw_cntl_psr; union dpia_debug_options dpia_debug; #endif + bool apply_vendor_specific_lttpr_wa; }; struct gpu_info_soc_bounding_box_v1_0; @@ -1290,6 +1312,11 @@ struct dc_sink_dsc_caps { // 'true' if these are virtual DPCD's DSC caps (immediately upstream of sink in MST topology), // 'false' if they are sink's DSC caps bool is_virtual_dpcd_dsc; +#if defined(CONFIG_DRM_AMD_DC_DCN) + // 'true' if MST topology supports DSC passthrough for sink + // 'false' if MST topology does not support DSC passthrough + bool is_dsc_passthrough_supported; +#endif struct dsc_dec_dpcd_caps dsc_dec_caps; }; @@ -1405,6 +1432,9 @@ void dc_unlock_memory_clock_frequency(struct dc *dc); */ void dc_lock_memory_clock_frequency(struct dc *dc); +/* set soft max for memclk, to be used for AC/DC switching clock limitations */ +void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable); + /* cleanup on driver unload */ void dc_hardware_release(struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 360f3199ea6f..541376fabbef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -115,13 +115,44 @@ void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv) } } +void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dmub_srv) +{ + struct dmub_srv *dmub = dmub_srv->dmub; + struct dc_context *dc_ctx = dmub_srv->ctx; + enum dmub_status status = DMUB_STATUS_OK; + + status = dmub_srv_clear_inbox0_ack(dmub); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error clearing INBOX0 ack: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dmub_srv); + } +} + +void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dmub_srv) +{ + struct dmub_srv *dmub = dmub_srv->dmub; + struct dc_context *dc_ctx = dmub_srv->ctx; + enum dmub_status status = DMUB_STATUS_OK; + + status = dmub_srv_wait_for_inbox0_ack(dmub, 100000); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error waiting for INBOX0 HW Lock Ack\n"); + dc_dmub_srv_log_diagnostic_data(dmub_srv); + } +} + void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_data_register data) { struct dmub_srv *dmub = dmub_srv->dmub; - if (dmub->hw_funcs.send_inbox0_cmd) - dmub->hw_funcs.send_inbox0_cmd(dmub, data); - // TODO: Add wait command -- poll register for ACK + struct dc_context *dc_ctx = dmub_srv->ctx; + enum dmub_status status = DMUB_STATUS_OK; + + status = dmub_srv_send_inbox0_cmd(dmub, data); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error sending INBOX0 cmd\n"); + dc_dmub_srv_log_diagnostic_data(dmub_srv); + } } bool dc_dmub_srv_cmd_with_reply_data(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 3e35eee7188c..7e4e2ec5915d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -68,6 +68,8 @@ bool dc_dmub_srv_get_dmub_outbox0_msg(const struct dc *dc, struct dmcub_trace_bu void dc_dmub_trace_event_control(struct dc *dc, bool enable); +void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dmub_srv); +void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dmub_srv); void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_data_register data); bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *dmub_oca); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index e68e9a86a4d9..353dac420f34 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -378,7 +378,14 @@ enum dpcd_downstream_port_detailed_type { union dwnstream_port_caps_byte2 { struct { uint8_t MAX_BITS_PER_COLOR_COMPONENT:2; +#if defined(CONFIG_DRM_AMD_DC_DCN) + uint8_t MAX_ENCODED_LINK_BW_SUPPORT:3; + uint8_t SOURCE_CONTROL_MODE_SUPPORT:1; + uint8_t CONCURRENT_LINK_BRING_UP_SEQ_SUPPORT:1; + uint8_t RESERVED:1; +#else uint8_t RESERVED:6; +#endif } bits; uint8_t raw; }; @@ -416,6 +423,30 @@ union dwnstream_port_caps_byte3_hdmi { uint8_t raw; }; +#if defined(CONFIG_DRM_AMD_DC_DCN) +union hdmi_sink_encoded_link_bw_support { + struct { + uint8_t HDMI_SINK_ENCODED_LINK_BW_SUPPORT:3; + uint8_t RESERVED:5; + } bits; + uint8_t raw; +}; + +union hdmi_encoded_link_bw { + struct { + uint8_t FRL_MODE:1; // Bit 0 + uint8_t BW_9Gbps:1; + uint8_t BW_18Gbps:1; + uint8_t BW_24Gbps:1; + uint8_t BW_32Gbps:1; + uint8_t BW_40Gbps:1; + uint8_t BW_48Gbps:1; + uint8_t RESERVED:1; // Bit 7 + } bits; + uint8_t raw; +}; +#endif + /*4-byte structure for detailed capabilities of a down-stream port (DP-to-TMDS converter).*/ union dwnstream_portxcaps { @@ -852,6 +883,15 @@ struct psr_caps { unsigned char psr_version; unsigned int psr_rfb_setup_time; bool psr_exit_link_training_required; + unsigned char edp_revision; + unsigned char support_ver; + bool su_granularity_required; + bool y_coordinate_required; + uint8_t su_y_granularity; + bool alpm_cap; + bool standby_support; + uint8_t rate_control_caps; + unsigned int psr_power_opt_flag; }; /* Length of router topology ID read from DPCD in bytes. */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 52355fe6994c..eac34f591a3f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -741,6 +741,9 @@ struct dc_dsc_config { uint32_t version_minor; /* DSC minor version. Full version is formed as 1.version_minor. */ bool ycbcr422_simple; /* Tell DSC engine to convert YCbCr 4:2:2 to 'YCbCr 4:2:2 simple'. */ int32_t rc_buffer_size; /* DSC RC buffer block size in bytes */ +#if defined(CONFIG_DRM_AMD_DC_DCN) + bool is_frl; /* indicate if DSC is applied based on HDMI FRL sink's capability */ +#endif bool is_dp; /* indicate if DSC is applied based on DP's capability */ }; struct dc_crtc_timing { diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index fad3d883ed89..c0e37ad0e26c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -30,6 +30,8 @@ #include "dc_types.h" #include "grph_object_defs.h" +struct link_resource; + enum dc_link_fec_state { dc_link_fec_not_ready, dc_link_fec_ready, @@ -113,6 +115,7 @@ struct dc_link { * DIG encoder. */ bool is_dig_mapping_flexible; bool hpd_status; /* HPD status of link without physical HPD pin. */ + bool is_hpd_pending; /* Indicates a new received hpd */ bool edp_sink_present; @@ -159,9 +162,6 @@ struct dc_link { struct panel_cntl *panel_cntl; struct link_encoder *link_enc; -#if defined(CONFIG_DRM_AMD_DC_DCN) - struct hpo_dp_link_encoder *hpo_dp_link_enc; -#endif struct graphics_object_id link_id; /* Endpoint type distinguishes display endpoints which do not have entries * in the BIOS connector table from those that do. Helps when tracking link @@ -185,6 +185,10 @@ struct dc_link { /* Drive settings read from integrated info table */ struct dc_lane_settings bios_forced_drive_settings; + /* Vendor specific LTTPR workaround variables */ + uint8_t vendor_specific_lttpr_link_rate_wa; + bool apply_vendor_specific_lttpr_link_rate_wa; + /* MST record stream using this link */ struct link_flags { bool dp_keep_receiver_powered; @@ -291,6 +295,10 @@ bool dc_link_setup_psr(struct dc_link *dc_link, void dc_link_get_psr_residency(const struct dc_link *link, uint32_t *residency); +void dc_link_blank_all_dp_displays(struct dc *dc); + +void dc_link_blank_dp_stream(struct dc_link *link, bool hw_init); + /* Request DC to detect if there is a Panel connected. * boot - If this call is during initial boot. * Return false for any type of detection failure or MST detection @@ -302,7 +310,7 @@ enum dc_detect_reason { DETECT_REASON_HPD, DETECT_REASON_HPDRX, DETECT_REASON_FALLBACK, - DETECT_REASON_RETRAIN + DETECT_REASON_RETRAIN, }; bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); @@ -350,14 +358,17 @@ void dc_link_remove_remote_sink( void dc_link_dp_set_drive_settings( struct dc_link *link, + const struct link_resource *link_res, struct link_training_settings *lt_settings); bool dc_link_dp_perform_link_training_skip_aux( struct dc_link *link, + const struct link_resource *link_res, const struct dc_link_settings *link_setting); enum link_training_result dc_link_dp_perform_link_training( struct dc_link *link, + const struct link_resource *link_res, const struct dc_link_settings *link_settings, bool skip_video_pattern); @@ -365,6 +376,7 @@ bool dc_link_dp_sync_lt_begin(struct dc_link *link); enum link_training_result dc_link_dp_sync_lt_attempt( struct dc_link *link, + const struct link_resource *link_res, struct dc_link_settings *link_setting, struct dc_link_training_overrides *lt_settings); @@ -442,6 +454,13 @@ bool dc_link_is_fec_supported(const struct dc_link *link); bool dc_link_should_enable_fec(const struct dc_link *link); #if defined(CONFIG_DRM_AMD_DC_DCN) +uint32_t dc_link_bw_kbps_from_raw_frl_link_rate_data(uint8_t bw); enum dp_link_encoding dc_link_dp_mst_decide_link_encoding_format(const struct dc_link *link); #endif + +const struct link_resource *dc_link_get_cur_link_res(const struct dc_link *link); +/* take a snapshot of current link resource allocation state */ +void dc_get_cur_link_res_map(const struct dc *dc, uint32_t *map); +/* restore link resource allocation state from a snapshot */ +void dc_restore_link_res_map(const struct dc *dc, uint32_t *map); #endif /* DC_LINK_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 388457ffc0a8..0285a4b38d05 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -430,6 +430,7 @@ struct dc_dongle_caps { uint32_t dp_hdmi_max_bpc; uint32_t dp_hdmi_max_pixel_clk_in_khz; #if defined(CONFIG_DRM_AMD_DC_DCN) + uint32_t dp_hdmi_frl_max_link_bw_in_kbps; struct dc_dongle_dfp_cap_ext dfp_cap_ext; #endif }; @@ -950,6 +951,7 @@ enum dc_gpu_mem_alloc_type { enum dc_psr_version { DC_PSR_VERSION_1 = 0, + DC_PSR_VERSION_SU_1 = 1, DC_PSR_VERSION_UNSUPPORTED = 0xFFFFFFFF, }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 27218ede150a..70eaac017624 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -67,9 +67,6 @@ static void write_indirect_azalia_reg(struct audio *audio, /* AZALIA_F0_CODEC_ENDPOINT_DATA endpoint data */ REG_SET(AZALIA_F0_CODEC_ENDPOINT_DATA, 0, AZALIA_ENDPOINT_REG_DATA, reg_data); - - DC_LOG_HW_AUDIO("AUDIO:write_indirect_azalia_reg: index: %u data: %u\n", - reg_index, reg_data); } static uint32_t read_indirect_azalia_reg(struct audio *audio, uint32_t reg_index) @@ -85,9 +82,6 @@ static uint32_t read_indirect_azalia_reg(struct audio *audio, uint32_t reg_index /* AZALIA_F0_CODEC_ENDPOINT_DATA endpoint data */ value = REG_READ(AZALIA_F0_CODEC_ENDPOINT_DATA); - DC_LOG_HW_AUDIO("AUDIO:read_indirect_azalia_reg: index: %u data: %u\n", - reg_index, value); - return value; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h index 5622d5e32d81..dbd2cfed0603 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h @@ -113,6 +113,7 @@ struct dce_audio_shift { uint8_t DCCG_AUDIO_DTO2_USE_512FBR_DTO; uint32_t DCCG_AUDIO_DTO0_USE_512FBR_DTO; uint32_t DCCG_AUDIO_DTO1_USE_512FBR_DTO; + uint32_t CLOCK_GATING_DISABLE; }; struct dce_audio_mask { @@ -132,6 +133,7 @@ struct dce_audio_mask { uint32_t DCCG_AUDIO_DTO2_USE_512FBR_DTO; uint32_t DCCG_AUDIO_DTO0_USE_512FBR_DTO; uint32_t DCCG_AUDIO_DTO1_USE_512FBR_DTO; + uint32_t CLOCK_GATING_DISABLE; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 1e77ffee71b3..f1c61d5aee6c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -788,8 +788,9 @@ static bool dce110_link_encoder_validate_hdmi_output( crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) return false; - if (!enc110->base.features.flags.bits.HDMI_6GB_EN && - adjusted_pix_clk_khz >= 300000) + if ((!enc110->base.features.flags.bits.HDMI_6GB_EN || + enc110->base.ctx->dc->debug.hdmi20_disable) && + adjusted_pix_clk_khz >= 300000) return false; if (enc110->base.ctx->dc->debug.hdmi20_disable && crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index 9baf8ca0a920..b1b2e3c6f379 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -56,8 +56,11 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_cmd_lock_hw hw_lock_cmd) { union dmub_inbox0_data_register data = { 0 }; + data.inbox0_cmd_lock_hw = hw_lock_cmd; + dc_dmub_srv_clear_inbox0_ack(dmub_srv); dc_dmub_srv_send_inbox0_cmd(dmub_srv, data); + dc_dmub_srv_wait_for_inbox0_ack(dmub_srv); } bool should_use_dmub_lock(struct dc_link *link) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 90eb8eedacf2..87ed48d5530d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -230,7 +230,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_ /** * Set PSR power optimization flags. */ -static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt) +static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt, uint8_t panel_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; @@ -239,7 +239,9 @@ static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt cmd.psr_set_power_opt.header.type = DMUB_CMD__PSR; cmd.psr_set_power_opt.header.sub_type = DMUB_CMD__SET_PSR_POWER_OPT; cmd.psr_set_power_opt.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_power_opt_data); + cmd.psr_set_power_opt.psr_set_power_opt_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1; cmd.psr_set_power_opt.psr_set_power_opt_data.power_opt = power_opt; + cmd.psr_set_power_opt.psr_set_power_opt_data.panel_inst = panel_inst; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); @@ -327,6 +329,16 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->fec_enable_delay_in100us = link->dc->debug.fec_enable_delay_in100us; copy_settings_data->cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1; copy_settings_data->panel_inst = panel_inst; + copy_settings_data->dsc_enable_status = (pipe_ctx->stream->timing.flags.DSC == 1); + + if (link->fec_state == dc_link_fec_enabled && + (!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1, + sizeof(link->dpcd_caps.sink_dev_id_str)) || + !memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_2, + sizeof(link->dpcd_caps.sink_dev_id_str)))) + copy_settings_data->debug.bitfields.force_wakeup_by_tps3 = 1; + else + copy_settings_data->debug.bitfields.force_wakeup_by_tps3 = 0; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h index 5dbd479660f1..01acc01cc191 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h @@ -46,7 +46,7 @@ struct dmub_psr_funcs { void (*psr_force_static)(struct dmub_psr *dmub, uint8_t panel_inst); void (*psr_get_residency)(struct dmub_psr *dmub, uint32_t *residency, uint8_t panel_inst); - void (*psr_set_power_opt)(struct dmub_psr *dmub, unsigned int power_opt); + void (*psr_set_power_opt)(struct dmub_psr *dmub, unsigned int power_opt, uint8_t panel_inst); }; struct dmub_psr *dmub_psr_create(struct dc_context *ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 24e47df526f6..78192ecba102 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -69,6 +69,8 @@ #include "dcn10/dcn10_hw_sequencer.h" +#include "dce110_hw_sequencer.h" + #define GAMMA_HW_POINTS_NUM 256 /* @@ -1602,6 +1604,11 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); + if (dc_is_dp_signal(pipe_ctx->stream->signal) && + pipe_ctx->stream_res.stream_enc->funcs->reset_fifo) + pipe_ctx->stream_res.stream_enc->funcs->reset_fifo( + pipe_ctx->stream_res.stream_enc); + if (dc_is_dp_signal(pipe_ctx->stream->signal)) dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG); @@ -1655,30 +1662,12 @@ static enum dc_status apply_single_controller_ctx_to_hw( static void power_down_encoders(struct dc *dc) { - int i, j; + int i; for (i = 0; i < dc->link_count; i++) { enum signal_type signal = dc->links[i]->connector_signal; - if ((signal == SIGNAL_TYPE_EDP) || - (signal == SIGNAL_TYPE_DISPLAY_PORT)) { - if (dc->links[i]->link_enc->funcs->get_dig_frontend && - dc->links[i]->link_enc->funcs->is_dig_enabled(dc->links[i]->link_enc)) { - unsigned int fe = dc->links[i]->link_enc->funcs->get_dig_frontend( - dc->links[i]->link_enc); - - for (j = 0; j < dc->res_pool->stream_enc_count; j++) { - if (fe == dc->res_pool->stream_enc[j]->id) { - dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], - dc->res_pool->stream_enc[j]); - break; - } - } - } - - if (!dc->links[i]->wa_flags.dp_keep_receiver_powered) - dp_receiver_power_ctrl(dc->links[i], false); - } + dc_link_blank_dp_stream(dc->links[i], false); if (signal != SIGNAL_TYPE_EDP) signal = SIGNAL_TYPE_NONE; @@ -1805,7 +1794,6 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) struct dc_stream_state *edp_streams[MAX_NUM_EDP]; struct dc_link *edp_link_with_sink = NULL; struct dc_link *edp_link = NULL; - struct dc_stream_state *edp_stream = NULL; struct dce_hwseq *hws = dc->hwseq; int edp_with_sink_num; int edp_num; @@ -1826,27 +1814,29 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) get_edp_streams(context, edp_streams, &edp_stream_num); // Check fastboot support, disable on DCE8 because of blank screens - if (edp_num && dc->ctx->dce_version != DCE_VERSION_8_0 && + if (edp_num && edp_stream_num && dc->ctx->dce_version != DCE_VERSION_8_0 && dc->ctx->dce_version != DCE_VERSION_8_1 && dc->ctx->dce_version != DCE_VERSION_8_3) { for (i = 0; i < edp_num; i++) { edp_link = edp_links[i]; + if (edp_link != edp_streams[0]->link) + continue; // enable fastboot if backend is enabled on eDP - if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) { - /* Set optimization flag on eDP stream*/ - if (edp_stream_num && edp_link->link_status.link_active) { - edp_stream = edp_streams[0]; - can_apply_edp_fast_boot = !is_edp_ilr_optimization_required(edp_stream->link, &edp_stream->timing); - edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot; - if (can_apply_edp_fast_boot) - DC_LOG_EVENT_LINK_TRAINING("eDP fast boot disabled to optimize link rate\n"); - - break; - } + if (edp_link->link_enc->funcs->is_dig_enabled && + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && + edp_link->link_status.link_active) { + struct dc_stream_state *edp_stream = edp_streams[0]; + + can_apply_edp_fast_boot = !is_edp_ilr_optimization_required(edp_stream->link, &edp_stream->timing); + edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot; + if (can_apply_edp_fast_boot) + DC_LOG_EVENT_LINK_TRAINING("eDP fast boot disabled to optimize link rate\n"); + + break; } } // We are trying to enable eDP, don't power down VDD - if (edp_stream_num) + if (can_apply_edp_fast_boot) keep_edp_vdd_on = true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index 91fdfcd8a14e..db7ca4b0cdb9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -119,14 +119,6 @@ void dpp_read_state(struct dpp *dpp_base, } } -/* Program gamut remap in bypass mode */ -void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp) -{ - REG_SET(CM_GAMUT_REMAP_CONTROL, 0, - CM_GAMUT_REMAP_MODE, 0); - /* Gamut remap in bypass */ -} - #define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) bool dpp1_get_optimal_number_of_taps( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index 44293d66b46b..f607a0e28f14 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -39,6 +39,10 @@ #define BLACK_OFFSET_RGB_Y 0x0 #define BLACK_OFFSET_CBCR 0x8000 +#define VISUAL_CONFIRM_RECT_HEIGHT_DEFAULT 3 +#define VISUAL_CONFIRM_RECT_HEIGHT_MIN 1 +#define VISUAL_CONFIRM_RECT_HEIGHT_MAX 10 + #define REG(reg)\ dpp->tf_regs->reg @@ -85,51 +89,6 @@ enum dscl_mode_sel { DSCL_MODE_DSCL_BYPASS = 6 }; -static void dpp1_dscl_set_overscan( - struct dcn10_dpp *dpp, - const struct scaler_data *data) -{ - uint32_t left = data->recout.x; - uint32_t top = data->recout.y; - - int right = data->h_active - data->recout.x - data->recout.width; - int bottom = data->v_active - data->recout.y - data->recout.height; - - if (right < 0) { - BREAK_TO_DEBUGGER(); - right = 0; - } - if (bottom < 0) { - BREAK_TO_DEBUGGER(); - bottom = 0; - } - - REG_SET_2(DSCL_EXT_OVERSCAN_LEFT_RIGHT, 0, - EXT_OVERSCAN_LEFT, left, - EXT_OVERSCAN_RIGHT, right); - - REG_SET_2(DSCL_EXT_OVERSCAN_TOP_BOTTOM, 0, - EXT_OVERSCAN_BOTTOM, bottom, - EXT_OVERSCAN_TOP, top); -} - -static void dpp1_dscl_set_otg_blank( - struct dcn10_dpp *dpp, const struct scaler_data *data) -{ - uint32_t h_blank_start = data->h_active; - uint32_t h_blank_end = 0; - uint32_t v_blank_start = data->v_active; - uint32_t v_blank_end = 0; - - REG_SET_2(OTG_H_BLANK, 0, - OTG_H_BLANK_START, h_blank_start, - OTG_H_BLANK_END, h_blank_end); - - REG_SET_2(OTG_V_BLANK, 0, - OTG_V_BLANK_START, v_blank_start, - OTG_V_BLANK_END, v_blank_end); -} - static int dpp1_dscl_get_pixel_depth_val(enum lb_pixel_depth depth) { if (depth == LB_PIXEL_DEPTH_30BPP) @@ -551,58 +510,6 @@ static enum lb_memory_config dpp1_dscl_find_lb_memory_config(struct dcn10_dpp *d return LB_MEMORY_CONFIG_0; } -void dpp1_dscl_set_scaler_auto_scale( - struct dpp *dpp_base, - const struct scaler_data *scl_data) -{ - enum lb_memory_config lb_config; - struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode( - dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); - bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN - && scl_data->format <= PIXEL_FORMAT_VIDEO_END; - - dpp1_dscl_set_overscan(dpp, scl_data); - - dpp1_dscl_set_otg_blank(dpp, scl_data); - - REG_UPDATE(SCL_MODE, DSCL_MODE, dscl_mode); - - if (dscl_mode == DSCL_MODE_DSCL_BYPASS) - return; - - lb_config = dpp1_dscl_find_lb_memory_config(dpp, scl_data); - dpp1_dscl_set_lb(dpp, &scl_data->lb_params, lb_config); - - if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) - return; - - /* TODO: v_min */ - REG_SET_3(DSCL_AUTOCAL, 0, - AUTOCAL_MODE, AUTOCAL_MODE_AUTOSCALE, - AUTOCAL_NUM_PIPE, 0, - AUTOCAL_PIPE_ID, 0); - - /* Black offsets */ - if (ycbcr) - REG_SET_2(SCL_BLACK_OFFSET, 0, - SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, - SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_CBCR); - else - - REG_SET_2(SCL_BLACK_OFFSET, 0, - SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, - SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_RGB_Y); - - REG_SET_4(SCL_TAP_CONTROL, 0, - SCL_V_NUM_TAPS, scl_data->taps.v_taps - 1, - SCL_H_NUM_TAPS, scl_data->taps.h_taps - 1, - SCL_V_NUM_TAPS_C, scl_data->taps.v_taps_c - 1, - SCL_H_NUM_TAPS_C, scl_data->taps.h_taps_c - 1); - - dpp1_dscl_set_scl_filter(dpp, scl_data, ycbcr); -} - static void dpp1_dscl_set_manual_ratio_init( struct dcn10_dpp *dpp, const struct scaler_data *data) @@ -685,9 +592,17 @@ static void dpp1_dscl_set_recout(struct dcn10_dpp *dpp, const struct rect *recout) { int visual_confirm_on = 0; + unsigned short visual_confirm_rect_height = VISUAL_CONFIRM_RECT_HEIGHT_DEFAULT; + if (dpp->base.ctx->dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE) visual_confirm_on = 1; + /* Check bounds to ensure the VC bar height was set to a sane value */ + if ((dpp->base.ctx->dc->debug.visual_confirm_rect_height >= VISUAL_CONFIRM_RECT_HEIGHT_MIN) && + (dpp->base.ctx->dc->debug.visual_confirm_rect_height <= VISUAL_CONFIRM_RECT_HEIGHT_MAX)) { + visual_confirm_rect_height = dpp->base.ctx->dc->debug.visual_confirm_rect_height; + } + REG_SET_2(RECOUT_START, 0, /* First pixel of RECOUT in the active OTG area */ RECOUT_START_X, recout->x, @@ -699,7 +614,7 @@ static void dpp1_dscl_set_recout(struct dcn10_dpp *dpp, RECOUT_WIDTH, recout->width, /* Number of RECOUT vertical lines */ RECOUT_HEIGHT, recout->height - - visual_confirm_on * 2 * (dpp->base.inst + 1)); + - visual_confirm_on * 2 * (dpp->base.inst + visual_confirm_rect_height)); } /** diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 04d7bddc915b..530a72e3eefe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -77,9 +77,9 @@ #define PGFSM_POWER_ON 0 #define PGFSM_POWER_OFF 2 -void print_microsec(struct dc_context *dc_ctx, - struct dc_log_buffer_ctx *log_ctx, - uint32_t ref_cycle) +static void print_microsec(struct dc_context *dc_ctx, + struct dc_log_buffer_ctx *log_ctx, + uint32_t ref_cycle) { const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; static const unsigned int frac = 1000; @@ -132,7 +132,8 @@ static void log_mpc_crc(struct dc *dc, REG_READ(DPP_TOP0_DPP_CRC_VAL_B_A), REG_READ(DPP_TOP0_DPP_CRC_VAL_R_G)); } -void dcn10_log_hubbub_state(struct dc *dc, struct dc_log_buffer_ctx *log_ctx) +static void dcn10_log_hubbub_state(struct dc *dc, + struct dc_log_buffer_ctx *log_ctx) { struct dc_context *dc_ctx = dc->ctx; struct dcn_hubbub_wm wm; @@ -467,8 +468,6 @@ void dcn10_log_hw_state(struct dc *dc, log_mpc_crc(dc, log_ctx); { - int hpo_dp_link_enc_count = 0; - if (pool->hpo_dp_stream_enc_count > 0) { DTN_INFO("DP HPO S_ENC: Enabled OTG Format Depth Vid SDP Compressed Link\n"); for (i = 0; i < pool->hpo_dp_stream_enc_count; i++) { @@ -499,18 +498,14 @@ void dcn10_log_hw_state(struct dc *dc, } /* log DP HPO L_ENC section if any hpo_dp_link_enc exists */ - for (i = 0; i < dc->link_count; i++) - if (dc->links[i]->hpo_dp_link_enc) - hpo_dp_link_enc_count++; - - if (hpo_dp_link_enc_count) { + if (pool->hpo_dp_link_enc_count) { DTN_INFO("DP HPO L_ENC: Enabled Mode Lanes Stream Slots VC Rate X VC Rate Y\n"); - for (i = 0; i < dc->link_count; i++) { - struct hpo_dp_link_encoder *hpo_dp_link_enc = dc->links[i]->hpo_dp_link_enc; + for (i = 0; i < pool->hpo_dp_link_enc_count; i++) { + struct hpo_dp_link_encoder *hpo_dp_link_enc = pool->hpo_dp_link_enc[i]; struct hpo_dp_link_enc_state hpo_dp_le_state = {0}; - if (hpo_dp_link_enc && hpo_dp_link_enc->funcs->read_state) { + if (hpo_dp_link_enc->funcs->read_state) { hpo_dp_link_enc->funcs->read_state(hpo_dp_link_enc, &hpo_dp_le_state); DTN_INFO("[%d]: %d %6s %d %d %d %d %d\n", hpo_dp_link_enc->inst, @@ -1362,11 +1357,53 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) tg->funcs->tg_init(tg); } + + /* Power gate DSCs */ + if (hws->funcs.dsc_pg_control != NULL) { + uint32_t num_opps = 0; + uint32_t opp_id_src0 = OPP_ID_INVALID; + uint32_t opp_id_src1 = OPP_ID_INVALID; + + // Step 1: To find out which OPTC is running & OPTC DSC is ON + // We can't use res_pool->res_cap->num_timing_generator to check + // Because it records display pipes default setting built in driver, + // not display pipes of the current chip. + // Some ASICs would be fused display pipes less than the default setting. + // In dcnxx_resource_construct function, driver would obatin real information. + for (i = 0; i < dc->res_pool->timing_generator_count; i++) { + uint32_t optc_dsc_state = 0; + struct timing_generator *tg = dc->res_pool->timing_generators[i]; + + if (tg->funcs->is_tg_enabled(tg)) { + if (tg->funcs->get_dsc_status) + tg->funcs->get_dsc_status(tg, &optc_dsc_state); + // Only one OPTC with DSC is ON, so if we got one result, we would exit this block. + // non-zero value is DSC enabled + if (optc_dsc_state != 0) { + tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1); + break; + } + } + } + + // Step 2: To power down DSC but skip DSC of running OPTC + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { + struct dcn_dsc_state s = {0}; + + dc->res_pool->dscs[i]->funcs->dsc_read_state(dc->res_pool->dscs[i], &s); + + if ((s.dsc_opp_source == opp_id_src0 || s.dsc_opp_source == opp_id_src1) && + s.dsc_clock_en && s.dsc_fw_en) + continue; + + hws->funcs.dsc_pg_control(hws, dc->res_pool->dscs[i]->inst, false); + } + } } void dcn10_init_hw(struct dc *dc) { - int i, j; + int i; struct abm *abm = dc->res_pool->abm; struct dmcu *dmcu = dc->res_pool->dmcu; struct dce_hwseq *hws = dc->hwseq; @@ -1468,43 +1505,8 @@ void dcn10_init_hw(struct dc *dc) dmub_enable_outbox_notification(dc); /* we want to turn off all dp displays before doing detection */ - if (dc->config.power_down_display_on_boot) { - uint8_t dpcd_power_state = '\0'; - enum dc_status status = DC_ERROR_UNEXPECTED; - - for (i = 0; i < dc->link_count; i++) { - if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) - continue; - - /* DP 2.0 requires that LTTPR Caps be read first */ - dp_retrieve_lttpr_cap(dc->links[i]); - - /* - * If any of the displays are lit up turn them off. - * The reason is that some MST hubs cannot be turned off - * completely until we tell them to do so. - * If not turned off, then displays connected to MST hub - * won't light up. - */ - status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, - &dpcd_power_state, sizeof(dpcd_power_state)); - if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) { - /* blank dp stream before power off receiver*/ - if (dc->links[i]->link_enc->funcs->get_dig_frontend) { - unsigned int fe = dc->links[i]->link_enc->funcs->get_dig_frontend(dc->links[i]->link_enc); - - for (j = 0; j < dc->res_pool->stream_enc_count; j++) { - if (fe == dc->res_pool->stream_enc[j]->id) { - dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], - dc->res_pool->stream_enc[j]); - break; - } - } - } - dp_receiver_power_ctrl(dc->links[i], false); - } - } - } + if (dc->config.power_down_display_on_boot) + dc_link_blank_all_dp_displays(dc); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -1970,10 +1972,9 @@ static bool wait_for_reset_trigger_to_occur( return rc; } -uint64_t reduceSizeAndFraction( - uint64_t *numerator, - uint64_t *denominator, - bool checkUint32Bounary) +static uint64_t reduceSizeAndFraction(uint64_t *numerator, + uint64_t *denominator, + bool checkUint32Bounary) { int i; bool ret = checkUint32Bounary == false; @@ -2021,7 +2022,7 @@ uint64_t reduceSizeAndFraction( return ret; } -bool is_low_refresh_rate(struct pipe_ctx *pipe) +static bool is_low_refresh_rate(struct pipe_ctx *pipe) { uint32_t master_pipe_refresh_rate = pipe->stream->timing.pix_clk_100hz * 100 / @@ -2030,7 +2031,8 @@ bool is_low_refresh_rate(struct pipe_ctx *pipe) return master_pipe_refresh_rate <= 30; } -uint8_t get_clock_divider(struct pipe_ctx *pipe, bool account_low_refresh_rate) +static uint8_t get_clock_divider(struct pipe_ctx *pipe, + bool account_low_refresh_rate) { uint32_t clock_divider = 1; uint32_t numpipes = 1; @@ -2050,10 +2052,8 @@ uint8_t get_clock_divider(struct pipe_ctx *pipe, bool account_low_refresh_rate) return clock_divider; } -int dcn10_align_pixel_clocks( - struct dc *dc, - int group_size, - struct pipe_ctx *grouped_pipes[]) +static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, + struct pipe_ctx *grouped_pipes[]) { struct dc_context *dc_ctx = dc->ctx; int i, master = -1, embedded = -1; @@ -2342,7 +2342,7 @@ static void mmhub_read_vm_context0_settings(struct dcn10_hubp *hubp1, } -void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp) +static void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); struct vm_system_aperture_param apt = {0}; @@ -2624,7 +2624,7 @@ static void dcn10_update_dchubp_dpp( /* new calculated dispclk, dppclk are stored in * context->bw_ctx.bw.dcn.clk.dispclk_khz / dppclk_khz. current * dispclk, dppclk are from dc->clk_mgr->clks.dispclk_khz. - * dcn_validate_bandwidth compute new dispclk, dppclk. + * dcn10_validate_bandwidth compute new dispclk, dppclk. * dispclk will put in use after optimize_bandwidth when * ramp_up_dispclk_with_dpp is called. * there are two places for dppclk be put in use. One location @@ -2638,7 +2638,7 @@ static void dcn10_update_dchubp_dpp( * for example, eDP + external dp, change resolution of DP from * 1920x1080x144hz to 1280x960x60hz. * before change: dispclk = 337889 dppclk = 337889 - * change mode, dcn_validate_bandwidth calculate + * change mode, dcn10_validate_bandwidth calculate * dispclk = 143122 dppclk = 143122 * update_dchubp_dpp be executed before dispclk be updated, * dispclk = 337889, but dppclk use new value dispclk /2 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index 34001a30d449..10e613ec7d24 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -78,6 +78,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index 2dc4b4e4ba02..f4b34c110eae 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -646,8 +646,9 @@ static bool dcn10_link_encoder_validate_hdmi_output( crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) return false; - if (!enc10->base.features.flags.bits.HDMI_6GB_EN && - adjusted_pix_clk_100hz >= 3000000) + if ((!enc10->base.features.flags.bits.HDMI_6GB_EN || + enc10->base.ctx->dc->debug.hdmi20_disable) && + adjusted_pix_clk_100hz >= 3000000) return false; if (enc10->base.ctx->dc->debug.hdmi20_disable && crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c index d54d731415d7..2c409356f512 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c @@ -348,36 +348,6 @@ void opp1_program_stereo( */ } -void opp1_program_oppbuf( - struct output_pixel_processor *opp, - struct oppbuf_params *oppbuf) -{ - struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp); - - /* Program the oppbuf active width to be the frame width from mpc */ - REG_UPDATE(OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, oppbuf->active_width); - - /* Specifies the number of segments in multi-segment mode (DP-MSO operation) - * description "In 1/2/4 segment mode, specifies the horizontal active width in pixels of the display panel. - * In 4 segment split left/right mode, specifies the horizontal 1/2 active width in pixels of the display panel. - * Used to determine segment boundaries in multi-segment mode. Used to determine the width of the vertical active space in 3D frame packed modes. - * OPPBUF_ACTIVE_WIDTH must be integer divisible by the total number of segments." - */ - REG_UPDATE(OPPBUF_CONTROL, OPPBUF_DISPLAY_SEGMENTATION, oppbuf->mso_segmentation); - - /* description "Specifies the number of overlap pixels (1-8 overlapping pixels supported), used in multi-segment mode (DP-MSO operation)" */ - REG_UPDATE(OPPBUF_CONTROL, OPPBUF_OVERLAP_PIXEL_NUM, oppbuf->mso_overlap_pixel_num); - - /* description "Specifies the number of times a pixel is replicated (0-15 pixel replications supported). - * A value of 0 disables replication. The total number of times a pixel is output is OPPBUF_PIXEL_REPETITION + 1." - */ - REG_UPDATE(OPPBUF_CONTROL, OPPBUF_PIXEL_REPETITION, oppbuf->pixel_repetition); - - /* Controls the number of padded pixels at the end of a segment */ - if (REG(OPPBUF_CONTROL1)) - REG_UPDATE(OPPBUF_CONTROL1, OPPBUF_NUM_SEGMENT_PADDED_PIXELS, oppbuf->num_segment_padded_pixels); -} - void opp1_pipe_clock_control(struct output_pixel_processor *opp, bool enable) { struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 3d2a2848857a..b1671b00ce40 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -132,22 +132,6 @@ void optc1_setup_vertical_interrupt2( } /** - * Vupdate keepout can be set to a window to block the update lock for that pipe from changing. - * Start offset begins with vstartup and goes for x number of clocks, - * end offset starts from end of vupdate to x number of clocks. - */ -void optc1_set_vupdate_keepout(struct timing_generator *optc, - struct vupdate_keepout_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_3(OTG_VUPDATE_KEEPOUT, 0, - MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, params->start_offset, - MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, params->end_offset, - OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, params->enable); -} - -/** * program_timing_generator used by mode timing set * Program CRTC Timing Registers - OTG_H_*, OTG_V_*, Pixel repetition. * Including SYNC. Call BIOS command table to program Timings. @@ -876,7 +860,7 @@ void optc1_set_static_screen_control( OTG_STATIC_SCREEN_FRAME_COUNT, num_frames); } -void optc1_setup_manual_trigger(struct timing_generator *optc) +static void optc1_setup_manual_trigger(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -894,7 +878,7 @@ void optc1_setup_manual_trigger(struct timing_generator *optc) OTG_TRIGA_CLEAR, 1); } -void optc1_program_manual_trigger(struct timing_generator *optc) +static void optc1_program_manual_trigger(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index f37551e00023..858b72149897 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -686,9 +686,8 @@ static struct output_pixel_processor *dcn10_opp_create( return &opp->base; } -struct dce_aux *dcn10_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_aux *dcn10_aux_engine_create(struct dc_context *ctx, + uint32_t inst) { struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); @@ -724,9 +723,8 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) }; -struct dce_i2c_hw *dcn10_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_i2c_hw *dcn10_i2c_hw_create(struct dc_context *ctx, + uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); @@ -805,7 +803,7 @@ static const struct encoder_feature_support link_enc_feature = { .flags.bits.IS_TPS4_CAPABLE = true }; -struct link_encoder *dcn10_link_encoder_create( +static struct link_encoder *dcn10_link_encoder_create( const struct encoder_init_data *enc_init_data) { struct dcn10_link_encoder *enc10 = @@ -847,7 +845,7 @@ static struct panel_cntl *dcn10_panel_cntl_create(const struct panel_cntl_init_d return &panel_cntl->base; } -struct clock_source *dcn10_clock_source_create( +static struct clock_source *dcn10_clock_source_create( struct dc_context *ctx, struct dc_bios *bios, enum clock_source_id id, @@ -945,7 +943,7 @@ static const struct resource_create_funcs res_create_maximus_funcs = { .create_hwseq = dcn10_hwseq_create, }; -void dcn10_clock_source_destroy(struct clock_source **clk_src) +static void dcn10_clock_source_destroy(struct clock_source **clk_src) { kfree(TO_DCE110_CLK_SRC(*clk_src)); *clk_src = NULL; @@ -978,10 +976,8 @@ static void dcn10_resource_destruct(struct dcn10_resource_pool *pool) pool->base.mpc = NULL; } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.opps[i] != NULL) @@ -1011,14 +1007,10 @@ static void dcn10_resource_destruct(struct dcn10_resource_pool *pool) for (i = 0; i < pool->base.res_cap->num_ddc; i++) { if (pool->base.engines[i] != NULL) dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; } for (i = 0; i < pool->base.audio_count; i++) { @@ -1128,7 +1120,7 @@ static enum dc_status build_mapped_resource( return DC_OK; } -enum dc_status dcn10_add_stream_to_ctx( +static enum dc_status dcn10_add_stream_to_ctx( struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream) @@ -1320,7 +1312,7 @@ static const struct resource_funcs dcn10_res_pool_funcs = { .destroy = dcn10_destroy_resource_pool, .link_enc_create = dcn10_link_encoder_create, .panel_cntl_create = dcn10_panel_cntl_create, - .validate_bandwidth = dcn_validate_bandwidth, + .validate_bandwidth = dcn10_validate_bandwidth, .acquire_idle_pipe_for_layer = dcn10_acquire_idle_pipe_for_layer, .validate_plane = dcn10_validate_plane, .validate_global = dcn10_validate_global, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index b0c08ee6bc2c..bf4436d7aaab 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -902,6 +902,19 @@ void enc1_stream_encoder_stop_dp_info_packets( } +void enc1_stream_encoder_reset_fifo( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* set DIG_START to 0x1 to reset FIFO */ + REG_UPDATE(DIG_FE_CNTL, DIG_START, 1); + udelay(100); + + /* write 0 to take the FIFO out of reset */ + REG_UPDATE(DIG_FE_CNTL, DIG_START, 0); +} + void enc1_stream_encoder_dp_blank( struct dc_link *link, struct stream_encoder *enc) @@ -1587,6 +1600,8 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = { enc1_stream_encoder_send_immediate_sdp_message, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, + .reset_fifo = + enc1_stream_encoder_reset_fifo, .dp_blank = enc1_stream_encoder_dp_blank, .dp_unblank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index 687d7e4bf7ca..a146a41f68e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -626,6 +626,9 @@ void enc1_stream_encoder_send_immediate_sdp_message( void enc1_stream_encoder_stop_dp_info_packets( struct stream_encoder *enc); +void enc1_stream_encoder_reset_fifo( + struct stream_encoder *enc); + void enc1_stream_encoder_dp_blank( struct dc_link *link, struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index a9e420c7d75a..970b65efeac1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -251,20 +251,6 @@ static void dpp2_cnv_setup ( } -void dpp2_cnv_set_bias_scale( - struct dpp *dpp_base, - struct dc_bias_and_scale *bias_and_scale) -{ - struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); - - REG_UPDATE(FCNV_FP_BIAS_R, FCNV_FP_BIAS_R, bias_and_scale->bias_red); - REG_UPDATE(FCNV_FP_BIAS_G, FCNV_FP_BIAS_G, bias_and_scale->bias_green); - REG_UPDATE(FCNV_FP_BIAS_B, FCNV_FP_BIAS_B, bias_and_scale->bias_blue); - REG_UPDATE(FCNV_FP_SCALE_R, FCNV_FP_SCALE_R, bias_and_scale->scale_red); - REG_UPDATE(FCNV_FP_SCALE_G, FCNV_FP_SCALE_G, bias_and_scale->scale_green); - REG_UPDATE(FCNV_FP_SCALE_B, FCNV_FP_SCALE_B, bias_and_scale->scale_blue); -} - /*compute the maximum number of lines that we can fit in the line buffer*/ void dscl2_calc_lb_num_partitions( const struct scaler_data *scl_data, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c index 79b640e202eb..ef5c4c0f4d6c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c @@ -162,6 +162,8 @@ static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_ds REG_GET(DSCC_PPS_CONFIG2, PIC_WIDTH, &s->dsc_pic_width); REG_GET(DSCC_PPS_CONFIG2, PIC_HEIGHT, &s->dsc_pic_height); REG_GET(DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, &s->dsc_slice_bpg_offset); + REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &s->dsc_fw_en, + DSCRM_DSC_OPP_PIPE_SOURCE, &s->dsc_opp_source); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c index 880954ac0b02..994fb732a7cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c @@ -527,7 +527,7 @@ static const uint16_t filter_12tap_16p_183[108] = { 0, 84, 16328, 16032, 416, 1944, 1944, 416, 16032, 16328, 84, 0, }; -const uint16_t *wbscl_get_filter_3tap_16p(struct fixed31_32 ratio) +static const uint16_t *wbscl_get_filter_3tap_16p(struct fixed31_32 ratio) { if (ratio.value < dc_fixpt_one.value) return filter_3tap_16p_upscale; @@ -539,7 +539,7 @@ const uint16_t *wbscl_get_filter_3tap_16p(struct fixed31_32 ratio) return filter_3tap_16p_183; } -const uint16_t *wbscl_get_filter_4tap_16p(struct fixed31_32 ratio) +static const uint16_t *wbscl_get_filter_4tap_16p(struct fixed31_32 ratio) { if (ratio.value < dc_fixpt_one.value) return filter_4tap_16p_upscale; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 5adf42a7cc27..dc1752e9f461 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -192,9 +192,8 @@ void hubp2_vready_at_or_After_vsync(struct hubp *hubp, REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, value); } -void hubp2_program_requestor( - struct hubp *hubp, - struct _vcs_dpi_display_rq_regs_st *rq_regs) +static void hubp2_program_requestor(struct hubp *hubp, + struct _vcs_dpi_display_rq_regs_st *rq_regs) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -930,6 +929,16 @@ bool hubp2_is_flip_pending(struct hubp *hubp) void hubp2_set_blank(struct hubp *hubp, bool blank) { + hubp2_set_blank_regs(hubp, blank); + + if (blank) { + hubp->mpcc_id = 0xf; + hubp->opp_id = OPP_ID_INVALID; + } +} + +void hubp2_set_blank_regs(struct hubp *hubp, bool blank) +{ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); uint32_t blank_en = blank ? 1 : 0; @@ -951,9 +960,6 @@ void hubp2_set_blank(struct hubp *hubp, bool blank) HUBP_NO_OUTSTANDING_REQ, 1, 1, 200); } - - hubp->mpcc_id = 0xf; - hubp->opp_id = OPP_ID_INVALID; } } @@ -1285,7 +1291,7 @@ void hubp2_read_state(struct hubp *hubp) } -void hubp2_validate_dml_output(struct hubp *hubp, +static void hubp2_validate_dml_output(struct hubp *hubp, struct dc_context *ctx, struct _vcs_dpi_display_rq_regs_st *dml_rq_regs, struct _vcs_dpi_display_dlg_regs_st *dml_dlg_attr, @@ -1603,6 +1609,7 @@ static struct hubp_funcs dcn20_hubp_funcs = { .hubp_setup_interdependent = hubp2_setup_interdependent, .hubp_set_vm_system_aperture_settings = hubp2_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, + .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp2_dcc_control, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h index eea2254b15e4..9204c3ef323b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h @@ -330,6 +330,7 @@ void hubp2_program_surface_config( bool hubp2_is_flip_pending(struct hubp *hubp); void hubp2_set_blank(struct hubp *hubp, bool blank); +void hubp2_set_blank_regs(struct hubp *hubp, bool blank); void hubp2_cursor_set_position( struct hubp *hubp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index e6af99ae3d9f..4991e93e5308 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -615,6 +615,11 @@ void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->pipe_idx); } +void dcn20_disable_pixel_data(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank) +{ + dcn20_blank_pixel_data(dc, pipe_ctx, blank); +} + static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, int opp_cnt) { @@ -1080,10 +1085,8 @@ static void dcn20_power_on_plane( } } -void dcn20_enable_plane( - struct dc *dc, - struct pipe_ctx *pipe_ctx, - struct dc_state *context) +static void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, + struct dc_state *context) { //if (dc->debug.sanity_checks) { // dcn10_verify_allow_pstate_change_high(dc); @@ -1842,6 +1845,11 @@ void dcn20_optimize_bandwidth( dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); + if (dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr->clks.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && + context->bw_ctx.bw.dcn.clk.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->dc_mode_softmax_memclk); + dc->clk_mgr->funcs->update_clocks( dc->clk_mgr, context, @@ -2406,7 +2414,7 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->map_stream_to_link( pipe_ctx->stream_res.hpo_dp_stream_enc, pipe_ctx->stream_res.hpo_dp_stream_enc->inst, - link->hpo_dp_link_enc->inst); + pipe_ctx->link_res.hpo_dp_link_enc->inst); } if (!is_dp_128b_132b_signal(pipe_ctx) && link_enc) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h index 6bba191cd33e..33a36c02b2f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h @@ -53,6 +53,10 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx); void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn20_disable_pixel_data( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool blank); void dcn20_blank_pixel_data( struct dc *dc, struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index 5cfd4b0afea5..91e4885b743e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -27,6 +27,8 @@ #include "dcn10/dcn10_hw_sequencer.h" #include "dcn20_hwseq.h" +#include "dcn20_init.h" + static const struct hw_sequencer_funcs dcn20_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn10_init_hw, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index 947eb0df3f12..15734db0cdea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -400,10 +400,9 @@ static void mpc20_program_ogam_pwl( } -void apply_DEDCN20_305_wa( - struct mpc *mpc, - int mpcc_id, enum dc_lut_mode current_mode, - enum dc_lut_mode next_mode) +static void apply_DEDCN20_305_wa(struct mpc *mpc, int mpcc_id, + enum dc_lut_mode current_mode, + enum dc_lut_mode next_mode) { struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc); @@ -525,7 +524,7 @@ static void mpc2_init_mpcc(struct mpcc *mpcc, int mpcc_inst) mpcc->sm_cfg.enable = false; } -struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) +static struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) { struct mpcc *tmp_mpcc = tree->opp_list; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c index c90b8516dcc1..0340fdd3f5fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c @@ -73,21 +73,6 @@ bool optc2_enable_crtc(struct timing_generator *optc) } /** - * DRR double buffering control to select buffer point - * for V_TOTAL, H_TOTAL, VTOTAL_MIN, VTOTAL_MAX, VTOTAL_MIN_SEL and VTOTAL_MAX_SEL registers - * Options: anytime, start of frame, dp start of frame (range timing) - */ -void optc2_set_timing_db_mode(struct timing_generator *optc, bool enable) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - uint32_t blank_data_double_buffer_enable = enable ? 1 : 0; - - REG_UPDATE(OTG_DOUBLE_BUFFER_CONTROL, - OTG_RANGE_TIMING_DBUF_UPDATE_MODE, blank_data_double_buffer_enable); -} - -/** *For the below, I'm not sure how your GSL parameters are stored in your env, * so I will assume a gsl_params struct for now */ @@ -110,30 +95,6 @@ void optc2_set_gsl(struct timing_generator *optc, } -/* Use the gsl allow flip as the master update lock */ -void optc2_use_gsl_as_master_update_lock(struct timing_generator *optc, - const struct gsl_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OTG_GSL_CONTROL, - OTG_MASTER_UPDATE_LOCK_GSL_EN, params->master_update_lock_gsl_en); -} - -/* You can control the GSL timing by limiting GSL to a window (X,Y) */ -void optc2_set_gsl_window(struct timing_generator *optc, - const struct gsl_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_2(OTG_GSL_WINDOW_X, 0, - OTG_GSL_WINDOW_START_X, params->gsl_window_start_x, - OTG_GSL_WINDOW_END_X, params->gsl_window_end_x); - REG_SET_2(OTG_GSL_WINDOW_Y, 0, - OTG_GSL_WINDOW_START_Y, params->gsl_window_start_y, - OTG_GSL_WINDOW_END_Y, params->gsl_window_end_y); -} - void optc2_set_gsl_source_select( struct timing_generator *optc, int group_idx, @@ -156,18 +117,6 @@ void optc2_set_gsl_source_select( } } -/* DSC encoder frame start controls: x = h position, line_num = # of lines from vstartup */ -void optc2_set_dsc_encoder_frame_start(struct timing_generator *optc, - int x_position, - int line_num) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_2(OTG_DSC_START_POSITION, 0, - OTG_DSC_START_POSITION_X, x_position, - OTG_DSC_START_POSITION_LINE_NUM, line_num); -} - /* Set DSC-related configuration. * dsc_mode: 0 disables DSC, other values enable DSC in specified format * sc_bytes_per_pixel: Bytes per pixel in u3.28 format @@ -190,6 +139,19 @@ void optc2_set_dsc_config(struct timing_generator *optc, OPTC_DSC_SLICE_WIDTH, dsc_slice_width); } +/* Get DSC-related configuration. + * dsc_mode: 0 disables DSC, other values enable DSC in specified format + */ +void optc2_get_dsc_status(struct timing_generator *optc, + uint32_t *dsc_mode) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_GET(OPTC_DATA_FORMAT_CONTROL, + OPTC_DSC_MODE, dsc_mode); +} + + /*TEMP: Need to figure out inheritance model here.*/ bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) { @@ -280,8 +242,8 @@ void optc2_get_optc_source(struct timing_generator *optc, *num_of_src_opp = 1; } -void optc2_set_dwb_source(struct timing_generator *optc, - uint32_t dwb_pipe_inst) +static void optc2_set_dwb_source(struct timing_generator *optc, + uint32_t dwb_pipe_inst) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -293,7 +255,7 @@ void optc2_set_dwb_source(struct timing_generator *optc, OPTC_DWB1_SOURCE_SELECT, optc->inst); } -void optc2_align_vblanks( +static void optc2_align_vblanks( struct timing_generator *optc_master, struct timing_generator *optc_slave, uint32_t master_pixel_clock_100Hz, @@ -579,6 +541,7 @@ static struct timing_generator_funcs dcn20_tg_funcs = { .get_crc = optc1_get_crc, .configure_crc = optc2_configure_crc, .set_dsc_config = optc2_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = optc2_set_dwb_source, .set_odm_bypass = optc2_set_odm_bypass, .set_odm_combine = optc2_set_odm_combine, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h index be19a6885fbf..f7968b9ca16e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h @@ -98,6 +98,9 @@ void optc2_set_dsc_config(struct timing_generator *optc, uint32_t dsc_bytes_per_pixel, uint32_t dsc_slice_width); +void optc2_get_dsc_status(struct timing_generator *optc, + uint32_t *dsc_mode); + void optc2_set_odm_bypass(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 3883f918b3bb..2bc93df023ad 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1069,7 +1069,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -3093,8 +3093,7 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { struct dc_link *link = context->streams[0]->sink->link; - if ((link->link_index == 0 && link->psr_settings.psr_feature_enabled) - || context->bw_ctx.dml.vba.StutterPeriod > 5000.0) + if (link->link_index == 0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0) return DCN_ZSTATE_SUPPORT_ALLOW; else return DCN_ZSTATE_SUPPORT_DISALLOW; @@ -3796,6 +3795,8 @@ static bool dcn20_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.hdmi_frl_pcon_support = true; + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) { dc->debug = debug_defaults_drv; } else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c index aab25ca8343a..8a70f92795c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c @@ -593,6 +593,8 @@ static const struct stream_encoder_funcs dcn20_str_enc_funcs = { enc1_stream_encoder_send_immediate_sdp_message, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, + .reset_fifo = + enc1_stream_encoder_reset_fifo, .dp_blank = enc1_stream_encoder_dp_blank, .dp_unblank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c index f5bf04f7da25..9a3402148fde 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c @@ -44,7 +44,8 @@ #define DC_LOGGER \ dccg->ctx->logger -void dccg201_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk) +static void dccg201_update_dpp_dto(struct dccg *dccg, int dpp_inst, + int req_dppclk) { /* vbios handles it */ } diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c index 6b6f74d4afd1..35dd4bac242a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c @@ -55,7 +55,7 @@ static void hubp201_program_surface_config( hubp1_program_pixel_format(hubp, format); } -void hubp201_program_deadline( +static void hubp201_program_deadline( struct hubp *hubp, struct _vcs_dpi_display_dlg_regs_st *dlg_attr, struct _vcs_dpi_display_ttu_regs_st *ttu_attr) @@ -63,9 +63,8 @@ void hubp201_program_deadline( hubp1_program_deadline(hubp, dlg_attr, ttu_attr); } -void hubp201_program_requestor( - struct hubp *hubp, - struct _vcs_dpi_display_rq_regs_st *rq_regs) +static void hubp201_program_requestor(struct hubp *hubp, + struct _vcs_dpi_display_rq_regs_st *rq_regs) { struct dcn201_hubp *hubp201 = TO_DCN201_HUBP(hubp); diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c index cfd09b3f705e..fe22530242d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c @@ -134,11 +134,12 @@ void dcn201_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx) PHYSICAL_ADDRESS_LOC addr; struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct dce_hwseq *hws = dc->hwseq; - struct dc_plane_address uma = plane_state->address; + struct dc_plane_address uma; if (plane_state == NULL) return; + uma = plane_state->address; addr_patched = patch_address_for_sbs_tb_stereo(pipe_ctx, &addr); plane_address_in_gpu_space_to_uma(hws, &uma); diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c index a65e8f7801db..7f9ec59ef443 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c @@ -50,8 +50,8 @@ #define IND_REG(index) \ (enc10->link_regs->index) -void dcn201_link_encoder_get_max_link_cap(struct link_encoder *enc, - struct dc_link_settings *link_settings) +static void dcn201_link_encoder_get_max_link_cap(struct link_encoder *enc, + struct dc_link_settings *link_settings) { uint32_t value1, value2; struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); @@ -66,7 +66,7 @@ void dcn201_link_encoder_get_max_link_cap(struct link_encoder *enc, } } -bool dcn201_link_encoder_is_in_alt_mode(struct link_encoder *enc) +static bool dcn201_link_encoder_is_in_alt_mode(struct link_encoder *enc) { uint32_t value; struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index 0fa381088d1d..0bb7d3dd53fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -603,7 +603,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -672,9 +672,8 @@ static struct output_pixel_processor *dcn201_opp_create( return &opp->base; } -struct dce_aux *dcn201_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_aux *dcn201_aux_engine_create(struct dc_context *ctx, + uint32_t inst) { struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC); @@ -706,9 +705,8 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) }; -struct dce_i2c_hw *dcn201_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_i2c_hw *dcn201_i2c_hw_create(struct dc_context *ctx, + uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC); @@ -789,7 +787,7 @@ static const struct encoder_feature_support link_enc_feature = { .flags.bits.IS_TPS4_CAPABLE = true }; -struct link_encoder *dcn201_link_encoder_create( +static struct link_encoder *dcn201_link_encoder_create( const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -811,7 +809,7 @@ struct link_encoder *dcn201_link_encoder_create( return &enc10->base; } -struct clock_source *dcn201_clock_source_create( +static struct clock_source *dcn201_clock_source_create( struct dc_context *ctx, struct dc_bios *bios, enum clock_source_id id, @@ -906,7 +904,7 @@ static const struct resource_create_funcs res_create_maximus_funcs = { .create_hwseq = dcn201_hwseq_create, }; -void dcn201_clock_source_destroy(struct clock_source **clk_src) +static void dcn201_clock_source_destroy(struct clock_source **clk_src) { kfree(TO_DCE110_CLK_SRC(*clk_src)); *clk_src = NULL; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c index 36044cb8ec83..c5e200d09038 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c @@ -680,7 +680,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_chanage); } -void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub) +static void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub) { struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); uint32_t prog_wm_value; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 3de1bcf9b3d8..58e459c7e7d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -183,7 +183,7 @@ static void hubp21_setup( } -void hubp21_set_viewport( +static void hubp21_set_viewport( struct hubp *hubp, const struct rect *viewport, const struct rect *viewport_c) @@ -225,8 +225,8 @@ void hubp21_set_viewport( SEC_VIEWPORT_Y_START_C, viewport_c->y); } -void hubp21_set_vm_system_aperture_settings(struct hubp *hubp, - struct vm_system_aperture_param *apt) +static void hubp21_set_vm_system_aperture_settings(struct hubp *hubp, + struct vm_system_aperture_param *apt) { struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); @@ -248,7 +248,7 @@ void hubp21_set_vm_system_aperture_settings(struct hubp *hubp, SYSTEM_ACCESS_MODE, 0x3); } -void hubp21_validate_dml_output(struct hubp *hubp, +static void hubp21_validate_dml_output(struct hubp *hubp, struct dc_context *ctx, struct _vcs_dpi_display_rq_regs_st *dml_rq_regs, struct _vcs_dpi_display_dlg_regs_st *dml_dlg_attr, @@ -664,7 +664,8 @@ static void program_surface_flip_and_addr(struct hubp *hubp, struct surface_flip flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS); } -void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_regs) +static void dmcub_PLAT_54186_wa(struct hubp *hubp, + struct surface_flip_registers *flip_regs) { struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv; struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); @@ -697,7 +698,7 @@ void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_ PERF_TRACE(); // TODO: remove after performance is stable. } -bool hubp21_program_surface_flip_and_addr( +static bool hubp21_program_surface_flip_and_addr( struct hubp *hubp, const struct dc_plane_address *address, bool flip_immediate) @@ -805,7 +806,7 @@ bool hubp21_program_surface_flip_and_addr( return true; } -void hubp21_init(struct hubp *hubp) +static void hubp21_init(struct hubp *hubp) { // DEDCN21-133: Inconsistent row starting line for flip between DPTE and Meta // This is a chicken bit to enable the ECO fix. diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index 54c11ba550ae..b270f0b194dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -28,6 +28,8 @@ #include "dcn20/dcn20_hwseq.h" #include "dcn21_hwseq.h" +#include "dcn21_init.h" + static const struct hw_sequencer_funcs dcn21_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn10_init_hw, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c index aa46c35b05a2..0a1ba6e7081c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c @@ -203,7 +203,7 @@ static bool update_cfg_data( return true; } -bool dcn21_link_encoder_acquire_phy(struct link_encoder *enc) +static bool dcn21_link_encoder_acquire_phy(struct link_encoder *enc) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); int value; @@ -277,7 +277,7 @@ void dcn21_link_encoder_enable_dp_output( } -void dcn21_link_encoder_enable_dp_mst_output( +static void dcn21_link_encoder_enable_dp_mst_output( struct link_encoder *enc, const struct dc_link_settings *link_settings, enum clock_source_id clock_source) @@ -288,9 +288,8 @@ void dcn21_link_encoder_enable_dp_mst_output( dcn10_link_encoder_enable_dp_mst_output(enc, link_settings, clock_source); } -void dcn21_link_encoder_disable_output( - struct link_encoder *enc, - enum signal_type signal) +static void dcn21_link_encoder_disable_output(struct link_encoder *enc, + enum signal_type signal) { dcn10_link_encoder_disable_output(enc, signal); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d452a0d1777e..e5cc6bf45743 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -784,9 +784,8 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) }; -struct dce_i2c_hw *dcn21_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_i2c_hw *dcn21_i2c_hw_create(struct dc_context *ctx, + uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); @@ -874,7 +873,7 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -1093,7 +1092,7 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s } } -void dcn21_calculate_wm( +static void dcn21_calculate_wm( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int *out_pipe_cnt, @@ -1390,7 +1389,7 @@ validate_out: * with DC_FP_START()/DC_FP_END(). Use the same approach as for * dcn20_validate_bandwidth in dcn20_resource.c. */ -bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, +static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { bool voltage_supported; @@ -1480,8 +1479,8 @@ static struct hubbub *dcn21_hubbub_create(struct dc_context *ctx) return &hubbub->base; } -struct output_pixel_processor *dcn21_opp_create( - struct dc_context *ctx, uint32_t inst) +static struct output_pixel_processor *dcn21_opp_create(struct dc_context *ctx, + uint32_t inst) { struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); @@ -1496,9 +1495,8 @@ struct output_pixel_processor *dcn21_opp_create( return &opp->base; } -struct timing_generator *dcn21_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) +static struct timing_generator *dcn21_timing_generator_create(struct dc_context *ctx, + uint32_t instance) { struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL); @@ -1518,7 +1516,7 @@ struct timing_generator *dcn21_timing_generator_create( return &tgn10->base; } -struct mpc *dcn21_mpc_create(struct dc_context *ctx) +static struct mpc *dcn21_mpc_create(struct dc_context *ctx) { struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc), GFP_KERNEL); @@ -1545,8 +1543,8 @@ static void read_dce_straps( } -struct display_stream_compressor *dcn21_dsc_create( - struct dc_context *ctx, uint32_t inst) +static struct display_stream_compressor *dcn21_dsc_create(struct dc_context *ctx, + uint32_t inst) { struct dcn20_dsc *dsc = kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); @@ -1683,9 +1681,8 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; -struct stream_encoder *dcn21_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) +static struct stream_encoder *dcn21_stream_encoder_create(enum engine_id eng_id, + struct dc_context *ctx) { struct dcn10_stream_encoder *enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); @@ -1917,7 +1914,7 @@ static int dcn21_populate_dml_pipes_from_context( return pipe_cnt; } -enum dc_status dcn21_patch_unknown_plane_state(struct dc_plane_state *plane_state) +static enum dc_status dcn21_patch_unknown_plane_state(struct dc_plane_state *plane_state) { enum dc_status result = DC_OK; @@ -2028,6 +2025,8 @@ static bool dcn21_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.hdmi_frl_pcon_support = true; + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c index ebd9c35c914f..8daa12730bc1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c @@ -50,22 +50,6 @@ enc1->base.ctx -void convert_dc_info_packet_to_128( - const struct dc_info_packet *info_packet, - struct dc_info_packet_128 *info_packet_128) -{ - unsigned int i; - - info_packet_128->hb0 = info_packet->hb0; - info_packet_128->hb1 = info_packet->hb1; - info_packet_128->hb2 = info_packet->hb2; - info_packet_128->hb3 = info_packet->hb3; - - for (i = 0; i < 32; i++) { - info_packet_128->sb[i] = info_packet->sb[i]; - } - -} static void enc3_update_hdmi_info_packet( struct dcn10_stream_encoder *enc1, uint32_t packet_index, @@ -489,7 +473,7 @@ static void enc3_dp_set_odm_combine( } /* setup stream encoder in dvi mode */ -void enc3_stream_encoder_dvi_set_stream_attribute( +static void enc3_stream_encoder_dvi_set_stream_attribute( struct stream_encoder *enc, struct dc_crtc_timing *crtc_timing, bool is_dual_link) @@ -805,6 +789,8 @@ static const struct stream_encoder_funcs dcn30_str_enc_funcs = { enc3_stream_encoder_update_dp_info_packets, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, + .reset_fifo = + enc1_stream_encoder_reset_fifo, .dp_blank = enc1_stream_encoder_dp_blank, .dp_unblank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index c1d967ed6551..ab3918c0a15b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -41,8 +41,7 @@ dpp->tf_shift->field_name, dpp->tf_mask->field_name -void dpp30_read_state(struct dpp *dpp_base, - struct dcn_dpp_state *s) +static void dpp30_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s) { struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); @@ -373,7 +372,7 @@ void dpp3_set_cursor_attributes( } -bool dpp3_get_optimal_number_of_taps( +static bool dpp3_get_optimal_number_of_taps( struct dpp *dpp, struct scaler_data *scl_data, const struct scaling_taps *in_taps) @@ -474,22 +473,7 @@ bool dpp3_get_optimal_number_of_taps( return true; } -void dpp3_cnv_set_bias_scale( - struct dpp *dpp_base, - struct dc_bias_and_scale *bias_and_scale) -{ - struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base); - - REG_UPDATE(FCNV_FP_BIAS_R, FCNV_FP_BIAS_R, bias_and_scale->bias_red); - REG_UPDATE(FCNV_FP_BIAS_G, FCNV_FP_BIAS_G, bias_and_scale->bias_green); - REG_UPDATE(FCNV_FP_BIAS_B, FCNV_FP_BIAS_B, bias_and_scale->bias_blue); - REG_UPDATE(FCNV_FP_SCALE_R, FCNV_FP_SCALE_R, bias_and_scale->scale_red); - REG_UPDATE(FCNV_FP_SCALE_G, FCNV_FP_SCALE_G, bias_and_scale->scale_green); - REG_UPDATE(FCNV_FP_SCALE_B, FCNV_FP_SCALE_B, bias_and_scale->scale_blue); -} - -void dpp3_deferred_update( - struct dpp *dpp_base) +static void dpp3_deferred_update(struct dpp *dpp_base) { int bypass_state; struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base); @@ -751,8 +735,8 @@ static enum dc_lut_mode dpp3_get_blndgam_current(struct dpp *dpp_base) return mode; } -bool dpp3_program_blnd_lut( - struct dpp *dpp_base, const struct pwl_params *params) +static bool dpp3_program_blnd_lut(struct dpp *dpp_base, + const struct pwl_params *params) { enum dc_lut_mode current_mode; enum dc_lut_mode next_mode; @@ -1164,9 +1148,8 @@ static void dpp3_program_shaper_lutb_settings( } -bool dpp3_program_shaper( - struct dpp *dpp_base, - const struct pwl_params *params) +static bool dpp3_program_shaper(struct dpp *dpp_base, + const struct pwl_params *params) { enum dc_lut_mode current_mode; enum dc_lut_mode next_mode; @@ -1355,9 +1338,8 @@ static void dpp3_select_3dlut_ram_mask( REG_SET(CM_3DLUT_INDEX, 0, CM_3DLUT_INDEX, 0); } -bool dpp3_program_3dlut( - struct dpp *dpp_base, - struct tetrahedral_params *params) +static bool dpp3_program_3dlut(struct dpp *dpp_base, + struct tetrahedral_params *params) { enum dc_lut_mode mode; bool is_17x17x17; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index eac08926b574..6a4dcafb9bba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -490,6 +490,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_setup_interdependent = hubp2_setup_interdependent, .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, + .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp3_dcc_control, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index df2717116604..1db1ca19411d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -344,6 +344,17 @@ void dcn30_enable_writeback( dwb->funcs->enable(dwb, &wb_info->dwb_params); } +void dcn30_prepare_bandwidth(struct dc *dc, + struct dc_state *context) +{ + if (dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && + context->bw_ctx.bw.dcn.clk.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz); + + dcn20_prepare_bandwidth(dc, context); +} + void dcn30_disable_writeback( struct dc *dc, unsigned int dwb_pipe_inst) @@ -437,7 +448,7 @@ void dcn30_init_hw(struct dc *dc) struct dce_hwseq *hws = dc->hwseq; struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; - int i, j; + int i; int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; @@ -534,41 +545,8 @@ void dcn30_init_hw(struct dc *dc) hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); /* we want to turn off all dp displays before doing detection */ - if (dc->config.power_down_display_on_boot) { - uint8_t dpcd_power_state = '\0'; - enum dc_status status = DC_ERROR_UNEXPECTED; - - for (i = 0; i < dc->link_count; i++) { - if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) - continue; - /* DP 2.0 states that LTTPR regs must be read first */ - dp_retrieve_lttpr_cap(dc->links[i]); - - /* if any of the displays are lit up turn them off */ - status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, - &dpcd_power_state, sizeof(dpcd_power_state)); - if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) { - /* blank dp stream before power off receiver*/ - if (dc->links[i]->link_enc->funcs->get_dig_frontend) { - unsigned int fe; - - fe = dc->links[i]->link_enc->funcs->get_dig_frontend( - dc->links[i]->link_enc); - if (fe == ENGINE_ID_UNKNOWN) - continue; - - for (j = 0; j < dc->res_pool->stream_enc_count; j++) { - if (fe == dc->res_pool->stream_enc[j]->id) { - dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], - dc->res_pool->stream_enc[j]); - break; - } - } - } - dp_receiver_power_ctrl(dc->links[i], false); - } - } - } + if (dc->config.power_down_display_on_boot) + dc_link_blank_all_dp_displays(dc); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h index e9a0005288d3..73e7b690e82c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h @@ -27,7 +27,7 @@ #define __DC_HWSS_DCN30_H__ #include "hw_sequencer_private.h" - +#include "dcn20/dcn20_hwseq.h" struct dc; void dcn30_init_hw(struct dc *dc); @@ -47,6 +47,9 @@ void dcn30_disable_writeback( struct dc *dc, unsigned int dwb_pipe_inst); +void dcn30_prepare_bandwidth(struct dc *dc, + struct dc_state *context); + bool dcn30_mmhubbub_warmup( struct dc *dc, unsigned int num_dwb, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c index 93f32a312fee..bb347319de83 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c @@ -29,6 +29,8 @@ #include "dcn21/dcn21_hwseq.h" #include "dcn30_hwseq.h" +#include "dcn30_init.h" + static const struct hw_sequencer_funcs dcn30_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn30_init_hw, @@ -53,6 +55,7 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .enable_audio_stream = dce110_enable_audio_stream, .disable_audio_stream = dce110_disable_audio_stream, .disable_plane = dcn20_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, .pipe_control_lock = dcn20_pipe_control_lock, .interdependent_update_lock = dcn10_lock_all_pipes, .cursor_lock = dcn10_cursor_lock, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c index 1c4b171c68ad..7a93eff183d9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c @@ -100,7 +100,7 @@ static void mmhubbub3_warmup_mcif(struct mcif_wb *mcif_wb, REG_UPDATE(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB_WARMUP_EN, false); } -void mmhubbub3_config_mcif_buf(struct mcif_wb *mcif_wb, +static void mmhubbub3_config_mcif_buf(struct mcif_wb *mcif_wb, struct mcif_buf_params *params, unsigned int dest_height) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c index 95149734378b..0ce0d6165f43 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c @@ -1362,7 +1362,7 @@ uint32_t mpcc3_acquire_rmu(struct mpc *mpc, int mpcc_id, int rmu_idx) return -1; } -int mpcc3_release_rmu(struct mpc *mpc, int mpcc_id) +static int mpcc3_release_rmu(struct mpc *mpc, int mpcc_id) { struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); int rmu_idx; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index 5d9e6413d67a..f5e8916601d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -332,6 +332,7 @@ static struct timing_generator_funcs dcn30_tg_funcs = { .get_crc = optc1_get_crc, .configure_crc = optc2_configure_crc, .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, .set_odm_bypass = optc3_set_odm_bypass, .set_odm_combine = optc3_set_odm_combine, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 79a66e0c4303..602ec9a08549 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -816,7 +816,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, @@ -840,7 +840,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -875,7 +875,7 @@ static const struct dc_debug_options debug_defaults_diags = { .use_max_lb = true }; -void dcn30_dpp_destroy(struct dpp **dpp) +static void dcn30_dpp_destroy(struct dpp **dpp) { kfree(TO_DCN20_DPP(*dpp)); *dpp = NULL; @@ -992,7 +992,7 @@ static struct mpc *dcn30_mpc_create( return &mpc30->base; } -struct hubbub *dcn30_hubbub_create(struct dc_context *ctx) +static struct hubbub *dcn30_hubbub_create(struct dc_context *ctx) { int i; @@ -1143,9 +1143,8 @@ static struct afmt *dcn30_afmt_create( return &afmt3->base; } -struct stream_encoder *dcn30_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) +static struct stream_encoder *dcn30_stream_encoder_create(enum engine_id eng_id, + struct dc_context *ctx) { struct dcn10_stream_encoder *enc1; struct vpg *vpg; @@ -1179,8 +1178,7 @@ struct stream_encoder *dcn30_stream_encoder_create( return &enc1->base; } -struct dce_hwseq *dcn30_hwseq_create( - struct dc_context *ctx) +static struct dce_hwseq *dcn30_hwseq_create(struct dc_context *ctx) { struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); @@ -2639,6 +2637,8 @@ static bool dcn30_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.hdmi_frl_pcon_support = true; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c index e85b695f2351..3d42a1a337ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c @@ -30,6 +30,8 @@ #include "dcn30/dcn30_hwseq.h" #include "dcn301_hwseq.h" +#include "dcn301_init.h" + static const struct hw_sequencer_funcs dcn301_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn10_init_hw, diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c index 736bda30abc3..ad0df1a72a90 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c @@ -93,7 +93,7 @@ static unsigned int dcn301_get_16_bit_backlight_from_pwm(struct panel_cntl *pane return (uint32_t)(current_backlight); } -uint32_t dcn301_panel_cntl_hw_init(struct panel_cntl *panel_cntl) +static uint32_t dcn301_panel_cntl_hw_init(struct panel_cntl *panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl); uint32_t value; @@ -147,7 +147,7 @@ uint32_t dcn301_panel_cntl_hw_init(struct panel_cntl *panel_cntl) return current_backlight; } -void dcn301_panel_cntl_destroy(struct panel_cntl **panel_cntl) +static void dcn301_panel_cntl_destroy(struct panel_cntl **panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(*panel_cntl); @@ -155,7 +155,7 @@ void dcn301_panel_cntl_destroy(struct panel_cntl **panel_cntl) *panel_cntl = NULL; } -bool dcn301_is_panel_backlight_on(struct panel_cntl *panel_cntl) +static bool dcn301_is_panel_backlight_on(struct panel_cntl *panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl); uint32_t value; @@ -165,7 +165,7 @@ bool dcn301_is_panel_backlight_on(struct panel_cntl *panel_cntl) return value; } -bool dcn301_is_panel_powered_on(struct panel_cntl *panel_cntl) +static bool dcn301_is_panel_powered_on(struct panel_cntl *panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl); uint32_t pwr_seq_state, dig_on, dig_on_ovrd; @@ -177,7 +177,7 @@ bool dcn301_is_panel_powered_on(struct panel_cntl *panel_cntl) return (pwr_seq_state == 1) || (dig_on == 1 && dig_on_ovrd == 1); } -void dcn301_store_backlight_level(struct panel_cntl *panel_cntl) +static void dcn301_store_backlight_level(struct panel_cntl *panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl); diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index fbaa03f26d8b..c1c6e602b06c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -656,7 +656,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, @@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_clock_gate = true, .disable_pplib_clock_request = true, .disable_pplib_wm_range = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -717,15 +717,13 @@ static const struct dc_debug_options debug_defaults_diags = { .use_max_lb = false, }; -void dcn301_dpp_destroy(struct dpp **dpp) +static void dcn301_dpp_destroy(struct dpp **dpp) { kfree(TO_DCN20_DPP(*dpp)); *dpp = NULL; } -struct dpp *dcn301_dpp_create( - struct dc_context *ctx, - uint32_t inst) +static struct dpp *dcn301_dpp_create(struct dc_context *ctx, uint32_t inst) { struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); @@ -741,8 +739,8 @@ struct dpp *dcn301_dpp_create( kfree(dpp); return NULL; } -struct output_pixel_processor *dcn301_opp_create( - struct dc_context *ctx, uint32_t inst) +static struct output_pixel_processor *dcn301_opp_create(struct dc_context *ctx, + uint32_t inst) { struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); @@ -757,9 +755,7 @@ struct output_pixel_processor *dcn301_opp_create( return &opp->base; } -struct dce_aux *dcn301_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_aux *dcn301_aux_engine_create(struct dc_context *ctx, uint32_t inst) { struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); @@ -793,9 +789,7 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) }; -struct dce_i2c_hw *dcn301_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_i2c_hw *dcn301_i2c_hw_create(struct dc_context *ctx, uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); @@ -829,7 +823,7 @@ static struct mpc *dcn301_mpc_create( return &mpc30->base; } -struct hubbub *dcn301_hubbub_create(struct dc_context *ctx) +static struct hubbub *dcn301_hubbub_create(struct dc_context *ctx) { int i; @@ -860,9 +854,8 @@ struct hubbub *dcn301_hubbub_create(struct dc_context *ctx) return &hubbub3->base; } -struct timing_generator *dcn301_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) +static struct timing_generator *dcn301_timing_generator_create( + struct dc_context *ctx, uint32_t instance) { struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL); @@ -894,7 +887,7 @@ static const struct encoder_feature_support link_enc_feature = { .flags.bits.IS_TPS4_CAPABLE = true }; -struct link_encoder *dcn301_link_encoder_create( +static struct link_encoder *dcn301_link_encoder_create( const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -915,7 +908,7 @@ struct link_encoder *dcn301_link_encoder_create( return &enc20->enc10.base; } -struct panel_cntl *dcn301_panel_cntl_create(const struct panel_cntl_init_data *init_data) +static struct panel_cntl *dcn301_panel_cntl_create(const struct panel_cntl_init_data *init_data) { struct dcn301_panel_cntl *panel_cntl = kzalloc(sizeof(struct dcn301_panel_cntl), GFP_KERNEL); @@ -997,9 +990,8 @@ static struct afmt *dcn301_afmt_create( return &afmt3->base; } -struct stream_encoder *dcn301_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) +static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id, + struct dc_context *ctx) { struct dcn10_stream_encoder *enc1; struct vpg *vpg; @@ -1033,8 +1025,7 @@ struct stream_encoder *dcn301_stream_encoder_create( return &enc1->base; } -struct dce_hwseq *dcn301_hwseq_create( - struct dc_context *ctx) +static struct dce_hwseq *dcn301_hwseq_create(struct dc_context *ctx) { struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); @@ -1182,9 +1173,7 @@ static void dcn301_destruct(struct dcn301_resource_pool *pool) dcn_dccg_destroy(&pool->base.dccg); } -struct hubp *dcn301_hubp_create( - struct dc_context *ctx, - uint32_t inst) +static struct hubp *dcn301_hubp_create(struct dc_context *ctx, uint32_t inst) { struct dcn20_hubp *hubp2 = kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); @@ -1201,7 +1190,7 @@ struct hubp *dcn301_hubp_create( return NULL; } -bool dcn301_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +static bool dcn301_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) { int i; uint32_t pipe_count = pool->res_cap->num_dwb; @@ -1226,7 +1215,7 @@ bool dcn301_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) return true; } -bool dcn301_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +static bool dcn301_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) { int i; uint32_t pipe_count = pool->res_cap->num_dwb; @@ -1449,9 +1438,7 @@ static bool dcn301_resource_construct( dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.extended_aux_timeout_support = true; -#ifdef CONFIG_DRM_AMD_DC_DMUB dc->caps.dmcub_support = true; -#endif /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; @@ -1487,6 +1474,23 @@ static bool dcn301_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c index d88b9011c502..eb375f30f5bc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c @@ -29,6 +29,8 @@ #include "dc.h" +#include "dcn302_init.h" + void dcn302_hw_sequencer_construct(struct dc *dc) { dcn30_hw_sequencer_construct(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index fcf96cf08c76..2e9cbfa7663b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -211,7 +211,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -276,7 +276,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, .max_upscale_factor = { @@ -1557,6 +1557,24 @@ static bool dcn302_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, + &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h index a79c54bbc899..294bd757bcb5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h @@ -15,7 +15,11 @@ SR(DPPCLK_DTO_CTRL),\ DCCG_SRII(DTO_PARAM, DPPCLK, 0),\ DCCG_SRII(DTO_PARAM, DPPCLK, 1),\ - SR(REFCLK_CNTL) + SR(REFCLK_CNTL),\ + SR(DISPCLK_FREQ_CHANGE_CNTL),\ + DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0),\ + DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1) + #define DCCG_MASK_SH_LIST_DCN3_03(mask_sh) \ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 0, mask_sh),\ @@ -25,6 +29,18 @@ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\ DCCG_SF(REFCLK_CNTL, REFCLK_CLOCK_EN, mask_sh),\ - DCCG_SF(REFCLK_CNTL, REFCLK_SRC_SEL, mask_sh) + DCCG_SF(REFCLK_CNTL, REFCLK_SRC_SEL, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_DELAY, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_SIZE, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_FREQ_RAMP_DONE, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_MAX_ERRDET_CYCLES, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_RESET, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_STATE, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_OVR_EN, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_CHG_FWD_CORR_DISABLE, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 0, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 1, mask_sh) #endif //__DCN303_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c index aa5dbbade2bd..f499f8ab5e47 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c @@ -9,6 +9,8 @@ #include "dcn30/dcn30_init.h" #include "dc.h" +#include "dcn303_init.h" + void dcn303_hw_sequencer_construct(struct dc *dc) { dcn30_hw_sequencer_construct(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 4a9b64023675..2de687f64cf6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -193,7 +193,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -254,7 +254,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, .max_upscale_factor = { @@ -1500,6 +1500,23 @@ static bool dcn303_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index 815481a3ef54..ea4f8e06b07c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -462,7 +462,7 @@ void dccg31_set_physymclk( } /* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */ -void dccg31_set_dtbclk_dto( +static void dccg31_set_dtbclk_dto( struct dccg *dccg, int dtbclk_inst, int req_dtbclk_khz, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index ee6f13bef377..8b9b1a5309ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -67,6 +67,68 @@ #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #endif +static uint8_t phy_id_from_transmitter(enum transmitter t) +{ + uint8_t phy_id; + + switch (t) { + case TRANSMITTER_UNIPHY_A: + phy_id = 0; + break; + case TRANSMITTER_UNIPHY_B: + phy_id = 1; + break; + case TRANSMITTER_UNIPHY_C: + phy_id = 2; + break; + case TRANSMITTER_UNIPHY_D: + phy_id = 3; + break; + case TRANSMITTER_UNIPHY_E: + phy_id = 4; + break; + case TRANSMITTER_UNIPHY_F: + phy_id = 5; + break; + case TRANSMITTER_UNIPHY_G: + phy_id = 6; + break; + default: + phy_id = 0; + break; + } + return phy_id; +} + +static bool has_query_dp_alt(struct link_encoder *enc) +{ + struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv; + + /* Supports development firmware and firmware >= 4.0.11 */ + return dc_dmub_srv && + !(dc_dmub_srv->dmub->fw_version >= DMUB_FW_VERSION(4, 0, 0) && + dc_dmub_srv->dmub->fw_version <= DMUB_FW_VERSION(4, 0, 10)); +} + +static bool query_dp_alt_from_dmub(struct link_encoder *enc, + union dmub_rb_cmd *cmd) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv; + + memset(cmd, 0, sizeof(*cmd)); + cmd->query_dp_alt.header.type = DMUB_CMD__VBIOS; + cmd->query_dp_alt.header.sub_type = + DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT; + cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data); + cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter); + + if (!dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, cmd)) + return false; + + return true; +} + void dcn31_link_encoder_set_dio_phy_mux( struct link_encoder *enc, enum encoder_type_select sel, @@ -141,7 +203,7 @@ void dcn31_link_encoder_set_dio_phy_mux( } } -void enc31_hw_init(struct link_encoder *enc) +static void enc31_hw_init(struct link_encoder *enc) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); @@ -536,57 +598,90 @@ void dcn31_link_encoder_disable_output( bool dcn31_link_encoder_is_in_alt_mode(struct link_encoder *enc) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + union dmub_rb_cmd cmd; uint32_t dp_alt_mode_disable; - bool is_usb_c_alt_mode = false; - if (enc->features.flags.bits.DP_IS_USB_C) { - if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) { - // [Note] no need to check hw_internal_rev once phy mux selection is ready - REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable); - } else { + /* Only applicable to USB-C PHY. */ + if (!enc->features.flags.bits.DP_IS_USB_C) + return false; + + /* + * Use the new interface from DMCUB if available. + * Avoids hanging the RDCPSPIPE if DMCUB wasn't already running. + */ + if (has_query_dp_alt(enc)) { + if (!query_dp_alt_from_dmub(enc, &cmd)) + return false; + + return (cmd.query_dp_alt.data.is_dp_alt_disable == 0); + } + + /* Legacy path, avoid if possible. */ + if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) { + REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, + &dp_alt_mode_disable); + } else { /* * B0 phys use a new set of registers to check whether alt mode is disabled. * if value == 1 alt mode is disabled, otherwise it is enabled. */ - if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) - || (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) - || (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) { - REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable); - } else { - // [Note] need to change TRANSMITTER_UNIPHY_C/D to F/G once phy mux selection is ready - REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable); - } + if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) || + (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) || + (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) { + REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, + &dp_alt_mode_disable); + } else { + REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, + &dp_alt_mode_disable); } - - is_usb_c_alt_mode = (dp_alt_mode_disable == 0); } - return is_usb_c_alt_mode; + return (dp_alt_mode_disable == 0); } -void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, - struct dc_link_settings *link_settings) +void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, struct dc_link_settings *link_settings) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + union dmub_rb_cmd cmd; uint32_t is_in_usb_c_dp4_mode = 0; dcn10_link_encoder_get_max_link_cap(enc, link_settings); - /* in usb c dp2 mode, max lane count is 2 */ - if (enc->funcs->is_in_alt_mode && enc->funcs->is_in_alt_mode(enc)) { - if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) { - // [Note] no need to check hw_internal_rev once phy mux selection is ready - REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode); + /* Take the link cap directly if not USB */ + if (!enc->features.flags.bits.DP_IS_USB_C) + return; + + /* + * Use the new interface from DMCUB if available. + * Avoids hanging the RDCPSPIPE if DMCUB wasn't already running. + */ + if (has_query_dp_alt(enc)) { + if (!query_dp_alt_from_dmub(enc, &cmd)) + return; + + if (cmd.query_dp_alt.data.is_usb && + cmd.query_dp_alt.data.is_dp4 == 0) + link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count); + + return; + } + + /* Legacy path, avoid if possible. */ + if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) { + REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, + &is_in_usb_c_dp4_mode); + } else { + if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) || + (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) || + (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) { + REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, + &is_in_usb_c_dp4_mode); } else { - if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) - || (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) - || (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) { - REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode); - } else { - REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode); - } + REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, + &is_in_usb_c_dp4_mode); } - if (!is_in_usb_c_dp4_mode) - link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count); } + + if (!is_in_usb_c_dp4_mode) + link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c index 6c08e21bb708..80dfaa4d4d81 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c @@ -499,7 +499,8 @@ static enum bp_result link_transmitter_control( void dcn31_hpo_dp_link_enc_enable_dp_output( struct hpo_dp_link_encoder *enc, const struct dc_link_settings *link_settings, - enum transmitter transmitter) + enum transmitter transmitter, + enum hpd_source_id hpd_source) { struct dcn31_hpo_dp_link_encoder *enc3 = DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(enc); struct bp_transmitter_control cntl = { 0 }; @@ -508,6 +509,9 @@ void dcn31_hpo_dp_link_enc_enable_dp_output( /* Set the transmitter */ enc3->base.transmitter = transmitter; + /* Set the hpd source */ + enc3->base.hpd_source = hpd_source; + /* Enable the PHY */ cntl.action = TRANSMITTER_CONTROL_ENABLE; cntl.engine_id = ENGINE_ID_UNKNOWN; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h index 0706ccaf6fec..e324e9b83136 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h @@ -184,7 +184,8 @@ void hpo_dp_link_encoder31_construct(struct dcn31_hpo_dp_link_encoder *enc31, void dcn31_hpo_dp_link_enc_enable_dp_output( struct hpo_dp_link_encoder *enc, const struct dc_link_settings *link_settings, - enum transmitter transmitter); + enum transmitter transmitter, + enum hpd_source_id hpd_source); void dcn31_hpo_dp_link_enc_disable_output( struct hpo_dp_link_encoder *enc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c index 565f12dd179a..5065904c7833 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c @@ -358,8 +358,8 @@ static void dcn31_hpo_dp_stream_enc_set_stream_attribute( h_width = hw_crtc_timing.h_border_left + hw_crtc_timing.h_addressable + hw_crtc_timing.h_border_right; v_height = hw_crtc_timing.v_border_top + hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom; - hsp = hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY ? 0x80 : 0; - vsp = hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY ? 0x80 : 0; + hsp = hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY ? 0 : 0x80; + vsp = hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY ? 0 : 0x80; v_freq = hw_crtc_timing.pix_clk_100hz * 100; /* MSA Packet Mapping to 32-bit Link Symbols - DP2 spec, section 2.7.4.1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 4d4ac4ceb1e8..4206ce5bf9a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -112,7 +112,7 @@ void dcn31_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; - int i, j; + int i; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -192,50 +192,13 @@ void dcn31_init_hw(struct dc *dc) link->link_status.link_active = true; } - /* Power gate DSCs */ - for (i = 0; i < res_pool->res_cap->num_dsc; i++) - if (hws->funcs.dsc_pg_control != NULL) - hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); - /* Enables outbox notifications for usb4 dpia */ if (dc->res_pool->usb4_dpia_count) dmub_enable_outbox_notification(dc); /* we want to turn off all dp displays before doing detection */ - if (dc->config.power_down_display_on_boot) { - uint8_t dpcd_power_state = '\0'; - enum dc_status status = DC_ERROR_UNEXPECTED; - - for (i = 0; i < dc->link_count; i++) { - if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) - continue; - - /* if any of the displays are lit up turn them off */ - status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, - &dpcd_power_state, sizeof(dpcd_power_state)); - if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) { - /* blank dp stream before power off receiver*/ - if (dc->links[i]->ep_type == DISPLAY_ENDPOINT_PHY && - dc->links[i]->link_enc->funcs->get_dig_frontend) { - unsigned int fe; - - fe = dc->links[i]->link_enc->funcs->get_dig_frontend( - dc->links[i]->link_enc); - if (fe == ENGINE_ID_UNKNOWN) - continue; - - for (j = 0; j < dc->res_pool->stream_enc_count; j++) { - if (fe == dc->res_pool->stream_enc[j]->id) { - dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], - dc->res_pool->stream_enc[j]); - break; - } - } - } - dp_receiver_power_ctrl(dc->links[i], false); - } - } - } + if (dc->config.power_down_display_on_boot) + dc_link_blank_all_dp_displays(dc); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 05335a8c3c2d..d7559e5a99ce 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -31,6 +31,8 @@ #include "dcn301/dcn301_hwseq.h" #include "dcn31/dcn31_hwseq.h" +#include "dcn31_init.h" + static const struct hw_sequencer_funcs dcn31_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn31_init_hw, @@ -101,6 +103,8 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .z10_restore = dcn31_z10_restore, .z10_save_init = dcn31_z10_save_init, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn20_update_visual_confirm_color, }; @@ -149,4 +153,9 @@ void dcn31_hw_sequencer_construct(struct dc *dc) dc->hwss.init_hw = dcn20_fpga_init_hw; dc->hwseq->funcs.init_pipes = NULL; } + if (dc->debug.disable_z10) { + /*hw not support z10 or sw disable it*/ + dc->hwss.z10_restore = NULL; + dc->hwss.z10_save_init = NULL; + } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index a4b1d98f0007..e8562fa11366 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -256,6 +256,7 @@ static struct timing_generator_funcs dcn31_tg_funcs = { .get_crc = optc1_get_crc, .configure_crc = optc2_configure_crc, .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, .set_odm_bypass = optc3_set_odm_bypass, .set_odm_combine = optc31_set_odm_combine, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 3b3721386571..83ece02380a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -65,7 +65,7 @@ static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cnt return cmd.panel_cntl.data.current_backlight; } -uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl) +static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl) { struct dcn31_panel_cntl *dcn31_panel_cntl = TO_DCN31_PANEL_CNTL(panel_cntl); struct dc_dmub_srv *dc_dmub_srv = panel_cntl->ctx->dmub_srv; @@ -96,7 +96,7 @@ uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl) return cmd.panel_cntl.data.current_backlight; } -void dcn31_panel_cntl_destroy(struct panel_cntl **panel_cntl) +static void dcn31_panel_cntl_destroy(struct panel_cntl **panel_cntl) { struct dcn31_panel_cntl *dcn31_panel_cntl = TO_DCN31_PANEL_CNTL(*panel_cntl); @@ -104,7 +104,7 @@ void dcn31_panel_cntl_destroy(struct panel_cntl **panel_cntl) *panel_cntl = NULL; } -bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl) +static bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl) { union dmub_rb_cmd cmd; @@ -114,7 +114,7 @@ bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl) return cmd.panel_cntl.data.is_backlight_on; } -bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl) +static bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl) { union dmub_rb_cmd cmd; @@ -124,7 +124,7 @@ bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl) return cmd.panel_cntl.data.is_powered_on; } -void dcn31_store_backlight_level(struct panel_cntl *panel_cntl) +static void dcn31_store_backlight_level(struct panel_cntl *panel_cntl) { union dmub_rb_cmd cmd; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 18896294ae12..42ed47e8133d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -355,6 +355,14 @@ static const struct dce110_clk_src_regs clk_src_regs[] = { clk_src_regs(3, D), clk_src_regs(4, E) }; +/*pll_id being rempped in dmub, in driver it is logical instance*/ +static const struct dce110_clk_src_regs clk_src_regs_b0[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, F), + clk_src_regs(3, G), + clk_src_regs(4, E) +}; static const struct dce110_clk_src_shift cs_shift = { CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) @@ -485,7 +493,8 @@ static const struct dcn31_apg_mask apg_mask = { SE_DCN3_REG_LIST(id)\ } -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { +/* Some encoders won't be initialized here - but they're logical, not physical. */ +static const struct dcn10_stream_enc_registers stream_enc_regs[ENGINE_ID_COUNT] = { stream_enc_regs(0), stream_enc_regs(1), stream_enc_regs(2), @@ -968,7 +977,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, @@ -994,7 +1003,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_AVOID, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -1023,6 +1032,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .optimize_edp_link_rate = true, .enable_sw_cntl_psr = true, + .apply_vendor_specific_lttpr_wa = true, }; static const struct dc_debug_options debug_defaults_diags = { @@ -1270,7 +1280,7 @@ static struct link_encoder *dcn31_link_enc_create_minimal( return &enc20->enc10.base; } -struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) +static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) { struct dcn31_panel_cntl *panel_cntl = kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); @@ -1774,6 +1784,7 @@ static int dcn31_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; + bool upscaled = false; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -1785,6 +1796,11 @@ static int dcn31_populate_dml_pipes_from_context( pipe = &res_ctx->pipe_ctx[i]; timing = &pipe->stream->timing; + if (pipe->plane_state && + (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || + pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) + upscaled = true; + /* * Immediate flip can be set dynamically after enabling the plane. * We need to require support for immediate flip or underflow can be @@ -1829,6 +1845,11 @@ static int dcn31_populate_dml_pipes_from_context( context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; pipes[0].pipe.src.unbounded_req_mode = true; } + } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count + && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; + } else if (context->stream_count >= 3 && upscaled) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; } return pipe_cnt; @@ -1963,7 +1984,7 @@ static void dcn31_calculate_wm_and_dlg_fp( pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - if (dc->config.forced_clocks) { + if (dc->config.forced_clocks || dc->debug.max_disp_clk) { pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; } @@ -2199,6 +2220,8 @@ static bool dcn31_resource_construct( dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.dp_hpo = true; + dc->caps.hdmi_frl_pcon_support = true; + dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.is_apu = true; @@ -2276,14 +2299,27 @@ static bool dcn31_resource_construct( dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + /*move phypllx_pixclk_resync to dmub next*/ + if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs_b0[2], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs_b0[3], false); + } else { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL2, &clk_src_regs[2], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL3, &clk_src_regs[3], false); + } + pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL4, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 416fe7a721d8..a513363b3326 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -49,4 +49,35 @@ struct resource_pool *dcn31_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); +/*temp: B0 specific before switch to dcn313 headers*/ +#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL +#define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e +#define regPHYPLLF_PIXCLK_RESYNC_CNTL_BASE_IDX 1 +#define regPHYPLLG_PIXCLK_RESYNC_CNTL 0x005f +#define regPHYPLLG_PIXCLK_RESYNC_CNTL_BASE_IDX 1 + +//PHYPLLF_PIXCLK_RESYNC_CNTL +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L + +//PHYPLLG_PIXCLK_RESYNC_CNTL +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L +#endif #endif /* _DCN31_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h index 511f9e1159c7..4229369c57f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h +++ b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h @@ -34,12 +34,12 @@ struct cp_psp_stream_config { uint8_t dig_fe; uint8_t link_enc_idx; uint8_t stream_enc_idx; - uint8_t phy_idx; uint8_t dio_output_idx; - uint8_t dio_output_type; + uint8_t phy_idx; uint8_t assr_enabled; uint8_t mst_enabled; uint8_t dp2_enabled; + uint8_t usb4_enabled; void *dm_stream_ctx; bool dpms_off; }; diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 0fe66b080a03..7f94e3f70d7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -59,7 +59,7 @@ void dm_helpers_free_gpu_mem( void *pvMem); enum dc_edid_status dm_helpers_parse_edid_caps( - struct dc_context *ctx, + struct dc_link *link, const struct dc_edid *edid, struct dc_edid_caps *edid_caps); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 46c433c0bcb0..8bc27de4c104 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -1711,14 +1711,6 @@ void dml21_rq_dlg_get_dlg_reg( dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); } -void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param) -{ - memset(arb_param, 0, sizeof(*arb_param)); - arb_param->max_req_outstanding = 256; - arb_param->min_req_outstanding = 68; - arb_param->sat_level_us = 60; -} - static void calculate_ttu_cursor( struct display_mode_lib *mode_lib, double *refcyc_per_req_delivery_pre_cur, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 7e937bdcea00..6feb23432f8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -422,62 +422,8 @@ static void CalculateUrgentBurstFactor( static void UseMinimumDCFCLK( struct display_mode_lib *mode_lib, - int MaxInterDCNTileRepeaters, int MaxPrefetchMode, - double FinalDRAMClockChangeLatency, - double SREnterPlusExitTime, - int ReturnBusWidth, - int RoundTripPingLatencyCycles, - int ReorderingBytes, - int PixelChunkSizeInKByte, - int MetaChunkSize, - bool GPUVMEnable, - int GPUVMMaxPageTableLevels, - bool HostVMEnable, - int NumberOfActivePlanes, - double HostVMMinPageSize, - int HostVMMaxNonCachedPageTableLevels, - bool DynamicMetadataVMEnabled, - enum immediate_flip_requirement ImmediateFlipRequirement, - bool ProgressiveToInterlaceUnitInOPP, - double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, - double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, - int VTotal[], - int VActive[], - int DynamicMetadataTransmittedBytes[], - int DynamicMetadataLinesBeforeActiveRequired[], - bool Interlace[], - double RequiredDPPCLK[][2][DC__NUM_DPP__MAX], - double RequiredDISPCLK[][2], - double UrgLatency[], - unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], - double ProjectedDCFCLKDeepSleep[][2], - double MaximumVStartup[][2][DC__NUM_DPP__MAX], - double TotalVActivePixelBandwidth[][2], - double TotalVActiveCursorBandwidth[][2], - double TotalMetaRowBandwidth[][2], - double TotalDPTERowBandwidth[][2], - unsigned int TotalNumberOfActiveDPP[][2], - unsigned int TotalNumberOfDCCActiveDPP[][2], - int dpte_group_bytes[], - double PrefetchLinesY[][2][DC__NUM_DPP__MAX], - double PrefetchLinesC[][2][DC__NUM_DPP__MAX], - int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], - int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], - int BytePerPixelY[], - int BytePerPixelC[], - int HTotal[], - double PixelClock[], - double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], - double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], - double MetaRowBytes[][2][DC__NUM_DPP__MAX], - bool DynamicMetadataEnable[], - double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX], - double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX], - double ReadBandwidthLuma[], - double ReadBandwidthChroma[], - double DCFCLKPerState[], - double DCFCLKState[][2]); + int ReorderingBytes); static void CalculatePixelDeliveryTimes( unsigned int NumberOfActivePlanes, @@ -3949,6 +3895,102 @@ static double TruncToValidBPP( return BPP_INVALID; } +static noinline void CalculatePrefetchSchedulePerPlane( + struct display_mode_lib *mode_lib, + double HostVMInefficiencyFactor, + int i, + unsigned j, + unsigned k) +{ + struct vba_vars_st *v = &mode_lib->vba; + Pipe myPipe; + + myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; + myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; + myPipe.PixelClock = v->PixelClock[k]; + myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; + myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; + myPipe.ScalerEnabled = v->ScalerEnabled[k]; + myPipe.SourceScan = v->SourceScan[k]; + myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; + myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; + myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; + myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; + myPipe.InterlaceEnable = v->Interlace[k]; + myPipe.NumberOfCursors = v->NumberOfCursors[k]; + myPipe.VBlank = v->VTotal[k] - v->VActive[k]; + myPipe.HTotal = v->HTotal[k]; + myPipe.DCCEnable = v->DCCEnable[k]; + myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 + || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; + myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; + myPipe.BytePerPixelY = v->BytePerPixelY[k]; + myPipe.BytePerPixelC = v->BytePerPixelC[k]; + myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; + v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( + mode_lib, + HostVMInefficiencyFactor, + &myPipe, + v->DSCDelayPerState[i][k], + v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, + v->DPPCLKDelaySCL, + v->DPPCLKDelaySCLLBOnly, + v->DPPCLKDelayCNVCCursor, + v->DISPCLKDelaySubtotal, + v->SwathWidthYThisState[k] / v->HRatio[k], + v->OutputFormat[k], + v->MaxInterDCNTileRepeaters, + dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), + v->MaximumVStartup[i][j][k], + v->GPUVMMaxPageTableLevels, + v->GPUVMEnable, + v->HostVMEnable, + v->HostVMMaxNonCachedPageTableLevels, + v->HostVMMinPageSize, + v->DynamicMetadataEnable[k], + v->DynamicMetadataVMEnabled, + v->DynamicMetadataLinesBeforeActiveRequired[k], + v->DynamicMetadataTransmittedBytes[k], + v->UrgLatency[i], + v->ExtraLatency, + v->TimeCalc, + v->PDEAndMetaPTEBytesPerFrame[i][j][k], + v->MetaRowBytes[i][j][k], + v->DPTEBytesPerRow[i][j][k], + v->PrefetchLinesY[i][j][k], + v->SwathWidthYThisState[k], + v->PrefillY[k], + v->MaxNumSwY[k], + v->PrefetchLinesC[i][j][k], + v->SwathWidthCThisState[k], + v->PrefillC[k], + v->MaxNumSwC[k], + v->swath_width_luma_ub_this_state[k], + v->swath_width_chroma_ub_this_state[k], + v->SwathHeightYThisState[k], + v->SwathHeightCThisState[k], + v->TWait, + &v->DSTXAfterScaler[k], + &v->DSTYAfterScaler[k], + &v->LineTimesForPrefetch[k], + &v->PrefetchBW[k], + &v->LinesForMetaPTE[k], + &v->LinesForMetaAndDPTERow[k], + &v->VRatioPreY[i][j][k], + &v->VRatioPreC[i][j][k], + &v->RequiredPrefetchPixelDataBWLuma[i][j][k], + &v->RequiredPrefetchPixelDataBWChroma[i][j][k], + &v->NoTimeForDynamicMetadata[i][j][k], + &v->Tno_bw[k], + &v->prefetch_vmrow_bw[k], + &v->dummy7[k], + &v->dummy8[k], + &v->dummy13[k], + &v->VUpdateOffsetPix[k], + &v->VUpdateWidthPix[k], + &v->VReadyOffsetPix[k]); +} + void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { struct vba_vars_st *v = &mode_lib->vba; @@ -5079,66 +5121,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - if (v->UseMinimumRequiredDCFCLK == true) { - UseMinimumDCFCLK( - mode_lib, - v->MaxInterDCNTileRepeaters, - MaxPrefetchMode, - v->DRAMClockChangeLatency, - v->SREnterPlusExitTime, - v->ReturnBusWidth, - v->RoundTripPingLatencyCycles, - ReorderingBytes, - v->PixelChunkSizeInKByte, - v->MetaChunkSize, - v->GPUVMEnable, - v->GPUVMMaxPageTableLevels, - v->HostVMEnable, - v->NumberOfActivePlanes, - v->HostVMMinPageSize, - v->HostVMMaxNonCachedPageTableLevels, - v->DynamicMetadataVMEnabled, - v->ImmediateFlipRequirement[0], - v->ProgressiveToInterlaceUnitInOPP, - v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, - v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, - v->VTotal, - v->VActive, - v->DynamicMetadataTransmittedBytes, - v->DynamicMetadataLinesBeforeActiveRequired, - v->Interlace, - v->RequiredDPPCLK, - v->RequiredDISPCLK, - v->UrgLatency, - v->NoOfDPP, - v->ProjectedDCFCLKDeepSleep, - v->MaximumVStartup, - v->TotalVActivePixelBandwidth, - v->TotalVActiveCursorBandwidth, - v->TotalMetaRowBandwidth, - v->TotalDPTERowBandwidth, - v->TotalNumberOfActiveDPP, - v->TotalNumberOfDCCActiveDPP, - v->dpte_group_bytes, - v->PrefetchLinesY, - v->PrefetchLinesC, - v->swath_width_luma_ub_all_states, - v->swath_width_chroma_ub_all_states, - v->BytePerPixelY, - v->BytePerPixelC, - v->HTotal, - v->PixelClock, - v->PDEAndMetaPTEBytesPerFrame, - v->DPTEBytesPerRow, - v->MetaRowBytes, - v->DynamicMetadataEnable, - v->VActivePixelBandwidth, - v->VActiveCursorBandwidth, - v->ReadBandwidthLuma, - v->ReadBandwidthChroma, - v->DCFCLKPerState, - v->DCFCLKState); - } + if (v->UseMinimumRequiredDCFCLK == true) + UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { @@ -5276,92 +5260,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SREnterPlusExitTime); for (k = 0; k < v->NumberOfActivePlanes; k++) { - Pipe myPipe; - - myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; - myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; - myPipe.PixelClock = v->PixelClock[k]; - myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; - myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; - myPipe.ScalerEnabled = v->ScalerEnabled[k]; - myPipe.SourceScan = v->SourceScan[k]; - myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; - myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; - myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; - myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; - myPipe.InterlaceEnable = v->Interlace[k]; - myPipe.NumberOfCursors = v->NumberOfCursors[k]; - myPipe.VBlank = v->VTotal[k] - v->VActive[k]; - myPipe.HTotal = v->HTotal[k]; - myPipe.DCCEnable = v->DCCEnable[k]; - myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 - || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; - myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; - myPipe.BytePerPixelY = v->BytePerPixelY[k]; - myPipe.BytePerPixelC = v->BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; - v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( - mode_lib, - HostVMInefficiencyFactor, - &myPipe, - v->DSCDelayPerState[i][k], - v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, - v->DPPCLKDelaySCL, - v->DPPCLKDelaySCLLBOnly, - v->DPPCLKDelayCNVCCursor, - v->DISPCLKDelaySubtotal, - v->SwathWidthYThisState[k] / v->HRatio[k], - v->OutputFormat[k], - v->MaxInterDCNTileRepeaters, - dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), - v->MaximumVStartup[i][j][k], - v->GPUVMMaxPageTableLevels, - v->GPUVMEnable, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->HostVMMinPageSize, - v->DynamicMetadataEnable[k], - v->DynamicMetadataVMEnabled, - v->DynamicMetadataLinesBeforeActiveRequired[k], - v->DynamicMetadataTransmittedBytes[k], - v->UrgLatency[i], - v->ExtraLatency, - v->TimeCalc, - v->PDEAndMetaPTEBytesPerFrame[i][j][k], - v->MetaRowBytes[i][j][k], - v->DPTEBytesPerRow[i][j][k], - v->PrefetchLinesY[i][j][k], - v->SwathWidthYThisState[k], - v->PrefillY[k], - v->MaxNumSwY[k], - v->PrefetchLinesC[i][j][k], - v->SwathWidthCThisState[k], - v->PrefillC[k], - v->MaxNumSwC[k], - v->swath_width_luma_ub_this_state[k], - v->swath_width_chroma_ub_this_state[k], - v->SwathHeightYThisState[k], - v->SwathHeightCThisState[k], - v->TWait, - &v->DSTXAfterScaler[k], - &v->DSTYAfterScaler[k], - &v->LineTimesForPrefetch[k], - &v->PrefetchBW[k], - &v->LinesForMetaPTE[k], - &v->LinesForMetaAndDPTERow[k], - &v->VRatioPreY[i][j][k], - &v->VRatioPreC[i][j][k], - &v->RequiredPrefetchPixelDataBWLuma[i][j][k], - &v->RequiredPrefetchPixelDataBWChroma[i][j][k], - &v->NoTimeForDynamicMetadata[i][j][k], - &v->Tno_bw[k], - &v->prefetch_vmrow_bw[k], - &v->dummy7[k], - &v->dummy8[k], - &v->dummy13[k], - &v->VUpdateOffsetPix[k], - &v->VUpdateWidthPix[k], - &v->VReadyOffsetPix[k]); + CalculatePrefetchSchedulePerPlane(mode_lib, + HostVMInefficiencyFactor, + i, j, k); } for (k = 0; k < v->NumberOfActivePlanes; k++) { @@ -7249,69 +7150,15 @@ static double CalculateUrgentLatency( static void UseMinimumDCFCLK( struct display_mode_lib *mode_lib, - int MaxInterDCNTileRepeaters, int MaxPrefetchMode, - double FinalDRAMClockChangeLatency, - double SREnterPlusExitTime, - int ReturnBusWidth, - int RoundTripPingLatencyCycles, - int ReorderingBytes, - int PixelChunkSizeInKByte, - int MetaChunkSize, - bool GPUVMEnable, - int GPUVMMaxPageTableLevels, - bool HostVMEnable, - int NumberOfActivePlanes, - double HostVMMinPageSize, - int HostVMMaxNonCachedPageTableLevels, - bool DynamicMetadataVMEnabled, - enum immediate_flip_requirement ImmediateFlipRequirement, - bool ProgressiveToInterlaceUnitInOPP, - double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, - double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, - int VTotal[], - int VActive[], - int DynamicMetadataTransmittedBytes[], - int DynamicMetadataLinesBeforeActiveRequired[], - bool Interlace[], - double RequiredDPPCLK[][2][DC__NUM_DPP__MAX], - double RequiredDISPCLK[][2], - double UrgLatency[], - unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], - double ProjectedDCFCLKDeepSleep[][2], - double MaximumVStartup[][2][DC__NUM_DPP__MAX], - double TotalVActivePixelBandwidth[][2], - double TotalVActiveCursorBandwidth[][2], - double TotalMetaRowBandwidth[][2], - double TotalDPTERowBandwidth[][2], - unsigned int TotalNumberOfActiveDPP[][2], - unsigned int TotalNumberOfDCCActiveDPP[][2], - int dpte_group_bytes[], - double PrefetchLinesY[][2][DC__NUM_DPP__MAX], - double PrefetchLinesC[][2][DC__NUM_DPP__MAX], - int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], - int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], - int BytePerPixelY[], - int BytePerPixelC[], - int HTotal[], - double PixelClock[], - double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], - double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], - double MetaRowBytes[][2][DC__NUM_DPP__MAX], - bool DynamicMetadataEnable[], - double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX], - double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX], - double ReadBandwidthLuma[], - double ReadBandwidthChroma[], - double DCFCLKPerState[], - double DCFCLKState[][2]) + int ReorderingBytes) { struct vba_vars_st *v = &mode_lib->vba; int dummy1, i, j, k; double NormalEfficiency, dummy2, dummy3; double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; - NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; + NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; @@ -7329,61 +7176,61 @@ static void UseMinimumDCFCLK( double MinimumTvmPlus2Tr0; TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] - + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]); + + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); } - for (k = 0; k <= NumberOfActivePlanes - 1; ++k) { - NoOfDPPState[k] = NoOfDPP[i][j][k]; + for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { + NoOfDPPState[k] = v->NoOfDPP[i][j][k]; } - MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime); - NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j]; - DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ? - TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j]; + MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); + NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; + DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? + TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; DCFCLKRequiredForAverageBandwidth = dml_max3( - ProjectedDCFCLKDeepSleep[i][j], - (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth - / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), - (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth); + v->ProjectedDCFCLKDeepSleep[i][j], + (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth + / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), + (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); ExtraLatencyBytes = CalculateExtraLatencyBytes( ReorderingBytes, - TotalNumberOfActiveDPP[i][j], - PixelChunkSizeInKByte, - TotalNumberOfDCCActiveDPP[i][j], - MetaChunkSize, - GPUVMEnable, - HostVMEnable, - NumberOfActivePlanes, + v->TotalNumberOfActiveDPP[i][j], + v->PixelChunkSizeInKByte, + v->TotalNumberOfDCCActiveDPP[i][j], + v->MetaChunkSize, + v->GPUVMEnable, + v->HostVMEnable, + v->NumberOfActivePlanes, NoOfDPPState, - dpte_group_bytes, + v->dpte_group_bytes, 1, - HostVMMinPageSize, - HostVMMaxNonCachedPageTableLevels); - ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; - for (k = 0; k < NumberOfActivePlanes; ++k) { + v->HostVMMinPageSize, + v->HostVMMaxNonCachedPageTableLevels); + ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { double DCFCLKCyclesRequiredInPrefetch; double ExpectedPrefetchBWAcceleration; double PrefetchTime; - PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] - + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth; + PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] + + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] - + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) - + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth - + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; - PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k]; - ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) - / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth + + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; + PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; + ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) + / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); DynamicMetadataVMExtraLatency[k] = - (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? - UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; - PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - - UrgLatency[i] - * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2) - * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) + (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? + v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; + PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait + - v->UrgLatency[i] + * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) + * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k]; if (PrefetchTime > 0) { @@ -7392,14 +7239,14 @@ static void UseMinimumDCFCLK( / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; - if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) { + if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] - + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth; + + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; } } else { - DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i]; + DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; } - if (DynamicMetadataEnable[k] == true) { + if (v->DynamicMetadataEnable[k] == true) { double TSetupPipe; double TdmbfPipe; double TdmsksPipe; @@ -7407,17 +7254,17 @@ static void UseMinimumDCFCLK( double AllowedTimeForUrgentExtraLatency; CalculateVupdateAndDynamicMetadataParameters( - MaxInterDCNTileRepeaters, - RequiredDPPCLK[i][j][k], - RequiredDISPCLK[i][j], - ProjectedDCFCLKDeepSleep[i][j], - PixelClock[k], - HTotal[k], - VTotal[k] - VActive[k], - DynamicMetadataTransmittedBytes[k], - DynamicMetadataLinesBeforeActiveRequired[k], - Interlace[k], - ProgressiveToInterlaceUnitInOPP, + v->MaxInterDCNTileRepeaters, + v->RequiredDPPCLK[i][j][k], + v->RequiredDISPCLK[i][j], + v->ProjectedDCFCLKDeepSleep[i][j], + v->PixelClock[k], + v->HTotal[k], + v->VTotal[k] - v->VActive[k], + v->DynamicMetadataTransmittedBytes[k], + v->DynamicMetadataLinesBeforeActiveRequired[k], + v->Interlace[k], + v->ProgressiveToInterlaceUnitInOPP, &TSetupPipe, &TdmbfPipe, &TdmecPipe, @@ -7425,31 +7272,31 @@ static void UseMinimumDCFCLK( &dummy1, &dummy2, &dummy3); - AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe + AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; if (AllowedTimeForUrgentExtraLatency > 0) { DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( DCFCLKRequiredForPeakBandwidthPerPlane[k], ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); } else { - DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i]; + DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; } } } DCFCLKRequiredForPeakBandwidth = 0; - for (k = 0; k <= NumberOfActivePlanes - 1; ++k) { + for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; } - MinimumTvmPlus2Tr0 = UrgLatency[i] - * (GPUVMEnable == true ? - (HostVMEnable == true ? - (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : + MinimumTvmPlus2Tr0 = v->UrgLatency[i] + * (v->GPUVMEnable == true ? + (v->HostVMEnable == true ? + (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0); - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { double MaximumTvmPlus2Tr0PlusTsw; - MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; + MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { - DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; + DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; } else { DCFCLKRequiredForPeakBandwidth = dml_max3( DCFCLKRequiredForPeakBandwidth, @@ -7457,7 +7304,7 @@ static void UseMinimumDCFCLK( (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); } } - DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); + v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); } } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h index 6905ef1e75a6..d76251fd1566 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h @@ -73,6 +73,7 @@ struct display_mode_lib { struct vba_vars_st vba; struct dal_logger *logger; struct dml_funcs funcs; + struct _vcs_dpi_display_e2e_pipe_params_st dml_pipe_state[6]; }; void dml_init_instance(struct display_mode_lib *lib, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c new file mode 100644 index 000000000000..789f7562cdc7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c @@ -0,0 +1,1889 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dml_wrapper.h" +#include "resource.h" +#include "core_types.h" +#include "dsc.h" +#include "clk_mgr.h" + +#ifndef DC_LOGGER_INIT +#define DC_LOGGER_INIT +#undef DC_LOG_WARNING +#define DC_LOG_WARNING +#endif + +#define DML_WRAPPER_TRANSLATION_ +#include "dml_wrapper_translation.c" +#undef DML_WRAPPER_TRANSLATION_ + +static bool is_dual_plane(enum surface_pixel_format format) +{ + return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; +} + +static void build_clamping_params(struct dc_stream_state *stream) +{ + stream->clamping.clamping_level = CLAMPING_FULL_RANGE; + stream->clamping.c_depth = stream->timing.display_color_depth; + stream->clamping.pixel_encoding = stream->timing.pixel_encoding; +} + +static void get_pixel_clock_parameters( + const struct pipe_ctx *pipe_ctx, + struct pixel_clk_params *pixel_clk_params) +{ + const struct dc_stream_state *stream = pipe_ctx->stream; + + /*TODO: is this halved for YCbCr 420? in that case we might want to move + * the pixel clock normalization for hdmi up to here instead of doing it + * in pll_adjust_pix_clk + */ + pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; + pixel_clk_params->encoder_object_id = stream->link->link_enc->id; + pixel_clk_params->signal_type = pipe_ctx->stream->signal; + pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; + /* TODO: un-hardcode*/ + pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * + LINK_RATE_REF_FREQ_IN_KHZ; + pixel_clk_params->flags.ENABLE_SS = 0; + pixel_clk_params->color_depth = + stream->timing.display_color_depth; + pixel_clk_params->flags.DISPLAY_BLANKED = 1; + pixel_clk_params->flags.SUPPORT_YCBCR420 = (stream->timing.pixel_encoding == + PIXEL_ENCODING_YCBCR420); + pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { + pixel_clk_params->color_depth = COLOR_DEPTH_888; + } + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + pixel_clk_params->requested_pix_clk_100hz = pixel_clk_params->requested_pix_clk_100hz / 2; + } + if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + pixel_clk_params->requested_pix_clk_100hz *= 2; + +} + +static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) +{ + get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); + + if (pipe_ctx->clock_source) + pipe_ctx->clock_source->funcs->get_pix_clk_dividers( + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + &pipe_ctx->pll_settings); + + pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; + + resource_build_bit_depth_reduction_params(pipe_ctx->stream, + &pipe_ctx->stream->bit_depth_params); + build_clamping_params(pipe_ctx->stream); + + return DC_OK; +} + +static void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, + struct bit_depth_reduction_params *fmt_bit_depth) +{ + enum dc_dither_option option = stream->dither_option; + enum dc_pixel_encoding pixel_encoding = + stream->timing.pixel_encoding; + + memset(fmt_bit_depth, 0, sizeof(*fmt_bit_depth)); + + if (option == DITHER_OPTION_DEFAULT) { + switch (stream->timing.display_color_depth) { + case COLOR_DEPTH_666: + option = DITHER_OPTION_SPATIAL6; + break; + case COLOR_DEPTH_888: + option = DITHER_OPTION_SPATIAL8; + break; + case COLOR_DEPTH_101010: + option = DITHER_OPTION_SPATIAL10; + break; + default: + option = DITHER_OPTION_DISABLE; + } + } + + if (option == DITHER_OPTION_DISABLE) + return; + + if (option == DITHER_OPTION_TRUN6) { + fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; + fmt_bit_depth->flags.TRUNCATE_DEPTH = 0; + } else if (option == DITHER_OPTION_TRUN8 || + option == DITHER_OPTION_TRUN8_SPATIAL6 || + option == DITHER_OPTION_TRUN8_FM6) { + fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; + fmt_bit_depth->flags.TRUNCATE_DEPTH = 1; + } else if (option == DITHER_OPTION_TRUN10 || + option == DITHER_OPTION_TRUN10_SPATIAL6 || + option == DITHER_OPTION_TRUN10_SPATIAL8 || + option == DITHER_OPTION_TRUN10_FM8 || + option == DITHER_OPTION_TRUN10_FM6 || + option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { + fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; + fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; + } + + /* special case - Formatter can only reduce by 4 bits at most. + * When reducing from 12 to 6 bits, + * HW recommends we use trunc with round mode + * (if we did nothing, trunc to 10 bits would be used) + * note that any 12->10 bit reduction is ignored prior to DCE8, + * as the input was 10 bits. + */ + if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM || + option == DITHER_OPTION_SPATIAL6 || + option == DITHER_OPTION_FM6) { + fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; + fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; + fmt_bit_depth->flags.TRUNCATE_MODE = 1; + } + + /* spatial dither + * note that spatial modes 1-3 are never used + */ + if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM || + option == DITHER_OPTION_SPATIAL6 || + option == DITHER_OPTION_TRUN10_SPATIAL6 || + option == DITHER_OPTION_TRUN8_SPATIAL6) { + fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; + fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 0; + fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; + fmt_bit_depth->flags.RGB_RANDOM = + (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; + } else if (option == DITHER_OPTION_SPATIAL8_FRAME_RANDOM || + option == DITHER_OPTION_SPATIAL8 || + option == DITHER_OPTION_SPATIAL8_FM6 || + option == DITHER_OPTION_TRUN10_SPATIAL8 || + option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { + fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; + fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 1; + fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; + fmt_bit_depth->flags.RGB_RANDOM = + (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; + } else if (option == DITHER_OPTION_SPATIAL10_FRAME_RANDOM || + option == DITHER_OPTION_SPATIAL10 || + option == DITHER_OPTION_SPATIAL10_FM8 || + option == DITHER_OPTION_SPATIAL10_FM6) { + fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; + fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 2; + fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; + fmt_bit_depth->flags.RGB_RANDOM = + (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; + } + + if (option == DITHER_OPTION_SPATIAL6 || + option == DITHER_OPTION_SPATIAL8 || + option == DITHER_OPTION_SPATIAL10) { + fmt_bit_depth->flags.FRAME_RANDOM = 0; + } else { + fmt_bit_depth->flags.FRAME_RANDOM = 1; + } + + ////////////////////// + //// temporal dither + ////////////////////// + if (option == DITHER_OPTION_FM6 || + option == DITHER_OPTION_SPATIAL8_FM6 || + option == DITHER_OPTION_SPATIAL10_FM6 || + option == DITHER_OPTION_TRUN10_FM6 || + option == DITHER_OPTION_TRUN8_FM6 || + option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { + fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; + fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 0; + } else if (option == DITHER_OPTION_FM8 || + option == DITHER_OPTION_SPATIAL10_FM8 || + option == DITHER_OPTION_TRUN10_FM8) { + fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; + fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 1; + } else if (option == DITHER_OPTION_FM10) { + fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; + fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 2; + } + + fmt_bit_depth->pixel_encoding = pixel_encoding; +} + +bool dml_validate_dsc(struct dc *dc, struct dc_state *new_ctx) +{ + int i; + + /* Validate DSC config, dsc count validation is already done */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i]; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dsc_config dsc_cfg; + struct pipe_ctx *odm_pipe; + int opp_cnt = 1; + + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + opp_cnt++; + + /* Only need to validate top pipe */ + if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe || !stream || !stream->timing.flags.DSC) + continue; + + dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + + stream->timing.h_border_right) / opp_cnt; + dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + + stream->timing.v_border_bottom; + dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; + dsc_cfg.color_depth = stream->timing.display_color_depth; + dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false; + dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; + dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + + if (pipe_ctx->stream_res.dsc && !pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg)) + return false; + } + return true; +} + +enum dc_status dml_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream) +{ + enum dc_status status = DC_OK; + struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream); + + if (!pipe_ctx) + return DC_ERROR_UNEXPECTED; + + + status = build_pipe_hw_param(pipe_ctx); + + return status; +} + +void dml_acquire_dsc(const struct dc *dc, + struct resource_context *res_ctx, + struct display_stream_compressor **dsc, + int pipe_idx) +{ + int i; + const struct resource_pool *pool = dc->res_pool; + struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc; + + ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */ + *dsc = NULL; + + /* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */ + if (pool->res_cap->num_dsc == pool->res_cap->num_opp) { + *dsc = pool->dscs[pipe_idx]; + res_ctx->is_dsc_acquired[pipe_idx] = true; + return; + } + + /* Return old DSC to avoid the need for redo it */ + if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) { + *dsc = dsc_old; + res_ctx->is_dsc_acquired[dsc_old->inst] = true; + return ; + } + + /* Find first free DSC */ + for (i = 0; i < pool->res_cap->num_dsc; i++) + if (!res_ctx->is_dsc_acquired[i]) { + *dsc = pool->dscs[i]; + res_ctx->is_dsc_acquired[i] = true; + break; + } +} + +static bool dml_split_stream_for_mpc_or_odm( + const struct dc *dc, + struct resource_context *res_ctx, + struct pipe_ctx *pri_pipe, + struct pipe_ctx *sec_pipe, + bool odm) +{ + int pipe_idx = sec_pipe->pipe_idx; + const struct resource_pool *pool = dc->res_pool; + + *sec_pipe = *pri_pipe; + + sec_pipe->pipe_idx = pipe_idx; + sec_pipe->plane_res.mi = pool->mis[pipe_idx]; + sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; + sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; + sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; + sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; + sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; + sec_pipe->stream_res.dsc = NULL; + if (odm) { + if (pri_pipe->next_odm_pipe) { + ASSERT(pri_pipe->next_odm_pipe != sec_pipe); + sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; + sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; + } + if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { + pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; + } + if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { + pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; + } + pri_pipe->next_odm_pipe = sec_pipe; + sec_pipe->prev_odm_pipe = pri_pipe; + ASSERT(sec_pipe->top_pipe == NULL); + + if (!sec_pipe->top_pipe) + sec_pipe->stream_res.opp = pool->opps[pipe_idx]; + else + sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; + if (sec_pipe->stream->timing.flags.DSC == 1) { + dml_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + ASSERT(sec_pipe->stream_res.dsc); + if (sec_pipe->stream_res.dsc == NULL) + return false; + } + } else { + if (pri_pipe->bottom_pipe) { + ASSERT(pri_pipe->bottom_pipe != sec_pipe); + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; + sec_pipe->bottom_pipe->top_pipe = sec_pipe; + } + pri_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe; + + ASSERT(pri_pipe->plane_state); + } + + return true; +} + +static struct pipe_ctx *dml_find_split_pipe( + struct dc *dc, + struct dc_state *context, + int old_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[old_index]; + pipe->pipe_idx = old_index; + } + + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL + && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + } + + /* + * May need to fix pipes getting tossed from 1 opp to another on flip + * Add for debugging transient underflow during topology updates: + * ASSERT(pipe); + */ + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + + return pipe; +} + +static void dml_release_dsc(struct resource_context *res_ctx, + const struct resource_pool *pool, + struct display_stream_compressor **dsc) +{ + int i; + + for (i = 0; i < pool->res_cap->num_dsc; i++) + if (pool->dscs[i] == *dsc) { + res_ctx->is_dsc_acquired[i] = false; + *dsc = NULL; + break; + } +} + +static int dml_get_num_mpc_splits(struct pipe_ctx *pipe) +{ + int mpc_split_count = 0; + struct pipe_ctx *other_pipe = pipe->bottom_pipe; + + while (other_pipe && other_pipe->plane_state == pipe->plane_state) { + mpc_split_count++; + other_pipe = other_pipe->bottom_pipe; + } + other_pipe = pipe->top_pipe; + while (other_pipe && other_pipe->plane_state == pipe->plane_state) { + mpc_split_count++; + other_pipe = other_pipe->top_pipe; + } + + return mpc_split_count; +} + +static bool dml_enough_pipes_for_subvp(struct dc *dc, + struct dc_state *context) +{ + int i = 0; + int num_pipes = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->plane_state) + num_pipes++; + } + + // Sub-VP only possible if the number of "real" pipes is + // less than or equal to half the number of available pipes + if (num_pipes * 2 > dc->res_pool->pipe_count) + return false; + + return true; +} + +static int dml_validate_apply_pipe_split_flags( + struct dc *dc, + struct dc_state *context, + int vlevel, + int *split, + bool *merge) +{ + int i, pipe_idx, vlevel_split; + int plane_count = 0; + bool force_split = false; + bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID; + struct vba_vars_st *v = &context->bw_ctx.dml.vba; + int max_mpc_comb = v->maxMpcComb; + + if (context->stream_count > 1) { + if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP) + avoid_split = true; + } else if (dc->debug.force_single_disp_pipe_split) + force_split = true; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /** + * Workaround for avoiding pipe-split in cases where we'd split + * planes that are too small, resulting in splits that aren't + * valid for the scaler. + */ + if (pipe->plane_state && + (pipe->plane_state->dst_rect.width <= 16 || + pipe->plane_state->dst_rect.height <= 16 || + pipe->plane_state->src_rect.width <= 16 || + pipe->plane_state->src_rect.height <= 16)) + avoid_split = true; + + /* TODO: fix dc bugs and remove this split threshold thing */ + if (pipe->stream && !pipe->prev_odm_pipe && + (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) + ++plane_count; + } + if (plane_count > dc->res_pool->pipe_count / 2) + avoid_split = true; + + /* W/A: Mode timing with borders may not work well with pipe split, avoid for this corner case */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct dc_crtc_timing timing; + + if (!pipe->stream) + continue; + else { + timing = pipe->stream->timing; + if (timing.h_border_left + timing.h_border_right + + timing.v_border_top + timing.v_border_bottom > 0) { + avoid_split = true; + break; + } + } + } + + /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */ + if (avoid_split) { + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++) + if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 && + v->ModeSupport[vlevel][0]) + break; + /* Impossible to not split this pipe */ + if (vlevel > context->bw_ctx.dml.soc.num_states) + vlevel = vlevel_split; + else + max_mpc_comb = 0; + pipe_idx++; + } + v->maxMpcComb = max_mpc_comb; + } + + /* Split loop sets which pipe should be split based on dml outputs and dc flags */ + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + int pipe_plane = v->pipe_plane[pipe_idx]; + bool split4mpc = context->stream_count == 1 && plane_count == 1 + && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4; + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4) + split[i] = 4; + else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2) + split[i] = 2; + + if ((pipe->stream->view_format == + VIEW_3D_FORMAT_SIDE_BY_SIDE || + pipe->stream->view_format == + VIEW_3D_FORMAT_TOP_AND_BOTTOM) && + (pipe->stream->timing.timing_3d_format == + TIMING_3D_FORMAT_TOP_AND_BOTTOM || + pipe->stream->timing.timing_3d_format == + TIMING_3D_FORMAT_SIDE_BY_SIDE)) + split[i] = 2; + if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { + split[i] = 2; + v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1; + } + if (dc->debug.force_odm_combine_4to1 & (1 << pipe->stream_res.tg->inst)) { + split[i] = 4; + v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_4to1; + } + /*420 format workaround*/ + if (pipe->stream->timing.h_addressable > 7680 && + pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + split[i] = 4; + } + + v->ODMCombineEnabled[pipe_plane] = + v->ODMCombineEnablePerState[vlevel][pipe_plane]; + + if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) { + if (dml_get_num_mpc_splits(pipe) == 1) { + /*If need split for mpc but 2 way split already*/ + if (split[i] == 4) + split[i] = 2; /* 2 -> 4 MPC */ + else if (split[i] == 2) + split[i] = 0; /* 2 -> 2 MPC */ + else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) + merge[i] = true; /* 2 -> 1 MPC */ + } else if (dml_get_num_mpc_splits(pipe) == 3) { + /*If need split for mpc but 4 way split already*/ + if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe) + || !pipe->bottom_pipe)) { + merge[i] = true; /* 4 -> 2 MPC */ + } else if (split[i] == 0 && pipe->top_pipe && + pipe->top_pipe->plane_state == pipe->plane_state) + merge[i] = true; /* 4 -> 1 MPC */ + split[i] = 0; + } else if (dml_get_num_mpc_splits(pipe)) { + /* ODM -> MPC transition */ + if (pipe->prev_odm_pipe) { + split[i] = 0; + merge[i] = true; + } + } + } else { + if (dml_get_num_mpc_splits(pipe) == 1) { + /*If need split for odm but 2 way split already*/ + if (split[i] == 4) + split[i] = 2; /* 2 -> 4 ODM */ + else if (split[i] == 2) + split[i] = 0; /* 2 -> 2 ODM */ + else if (pipe->prev_odm_pipe) { + ASSERT(0); /* NOT expected yet */ + merge[i] = true; /* exit ODM */ + } + } else if (dml_get_num_mpc_splits(pipe) == 3) { + /*If need split for odm but 4 way split already*/ + if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe) + || !pipe->next_odm_pipe)) { + ASSERT(0); /* NOT expected yet */ + merge[i] = true; /* 4 -> 2 ODM */ + } else if (split[i] == 0 && pipe->prev_odm_pipe) { + ASSERT(0); /* NOT expected yet */ + merge[i] = true; /* exit ODM */ + } + split[i] = 0; + } else if (dml_get_num_mpc_splits(pipe)) { + /* MPC -> ODM transition */ + ASSERT(0); /* NOT expected yet */ + if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + split[i] = 0; + merge[i] = true; + } + } + } + + /* Adjust dppclk when split is forced, do not bother with dispclk */ + if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1) + v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2; + pipe_idx++; + } + + return vlevel; +} + +static void dml_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream) +{ + // phantom_vactive = blackout (latency + margin) + fw_processing_delays + pstate allow width + uint32_t phantom_vactive_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us + 60 + + dc->caps.subvp_fw_processing_delay_us + + dc->caps.subvp_pstate_allow_width_us; + uint32_t phantom_vactive = ((double)phantom_vactive_us/1000000) * + (ref_pipe->stream->timing.pix_clk_100hz * 100) / + (double)ref_pipe->stream->timing.h_total; + uint32_t phantom_bp = ref_pipe->pipe_dlg_param.vstartup_start; + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = phantom_vactive; + phantom_stream->src.y = 0; + phantom_stream->src.height = phantom_vactive; + + phantom_stream->timing.v_addressable = phantom_vactive; + phantom_stream->timing.v_front_porch = 1; + phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + + phantom_stream->timing.v_front_porch + + phantom_stream->timing.v_sync_width + + phantom_bp; +} + +static struct dc_stream_state *dml_enable_phantom_stream(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe) +{ + struct dc_stream_state *phantom_stream = NULL; + + phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink); + phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; + phantom_stream->dpms_off = true; + phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; + phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; + ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; + ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; + + /* stream has limited viewport and small timing */ + memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); + memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); + memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); + dml_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream); + + dc_add_stream_to_ctx(dc, context, phantom_stream); + dc->hwss.apply_ctx_to_hw(dc, context); + return phantom_stream; +} + +static void dml_enable_phantom_plane(struct dc *dc, + struct dc_state *context, + struct dc_stream_state *phantom_stream, + struct pipe_ctx *main_pipe) +{ + struct dc_plane_state *phantom_plane = NULL; + struct dc_plane_state *prev_phantom_plane = NULL; + struct pipe_ctx *curr_pipe = main_pipe; + + while (curr_pipe) { + if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) + phantom_plane = prev_phantom_plane; + else + phantom_plane = dc_create_plane_state(dc); + + memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); + memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, + sizeof(phantom_plane->scaling_quality)); + memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect)); + memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect)); + memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect)); + memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size, + sizeof(phantom_plane->plane_size)); + memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info, + sizeof(phantom_plane->tiling_info)); + memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc)); + /* Currently compat_level is undefined in dc_state + * phantom_plane->compat_level = curr_pipe->plane_state->compat_level; + */ + phantom_plane->format = curr_pipe->plane_state->format; + phantom_plane->rotation = curr_pipe->plane_state->rotation; + phantom_plane->visible = curr_pipe->plane_state->visible; + + /* Shadow pipe has small viewport. */ + phantom_plane->clip_rect.y = 0; + phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; + + dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context); + + curr_pipe = curr_pipe->bottom_pipe; + prev_phantom_plane = phantom_plane; + } +} + +static void dml_add_phantom_pipes(struct dc *dc, struct dc_state *context) +{ + int i = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct dc_stream_state *ref_stream = pipe->stream; + // Only construct phantom stream for top pipes that have plane enabled + if (!pipe->top_pipe && pipe->plane_state && pipe->stream && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + struct dc_stream_state *phantom_stream = NULL; + + phantom_stream = dml_enable_phantom_stream(dc, context, pipe); + dml_enable_phantom_plane(dc, context, phantom_stream, pipe); + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state && pipe->stream && + pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + pipe->stream->use_dynamic_meta = false; + pipe->plane_state->flip_immediate = false; + if (!resource_build_scaling_params(pipe)) { + // Log / remove phantom pipes since failed to build scaling params + } + } + } +} + +static void dml_remove_phantom_pipes(struct dc *dc, struct dc_state *context) +{ + int i; + bool removed_pipe = false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + // build scaling params for phantom pipes + if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + dc_rem_all_planes_for_stream(dc, pipe->stream, context); + dc_remove_stream_from_ctx(dc, context, pipe->stream); + removed_pipe = true; + } + + // Clear all phantom stream info + if (pipe->stream) { + pipe->stream->mall_stream_config.type = SUBVP_NONE; + pipe->stream->mall_stream_config.paired_stream = NULL; + } + } + if (removed_pipe) + dc->hwss.apply_ctx_to_hw(dc, context); +} + +/* + * If the input state contains no upstream planes for a particular pipe (i.e. only timing) + * we need to populate some "conservative" plane information as DML cannot handle "no planes" + */ +static void populate_default_plane_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_params_st *pipe) +{ + pipe->src.is_hsplit = pipe->dest.odm_combine != dm_odm_combine_mode_disabled; + pipe->src.source_scan = dm_horz; + pipe->src.sw_mode = dm_sw_4kb_s; + pipe->src.macro_tile_size = dm_64k_tile; + pipe->src.viewport_width = timing->h_addressable; + if (pipe->src.viewport_width > 1920) + pipe->src.viewport_width = 1920; + pipe->src.viewport_height = timing->v_addressable; + if (pipe->src.viewport_height > 1080) + pipe->src.viewport_height = 1080; + pipe->src.surface_height_y = pipe->src.viewport_height; + pipe->src.surface_width_y = pipe->src.viewport_width; + pipe->src.surface_height_c = pipe->src.viewport_height; + pipe->src.surface_width_c = pipe->src.viewport_width; + pipe->src.data_pitch = ((pipe->src.viewport_width + 255) / 256) * 256; + pipe->src.source_format = dm_444_32; + pipe->dest.recout_width = pipe->src.viewport_width; + pipe->dest.recout_height = pipe->src.viewport_height; + pipe->dest.full_recout_width = pipe->dest.recout_width; + pipe->dest.full_recout_height = pipe->dest.recout_height; + pipe->scale_ratio_depth.lb_depth = dm_lb_16; + pipe->scale_ratio_depth.hscl_ratio = 1.0; + pipe->scale_ratio_depth.vscl_ratio = 1.0; + pipe->scale_ratio_depth.scl_enable = 0; + pipe->scale_taps.htaps = 1; + pipe->scale_taps.vtaps = 1; + pipe->dest.vtotal_min = timing->v_total; + pipe->dest.vtotal_max = timing->v_total; + + if (pipe->dest.odm_combine == dm_odm_combine_mode_2to1) { + pipe->src.viewport_width /= 2; + pipe->dest.recout_width /= 2; + } else if (pipe->dest.odm_combine == dm_odm_combine_mode_4to1) { + pipe->src.viewport_width /= 4; + pipe->dest.recout_width /= 4; + } + + pipe->src.dcc = false; + pipe->src.dcc_rate = 1; +} + +/* + * If the pipe is not blending (i.e. pipe_ctx->top pipe == null) then its + * hsplit group is equal to its own pipe ID + * Otherwise, all pipes part of the same blending tree have the same hsplit group + * ID as the top most pipe + * + * If the pipe ctx is ODM combined, then similar logic follows + */ +static void populate_hsplit_group_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe) +{ + e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; + + if (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state + == dc_pipe_ctx->plane_state) { + struct pipe_ctx *first_pipe = dc_pipe_ctx->top_pipe; + int split_idx = 0; + + while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state + == dc_pipe_ctx->plane_state) { + first_pipe = first_pipe->top_pipe; + split_idx++; + } + + /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */ + if (split_idx == 0) + e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx; + else if (split_idx == 1) + e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; + else if (split_idx == 2) + e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->top_pipe->pipe_idx; + + } else if (dc_pipe_ctx->prev_odm_pipe) { + struct pipe_ctx *first_pipe = dc_pipe_ctx->prev_odm_pipe; + + while (first_pipe->prev_odm_pipe) + first_pipe = first_pipe->prev_odm_pipe; + e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx; + } +} + +static void populate_dml_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe, int always_scale) +{ + const struct dc_plane_state *pln = dc_pipe_ctx->plane_state; + const struct scaler_data *scl = &dc_pipe_ctx->plane_res.scl_data; + + e2e_pipe->pipe.src.immediate_flip = pln->flip_immediate; + e2e_pipe->pipe.src.is_hsplit = (dc_pipe_ctx->bottom_pipe && dc_pipe_ctx->bottom_pipe->plane_state == pln) + || (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state == pln) + || e2e_pipe->pipe.dest.odm_combine != dm_odm_combine_mode_disabled; + + /* stereo is not split */ + if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE || + pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) { + e2e_pipe->pipe.src.is_hsplit = false; + e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; + } + + e2e_pipe->pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90 + || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz; + e2e_pipe->pipe.src.viewport_y_y = scl->viewport.y; + e2e_pipe->pipe.src.viewport_y_c = scl->viewport_c.y; + e2e_pipe->pipe.src.viewport_width = scl->viewport.width; + e2e_pipe->pipe.src.viewport_width_c = scl->viewport_c.width; + e2e_pipe->pipe.src.viewport_height = scl->viewport.height; + e2e_pipe->pipe.src.viewport_height_c = scl->viewport_c.height; + e2e_pipe->pipe.src.viewport_width_max = pln->src_rect.width; + e2e_pipe->pipe.src.viewport_height_max = pln->src_rect.height; + e2e_pipe->pipe.src.surface_width_y = pln->plane_size.surface_size.width; + e2e_pipe->pipe.src.surface_height_y = pln->plane_size.surface_size.height; + e2e_pipe->pipe.src.surface_width_c = pln->plane_size.chroma_size.width; + e2e_pipe->pipe.src.surface_height_c = pln->plane_size.chroma_size.height; + + if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA + || pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch; + e2e_pipe->pipe.src.data_pitch_c = pln->plane_size.chroma_pitch; + e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch; + e2e_pipe->pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c; + } else { + e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch; + e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch; + } + e2e_pipe->pipe.src.dcc = pln->dcc.enable; + e2e_pipe->pipe.src.dcc_rate = 1; + e2e_pipe->pipe.dest.recout_width = scl->recout.width; + e2e_pipe->pipe.dest.recout_height = scl->recout.height; + e2e_pipe->pipe.dest.full_recout_height = scl->recout.height; + e2e_pipe->pipe.dest.full_recout_width = scl->recout.width; + if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_2to1) + e2e_pipe->pipe.dest.full_recout_width *= 2; + else if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_4to1) + e2e_pipe->pipe.dest.full_recout_width *= 4; + else { + struct pipe_ctx *split_pipe = dc_pipe_ctx->bottom_pipe; + + while (split_pipe && split_pipe->plane_state == pln) { + e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; + split_pipe = split_pipe->bottom_pipe; + } + split_pipe = dc_pipe_ctx->top_pipe; + while (split_pipe && split_pipe->plane_state == pln) { + e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; + split_pipe = split_pipe->top_pipe; + } + } + + e2e_pipe->pipe.scale_ratio_depth.lb_depth = dm_lb_16; + e2e_pipe->pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32); + e2e_pipe->pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32); + e2e_pipe->pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32); + e2e_pipe->pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32); + e2e_pipe->pipe.scale_ratio_depth.scl_enable = + scl->ratios.vert.value != dc_fixpt_one.value + || scl->ratios.horz.value != dc_fixpt_one.value + || scl->ratios.vert_c.value != dc_fixpt_one.value + || scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/ + || always_scale; /*support always scale*/ + e2e_pipe->pipe.scale_taps.htaps = scl->taps.h_taps; + e2e_pipe->pipe.scale_taps.htaps_c = scl->taps.h_taps_c; + e2e_pipe->pipe.scale_taps.vtaps = scl->taps.v_taps; + e2e_pipe->pipe.scale_taps.vtaps_c = scl->taps.v_taps_c; + + /* Currently compat_level is not defined. Commenting it until further resolution + * if (pln->compat_level == DC_LEGACY_TILING_ADDR_GEN_TWO) { + swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle, + &e2e_pipe->pipe.src.sw_mode); + e2e_pipe->pipe.src.macro_tile_size = + swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle); + } else { + gfx10array_mode_to_dml_params(pln->tiling_info.gfx10compatible.array_mode, + pln->compat_level, + &e2e_pipe->pipe.src.sw_mode); + e2e_pipe->pipe.src.macro_tile_size = dm_4k_tile; + }*/ + + e2e_pipe->pipe.src.source_format = dc_source_format_to_dml_source_format(pln->format); +} + +static void populate_dml_cursor_parameters_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe) +{ + /* + * For graphic plane, cursor number is 1, nv12 is 0 + * bw calculations due to cursor on/off + */ + if (dc_pipe_ctx->plane_state && + (dc_pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || + dc_pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM)) + e2e_pipe->pipe.src.num_cursors = 0; + else + e2e_pipe->pipe.src.num_cursors = 1; + + e2e_pipe->pipe.src.cur0_src_width = 256; + e2e_pipe->pipe.src.cur0_bpp = dm_cur_32bit; +} + +static int populate_dml_pipes_from_context_base( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int pipe_cnt, i; + bool synchronized_vblank = true; + struct resource_context *res_ctx = &context->res_ctx; + + for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) { + if (!res_ctx->pipe_ctx[i].stream) + continue; + + if (pipe_cnt < 0) { + pipe_cnt = i; + continue; + } + + if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream) + continue; + + if (dc->debug.disable_timing_sync || + (!resource_are_streams_timing_synchronizable( + res_ctx->pipe_ctx[pipe_cnt].stream, + res_ctx->pipe_ctx[i].stream) && + !resource_are_vblanks_synchronizable( + res_ctx->pipe_ctx[pipe_cnt].stream, + res_ctx->pipe_ctx[i].stream))) { + synchronized_vblank = false; + break; + } + } + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing; + + struct audio_check aud_check = {0}; + if (!res_ctx->pipe_ctx[i].stream) + continue; + + /* todo: + pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0; + pipes[pipe_cnt].pipe.src.dcc = 0; + pipes[pipe_cnt].pipe.src.vm = 0;*/ + + pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + + pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC; + /* todo: rotation?*/ + pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h; + if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) { + pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true; + /* 1/2 vblank */ + pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active = + (timing->v_total - timing->v_addressable + - timing->v_border_top - timing->v_border_bottom) / 2; + /* 36 bytes dp, 32 hdmi */ + pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes = + dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32; + } + pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank; + + dc_timing_to_dml_timing(timing, &pipes[pipe_cnt].pipe.dest); + pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min; + pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max; + + pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst; + + pipes[pipe_cnt].pipe.dest.odm_combine = get_dml_odm_combine(&res_ctx->pipe_ctx[i]); + + populate_hsplit_group_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]); + + pipes[pipe_cnt].dout.dp_lanes = 4; + pipes[pipe_cnt].dout.is_virtual = 0; + pipes[pipe_cnt].dout.output_type = get_dml_output_type(res_ctx->pipe_ctx[i].stream->signal); + if (pipes[pipe_cnt].dout.output_type < 0) { + pipes[pipe_cnt].dout.output_type = dm_dp; + pipes[pipe_cnt].dout.is_virtual = 1; + } + + populate_color_depth_and_encoding_from_timing(&res_ctx->pipe_ctx[i].stream->timing, &pipes[pipe_cnt].dout); + + if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC) + pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0; + + /* todo: default max for now, until there is logic reflecting this in dc*/ + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + /*fill up the audio sample rate (unit in kHz)*/ + get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check); + pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000; + + populate_dml_cursor_parameters_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]); + + if (!res_ctx->pipe_ctx[i].plane_state) { + populate_default_plane_from_timing(timing, &pipes[pipe_cnt].pipe); + } else { + populate_dml_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt], dc->debug.always_scale); + } + + pipe_cnt++; + } + + /* populate writeback information */ + if (dc->res_pool) + dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes); + + return pipe_cnt; +} + +static int dml_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe; + + populate_dml_pipes_from_context_base(dc, context, pipes, fast_validate); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + pipes[pipe_cnt].pipe.src.gpuvm = true; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; + + pipes[pipe_cnt].dout.dsc_input_bpc = 0; + if (pipes[pipe_cnt].dout.dsc_enable) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + pipes[pipe_cnt].dout.dsc_input_bpc = 8; + break; + case COLOR_DEPTH_101010: + pipes[pipe_cnt].dout.dsc_input_bpc = 10; + break; + case COLOR_DEPTH_121212: + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + break; + default: + ASSERT(0); + break; + } + } + pipe_cnt++; + } + dc->config.enable_4to1MPC = false; + if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { + if (is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { + dc->config.enable_4to1MPC = true; + } else if (!is_dual_plane(pipe->plane_state->format)) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + pipes[0].pipe.src.unbounded_req_mode = true; + } + } + + return pipe_cnt; +} + +static void dml_full_validate_bw_helper(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *vlevel, + int *split, + bool *merge, + int *pipe_cnt) +{ + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + /* + * DML favors voltage over p-state, but we're more interested in + * supporting p-state over voltage. We can't support p-state in + * prefetch mode > 0 so try capping the prefetch mode to start. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh_and_mclk_switch; + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + /* This may adjust vlevel and maxMpcComb */ + if (*vlevel < context->bw_ctx.dml.soc.num_states) + *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + + /* Conditions for setting up phantom pipes for SubVP: + * 1. Not force disable SubVP + * 2. Full update (i.e. !fast_validate) + * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) + * 4. Display configuration passes validation + * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) + */ + if (!dc->debug.force_disable_subvp && + dml_enough_pipes_for_subvp(dc, context) && + *vlevel < context->bw_ctx.dml.soc.num_states && + (vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || + dc->debug.force_subvp_mclk_switch)) { + + dml_add_phantom_pipes(dc, context); + + /* Create input to DML based on new context which includes phantom pipes + * TODO: Input to DML should mark which pipes are phantom + */ + *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false); + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + if (*vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, MAX_PIPES * sizeof(*split)); + memset(merge, 0, MAX_PIPES * sizeof(*merge)); + *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + } + + // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) + // remove phantom pipes and repopulate dml pipes + if (*vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + dml_remove_phantom_pipes(dc, context); + *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false); + } + } +} + +static void dcn20_adjust_adaptive_sync_v_startup( + const struct dc_crtc_timing *dc_crtc_timing, int *vstartup_start) +{ + struct dc_crtc_timing patched_crtc_timing; + uint32_t asic_blank_end = 0; + uint32_t asic_blank_start = 0; + uint32_t newVstartup = 0; + + patched_crtc_timing = *dc_crtc_timing; + + if (patched_crtc_timing.flags.INTERLACE == 1) { + if (patched_crtc_timing.v_front_porch < 2) + patched_crtc_timing.v_front_porch = 2; + } else { + if (patched_crtc_timing.v_front_porch < 1) + patched_crtc_timing.v_front_porch = 1; + } + + /* blank_start = frame end - front porch */ + asic_blank_start = patched_crtc_timing.v_total - + patched_crtc_timing.v_front_porch; + + /* blank_end = blank_start - active */ + asic_blank_end = asic_blank_start - + patched_crtc_timing.v_border_bottom - + patched_crtc_timing.v_addressable - + patched_crtc_timing.v_border_top; + + newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start); + + *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); +} + +static bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx) +{ + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->link_res.hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal)); +} + +static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) +{ + int i; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; +#if defined (CONFIG_DRM_AMD_DC_DP2_0) + if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) + return true; +#endif + } + return false; +} + +static void dml_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; + } +} + +static bool dml_internal_validate( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate) +{ + bool out = false; + bool repopulate_pipes = false; + int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; + bool newly_split[MAX_PIPES] = { false }; + int pipe_cnt, i, pipe_idx, vlevel; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + ASSERT(pipes); + if (!pipes) + return false; + + // For each full update, remove all existing phantom pipes first + dml_remove_phantom_pipes(dc, context); + + dml_update_soc_for_wm_a(dc, context); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + // On initial pass through DML, we intend to use MALL for SS on all + // (non-PSR) surfaces with none using MALL for P-State + // 'mall_plane_config': is not a member of 'dc_plane_state' - commenting it out till mall_plane_config gets supported in dc_plant_state + //if (pipe->stream && pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) + // pipe->plane_state->mall_plane_config.use_mall_for_ss = true; + } + } + pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + + dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (!fast_validate) { + dml_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); + } + + if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* + * If mode is unsupported or there's still no p-state support then + * fall back to favoring voltage. + * + * We don't actually support prefetch mode 2, so require that we + * at least support prefetch mode 1. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh; + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, sizeof(split)); + memset(merge, 0, sizeof(merge)); + vlevel = dml_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + } + } + + dml_log_mode_support_params(&context->bw_ctx.dml); + + if (vlevel == context->bw_ctx.dml.soc.num_states) + goto validate_fail; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; + + if (!pipe->stream) + continue; + + /* We only support full screen mpo with ODM */ + if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled + && pipe->plane_state && mpo_pipe + && memcmp(&mpo_pipe->plane_res.scl_data.recout, + &pipe->plane_res.scl_data.recout, + sizeof(struct rect)) != 0) { + ASSERT(mpo_pipe->plane_state != pipe->plane_state); + goto validate_fail; + } + pipe_idx++; + } + + /* merge pipes if necessary */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /*skip pipes that don't need merging*/ + if (!merge[i]) + continue; + + /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ + if (pipe->prev_odm_pipe) { + /*split off odm pipe*/ + pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; + if (pipe->next_odm_pipe) + pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; + + pipe->bottom_pipe = NULL; + pipe->next_odm_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + pipe->top_pipe = NULL; + pipe->prev_odm_pipe = NULL; + if (pipe->stream_res.dsc) + dml_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + struct pipe_ctx *top_pipe = pipe->top_pipe; + struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; + + top_pipe->bottom_pipe = bottom_pipe; + if (bottom_pipe) + bottom_pipe->top_pipe = top_pipe; + + pipe->top_pipe = NULL; + pipe->bottom_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else + ASSERT(0); /* Should never try to merge master pipe */ + + } + + for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = NULL; + bool odm; + int old_index = -1; + + if (!pipe->stream || newly_split[i]) + continue; + + pipe_idx++; + odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; + + if (!pipe->plane_state && !odm) + continue; + + if (split[i]) { + if (odm) { + if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + } else { + if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else if (old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + } + hsplit_pipe = dml_find_split_pipe(dc, context, old_index); + ASSERT(hsplit_pipe); + if (!hsplit_pipe) + goto validate_fail; + + if (!dml_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe, odm)) + goto validate_fail; + + newly_split[hsplit_pipe->pipe_idx] = true; + repopulate_pipes = true; + } + if (split[i] == 4) { + struct pipe_ctx *pipe_4to1; + + if (odm && old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dml_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dml_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + + if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe + && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dml_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dml_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + hsplit_pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + } + if (odm) + dml_build_mapped_resource(dc, context, pipe->stream); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!resource_build_scaling_params(pipe)) + goto validate_fail; + } + } + + /* Actual dsc count per stream dsc validation*/ + if (!dml_validate_dsc(dc, context)) { + vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; + goto validate_fail; + } + + if (repopulate_pipes) + pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + *vlevel_out = vlevel; + *pipe_cnt_out = pipe_cnt; + + out = true; + goto validate_out; + +validate_fail: + out = false; + +validate_out: + return out; +} + +static void dml_calculate_dlg_params( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx; + int plane_count; + + /* Writeback MCIF_WB arbitration parameters */ + if (dc->res_pool) + dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); + + context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; + context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; + context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; + context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; + context->bw_ctx.bw.dcn.clk.p_state_change_support = + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] + != dm_dram_clock_change_unsupported; + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + /* 'z9_support': is not a member of 'dc_clocks' - Commenting out till we have this support in dc_clocks + * context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ? + DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW; + */ + plane_count = 0; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + /* Commented out as per above error for now. + if (plane_count == 0) + context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW; + */ + context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); + /* TODO : Uncomment the below line and make changes + * as per DML nomenclature once it is available. + * context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = context->bw_ctx.dml.vba.fclk_pstate_support; + */ + + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; + context->res_ctx.pipe_ctx[i].unbounded_req = false; + } else { + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes; + context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; + } + + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = + pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; + pipe_idx++; + } + /*save a original dppclock copy*/ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000; + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000; + context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes + - context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2; + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].dlg_regs, + &context->res_ctx.pipe_ctx[i].ttu_regs, + pipes, + pipe_cnt, + pipe_idx, + cstate_en, + context->bw_ctx.bw.dcn.clk.p_state_change_support, + false, false, true); + + context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].rq_regs, + &pipes[pipe_idx].pipe); + pipe_idx++; + } +} + +static void dml_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx, vlevel_temp = 0; + + double dcfclk = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; + double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != + dm_dram_clock_change_unsupported; + + /* Set B: + * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, + * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark + * calculations to cover bootup clocks. + * DCFCLK: soc.clock_limits[2] when available + * UCLK: soc.clock_limits[2] when available + */ + if (context->bw_ctx.dml.soc.num_states > 2) { + vlevel_temp = 2; + dcfclk = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 4; + context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 8; + + /* Set D: + * All clocks min. + * DCFCLK: Min, as reported by PM FW when available + * UCLK : Min, as reported by PM FW when available + * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) + */ + + if (context->bw_ctx.dml.soc.num_states > 2) { + vlevel_temp = 0; + dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 4; + context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 8; + + /* Set C, for Dummy P-State: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK : Min, as reported by PM FW, when available + * pstate latency as per UCLK state dummy pstate latency + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + unsigned int min_dram_speed_mts_margin = 160; + + if ((!pstate_en)) + min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; + + /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ + for (i = 3; i > 0; i--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) + break; + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; + context->bw_ctx.dml.soc.dummy_pstate_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 4; + context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 8; + + if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { + /* The only difference between A and C is p-state latency, if p-state is not supported + * with full p-state latency we want to calculate DLG based on dummy p-state latency, + * Set A p-state watermark set to 0 previously, when p-state unsupported, for now keep as previous implementation. + */ + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; + } else { + /* Set A: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK: Min, as reported by PM FW, when available + */ + dml_update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + + dml_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + + if (!pstate_en) + /* Restore full p-state latency */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; +} + +bool dml_validate(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + BW_VAL_TRACE_SETUP(); + + int vlevel = 0; + int pipe_cnt = 0; + display_e2e_pipe_params_st *pipes = context->bw_ctx.dml.dml_pipe_state; + DC_LOGGER_INIT(dc->ctx->logger); + + BW_VAL_TRACE_COUNT(); + + out = dml_internal_validate(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + + if (pipe_cnt == 0) + goto validate_out; + + if (!out) + goto validate_fail; + + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); + goto validate_out; + } + + dml_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + + BW_VAL_TRACE_END_WATERMARKS(); + + goto validate_out; + +validate_fail: + DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", + dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); + + BW_VAL_TRACE_SKIP(fail); + out = false; + +validate_out: + BW_VAL_TRACE_FINISH(); + + return out; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c new file mode 100644 index 000000000000..4ec5310a2962 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c @@ -0,0 +1,284 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifdef DML_WRAPPER_TRANSLATION_ + +static void gfx10array_mode_to_dml_params( + enum array_mode_values array_mode, + enum legacy_tiling_compat_level compat_level, + unsigned int *sw_mode) +{ + switch (array_mode) { + case DC_ARRAY_LINEAR_ALLIGNED: + case DC_ARRAY_LINEAR_GENERAL: + *sw_mode = dm_sw_linear; + break; + case DC_ARRAY_2D_TILED_THIN1: +// DC_LEGACY_TILING_ADDR_GEN_ZERO - undefined as per current code hence removed +#if 0 + if (compat_level == DC_LEGACY_TILING_ADDR_GEN_ZERO) + *sw_mode = dm_sw_gfx7_2d_thin_l_vp; + else + *sw_mode = dm_sw_gfx7_2d_thin_gl; +#endif + break; + default: + ASSERT(0); /* Not supported */ + break; + } +} + +static void swizzle_to_dml_params( + enum swizzle_mode_values swizzle, + unsigned int *sw_mode) +{ + switch (swizzle) { + case DC_SW_LINEAR: + *sw_mode = dm_sw_linear; + break; + case DC_SW_4KB_S: + *sw_mode = dm_sw_4kb_s; + break; + case DC_SW_4KB_S_X: + *sw_mode = dm_sw_4kb_s_x; + break; + case DC_SW_4KB_D: + *sw_mode = dm_sw_4kb_d; + break; + case DC_SW_4KB_D_X: + *sw_mode = dm_sw_4kb_d_x; + break; + case DC_SW_64KB_S: + *sw_mode = dm_sw_64kb_s; + break; + case DC_SW_64KB_S_X: + *sw_mode = dm_sw_64kb_s_x; + break; + case DC_SW_64KB_S_T: + *sw_mode = dm_sw_64kb_s_t; + break; + case DC_SW_64KB_D: + *sw_mode = dm_sw_64kb_d; + break; + case DC_SW_64KB_D_X: + *sw_mode = dm_sw_64kb_d_x; + break; + case DC_SW_64KB_D_T: + *sw_mode = dm_sw_64kb_d_t; + break; + case DC_SW_64KB_R_X: + *sw_mode = dm_sw_64kb_r_x; + break; + case DC_SW_VAR_S: + *sw_mode = dm_sw_var_s; + break; + case DC_SW_VAR_S_X: + *sw_mode = dm_sw_var_s_x; + break; + case DC_SW_VAR_D: + *sw_mode = dm_sw_var_d; + break; + case DC_SW_VAR_D_X: + *sw_mode = dm_sw_var_d_x; + break; + + default: + ASSERT(0); /* Not supported */ + break; + } +} + +static void dc_timing_to_dml_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_dest_params_st *dest) +{ + dest->hblank_start = timing->h_total - timing->h_front_porch; + dest->hblank_end = dest->hblank_start + - timing->h_addressable + - timing->h_border_left + - timing->h_border_right; + dest->vblank_start = timing->v_total - timing->v_front_porch; + dest->vblank_end = dest->vblank_start + - timing->v_addressable + - timing->v_border_top + - timing->v_border_bottom; + dest->htotal = timing->h_total; + dest->vtotal = timing->v_total; + dest->hactive = timing->h_addressable; + dest->vactive = timing->v_addressable; + dest->interlaced = timing->flags.INTERLACE; + dest->pixel_rate_mhz = timing->pix_clk_100hz/10000.0; + if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + dest->pixel_rate_mhz *= 2; +} + +static enum odm_combine_mode get_dml_odm_combine(const struct pipe_ctx *pipe) +{ + int odm_split_count = 0; + enum odm_combine_mode combine_mode = dm_odm_combine_mode_disabled; + struct pipe_ctx *next_pipe = pipe->next_odm_pipe; + + // Traverse pipe tree to determine odm split count + while (next_pipe) { + odm_split_count++; + next_pipe = next_pipe->next_odm_pipe; + } + pipe = pipe->prev_odm_pipe; + while (pipe) { + odm_split_count++; + pipe = pipe->prev_odm_pipe; + } + + // Translate split to DML odm combine factor + switch (odm_split_count) { + case 1: + combine_mode = dm_odm_combine_mode_2to1; + break; + case 3: + combine_mode = dm_odm_combine_mode_4to1; + break; + default: + combine_mode = dm_odm_combine_mode_disabled; + } + + return combine_mode; +} + +static int get_dml_output_type(enum signal_type dc_signal) +{ + int dml_output_type = -1; + + switch (dc_signal) { + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_DISPLAY_PORT: + dml_output_type = dm_dp; + break; + case SIGNAL_TYPE_EDP: + dml_output_type = dm_edp; + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + dml_output_type = dm_hdmi; + break; + default: + break; + } + + return dml_output_type; +} + +static void populate_color_depth_and_encoding_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_output_params_st *dout) +{ + int output_bpc = 0; + + switch (timing->display_color_depth) { + case COLOR_DEPTH_666: + output_bpc = 6; + break; + case COLOR_DEPTH_888: + output_bpc = 8; + break; + case COLOR_DEPTH_101010: + output_bpc = 10; + break; + case COLOR_DEPTH_121212: + output_bpc = 12; + break; + case COLOR_DEPTH_141414: + output_bpc = 14; + break; + case COLOR_DEPTH_161616: + output_bpc = 16; + break; + case COLOR_DEPTH_999: + output_bpc = 9; + break; + case COLOR_DEPTH_111111: + output_bpc = 11; + break; + default: + output_bpc = 8; + break; + } + + switch (timing->pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + dout->output_format = dm_444; + dout->output_bpp = output_bpc * 3; + break; + case PIXEL_ENCODING_YCBCR420: + dout->output_format = dm_420; + dout->output_bpp = (output_bpc * 3.0) / 2; + break; + case PIXEL_ENCODING_YCBCR422: + if (timing->flags.DSC && !timing->dsc_cfg.ycbcr422_simple) + dout->output_format = dm_n422; + else + dout->output_format = dm_s422; + dout->output_bpp = output_bpc * 2; + break; + default: + dout->output_format = dm_444; + dout->output_bpp = output_bpc * 3; + } +} + +static enum source_format_class dc_source_format_to_dml_source_format(enum surface_pixel_format dc_format) +{ + enum source_format_class dml_format = dm_444_32; + + switch (dc_format) { + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + dml_format = dm_420_8; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + dml_format = dm_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + dml_format = dm_444_64; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + dml_format = dm_444_16; + break; + case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: + dml_format = dm_444_8; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + dml_format = dm_rgbe_alpha; + break; + default: + dml_format = dm_444_32; + break; + } + + return dml_format; +} + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c index 3ee858f311d1..ec636d06e18c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c @@ -61,16 +61,6 @@ static double dsc_roundf(double num) return (int)(num); } -static double dsc_ceil(double num) -{ - double retval = (int)num; - - if (retval != num && num > 0) - retval = num + 1; - - return (int)retval; -} - static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp) { @@ -103,7 +93,7 @@ static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, TABLE_CASE(420, 12, min); } - if (table == 0) + if (!table) return; index = (bpp - table[0].bpp) * 2; @@ -268,24 +258,3 @@ void _do_calc_rc_params(struct rc_params *rc, rc->rc_buf_thresh[13] = 8064; } -u32 _do_bytes_per_pixel_calc(int slice_width, - u16 drm_bpp, - bool is_navite_422_or_420) -{ - float bpp; - u32 bytes_per_pixel; - double d_bytes_per_pixel; - - dc_assert_fp_enabled(); - - bpp = ((float)drm_bpp / 16.0); - d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; - // TODO: Make sure the formula for calculating this is precise (ceiling - // vs. floor, and at what point they should be applied) - if (is_navite_422_or_420) - d_bytes_per_pixel /= 2; - - bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000); - - return bytes_per_pixel; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h index b93b95409fbe..cad244c023cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h @@ -78,10 +78,6 @@ struct qp_entry { typedef struct qp_entry qp_table[]; -u32 _do_bytes_per_pixel_calc(int slice_width, - u16 drm_bpp, - bool is_navite_422_or_420); - void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 0321b4446e05..9c74564cbd8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -455,6 +455,7 @@ static bool intersect_dsc_caps( if (pixel_encoding == PIXEL_ENCODING_YCBCR422 || pixel_encoding == PIXEL_ENCODING_YCBCR420) dsc_common_caps->bpp_increment_div = min(dsc_common_caps->bpp_increment_div, (uint32_t)8); + dsc_common_caps->edp_sink_max_bits_per_pixel = dsc_sink_caps->edp_max_bits_per_pixel; dsc_common_caps->is_dp = dsc_sink_caps->is_dp; return true; } @@ -513,6 +514,13 @@ static bool decide_dsc_bandwidth_range( range->min_target_bpp_x16 = preferred_bpp_x16; } } + /* TODO - make this value generic to all signal types */ + else if (dsc_caps->edp_sink_max_bits_per_pixel) { + /* apply max bpp limitation from edp sink */ + range->max_target_bpp_x16 = MIN(dsc_caps->edp_sink_max_bits_per_pixel, + max_bpp_x16); + range->min_target_bpp_x16 = min_bpp_x16; + } else { range->max_target_bpp_x16 = max_bpp_x16; range->min_target_bpp_x16 = min_bpp_x16; @@ -574,7 +582,7 @@ static bool decide_dsc_target_bpp_x16( return *target_bpp_x16 != 0; } -#define MIN_AVAILABLE_SLICES_SIZE 4 +#define MIN_AVAILABLE_SLICES_SIZE 6 static int get_available_dsc_slices(union dsc_enc_slice_caps slice_caps, int *available_slices) { @@ -860,6 +868,10 @@ static bool setup_dsc_config( min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first? is_dsc_possible = (min_slices_h <= max_slices_h); + + if (min_slices_h == 0 && max_slices_h == 0) + is_dsc_possible = false; + if (!is_dsc_possible) goto done; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c index b19d3aeb5962..e97cf09be9d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c @@ -60,31 +60,3 @@ void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps) pps->dsc_version_minor); DC_FP_END(); } - -/** - * calc_dsc_bytes_per_pixel - calculate bytes per pixel - * @pps: DRM struct with all required DSC values - * - * Based on the information inside drm_dsc_config, this function calculates the - * total of bytes per pixel. - * - * @note This calculation requires float point operation, most of it executes - * under kernel_fpu_{begin,end}. - * - * Return: - * Return the number of bytes per pixel - */ -u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps) - -{ - u32 ret; - u16 drm_bpp = pps->bits_per_pixel; - int slice_width = pps->slice_width; - bool is_navite_422_or_420 = pps->native_422 || pps->native_420; - - DC_FP_START(); - ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp, - is_navite_422_or_420); - DC_FP_END(); - return ret; -} diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h index c2340e001b57..80921c1c0d53 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h @@ -30,7 +30,6 @@ #include "dml/dsc/rc_calc_fpu.h" void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps); -u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c index 1e19dd674e5a..7e306aa3e2b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c @@ -100,8 +100,7 @@ int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_par int ret; struct rc_params rc; struct drm_dsc_config dsc_cfg; - - dsc_params->bytes_per_pixel = calc_dsc_bytes_per_pixel(pps); + unsigned long long tmp; calc_rc_params(&rc, pps); dsc_params->pps = *pps; @@ -113,6 +112,9 @@ int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_par dsc_cfg.mux_word_size = dsc_params->pps.bits_per_component <= 10 ? 48 : 64; ret = drm_dsc_compute_rc_parameters(&dsc_cfg); + tmp = (unsigned long long)dsc_cfg.slice_chunk_size * 0x10000000 + (dsc_cfg.slice_width - 1); + do_div(tmp, (uint32_t)dsc_cfg.slice_width); //ROUND-UP + dsc_params->bytes_per_pixel = (uint32_t)tmp; copy_pps_fields(&dsc_params->pps, &dsc_cfg); dsc_params->rc_buffer_model_size = dsc_cfg.rc_bits; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h index d34b0b0eea65..444182a97e6e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h @@ -53,6 +53,8 @@ enum dc_status { DC_NOT_SUPPORTED = 24, DC_UNSUPPORTED_VALUE = 25, + DC_NO_LINK_ENC_RESOURCE = 26, + DC_ERROR_UNEXPECTED = -1 }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 6fc6488c54c0..890280026e69 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -334,6 +334,20 @@ struct plane_resource { struct dcn_fe_bandwidth bw; }; +#if defined(CONFIG_DRM_AMD_DC_DCN) +#define LINK_RES_HPO_DP_REC_MAP__MASK 0xFFFF +#define LINK_RES_HPO_DP_REC_MAP__SHIFT 0 +#endif + +/* all mappable hardware resources used to enable a link */ +struct link_resource { +#if defined(CONFIG_DRM_AMD_DC_DCN) + struct hpo_dp_link_encoder *hpo_dp_link_enc; +#else + void *dummy; +#endif +}; + union pipe_update_flags { struct { uint32_t enable : 1; @@ -361,6 +375,7 @@ struct pipe_ctx { struct plane_resource plane_res; struct stream_resource stream_res; + struct link_resource link_res; struct clock_source *clock_source; @@ -411,6 +426,8 @@ struct resource_context { struct link_enc_cfg_context link_enc_cfg_ctx; #if defined(CONFIG_DRM_AMD_DC_DCN) bool is_hpo_dp_stream_enc_acquired[MAX_HPO_DP2_ENCODERS]; + unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS]; + int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS]; #endif #if defined(CONFIG_DRM_AMD_DC_DCN) bool is_mpc_3dlut_acquired[MAX_PIPES]; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index a6d3d859754a..cd52813a8432 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -56,16 +56,19 @@ enum { bool dp_verify_link_cap( struct dc_link *link, + const struct link_resource *link_res, struct dc_link_settings *known_limit_link_setting, int *fail_count); bool dp_verify_link_cap_with_retries( struct dc_link *link, + const struct link_resource *link_res, struct dc_link_settings *known_limit_link_setting, int attempts); bool dp_verify_mst_link_cap( - struct dc_link *link); + struct dc_link *link, + const struct link_resource *link_res); bool dp_validate_mode_timing( struct dc_link *link, @@ -168,8 +171,9 @@ uint8_t dc_dp_initialize_scrambling_data_symbols( struct dc_link *link, enum dc_dp_training_pattern pattern); -enum dc_status dp_set_fec_ready(struct dc_link *link, bool ready); +enum dc_status dp_set_fec_ready(struct dc_link *link, const struct link_resource *link_res, bool ready); void dp_set_fec_enable(struct dc_link *link, bool enable); +struct link_encoder *dp_get_link_enc(struct dc_link *link); bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable); bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable, bool immediate_update); void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable); @@ -210,11 +214,16 @@ bool dpcd_poll_for_allocation_change_trigger(struct dc_link *link); struct fixed31_32 calculate_sst_avg_time_slots_per_mtp( const struct dc_stream_state *stream, const struct dc_link *link); -void enable_dp_hpo_output(struct dc_link *link, const struct dc_link_settings *link_settings); -void disable_dp_hpo_output(struct dc_link *link, enum signal_type signal); +void enable_dp_hpo_output(struct dc_link *link, + const struct link_resource *link_res, + const struct dc_link_settings *link_settings); +void disable_dp_hpo_output(struct dc_link *link, + const struct link_resource *link_res, + enum signal_type signal); void setup_dp_hpo_stream(struct pipe_ctx *pipe_ctx, bool enable); bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx); void reset_dp_hpo_stream_encoders_for_link(struct dc_link *link); bool dp_retrieve_lttpr_cap(struct dc_link *link); +void edp_panel_backlight_power_on(struct dc_link *link); #endif /* __DC_LINK_DP_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h index 974d703e3771..74dafd0f9d3d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h @@ -91,8 +91,9 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link); * DPIA equivalent of dc_link_dp_perfrorm_link_training. * Aborts link training upon detection of sink unplug. */ -enum link_training_result -dc_link_dpia_perform_link_training(struct dc_link *link, +enum link_training_result dc_link_dpia_perform_link_training( + struct dc_link *link, + const struct link_resource *link_res, const struct dc_link_settings *link_setting, bool skip_video_pattern); diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index 806f3041db14..337c0161e72d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -619,7 +619,7 @@ struct dcn_ip_params { }; extern const struct dcn_ip_params dcn10_ip_defaults; -bool dcn_validate_bandwidth( +bool dcn10_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate); diff --git a/drivers/gpu/drm/amd/display/dc/inc/dml_wrapper.h b/drivers/gpu/drm/amd/display/dc/inc/dml_wrapper.h new file mode 100644 index 000000000000..5dcfbd8e2697 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/dml_wrapper.h @@ -0,0 +1,34 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef DML_WRAPPER_H_ +#define DML_WRAPPER_H_ + +#include "dc.h" +#include "dml/display_mode_vba.h" + +bool dml_validate(struct dc *dc, struct dc_state *context, bool fast_validate); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index a17e5de3b100..c920c4b6077d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -211,6 +211,8 @@ struct dummy_pstate_entry { struct clk_bw_params { unsigned int vram_type; unsigned int num_channels; + unsigned int dispclk_vco_khz; + unsigned int dc_mode_softmax_memclk; struct clk_limit_table clk_table; struct wm_table wm_table; struct dummy_pstate_entry dummy_pstate_table[4]; @@ -261,6 +263,10 @@ struct clk_mgr_funcs { /* Send message to PMFW to set hard max memclk frequency to highest DPM */ void (*set_hard_max_memclk)(struct clk_mgr *clk_mgr); + /* Custom set a memclk freq range*/ + void (*set_max_memclk)(struct clk_mgr *clk_mgr, unsigned int memclk_mhz); + void (*set_min_memclk)(struct clk_mgr *clk_mgr, unsigned int memclk_mhz); + /* Get current memclk states from PMFW, update relevant structures */ void (*get_memclk_states_from_smu)(struct clk_mgr *clk_mgr); @@ -274,6 +280,7 @@ struct clk_mgr { struct dc_clocks clks; bool psr_allow_active_cache; bool force_smu_not_present; + bool dc_mode_softmax_enabled; int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes int dentist_vco_freq_khz; struct clk_state_registers_and_bypass boot_snapshot; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h index f94135c6e3c2..346f0ba73e86 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h @@ -61,6 +61,8 @@ struct dcn_dsc_state { uint32_t dsc_pic_height; uint32_t dsc_slice_bpg_offset; uint32_t dsc_chunk_size; + uint32_t dsc_fw_en; + uint32_t dsc_opp_source; }; @@ -88,6 +90,7 @@ struct dsc_enc_caps { int32_t max_total_throughput_mps; /* Maximum total throughput with all the slices combined */ int32_t max_slice_width; uint32_t bpp_increment_div; /* bpp increment divisor, e.g. if 16, it's 1/16th of a bit */ + uint32_t edp_sink_max_bits_per_pixel; bool is_dp; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 80e1a32bc63d..2c031586f4e6 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -139,6 +139,7 @@ struct hubp_funcs { bool (*hubp_is_flip_pending)(struct hubp *hubp); void (*set_blank)(struct hubp *hubp, bool blank); + void (*set_blank_regs)(struct hubp *hubp, bool blank); void (*set_hubp_blank_en)(struct hubp *hubp, bool blank); void (*set_cursor_attributes)( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index bb0e91756ddd..2ce15cd10d80 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -268,7 +268,8 @@ struct hpo_dp_link_encoder_funcs { void (*enable_link_phy)(struct hpo_dp_link_encoder *enc, const struct dc_link_settings *link_settings, - enum transmitter transmitter); + enum transmitter transmitter, + enum hpd_source_id hpd_source); void (*disable_link_phy)(struct hpo_dp_link_encoder *link_enc, enum signal_type signal); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index c88e113b94d1..073f8b667eff 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -164,6 +164,10 @@ struct stream_encoder_funcs { void (*stop_dp_info_packets)( struct stream_encoder *enc); + void (*reset_fifo)( + struct stream_encoder *enc + ); + void (*dp_blank)( struct dc_link *link, struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 7390baf916b5..c29320b3855d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -290,6 +290,8 @@ struct timing_generator_funcs { enum optc_dsc_mode dsc_mode, uint32_t dsc_bytes_per_pixel, uint32_t dsc_slice_width); + void (*get_dsc_status)(struct timing_generator *optc, + uint32_t *dsc_mode); void (*set_odm_bypass)(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing); void (*set_odm_combine)(struct timing_generator *optc, int *opp_id, int opp_cnt, struct dc_crtc_timing *timing); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index d50f4bd06b5d..05053f3b4ab7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -64,6 +64,7 @@ struct hw_sequencer_funcs { enum dc_status (*apply_ctx_to_hw)(struct dc *dc, struct dc_state *context); void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx); + void (*disable_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank); void (*apply_ctx_for_surface)(struct dc *dc, const struct dc_stream_state *stream, int num_planes, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h index 10dcf6a5e9b1..a4e43b4826e0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h @@ -36,7 +36,7 @@ * Initialise link encoder resource tracking. */ void link_enc_cfg_init( - struct dc *dc, + const struct dc *dc, struct dc_state *state); /* diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h index ba664bc49595..69d63763a10e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h @@ -32,6 +32,7 @@ struct gpio *get_hpd_gpio(struct dc_bios *dcb, void dp_enable_link_phy( struct dc_link *link, + const struct link_resource *link_res, enum signal_type signal, enum clock_source_id clock_source, const struct dc_link_settings *link_settings); @@ -42,22 +43,27 @@ void edp_add_delay_for_T9(struct dc_link *link); bool edp_receiver_ready_T9(struct dc_link *link); bool edp_receiver_ready_T7(struct dc_link *link); -void dp_disable_link_phy(struct dc_link *link, enum signal_type signal); +void dp_disable_link_phy(struct dc_link *link, const struct link_resource *link_res, + enum signal_type signal); -void dp_disable_link_phy_mst(struct dc_link *link, enum signal_type signal); +void dp_disable_link_phy_mst(struct dc_link *link, const struct link_resource *link_res, + enum signal_type signal); bool dp_set_hw_training_pattern( struct dc_link *link, + const struct link_resource *link_res, enum dc_dp_training_pattern pattern, uint32_t offset); void dp_set_hw_lane_settings( struct dc_link *link, + const struct link_resource *link_res, const struct link_training_settings *link_settings, uint32_t offset); void dp_set_hw_test_pattern( struct dc_link *link, + const struct link_resource *link_res, enum dp_test_pattern test_pattern, uint8_t *custom_pattern, uint32_t custom_pattern_size); diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 372c0898facd..4249bf306e09 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -202,8 +202,12 @@ int get_num_mpc_splits(struct pipe_ctx *pipe); int get_num_odm_splits(struct pipe_ctx *pipe); #if defined(CONFIG_DRM_AMD_DC_DCN) -struct hpo_dp_link_encoder *resource_get_unused_hpo_dp_link_encoder( - const struct resource_pool *pool); +struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt( + const struct resource_context *res_ctx, + const struct resource_pool *pool, + const struct dc_link *link); #endif +uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter); + #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c index 378cc11aa047..6b5fedd9ace0 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c @@ -185,16 +185,18 @@ bool dal_irq_service_dummy_set(struct irq_service *irq_service, const struct irq_source_info *info, bool enable) { - DC_LOG_ERROR("%s: called for non-implemented irq source\n", - __func__); + DC_LOG_ERROR("%s: called for non-implemented irq source, src_id=%u, ext_id=%u\n", + __func__, info->src_id, info->ext_id); + return false; } bool dal_irq_service_dummy_ack(struct irq_service *irq_service, const struct irq_source_info *info) { - DC_LOG_ERROR("%s: called for non-implemented irq source\n", - __func__); + DC_LOG_ERROR("%s: called for non-implemented irq source, src_id=%u, ext_id=%u\n", + __func__, info->src_id, info->ext_id); + return false; } diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c index 34f43cb650f8..cf072e2347d3 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c @@ -40,10 +40,9 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -enum dc_irq_source to_dal_irq_source_dcn10( - struct irq_service *irq_service, - uint32_t src_id, - uint32_t ext_id) +static enum dc_irq_source to_dal_irq_source_dcn10(struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) { switch (src_id) { case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c index a47f68634fc3..aa708b61142f 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c @@ -39,10 +39,9 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -enum dc_irq_source to_dal_irq_source_dcn201( - struct irq_service *irq_service, - uint32_t src_id, - uint32_t ext_id) +static enum dc_irq_source to_dal_irq_source_dcn201(struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) { switch (src_id) { case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c index 78940cb20e10..235294534c43 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c @@ -40,10 +40,9 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -enum dc_irq_source to_dal_irq_source_dcn21( - struct irq_service *irq_service, - uint32_t src_id, - uint32_t ext_id) +static enum dc_irq_source to_dal_irq_source_dcn21(struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) { switch (src_id) { case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c b/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c index 38e0ade60c7b..1b88e4e627fd 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c @@ -36,10 +36,9 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -enum dc_irq_source to_dal_irq_source_dcn31( - struct irq_service *irq_service, - uint32_t src_id, - uint32_t ext_id) +static enum dc_irq_source to_dal_irq_source_dcn31(struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) { switch (src_id) { case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index cd204eef073b..83855b8a32e9 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -360,6 +360,8 @@ struct dmub_srv_hw_funcs { uint32_t (*get_gpint_dataout)(struct dmub_srv *dmub); + void (*clear_inbox0_ack_register)(struct dmub_srv *dmub); + uint32_t (*read_inbox0_ack_register)(struct dmub_srv *dmub); void (*send_inbox0_cmd)(struct dmub_srv *dmub, union dmub_inbox0_data_register data); uint32_t (*get_current_time)(struct dmub_srv *dmub); @@ -409,6 +411,7 @@ struct dmub_srv { struct dmub_srv_base_funcs funcs; struct dmub_srv_hw_funcs hw_funcs; struct dmub_rb inbox1_rb; + uint32_t inbox1_last_wptr; /** * outbox1_rb is accessed without locks (dal & dc) * and to be used only in dmub_srv_stat_get_notification() @@ -735,6 +738,45 @@ bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_ bool dmub_srv_should_detect(struct dmub_srv *dmub); +/** + * dmub_srv_send_inbox0_cmd() - Send command to DMUB using INBOX0 + * @dmub: the dmub service + * @data: the data to be sent in the INBOX0 command + * + * Send command by writing directly to INBOX0 WPTR + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - hw_init false or hw function does not exist + */ +enum dmub_status dmub_srv_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data); + +/** + * dmub_srv_wait_for_inbox0_ack() - wait for DMUB to ACK INBOX0 command + * @dmub: the dmub service + * @timeout_us: the maximum number of microseconds to wait + * + * Wait for DMUB to ACK the INBOX0 message + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - hw_init false or hw function does not exist + * DMUB_STATUS_TIMEOUT - wait for ack timed out + */ +enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t timeout_us); + +/** + * dmub_srv_wait_for_inbox0_ack() - clear ACK register for INBOX0 + * @dmub: the dmub service + * + * Clear ACK register for INBOX0 + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - hw_init false or hw function does not exist + */ +enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub); + #if defined(__cplusplus) } #endif diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c29a67ccef17..873ecd04e01d 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -46,10 +46,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0x1d82d23e +#define DMUB_FW_VERSION_GIT_HASH 0xbaf06b95 #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 91 +#define DMUB_FW_VERSION_REVISION 98 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 @@ -173,13 +173,6 @@ extern "C" { #endif /** - * Number of nanoseconds per DMUB tick. - * DMCUB_TIMER_CURRENT increments in DMUB ticks, which are 10ns by default. - * If DMCUB_TIMER_WINDOW is non-zero this will no longer be true. - */ -#define NS_PER_DMUB_TICK 10 - -/** * union dmub_addr - DMUB physical/virtual 64-bit address. */ union dmub_addr { @@ -208,10 +201,9 @@ union dmub_psr_debug_flags { uint32_t use_hw_lock_mgr : 1; /** - * Unused. - * TODO: Remove. + * Use TPS3 signal when restore main link. */ - uint32_t log_line_nums : 1; + uint32_t force_wakeup_by_tps3 : 1; } bitfields; /** @@ -416,7 +408,14 @@ enum dmub_cmd_vbios_type { * Enables or disables power gating. */ DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING = 3, + /** + * Controls embedded panels. + */ DMUB_CMD__VBIOS_LVTMA_CONTROL = 15, + /** + * Query DP alt status on a transmitter. + */ + DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT = 26, }; //============================================================================== @@ -1550,10 +1549,14 @@ struct dmub_cmd_psr_copy_settings_data { * Currently the support is only for 0 or 1 */ uint8_t panel_inst; + /* + * DSC enable status in driver + */ + uint8_t dsc_enable_status; /** - * Explicit padding to 4 byte boundary. + * Explicit padding to 3 byte boundary. */ - uint8_t pad3[4]; + uint8_t pad3[3]; }; /** @@ -2398,6 +2401,24 @@ struct dmub_rb_cmd_lvtma_control { }; /** + * Data passed in/out in a DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT command. + */ +struct dmub_rb_cmd_transmitter_query_dp_alt_data { + uint8_t phy_id; /**< 0=UNIPHYA, 1=UNIPHYB, 2=UNIPHYC, 3=UNIPHYD, 4=UNIPHYE, 5=UNIPHYF */ + uint8_t is_usb; /**< is phy is usb */ + uint8_t is_dp_alt_disable; /**< is dp alt disable */ + uint8_t is_dp4; /**< is dp in 4 lane */ +}; + +/** + * Definition of a DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT command. + */ +struct dmub_rb_cmd_transmitter_query_dp_alt { + struct dmub_cmd_header header; /**< header */ + struct dmub_rb_cmd_transmitter_query_dp_alt_data data; /**< payload */ +}; + +/** * Maximum number of bytes a chunk sent to DMUB for parsing */ #define DMUB_EDID_CEA_DATA_CHUNK_BYTES 8 @@ -2408,7 +2429,7 @@ struct dmub_rb_cmd_lvtma_control { struct dmub_cmd_send_edid_cea { uint16_t offset; /**< offset into the CEA block */ uint8_t length; /**< number of bytes in payload to copy as part of CEA block */ - uint16_t total_length; /**< total length of the CEA block */ + uint16_t cea_total_length; /**< total length of the CEA block */ uint8_t payload[DMUB_EDID_CEA_DATA_CHUNK_BYTES]; /**< data chunk of the CEA block */ uint8_t pad[3]; /**< padding and for future expansion */ }; @@ -2605,6 +2626,10 @@ union dmub_rb_cmd { */ struct dmub_rb_cmd_lvtma_control lvtma_control; /** + * Definition of a DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT command. + */ + struct dmub_rb_cmd_transmitter_query_dp_alt query_dp_alt; + /** * Definition of a DMUB_CMD__DPIA_DIG1_CONTROL command. */ struct dmub_rb_cmd_dig1_dpia_control dig1_dpia_control; @@ -2722,7 +2747,7 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) static inline bool dmub_rb_push_front(struct dmub_rb *rb, const union dmub_rb_cmd *cmd) { - uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t); + uint64_t volatile *dst = (uint64_t volatile *)((uint8_t *)(rb->base_address) + rb->wrpt); const uint64_t *src = (const uint64_t *)cmd; uint8_t i; @@ -2840,7 +2865,7 @@ static inline bool dmub_rb_peek_offset(struct dmub_rb *rb, static inline bool dmub_rb_out_front(struct dmub_rb *rb, union dmub_rb_out_cmd *cmd) { - const uint64_t volatile *src = (const uint64_t volatile *)(rb->base_address) + rb->rptr / sizeof(uint64_t); + const uint64_t volatile *src = (const uint64_t volatile *)((uint8_t *)(rb->base_address) + rb->rptr); uint64_t *dst = (uint64_t *)cmd; uint8_t i; @@ -2888,7 +2913,7 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) uint32_t wptr = rb->wrpt; while (rptr != wptr) { - uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t); + uint64_t volatile *data = (uint64_t volatile *)((uint8_t *)(rb->base_address) + rptr); //uint64_t volatile *p = (uint64_t volatile *)data; uint64_t temp; uint8_t i; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 56d400ffa7ac..9280f2abd973 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -100,24 +100,9 @@ void dmub_flush_buffer_mem(const struct dmub_fb *fb) } static const struct dmub_fw_meta_info * -dmub_get_fw_meta_info(const struct dmub_srv_region_params *params) +dmub_get_fw_meta_info_from_blob(const uint8_t *blob, uint32_t blob_size, uint32_t meta_offset) { const union dmub_fw_meta *meta; - const uint8_t *blob = NULL; - uint32_t blob_size = 0; - uint32_t meta_offset = 0; - - if (params->fw_bss_data && params->bss_data_size) { - /* Legacy metadata region. */ - blob = params->fw_bss_data; - blob_size = params->bss_data_size; - meta_offset = DMUB_FW_META_OFFSET; - } else if (params->fw_inst_const && params->inst_const_size) { - /* Combined metadata region. */ - blob = params->fw_inst_const; - blob_size = params->inst_const_size; - meta_offset = 0; - } if (!blob || !blob_size) return NULL; @@ -134,6 +119,32 @@ dmub_get_fw_meta_info(const struct dmub_srv_region_params *params) return &meta->info; } +static const struct dmub_fw_meta_info * +dmub_get_fw_meta_info(const struct dmub_srv_region_params *params) +{ + const struct dmub_fw_meta_info *info = NULL; + + if (params->fw_bss_data && params->bss_data_size) { + /* Legacy metadata region. */ + info = dmub_get_fw_meta_info_from_blob(params->fw_bss_data, + params->bss_data_size, + DMUB_FW_META_OFFSET); + } else if (params->fw_inst_const && params->inst_const_size) { + /* Combined metadata region - can be aligned to 16-bytes. */ + uint32_t i; + + for (i = 0; i < 16; ++i) { + info = dmub_get_fw_meta_info_from_blob( + params->fw_inst_const, params->inst_const_size, i); + + if (info) + break; + } + } + + return info; +} + static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) { struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs; @@ -598,6 +609,8 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) { + struct dmub_rb flush_rb; + if (!dmub->hw_init) return DMUB_STATUS_INVALID; @@ -606,9 +619,14 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) * been flushed to framebuffer memory. Otherwise DMCUB might * read back stale, fully invalid or partially invalid data. */ - dmub_rb_flush_pending(&dmub->inbox1_rb); + flush_rb = dmub->inbox1_rb; + flush_rb.rptr = dmub->inbox1_last_wptr; + dmub_rb_flush_pending(&flush_rb); + + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); + + dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; - dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); return DMUB_STATUS_OK; } @@ -831,3 +849,38 @@ bool dmub_srv_should_detect(struct dmub_srv *dmub) return dmub->hw_funcs.should_detect(dmub); } + +enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub) +{ + if (!dmub->hw_init || !dmub->hw_funcs.clear_inbox0_ack_register) + return DMUB_STATUS_INVALID; + + dmub->hw_funcs.clear_inbox0_ack_register(dmub); + return DMUB_STATUS_OK; +} + +enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t timeout_us) +{ + uint32_t i = 0; + uint32_t ack = 0; + + if (!dmub->hw_init || !dmub->hw_funcs.read_inbox0_ack_register) + return DMUB_STATUS_INVALID; + + for (i = 0; i <= timeout_us; i++) { + ack = dmub->hw_funcs.read_inbox0_ack_register(dmub); + if (ack) + return DMUB_STATUS_OK; + } + return DMUB_STATUS_TIMEOUT; +} + +enum dmub_status dmub_srv_send_inbox0_cmd(struct dmub_srv *dmub, + union dmub_inbox0_data_register data) +{ + if (!dmub->hw_init || !dmub->hw_funcs.send_inbox0_cmd) + return DMUB_STATUS_INVALID; + + dmub->hw_funcs.send_inbox0_cmd(dmub, data); + return DMUB_STATUS_OK; +} diff --git a/drivers/gpu/drm/amd/display/include/ddc_service_types.h b/drivers/gpu/drm/amd/display/include/ddc_service_types.h index 4de59b66bb1a..a2b80514d83e 100644 --- a/drivers/gpu/drm/amd/display/include/ddc_service_types.h +++ b/drivers/gpu/drm/amd/display/include/ddc_service_types.h @@ -35,6 +35,7 @@ #define DP_BRANCH_DEVICE_ID_00E04C 0x00E04C #define DP_BRANCH_DEVICE_ID_006037 0x006037 +#define DP_DEVICE_ID_38EC11 0x38EC11 enum ddc_result { DDC_RESULT_UNKNOWN = 0, DDC_RESULT_SUCESSFULL, @@ -117,4 +118,7 @@ struct av_sync_data { uint8_t aud_del_ins3;/* DPCD 0002Dh */ }; +static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3, 0}; +static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5, 0}; + #endif /* __DAL_DDC_SERVICE_TYPES_H__ */ diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 370fad883e33..f093b49c5e6e 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -72,9 +72,7 @@ #define DC_LOG_DSC(...) DRM_DEBUG_KMS(__VA_ARGS__) #define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__) #define DC_LOG_DWB(...) DRM_DEBUG_KMS(__VA_ARGS__) -#if defined(CONFIG_DRM_AMD_DC_DCN) #define DC_LOG_DP2(...) DRM_DEBUG_KMS(__VA_ARGS__) -#endif struct dal_logger; @@ -126,9 +124,7 @@ enum dc_log_type { LOG_MAX_HW_POINTS, LOG_ALL_TF_CHANNELS, LOG_SAMPLE_1DLUT, -#if defined(CONFIG_DRM_AMD_DC_DCN) LOG_DP2, -#endif LOG_SECTION_TOTAL_COUNT }; diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index 6d648c889866..f7420c3f5672 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -104,6 +104,7 @@ struct mod_hdcp_displayport { uint8_t rev; uint8_t assr_enabled; uint8_t mst_enabled; + uint8_t usb4_enabled; }; struct mod_hdcp_hdmi { @@ -249,7 +250,6 @@ struct mod_hdcp_link { uint8_t ddc_line; uint8_t link_enc_idx; uint8_t phy_idx; - uint8_t dio_output_type; uint8_t dio_output_id; uint8_t hdcp_supported_informational; union { diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 4b9e68a79f06..f57a1478f0fe 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -231,6 +231,8 @@ enum DC_FEATURE_MASK { DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default DC_PSR_MASK = (1 << 3), //0x8, disabled by default for dcn < 3.1 DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default + DC_DISABLE_LTTPR_DP1_4A = (1 << 5), //0x20, disabled by default + DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default }; enum DC_DEBUG_MASK { diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h index 6d0052ce6bed..da6d380c948b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h @@ -354,5 +354,12 @@ #define mmMP1_SMN_EXT_SCRATCH7 0x03c7 #define mmMP1_SMN_EXT_SCRATCH7_BASE_IDX 0 +/* + * addressBlock: mp_SmuMp1Pub_MmuDec + * base address: 0x0 + */ +#define smnMP1_PMI_3_START 0x3030204 +#define smnMP1_PMI_3_FIFO 0x3030208 +#define smnMP1_PMI_3 0x3030600 #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h index 136fb5de6a4c..a5ae2a801254 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h @@ -959,5 +959,17 @@ #define MP1_SMN_EXT_SCRATCH7__DATA__SHIFT 0x0 #define MP1_SMN_EXT_SCRATCH7__DATA_MASK 0xFFFFFFFFL +// MP1_PMI_3_START +#define MP1_PMI_3_START__ENABLE_MASK 0x80000000L +// MP1_PMI_3_FIFO +#define MP1_PMI_3_FIFO__DEPTH_MASK 0x00000fffL + +// MP1_PMI_3_START +#define MP1_PMI_3_START__ENABLE__SHIFT 0x0000001f +// MP1_PMI_3_FIFO +#define MP1_PMI_3_FIFO__DEPTH__SHIFT 0x00000000 + + + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_offset.h index 79eae0256dbd..8072b0a6376d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_offset.h @@ -20753,8 +20753,6 @@ // addressBlock: nbio_nbif0_gdc_GDCDEC // base address: 0xd0000000 -#define regGDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL 0x2ffc0eda -#define regGDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL_BASE_IDX 5 #define regGDC1_NGDC_SDP_PORT_CTRL 0x2ffc0ee2 #define regGDC1_NGDC_SDP_PORT_CTRL_BASE_IDX 5 #define regGDC1_SHUB_REGS_IF_CTL 0x2ffc0ee3 diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_sh_mask.h index e27fdc0c643c..54b0e4623971 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_sh_mask.h @@ -1,4 +1,3 @@ - /* * Copyright (C) 2020 Advanced Micro Devices, Inc. * @@ -108541,17 +108540,6 @@ // addressBlock: nbio_nbif0_gdc_GDCDEC -//GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__LOGAN0_FAST_WRITE_RESPONSE_EN__SHIFT 0x0 -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__LOGAN1_FAST_WRITE_RESPONSE_EN__SHIFT 0x1 -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__LOGAN2_FAST_WRITE_RESPONSE_EN__SHIFT 0x2 -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__LOGAN3_FAST_WRITE_RESPONSE_EN__SHIFT 0x3 -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__FWR_NORMAL_ARB_MODE__SHIFT 0x10 -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__LOGAN0_FAST_WRITE_RESPONSE_EN_MASK 0x00000001L -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__LOGAN1_FAST_WRITE_RESPONSE_EN_MASK 0x00000002L -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__LOGAN2_FAST_WRITE_RESPONSE_EN_MASK 0x00000004L -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__LOGAN3_FAST_WRITE_RESPONSE_EN_MASK 0x00000008L -#define GDC1_LOGAN_FAST_WRITE_RESPONSE_CNTL__FWR_NORMAL_ARB_MODE_MASK 0x00010000L //GDC1_NGDC_SDP_PORT_CTRL #define GDC1_NGDC_SDP_PORT_CTRL__SDP_DISCON_HYSTERESIS__SHIFT 0x0 #define GDC1_NGDC_SDP_PORT_CTRL__NGDC_OBFF_HW_URGENT_EARLY_WAKEUP_EN__SHIFT 0xf diff --git a/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h b/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h index 9cb5f3631c60..ce79e5de8ce3 100644 --- a/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h +++ b/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h @@ -25,15 +25,15 @@ #define MAX_SEGMENT 5 -struct IP_BASE_INSTANCE +struct IP_BASE_INSTANCE { unsigned int segment[MAX_SEGMENT]; -}; - -struct IP_BASE +} __maybe_unused; + +struct IP_BASE { struct IP_BASE_INSTANCE instance[MAX_INSTANCE]; -}; +} __maybe_unused; static const struct IP_BASE ATHUB_BASE ={ { { { 0x00000C00, 0, 0, 0, 0 } }, diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 7ec4331e67f2..a486769b66c6 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -143,6 +143,55 @@ struct gc_info_v1_0 { uint32_t gc_num_gl2a; }; +struct gc_info_v1_1 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; +}; + +struct gc_info_v2_0 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_cu_per_sh; + uint32_t gc_num_sh_per_se; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_tccs; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_packer_per_sc; +}; + typedef struct harvest_info_header { uint32_t signature; /* Table Signature */ uint32_t version; /* Table Version */ diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index c84bd7b2cf59..ac941f62cbed 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -33,12 +33,11 @@ #include <linux/dma-fence.h> struct pci_dev; +struct amdgpu_device; #define KGD_MAX_QUEUES 128 struct kfd_dev; -struct kgd_dev; - struct kgd_mem; enum kfd_preempt_type { @@ -228,61 +227,61 @@ struct tile_config { */ struct kfd2kgd_calls { /* Register access functions */ - void (*program_sh_mem_settings)(struct kgd_dev *kgd, uint32_t vmid, + void (*program_sh_mem_settings)(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); - int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, u32 pasid, + int (*set_pasid_vmid_mapping)(struct amdgpu_device *adev, u32 pasid, unsigned int vmid); - int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); + int (*init_interrupts)(struct amdgpu_device *adev, uint32_t pipe_id); - int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + int (*hqd_load)(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); - int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd, + int (*hiq_mqd_load)(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off); - int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, + int (*hqd_sdma_load)(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm); - int (*hqd_dump)(struct kgd_dev *kgd, + int (*hqd_dump)(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); - int (*hqd_sdma_dump)(struct kgd_dev *kgd, + int (*hqd_sdma_dump)(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); - bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); - - int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, - unsigned int timeout, uint32_t pipe_id, + bool (*hqd_is_occupied)(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); - bool (*hqd_sdma_is_occupied)(struct kgd_dev *kgd, void *mqd); + int (*hqd_destroy)(struct amdgpu_device *adev, void *mqd, + uint32_t reset_type, unsigned int timeout, + uint32_t pipe_id, uint32_t queue_id); + + bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd); - int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd, + int (*hqd_sdma_destroy)(struct amdgpu_device *adev, void *mqd, unsigned int timeout); - int (*address_watch_disable)(struct kgd_dev *kgd); - int (*address_watch_execute)(struct kgd_dev *kgd, + int (*address_watch_disable)(struct amdgpu_device *adev); + int (*address_watch_execute)(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, uint32_t addr_lo); - int (*wave_control_execute)(struct kgd_dev *kgd, + int (*wave_control_execute)(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd); - uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd, + uint32_t (*address_watch_get_offset)(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset); - bool (*get_atc_vmid_pasid_mapping_info)( - struct kgd_dev *kgd, + bool (*get_atc_vmid_pasid_mapping_info)(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); @@ -290,16 +289,16 @@ struct kfd2kgd_calls { * passed to the shader by the CP. It's the user mode driver's * responsibility. */ - void (*set_scratch_backing_va)(struct kgd_dev *kgd, + void (*set_scratch_backing_va)(struct amdgpu_device *adev, uint64_t va, uint32_t vmid); - void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, + void (*set_vm_context_page_table_base)(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); - uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); + uint32_t (*read_vmid_from_vmfault_reg)(struct amdgpu_device *adev); - void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt, - int *max_waves_per_cu); - void (*program_trap_handler_settings)(struct kgd_dev *kgd, + void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, + int *wave_cnt, int *max_waves_per_cu); + void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); }; diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index bac15c466733..5c0867ebcfce 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -153,6 +153,10 @@ enum PP_SMC_POWER_PROFILE { PP_SMC_POWER_PROFILE_COUNT, }; +extern const char * const amdgpu_pp_profile_name[PP_SMC_POWER_PROFILE_COUNT]; + + + enum { PP_GROUP_UNKNOWN = 0, PP_GROUP_GFX = 1, diff --git a/drivers/gpu/drm/amd/include/yellow_carp_offset.h b/drivers/gpu/drm/amd/include/yellow_carp_offset.h index 76b9eb3f441d..28a56b56bcaf 100644 --- a/drivers/gpu/drm/amd/include/yellow_carp_offset.h +++ b/drivers/gpu/drm/amd/include/yellow_carp_offset.h @@ -9,12 +9,12 @@ struct IP_BASE_INSTANCE { unsigned int segment[MAX_SEGMENT]; -}; +} __maybe_unused; struct IP_BASE { struct IP_BASE_INSTANCE instance[MAX_INSTANCE]; -}; +} __maybe_unused; static const struct IP_BASE ACP_BASE = { { { { 0x02403800, 0x00480000, 0, 0, 0, 0 } }, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 41472ed99253..e2cae97f4ff1 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -82,6 +82,16 @@ static const struct hwmon_temp_label { {PP_TEMP_MEM, "mem"}, }; +const char * const amdgpu_pp_profile_name[] = { + "BOOTUP_DEFAULT", + "3D_FULL_SCREEN", + "POWER_SAVING", + "VIDEO", + "VR", + "COMPUTE", + "CUSTOM" +}; + /** * DOC: power_dpm_state * @@ -2080,7 +2090,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(unique_id)) { if (asic_type != CHIP_VEGA10 && asic_type != CHIP_VEGA20 && - asic_type != CHIP_ARCTURUS) + asic_type != CHIP_ARCTURUS && + asic_type != CHIP_ALDEBARAN) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_features)) { if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10) @@ -2123,6 +2134,12 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } } + /* setting should not be allowed from VF */ + if (amdgpu_sriov_vf(adev)) { + dev_attr->attr.mode &= ~S_IWUGO; + dev_attr->store = NULL; + } + #undef DEVICE_ATTR_IS return 0; @@ -3759,5 +3776,7 @@ void amdgpu_debugfs_pm_init(struct amdgpu_device *adev) adev, &amdgpu_debugfs_pm_prv_buffer_fops, adev->pm.smu_prv_buffer_size); + + amdgpu_smu_stb_debug_fs_init(adev); #endif } diff --git a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h index 35fa0d8e92dd..ab66a4b9e438 100644 --- a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h @@ -102,7 +102,9 @@ #define PPSMC_MSG_GfxDriverResetRecovery 0x42 #define PPSMC_MSG_BoardPowerCalibration 0x43 -#define PPSMC_Message_Count 0x44 +#define PPSMC_MSG_HeavySBR 0x45 +#define PPSMC_Message_Count 0x46 + //PPSMC Reset Types #define PPSMC_RESET_TYPE_WARM_RESET 0x00 diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 16e3f72d31b9..c464a045000d 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -423,6 +423,9 @@ enum ip_power_state { POWER_STATE_OFF, }; +/* Used to mask smu debug modes */ +#define SMU_DEBUG_HALT_ON_ERROR 0x1 + struct amdgpu_pm { struct mutex mutex; u32 current_sclk; @@ -460,6 +463,11 @@ struct amdgpu_pm { struct list_head pm_attr_list; atomic_t pwr_state[AMD_IP_BLOCK_TYPE_NUM]; + + /* + * 0 = disabled (default), otherwise enable corresponding debug mode + */ + uint32_t smu_debug_mask; }; #define R600_SSTU_DFLT 0 diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 3557f4e7fc30..ba7565bc8104 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -324,6 +324,7 @@ enum smu_table_id SMU_TABLE_OVERDRIVE, SMU_TABLE_I2C_COMMANDS, SMU_TABLE_PACE, + SMU_TABLE_ECCINFO, SMU_TABLE_COUNT, }; @@ -340,6 +341,7 @@ struct smu_table_context void *max_sustainable_clocks; struct smu_bios_boot_up_values boot_values; void *driver_pptable; + void *ecc_table; struct smu_table tables[SMU_TABLE_COUNT]; /* * The driver table is just a staging buffer for @@ -472,7 +474,14 @@ struct cmn2asic_mapping { int map_to; }; +struct stb_context { + uint32_t stb_buf_size; + bool enabled; + spinlock_t lock; +}; + #define WORKLOAD_POLICY_MAX 7 + struct smu_context { struct amdgpu_device *adev; @@ -559,6 +568,8 @@ struct smu_context uint16_t cpu_core_num; struct smu_user_dpm_profile user_dpm_profile; + + struct stb_context stb_context; }; struct i2c_adapter; @@ -1246,9 +1257,9 @@ struct pptable_funcs { int (*set_fine_grain_gfx_freq_parameters)(struct smu_context *smu); /** - * @set_light_sbr: Set light sbr mode for the SMU. + * @smu_handle_passthrough_sbr: Send message to SMU about special handling for SBR. */ - int (*set_light_sbr)(struct smu_context *smu, bool enable); + int (*smu_handle_passthrough_sbr)(struct smu_context *smu, bool enable); /** * @wait_for_event: Wait for events from SMU. @@ -1261,6 +1272,17 @@ struct pptable_funcs { * of SMUBUS table. */ int (*send_hbm_bad_pages_num)(struct smu_context *smu, uint32_t size); + + /** + * @get_ecc_table: message SMU to get ECC INFO table. + */ + ssize_t (*get_ecc_info)(struct smu_context *smu, void *table); + + + /** + * @stb_collect_info: Collects Smart Trace Buffers data. + */ + int (*stb_collect_info)(struct smu_context *smu, void *buf, uint32_t size); }; typedef enum { @@ -1393,10 +1415,13 @@ int smu_allow_xgmi_power_down(struct smu_context *smu, bool en); int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value); -int smu_set_light_sbr(struct smu_context *smu, bool enable); +int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable); int smu_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event, uint64_t event_arg); +int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc); +int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size); +void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev); #endif #endif diff --git a/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h b/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h index a017983ff1fa..0f67c56c2863 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h +++ b/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h @@ -140,6 +140,8 @@ #define MAX_SW_I2C_COMMANDS 24 +#define ALDEBARAN_UMC_CHANNEL_NUM 32 + typedef enum { I2C_CONTROLLER_PORT_0, //CKSVII2C0 I2C_CONTROLLER_PORT_1, //CKSVII2C1 @@ -507,6 +509,19 @@ typedef struct { uint32_t MmHubPadding[8]; // SMU internal use } AvfsDebugTable_t; +typedef struct { + uint64_t mca_umc_status; + uint64_t mca_umc_addr; + uint16_t ce_count_lo_chip; + uint16_t ce_count_hi_chip; + + uint32_t eccPadding; +} EccInfo_t; + +typedef struct { + EccInfo_t EccInfo[ALDEBARAN_UMC_CHANNEL_NUM]; +} EccInfoTable_t; + // These defines are used with the following messages: // SMC_MSG_TransferTableDram2Smu // SMC_MSG_TransferTableSmu2Dram @@ -517,6 +532,7 @@ typedef struct { #define TABLE_SMU_METRICS 4 #define TABLE_DRIVER_SMU_CONFIG 5 #define TABLE_I2C_COMMANDS 6 -#define TABLE_COUNT 7 +#define TABLE_ECCINFO 7 +#define TABLE_COUNT 8 #endif diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 18b862a90fbe..ff8a0bcbd290 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -229,7 +229,8 @@ __SMU_DUMMY_MAP(BoardPowerCalibration), \ __SMU_DUMMY_MAP(RequestGfxclk), \ __SMU_DUMMY_MAP(ForceGfxVid), \ - __SMU_DUMMY_MAP(UnforceGfxVid), + __SMU_DUMMY_MAP(UnforceGfxVid), \ + __SMU_DUMMY_MAP(HeavySBR), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index 2d422e6a9feb..acb3be292096 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -312,7 +312,7 @@ int smu_v11_0_deep_sleep_control(struct smu_context *smu, void smu_v11_0_interrupt_work(struct smu_context *smu); -int smu_v11_0_set_light_sbr(struct smu_context *smu, bool enable); +int smu_v11_0_handle_passthrough_sbr(struct smu_context *smu, bool enable); int smu_v11_0_restore_user_od_settings(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h index e5d3b0d1a032..44af23ae059e 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h @@ -27,7 +27,9 @@ #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 -#define SMU13_DRIVER_IF_VERSION_ALDE 0x07 +#define SMU13_DRIVER_IF_VERSION_ALDE 0x08 + +#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms /* MP Apertures */ #define MP0_Public 0x03800000 @@ -216,7 +218,6 @@ int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) int smu_v13_0_baco_enter(struct smu_context *smu); int smu_v13_0_baco_exit(struct smu_context *smu); -int smu_v13_0_mode1_reset(struct smu_context *smu); int smu_v13_0_mode2_reset(struct smu_context *smu); int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 8d796ed3b7d1..3ab67b232cd4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1328,7 +1328,12 @@ static int pp_set_powergating_by_smu(void *handle, pp_dpm_powergate_vce(handle, gate); break; case AMD_IP_BLOCK_TYPE_GMC: - pp_dpm_powergate_mmhub(handle); + /* + * For now, this is only used on PICASSO. + * And only "gate" operation is supported. + */ + if (gate) + pp_dpm_powergate_mmhub(handle); break; case AMD_IP_BLOCK_TYPE_GFX: ret = pp_dpm_powergate_gfx(handle, gate); @@ -1551,7 +1556,7 @@ static int pp_set_ppfeature_status(void *handle, uint64_t ppfeature_masks) static int pp_asic_reset_mode_2(void *handle) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; + int ret = 0; if (!hwmgr || !hwmgr->pm_en) return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 1f406f21b452..9ddd8491ff00 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -1439,13 +1439,6 @@ static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) {70, 90, 0, 0,}, {30, 60, 0, 6,}, }; - static const char *profile_name[6] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE"}; static const char *title[6] = {"NUM", "MODE_NAME", "BUSY_SET_POINT", @@ -1463,7 +1456,7 @@ static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) for (i = 0; i <= PP_SMC_POWER_PROFILE_COMPUTE; i++) size += sysfs_emit_at(buf, size, "%3d %14s%s: %14d %3d %10d %14d\n", - i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", + i, amdgpu_pp_profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", profile_mode_setting[i][0], profile_mode_setting[i][1], profile_mode_setting[i][2], profile_mode_setting[i][3]); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 611969bf4520..cd99db0dc2be 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -5498,14 +5498,6 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) uint32_t i, size = 0; uint32_t len; - static const char *profile_name[7] = {"BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; - static const char *title[8] = {"NUM", "MODE_NAME", "SCLK_UP_HYST", @@ -5529,7 +5521,7 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) for (i = 0; i < len; i++) { if (i == hwmgr->power_profile_mode) { size += sysfs_emit_at(buf, size, "%3d %14s %s: %8d %16d %16d %16d %16d %16d\n", - i, profile_name[i], "*", + i, amdgpu_pp_profile_name[i], "*", data->current_profile_setting.sclk_up_hyst, data->current_profile_setting.sclk_down_hyst, data->current_profile_setting.sclk_activity, @@ -5540,12 +5532,12 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) } if (smu7_profiling[i].bupdate_sclk) size += sysfs_emit_at(buf, size, "%3d %16s: %8d %16d %16d ", - i, profile_name[i], smu7_profiling[i].sclk_up_hyst, + i, amdgpu_pp_profile_name[i], smu7_profiling[i].sclk_up_hyst, smu7_profiling[i].sclk_down_hyst, smu7_profiling[i].sclk_activity); else size += sysfs_emit_at(buf, size, "%3d %16s: %8s %16s %16s ", - i, profile_name[i], "-", "-", "-"); + i, amdgpu_pp_profile_name[i], "-", "-", "-"); if (smu7_profiling[i].bupdate_mclk) size += sysfs_emit_at(buf, size, "%16d %16d %16d\n", diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index e6336654c565..3f040be0d158 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -5097,13 +5097,6 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) {70, 90, 0, 0,}, {30, 60, 0, 6,}, }; - static const char *profile_name[7] = {"BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[6] = {"NUM", "MODE_NAME", "BUSY_SET_POINT", @@ -5121,11 +5114,12 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) for (i = 0; i < PP_SMC_POWER_PROFILE_CUSTOM; i++) size += sysfs_emit_at(buf, size, "%3d %14s%s: %14d %3d %10d %14d\n", - i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", + i, amdgpu_pp_profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", profile_mode_setting[i][0], profile_mode_setting[i][1], profile_mode_setting[i][2], profile_mode_setting[i][3]); + size += sysfs_emit_at(buf, size, "%3d %14s%s: %14d %3d %10d %14d\n", i, - profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", + amdgpu_pp_profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", data->custom_profile_mode[0], data->custom_profile_mode[1], data->custom_profile_mode[2], data->custom_profile_mode[3]); return size; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c index 85d55ab4e369..97b3ad369046 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c @@ -3980,14 +3980,6 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) DpmActivityMonitorCoeffInt_t activity_monitor; uint32_t i, size = 0; uint16_t workload_type = 0; - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[] = { "PROFILE_INDEX(NAME)", "CLOCK_TYPE(NAME)", @@ -4021,7 +4013,7 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) return result); size += sysfs_emit_at(buf, size, "%2d %14s%s:\n", - i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " "); size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8a3244585d80..d93d28c1af95 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -277,8 +277,12 @@ static int smu_dpm_set_power_gate(void *handle, struct smu_context *smu = handle; int ret = 0; - if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) { + dev_WARN(smu->adev->dev, + "SMU uninitialized but power %s requested for %u!\n", + gate ? "gate" : "ungate", block_type); return -EOPNOTSUPP; + } switch (block_type) { /* @@ -1153,6 +1157,8 @@ static int smu_smc_hw_setup(struct smu_context *smu) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 0, 12): ret = smu_system_features_control(smu, true); + if (ret) + dev_err(adev->dev, "Failed system features control!\n"); break; default: break; @@ -1277,8 +1283,10 @@ static int smu_smc_hw_setup(struct smu_context *smu) } ret = smu_notify_display_change(smu); - if (ret) + if (ret) { + dev_err(adev->dev, "Failed to notify display change!\n"); return ret; + } /* * Set min deep sleep dce fclk with bootup value from vbios via @@ -1286,8 +1294,6 @@ static int smu_smc_hw_setup(struct smu_context *smu) */ ret = smu_set_min_dcef_deep_sleep(smu, smu->smu_table.boot_values.dcefclk / 100); - if (ret) - return ret; return ret; } @@ -1344,7 +1350,6 @@ static int smu_hw_init(void *handle) } if (smu->is_apu) { - smu_powergate_sdma(&adev->smu, false); smu_dpm_set_vcn_enable(smu, true); smu_dpm_set_jpeg_enable(smu, true); smu_set_gfx_cgpg(&adev->smu, true); @@ -1400,8 +1405,14 @@ static int smu_disable_dpms(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; int ret = 0; + /* + * TODO: (adev->in_suspend && !adev->in_s0ix) is added to pair + * the workaround which always reset the asic in suspend. + * It's likely that workaround will be dropped in the future. + * Then the change here should be dropped together. + */ bool use_baco = !smu->is_apu && - ((amdgpu_in_reset(adev) && + (((amdgpu_in_reset(adev) || (adev->in_suspend && !adev->in_s0ix)) && (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || ((adev->in_runpm || adev->in_s4) && amdgpu_asic_supports_baco(adev))); @@ -1506,10 +1517,6 @@ static int smu_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - if (smu->is_apu) { - smu_powergate_sdma(&adev->smu, true); - } - smu_dpm_set_vcn_enable(smu, false); smu_dpm_set_jpeg_enable(smu, false); @@ -1568,9 +1575,7 @@ static int smu_suspend(void *handle) smu->watermarks_bitmap &= ~(WATERMARKS_LOADED); - /* skip CGPG when in S0ix */ - if (smu->is_apu && !adev->in_s0ix) - smu_set_gfx_cgpg(&adev->smu, false); + smu_set_gfx_cgpg(&adev->smu, false); return 0; } @@ -1601,8 +1606,7 @@ static int smu_resume(void *handle) return ret; } - if (smu->is_apu) - smu_set_gfx_cgpg(&adev->smu, true); + smu_set_gfx_cgpg(&adev->smu, true); smu->disable_uclk_switch = 0; @@ -3060,16 +3064,30 @@ static int smu_gfx_state_change_set(void *handle, return ret; } -int smu_set_light_sbr(struct smu_context *smu, bool enable) +int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable) { int ret = 0; mutex_lock(&smu->mutex); - if (smu->ppt_funcs->set_light_sbr) - ret = smu->ppt_funcs->set_light_sbr(smu, enable); + if (smu->ppt_funcs->smu_handle_passthrough_sbr) + ret = smu->ppt_funcs->smu_handle_passthrough_sbr(smu, enable); + mutex_unlock(&smu->mutex); + + return ret; +} + +int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc) +{ + int ret = -EOPNOTSUPP; + + mutex_lock(&smu->mutex); + if (smu->ppt_funcs && + smu->ppt_funcs->get_ecc_info) + ret = smu->ppt_funcs->get_ecc_info(smu, umc_ecc); mutex_unlock(&smu->mutex); return ret; + } static int smu_get_prv_buffer_details(void *handle, void **addr, size_t *size) @@ -3161,3 +3179,107 @@ int smu_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event, return ret; } + +int smu_stb_collect_info(struct smu_context *smu, void *buf, uint32_t size) +{ + + if (!smu->ppt_funcs->stb_collect_info || !smu->stb_context.enabled) + return -EOPNOTSUPP; + + /* Confirm the buffer allocated is of correct size */ + if (size != smu->stb_context.stb_buf_size) + return -EINVAL; + + /* + * No need to lock smu mutex as we access STB directly through MMIO + * and not going through SMU messaging route (for now at least). + * For registers access rely on implementation internal locking. + */ + return smu->ppt_funcs->stb_collect_info(smu, buf, size); +} + +#if defined(CONFIG_DEBUG_FS) + +static int smu_stb_debugfs_open(struct inode *inode, struct file *filp) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct smu_context *smu = &adev->smu; + unsigned char *buf; + int r; + + buf = kvmalloc_array(smu->stb_context.stb_buf_size, sizeof(*buf), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + r = smu_stb_collect_info(smu, buf, smu->stb_context.stb_buf_size); + if (r) + goto out; + + filp->private_data = buf; + + return 0; + +out: + kvfree(buf); + return r; +} + +static ssize_t smu_stb_debugfs_read(struct file *filp, char __user *buf, size_t size, + loff_t *pos) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct smu_context *smu = &adev->smu; + + + if (!filp->private_data) + return -EINVAL; + + return simple_read_from_buffer(buf, + size, + pos, filp->private_data, + smu->stb_context.stb_buf_size); +} + +static int smu_stb_debugfs_release(struct inode *inode, struct file *filp) +{ + kvfree(filp->private_data); + filp->private_data = NULL; + + return 0; +} + +/* + * We have to define not only read method but also + * open and release because .read takes up to PAGE_SIZE + * data each time so and so is invoked multiple times. + * We allocate the STB buffer in .open and release it + * in .release + */ +static const struct file_operations smu_stb_debugfs_fops = { + .owner = THIS_MODULE, + .open = smu_stb_debugfs_open, + .read = smu_stb_debugfs_read, + .release = smu_stb_debugfs_release, + .llseek = default_llseek, +}; + +#endif + +void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + + struct smu_context *smu = &adev->smu; + + if (!smu->stb_context.stb_buf_size) + return; + + debugfs_create_file_size("amdgpu_smu_stb_dump", + S_IRUSR, + adev_to_drm(adev)->primary->debugfs_root, + adev, + &smu_stb_debugfs_fops, + smu->stb_context.stb_buf_size); +#endif + +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index fd1d30a93db5..505d2fb94fd9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -295,16 +295,6 @@ static int arcturus_allocate_dpm_context(struct smu_context *smu) return -ENOMEM; smu_dpm->dpm_context_size = sizeof(struct smu_11_0_dpm_context); - smu_dpm->dpm_current_power_state = kzalloc(sizeof(struct smu_power_state), - GFP_KERNEL); - if (!smu_dpm->dpm_current_power_state) - return -ENOMEM; - - smu_dpm->dpm_request_power_state = kzalloc(sizeof(struct smu_power_state), - GFP_KERNEL); - if (!smu_dpm->dpm_request_power_state) - return -ENOMEM; - return 0; } @@ -1389,14 +1379,6 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, char *buf) { DpmActivityMonitorCoeffInt_t activity_monitor; - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[] = { "PROFILE_INDEX(NAME)", "CLOCK_TYPE(NAME)", @@ -1453,7 +1435,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, } size += sysfs_emit_at(buf, size, "%2d %14s%s\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); if (smu_version >= 0x360d00) { size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", @@ -2490,7 +2472,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .deep_sleep_control = smu_v11_0_deep_sleep_control, .get_fan_parameters = arcturus_get_fan_parameters, .interrupt_work = smu_v11_0_interrupt_work, - .set_light_sbr = smu_v11_0_set_light_sbr, + .smu_handle_passthrough_sbr = smu_v11_0_handle_passthrough_sbr, .set_mp1_state = smu_cmn_set_mp1_state, }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 60a557068ea4..2bb7816b245a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1713,14 +1713,6 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) DpmActivityMonitorCoeffInt_t activity_monitor; uint32_t i, size = 0; int16_t workload_type = 0; - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[] = { "PROFILE_INDEX(NAME)", "CLOCK_TYPE(NAME)", @@ -1759,7 +1751,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) } size += sysfs_emit_at(buf, size, "%2d %14s%s:\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index a4108025fe29..777f717c37ae 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -80,6 +80,9 @@ (*member) = (smu->smu_table.driver_pptable + offsetof(PPTable_t, field));\ } while(0) +/* STB FIFO depth is in 64bit units */ +#define SIENNA_CICHLID_STB_DEPTH_UNIT_BYTES 8 + static int get_table_size(struct smu_context *smu) { if (smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 13)) @@ -650,6 +653,8 @@ static int sienna_cichlid_allocate_dpm_context(struct smu_context *smu) return 0; } +static void sienna_cichlid_stb_init(struct smu_context *smu); + static int sienna_cichlid_init_smc_tables(struct smu_context *smu) { int ret = 0; @@ -662,6 +667,8 @@ static int sienna_cichlid_init_smc_tables(struct smu_context *smu) if (ret) return ret; + sienna_cichlid_stb_init(smu); + return smu_v11_0_init_smc_tables(smu); } @@ -1171,7 +1178,7 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t mask) { struct amdgpu_device *adev = smu->adev; - int ret = 0, size = 0; + int ret = 0; uint32_t soft_min_level = 0, soft_max_level = 0, min_freq = 0, max_freq = 0; soft_min_level = mask ? (ffs(mask) - 1) : 0; @@ -1216,7 +1223,7 @@ forec_level_out: if ((clk_type == SMU_GFXCLK) || (clk_type == SMU_SCLK)) amdgpu_gfx_off_ctrl(adev, true); - return size; + return 0; } static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu) @@ -1342,14 +1349,6 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * &(activity_monitor_external.DpmActivityMonitorCoeffInt); uint32_t i, size = 0; int16_t workload_type = 0; - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[] = { "PROFILE_INDEX(NAME)", "CLOCK_TYPE(NAME)", @@ -1388,7 +1387,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * } size += sysfs_emit_at(buf, size, "%2d %14s%s:\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", @@ -2135,7 +2134,13 @@ static int sienna_cichlid_od_edit_dpm_table(struct smu_context *smu, static int sienna_cichlid_run_btc(struct smu_context *smu) { - return smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); + int res; + + res = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); + if (res) + dev_err(smu->adev->dev, "RunDcBtc failed!\n"); + + return res; } static int sienna_cichlid_baco_enter(struct smu_context *smu) @@ -3619,6 +3624,16 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, gpu_metrics->energy_accumulator = use_metrics_v2 ? metrics_v2->EnergyAccumulator : metrics->EnergyAccumulator; + if (metrics->CurrGfxVoltageOffset) + gpu_metrics->voltage_gfx = + (155000 - 625 * metrics->CurrGfxVoltageOffset) / 100; + if (metrics->CurrMemVidOffset) + gpu_metrics->voltage_mem = + (155000 - 625 * metrics->CurrMemVidOffset) / 100; + if (metrics->CurrSocVoltageOffset) + gpu_metrics->voltage_soc = + (155000 - 625 * metrics->CurrSocVoltageOffset) / 100; + average_gfx_activity = use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity; if (average_gfx_activity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) gpu_metrics->average_gfxclk_frequency = @@ -3793,6 +3808,53 @@ static int sienna_cichlid_set_mp1_state(struct smu_context *smu, return ret; } +static void sienna_cichlid_stb_init(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t reg; + + reg = RREG32_PCIE(MP1_Public | smnMP1_PMI_3_START); + smu->stb_context.enabled = REG_GET_FIELD(reg, MP1_PMI_3_START, ENABLE); + + /* STB is disabled */ + if (!smu->stb_context.enabled) + return; + + spin_lock_init(&smu->stb_context.lock); + + /* STB buffer size in bytes as function of FIFO depth */ + reg = RREG32_PCIE(MP1_Public | smnMP1_PMI_3_FIFO); + smu->stb_context.stb_buf_size = 1 << REG_GET_FIELD(reg, MP1_PMI_3_FIFO, DEPTH); + smu->stb_context.stb_buf_size *= SIENNA_CICHLID_STB_DEPTH_UNIT_BYTES; + + dev_info(smu->adev->dev, "STB initialized to %d entries", + smu->stb_context.stb_buf_size / SIENNA_CICHLID_STB_DEPTH_UNIT_BYTES); + +} + +int sienna_cichlid_stb_get_data_direct(struct smu_context *smu, + void *buf, + uint32_t size) +{ + uint32_t *p = buf; + struct amdgpu_device *adev = smu->adev; + + /* No need to disable interrupts for now as we don't lock it yet from ISR */ + spin_lock(&smu->stb_context.lock); + + /* + * Read the STB FIFO in units of 32bit since this is the accessor window + * (register width) we have. + */ + buf = ((char *) buf) + size; + while ((void *)p < buf) + *p++ = cpu_to_le32(RREG32_PCIE(MP1_Public | smnMP1_PMI_3)); + + spin_unlock(&smu->stb_context.lock); + + return 0; +} + static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_allowed_feature_mask = sienna_cichlid_get_allowed_feature_mask, .set_default_dpm_table = sienna_cichlid_set_default_dpm_table, @@ -3882,6 +3944,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .interrupt_work = smu_v11_0_interrupt_work, .gpo_control = sienna_cichlid_gpo_control, .set_mp1_state = sienna_cichlid_set_mp1_state, + .stb_collect_info = sienna_cichlid_stb_get_data_direct, }; void sienna_cichlid_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 28b7c0562b99..4e9e2cf39859 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1724,7 +1724,7 @@ int smu_v11_0_mode1_reset(struct smu_context *smu) return ret; } -int smu_v11_0_set_light_sbr(struct smu_context *smu, bool enable) +int smu_v11_0_handle_passthrough_sbr(struct smu_context *smu, bool enable) { int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index c02ed65ffa38..5cb07ed227fb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1039,14 +1039,6 @@ failed: static int vangogh_get_power_profile_mode(struct smu_context *smu, char *buf) { - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; uint32_t i, size = 0; int16_t workload_type = 0; @@ -1066,7 +1058,7 @@ static int vangogh_get_power_profile_mode(struct smu_context *smu, continue; size += sysfs_emit_at(buf, size, "%2d %14s%s\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); } return size; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 145f13b8c977..25c4b135f830 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -1095,14 +1095,6 @@ static int renoir_set_watermarks_table( static int renoir_get_power_profile_mode(struct smu_context *smu, char *buf) { - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; uint32_t i, size = 0; int16_t workload_type = 0; @@ -1121,7 +1113,7 @@ static int renoir_get_power_profile_mode(struct smu_context *smu, continue; size += sysfs_emit_at(buf, size, "%2d %14s%s\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); } return size; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index d60b8c5e8715..9c91e79c955f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -120,7 +120,8 @@ int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate) int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable) { - if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + /* Until now the SMU12 only implemented for Renoir series so here neen't do APU check. */ + if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) || smu->adev->in_s0ix) return 0; return smu_cmn_send_smc_msg_with_param(smu, @@ -191,6 +192,9 @@ int smu_v12_0_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 59a7d276541d..4885c4ae78b7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -78,6 +78,12 @@ #define smnPCIE_ESM_CTRL 0x111003D0 +/* + * SMU support ECCTABLE since version 68.42.0, + * use this to check ECCTALE feature whether support + */ +#define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00 + static const struct smu_temperature_range smu13_thermal_policy[] = { {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, @@ -135,6 +141,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT MSG_MAP(SetUclkDpmMode, PPSMC_MSG_SetUclkDpmMode, 0), MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), MSG_MAP(BoardPowerCalibration, PPSMC_MSG_BoardPowerCalibration, 0), + MSG_MAP(HeavySBR, PPSMC_MSG_HeavySBR, 0), }; static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = { @@ -190,6 +197,7 @@ static const struct cmn2asic_mapping aldebaran_table_map[SMU_TABLE_COUNT] = { TAB_MAP(SMU_METRICS), TAB_MAP(DRIVER_SMU_CONFIG), TAB_MAP(I2C_COMMANDS), + TAB_MAP(ECCINFO), }; static const uint8_t aldebaran_throttler_map[] = { @@ -223,6 +231,9 @@ static int aldebaran_tables_init(struct smu_context *smu) SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; @@ -235,6 +246,10 @@ static int aldebaran_tables_init(struct smu_context *smu) return -ENOMEM; } + smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL); + if (!smu_table->ecc_table) + return -ENOMEM; + return 0; } @@ -248,16 +263,6 @@ static int aldebaran_allocate_dpm_context(struct smu_context *smu) return -ENOMEM; smu_dpm->dpm_context_size = sizeof(struct smu_13_0_dpm_context); - smu_dpm->dpm_current_power_state = kzalloc(sizeof(struct smu_power_state), - GFP_KERNEL); - if (!smu_dpm->dpm_current_power_state) - return -ENOMEM; - - smu_dpm->dpm_request_power_state = kzalloc(sizeof(struct smu_power_state), - GFP_KERNEL); - if (!smu_dpm->dpm_request_power_state) - return -ENOMEM; - return 0; } @@ -1601,7 +1606,8 @@ out_unlock: mutex_unlock(&smu->metrics_lock); adev->unique_id = ((uint64_t)upper32 << 32) | lower32; - sprintf(adev->serial, "%016llx", adev->unique_id); + if (adev->serial[0] == '\0') + sprintf(adev->serial, "%016llx", adev->unique_id); } static bool aldebaran_is_baco_supported(struct smu_context *smu) @@ -1619,10 +1625,18 @@ static int aldebaran_set_df_cstate(struct smu_context *smu, static int aldebaran_allow_xgmi_power_down(struct smu_context *smu, bool en) { - return smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_GmiPwrDnControl, - en ? 1 : 0, - NULL); + struct amdgpu_device *adev = smu->adev; + + /* The message only works on master die and NACK will be sent + back for other dies, only send it on master die */ + if (!adev->smuio.funcs->get_socket_id(adev) && + !adev->smuio.funcs->get_die_id(adev)) + return smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GmiPwrDnControl, + en ? 0 : 1, + NULL); + else + return 0; } static const struct throttling_logging_label { @@ -1765,6 +1779,98 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v1_3); } +static int aldebaran_check_ecc_table_support(struct smu_context *smu) +{ + uint32_t if_version = 0xff, smu_version = 0xff; + int ret = 0; + + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); + if (ret) { + /* return not support if failed get smu_version */ + ret = -EOPNOTSUPP; + } + + if (smu_version < SUPPORT_ECCTABLE_SMU_VERSION) + ret = -EOPNOTSUPP; + + return ret; +} + +static ssize_t aldebaran_get_ecc_info(struct smu_context *smu, + void *table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + EccInfoTable_t *ecc_table = NULL; + struct ecc_info_per_ch *ecc_info_per_channel = NULL; + int i, ret = 0; + struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table; + + ret = aldebaran_check_ecc_table_support(smu); + if (ret) + return ret; + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ECCINFO, + 0, + smu_table->ecc_table, + false); + if (ret) { + dev_info(smu->adev->dev, "Failed to export SMU ecc table!\n"); + return ret; + } + + ecc_table = (EccInfoTable_t *)smu_table->ecc_table; + + for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) { + ecc_info_per_channel = &(eccinfo->ecc[i]); + ecc_info_per_channel->ce_count_lo_chip = + ecc_table->EccInfo[i].ce_count_lo_chip; + ecc_info_per_channel->ce_count_hi_chip = + ecc_table->EccInfo[i].ce_count_hi_chip; + ecc_info_per_channel->mca_umc_status = + ecc_table->EccInfo[i].mca_umc_status; + ecc_info_per_channel->mca_umc_addr = + ecc_table->EccInfo[i].mca_umc_addr; + } + + return ret; +} + +static int aldebaran_mode1_reset(struct smu_context *smu) +{ + u32 smu_version, fatal_err, param; + int ret = 0; + struct amdgpu_device *adev = smu->adev; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + fatal_err = 0; + param = SMU_RESET_MODE_1; + + /* + * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07 + */ + smu_cmn_get_smc_version(smu, NULL, &smu_version); + if (smu_version < 0x00440700) { + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); + } + else { + /* fatal error triggered by ras, PMFW supports the flag + from 68.44.0 */ + if ((smu_version >= 0x00442c00) && ras && + atomic_read(&ras->in_recovery)) + fatal_err = 1; + + param |= (fatal_err << 16); + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GfxDeviceDriverReset, param, NULL); + } + + if (!ret) + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); + + return ret; +} + static int aldebaran_mode2_reset(struct smu_context *smu) { u32 smu_version; @@ -1816,6 +1922,14 @@ out: return ret; } +static int aldebaran_smu_handle_passthrough_sbr(struct smu_context *smu, bool enable) +{ + int ret = 0; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_HeavySBR, enable ? 1 : 0, NULL); + + return ret; +} + static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu) { #if 0 @@ -1925,13 +2039,15 @@ static const struct pptable_funcs aldebaran_ppt_funcs = { .get_gpu_metrics = aldebaran_get_gpu_metrics, .mode1_reset_is_support = aldebaran_is_mode1_reset_supported, .mode2_reset_is_support = aldebaran_is_mode2_reset_supported, - .mode1_reset = smu_v13_0_mode1_reset, + .smu_handle_passthrough_sbr = aldebaran_smu_handle_passthrough_sbr, + .mode1_reset = aldebaran_mode1_reset, .set_mp1_state = aldebaran_set_mp1_state, .mode2_reset = aldebaran_mode2_reset, .wait_for_event = smu_v13_0_wait_for_event, .i2c_init = aldebaran_i2c_control_init, .i2c_fini = aldebaran_i2c_control_fini, .send_hbm_bad_pages_num = aldebaran_smu_send_hbm_bad_page_num, + .get_ecc_info = aldebaran_get_ecc_info, }; void aldebaran_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 35145db6eedf..b54790d3483e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -60,8 +60,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin"); #define SMU13_VOLTAGE_SCALE 4 -#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms - #define LINK_WIDTH_MAX 6 #define LINK_SPEED_MAX 3 @@ -198,6 +196,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) int smu_v13_0_check_fw_version(struct smu_context *smu) { + struct amdgpu_device *adev = smu->adev; uint32_t if_version = 0xff, smu_version = 0xff; uint16_t smu_major; uint8_t smu_minor, smu_debug; @@ -210,8 +209,10 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu_major = (smu_version >> 16) & 0xffff; smu_minor = (smu_version >> 8) & 0xff; smu_debug = (smu_version >> 0) & 0xff; + if (smu->is_apu) + adev->pm.fw_version = smu_version; - switch (smu->adev->ip_versions[MP1_HWIP][0]) { + switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_ALDE; break; @@ -220,13 +221,15 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP; break; default: - dev_err(smu->adev->dev, "smu unsupported IP version: 0x%x.\n", - smu->adev->ip_versions[MP1_HWIP][0]); + dev_err(adev->dev, "smu unsupported IP version: 0x%x.\n", + adev->ip_versions[MP1_HWIP][0]); smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_INV; break; } - dev_info(smu->adev->dev, "smu fw reported version = 0x%08x (%d.%d.%d)\n", + /* only for dGPU w/ SMU13*/ + if (adev->pm.fw) + dev_dbg(adev->dev, "smu fw reported version = 0x%08x (%d.%d.%d)\n", smu_version, smu_major, smu_minor, smu_debug); /* @@ -238,11 +241,11 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) * of halt driver loading. */ if (if_version != smu->smc_driver_if_version) { - dev_info(smu->adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " + dev_info(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " "smu fw version = 0x%08x (%d.%d.%d)\n", smu->smc_driver_if_version, if_version, smu_version, smu_major, smu_minor, smu_debug); - dev_warn(smu->adev->dev, "SMU driver if version not matched\n"); + dev_warn(adev->dev, "SMU driver if version not matched\n"); } return ret; @@ -430,8 +433,10 @@ int smu_v13_0_fini_smc_tables(struct smu_context *smu) kfree(smu_table->hardcode_pptable); smu_table->hardcode_pptable = NULL; + kfree(smu_table->ecc_table); kfree(smu_table->metrics_table); kfree(smu_table->watermarks_table); + smu_table->ecc_table = NULL; smu_table->metrics_table = NULL; smu_table->watermarks_table = NULL; smu_table->metrics_time = 0; @@ -1424,25 +1429,6 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu) return ret; } -int smu_v13_0_mode1_reset(struct smu_context *smu) -{ - u32 smu_version; - int ret = 0; - /* - * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07 - */ - smu_cmn_get_smc_version(smu, NULL, &smu_version); - if (smu_version < 0x00440700) - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); - else - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_1, NULL); - - if (!ret) - msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); - - return ret; -} - static int smu_v13_0_wait_for_reset_complete(struct smu_context *smu, uint64_t event_arg) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index ea6f50c08c5f..ee1a312fd497 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -94,10 +94,10 @@ static void smu_cmn_read_arg(struct smu_context *smu, /** * __smu_cmn_poll_stat -- poll for a status from the SMU - * smu: a pointer to SMU context + * @smu: a pointer to SMU context * * Returns the status of the SMU, which could be, - * 0, the SMU is busy with your previous command; + * 0, the SMU is busy with your command; * 1, execution status: success, execution result: success; * 0xFF, execution status: success, execution result: failure; * 0xFE, unknown command; @@ -257,10 +257,11 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu, uint16_t msg_index, uint32_t param) { + struct amdgpu_device *adev = smu->adev; u32 reg; int res; - if (smu->adev->no_hw_access) + if (adev->no_hw_access) return 0; reg = __smu_cmn_poll_stat(smu); @@ -272,6 +273,12 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu, __smu_cmn_send_msg(smu, msg_index, param); res = 0; Out: + if (unlikely(adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) && + res && (res != -ETIME)) { + amdgpu_device_halt(adev); + WARN_ON(1); + } + return res; } @@ -288,9 +295,18 @@ Out: int smu_cmn_wait_for_response(struct smu_context *smu) { u32 reg; + int res; reg = __smu_cmn_poll_stat(smu); - return __smu_cmn_reg2errno(smu, reg); + res = __smu_cmn_reg2errno(smu, reg); + + if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) && + res && (res != -ETIME)) { + amdgpu_device_halt(smu->adev); + WARN_ON(1); + } + + return res; } /** @@ -328,10 +344,11 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, uint32_t param, uint32_t *read_arg) { + struct amdgpu_device *adev = smu->adev; int res, index; u32 reg; - if (smu->adev->no_hw_access) + if (adev->no_hw_access) return 0; index = smu_cmn_to_asic_specific_index(smu, @@ -352,11 +369,16 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, __smu_cmn_send_msg(smu, (uint16_t) index, param); reg = __smu_cmn_poll_stat(smu); res = __smu_cmn_reg2errno(smu, reg); - if (res == -EREMOTEIO) + if (res != 0) __smu_cmn_reg_print_error(smu, reg, index, param, msg); if (read_arg) smu_cmn_read_arg(smu, read_arg); Out: + if (unlikely(adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) && res) { + amdgpu_device_halt(adev); + WARN_ON(1); + } + mutex_unlock(&smu->message_lock); return res; } |