diff options
Diffstat (limited to 'drivers/gpu')
156 files changed, 2817 insertions, 1301 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index dd8bc53411bd..415a7fa395c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -136,6 +136,7 @@ amdgpu-y += \ gfx_v9_0.o \ gfx_v9_4.o \ gfx_v9_4_2.o \ + gfx_v9_4_3.o \ gfx_v10_0.o \ imu_v11_0.o \ gfx_v11_0.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8cf2cc50b3de..02b827785e39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -185,7 +185,6 @@ extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern uint amdgpu_pp_feature_mask; extern uint amdgpu_force_long_training; -extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; @@ -471,7 +470,7 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); /* * Writeback */ -#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo *wb_obj; @@ -1222,7 +1221,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); ((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r))) #define amdgpu_asic_invalidate_hdp(adev, r) \ ((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : \ - ((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : 0)) + ((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : (void)0)) #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 60b1857f469e..aeeec211861c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -981,7 +981,12 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) */ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && + adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */ + return false; + + if ((adev->flags & AMD_IS_APU) && + amdgpu_acpi_is_s3_active(adev)) return false; if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f99d4873bf22..0385f7f69278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -96,7 +96,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, size_t *start_offset) { /* - * The first num_doorbells are used by amdgpu. + * The first num_kernel_doorbells are used by amdgpu. * amdkfd takes whatever's left in the aperture. */ if (adev->enable_mes) { @@ -109,11 +109,11 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, *aperture_base = adev->doorbell.base; *aperture_size = 0; *start_offset = 0; - } else if (adev->doorbell.size > adev->doorbell.num_doorbells * + } else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells * sizeof(u32)) { *aperture_base = adev->doorbell.base; *aperture_size = adev->doorbell.size; - *start_offset = adev->doorbell.num_doorbells * sizeof(u32); + *start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32); } else { *aperture_base = 0; *aperture_size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c87515210c4f..83a83ced2439 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -82,6 +82,25 @@ static bool kfd_mem_is_attached(struct amdgpu_vm *avm, return false; } +/** + * reuse_dmamap() - Check whether adev can share the original + * userptr BO + * + * If both adev and bo_adev are in direct mapping or + * in the same iommu group, they can share the original BO. + * + * @adev: Device to which can or cannot share the original BO + * @bo_adev: Device to which allocated BO belongs to + * + * Return: returns true if adev can share original userptr BO, + * false otherwise. + */ +static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) +{ + return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) || + (adev->dev->iommu_group == bo_adev->dev->iommu_group); +} + /* Set memory usage limits. Current, limits are * System (TTM + userptr) memory - 15/16th System RAM * TTM memory - 3/8th System RAM @@ -253,15 +272,19 @@ create_dmamap_sg_bo(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo **bo_out) { struct drm_gem_object *gem_obj; - int ret, align; + int ret; + uint64_t flags = 0; ret = amdgpu_bo_reserve(mem->bo, false); if (ret) return ret; - align = 1; - ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align, - AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE, + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) + flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_UNCACHED); + + ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1, + AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags, ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); amdgpu_bo_unreserve(mem->bo); @@ -481,9 +504,6 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem, if (unlikely(ret)) goto release_sg; - drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address, - ttm->num_pages); - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) @@ -805,11 +825,11 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, va + bo_size, vm); if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || - (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || - same_hive) { + (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) || + same_hive) { /* Mappings on the local GPU, or VRAM mappings in the - * local hive, or userptr mapping IOMMU direct map mode - * share the original BO + * local hive, or userptr mapping can reuse dma map + * address space share the original BO */ attachment[i]->type = KFD_MEM_ATT_SHARED; bo[i] = mem->bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 08eced097bd8..2eb2c66843a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1276,7 +1276,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, r = drm_sched_job_add_dependency(&leader->base, fence); if (r) { dma_fence_put(fence); - goto error_cleanup; + return r; } } @@ -1303,7 +1303,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } if (r) { r = -EAGAIN; - goto error_unlock; + mutex_unlock(&p->adev->notifier_lock); + return r; } p->fence = dma_fence_get(&leader->base.s_fence->finished); @@ -1350,14 +1351,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, mutex_unlock(&p->adev->notifier_lock); mutex_unlock(&p->bo_list->bo_list_mutex); return 0; - -error_unlock: - mutex_unlock(&p->adev->notifier_lock); - -error_cleanup: - for (i = 0; i < p->gang_size; ++i) - drm_sched_job_cleanup(&p->jobs[i]->base); - return r; } /* Cleanup the parser structure */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fac9312b1695..981a9cfb63b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -602,7 +602,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { return readl(adev->doorbell.ptr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); @@ -625,7 +625,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { writel(v, adev->doorbell.ptr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); @@ -646,7 +646,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); @@ -669,7 +669,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); @@ -1060,7 +1060,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (adev->asic_type < CHIP_BONAIRE) { adev->doorbell.base = 0; adev->doorbell.size = 0; - adev->doorbell.num_doorbells = 0; + adev->doorbell.num_kernel_doorbells = 0; adev->doorbell.ptr = NULL; return 0; } @@ -1075,27 +1075,27 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) adev->doorbell.size = pci_resource_len(adev->pdev, 2); if (adev->enable_mes) { - adev->doorbell.num_doorbells = + adev->doorbell.num_kernel_doorbells = adev->doorbell.size / sizeof(u32); } else { - adev->doorbell.num_doorbells = + adev->doorbell.num_kernel_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), adev->doorbell_index.max_assignment+1); - if (adev->doorbell.num_doorbells == 0) + if (adev->doorbell.num_kernel_doorbells == 0) return -EINVAL; /* For Vega, reserve and map two pages on doorbell BAR since SDMA * paging queue doorbell use the second page. The * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the * doorbells are in the first page. So with paging queue enabled, - * the max num_doorbells should + 1 page (0x400 in dword) + * the max num_kernel_doorbells should + 1 page (0x400 in dword) */ if (adev->asic_type >= CHIP_VEGA10) - adev->doorbell.num_doorbells += 0x400; + adev->doorbell.num_kernel_doorbells += 0x400; } adev->doorbell.ptr = ioremap(adev->doorbell.base, - adev->doorbell.num_doorbells * + adev->doorbell.num_kernel_doorbells * sizeof(u32)); if (adev->doorbell.ptr == NULL) return -ENOMEM; @@ -2184,7 +2184,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } - amdgpu_amdkfd_device_probe(adev); adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) @@ -2240,6 +2239,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; + amdgpu_amdkfd_device_probe(adev); adev->cg_flags &= amdgpu_cg_mask; adev->pg_flags &= amdgpu_pg_mask; @@ -2365,7 +2365,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) } r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, - ring->num_hw_submission, amdgpu_job_hang_limit, + ring->num_hw_submission, 0, timeout, adev->reset_domain->wq, ring->sched_score, ring->name, adev->dev); @@ -2539,8 +2539,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_fru_get_product_info(adev); init_failed: - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_release_full_gpu(adev, true); return r; } @@ -3305,9 +3303,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; - r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - return r; + if (!adev->in_s0ix) { + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + return r; + } r = amdgpu_device_ip_resume_phase1(adev); if (r) @@ -3578,6 +3578,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, int r, i; bool px = false; u32 max_MBps; + int tmp; adev->shutdown = false; adev->flags = flags; @@ -3799,7 +3800,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, } } } else { + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; if (r) { dev_err(adev->dev, "asic reset on init failed\n"); goto failed; @@ -3857,18 +3864,6 @@ fence_driver_init: r = amdgpu_device_ip_init(adev); if (r) { - /* failed in exclusive mode due to timeout */ - if (amdgpu_sriov_vf(adev) && - !amdgpu_sriov_runtime(adev) && - amdgpu_virt_mmio_blocked(adev) && - !amdgpu_virt_wait_reset(adev)) { - dev_err(adev->dev, "VF exclusive mode timeout\n"); - /* Don't send request since VF is inactive. */ - adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; - adev->virt.ops = NULL; - r = -EAGAIN; - goto release_ras_con; - } dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); goto release_ras_con; @@ -3937,8 +3932,10 @@ fence_driver_init: msecs_to_jiffies(AMDGPU_RESUME_MS)); } - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_release_full_gpu(adev, true); flush_delayed_work(&adev->delayed_init_work); + } r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); if (r) @@ -3978,6 +3975,20 @@ fence_driver_init: return 0; release_ras_con: + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_release_full_gpu(adev, true); + + /* failed in exclusive mode due to timeout */ + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + amdgpu_virt_mmio_blocked(adev) && + !amdgpu_virt_wait_reset(adev)) { + dev_err(adev->dev, "VF exclusive mode timeout\n"); + /* Don't send request since VF is inactive. */ + adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; + adev->virt.ops = NULL; + r = -EAGAIN; + } amdgpu_release_ras_context(adev); failed: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 5139334925ea..0ecce0b92b82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1502,6 +1502,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): + case IP_VERSION(9, 4, 3): amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); break; case IP_VERSION(10, 1, 10): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 7199b6b0be81..8fd11497faba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -21,6 +21,9 @@ * */ +#ifndef AMDGPU_DOORBELL_H +#define AMDGPU_DOORBELL_H + /* * GPU doorbell structures, functions & helpers */ @@ -29,7 +32,9 @@ struct amdgpu_doorbell { resource_size_t base; resource_size_t size; u32 __iomem *ptr; - u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */ + + /* Number of doorbells reserved for amdgpu kernel driver */ + u32 num_kernel_doorbells; }; /* Reserved doorbells for amdgpu (including multimedia). @@ -306,3 +311,4 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index)) #define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v)) +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b4189d669b54..b1ca1ab6d6ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -157,7 +157,6 @@ char *amdgpu_virtual_display; */ uint amdgpu_pp_feature_mask = 0xfff7bfff; uint amdgpu_force_long_training; -int amdgpu_job_hang_limit; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ @@ -521,13 +520,6 @@ MODULE_PARM_DESC(virtual_display, module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); /** - * DOC: job_hang_limit (int) - * Set how much time allow a job hang and not drop it. The default is 0. - */ -MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)"); -module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); - -/** * DOC: lbpw (int) * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled). */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c50d59855011..9d3a0542c996 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -305,6 +305,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->ring_obj = NULL; ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.kiq; + ring->vm_hub = AMDGPU_GFXHUB_0; r = amdgpu_gfx_kiq_acquire(adev, ring); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index de9e7a00bb15..bfabea76d166 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -42,6 +42,8 @@ #define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L +#define AMDGPU_MAX_GC_INSTANCES 8 + #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES @@ -53,6 +55,15 @@ enum amdgpu_gfx_pipe_priority { #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0 #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15 +enum amdgpu_gfx_partition { + AMDGPU_SPX_PARTITION_MODE = 0, + AMDGPU_DPX_PARTITION_MODE = 1, + AMDGPU_TPX_PARTITION_MODE = 2, + AMDGPU_QPX_PARTITION_MODE = 3, + AMDGPU_CPX_PARTITION_MODE = 4, + AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, +}; + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; @@ -323,7 +334,7 @@ struct amdgpu_gfx { bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsigned num_gfx_rings; - struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; + struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES]; unsigned num_compute_rings; struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; @@ -364,6 +375,10 @@ struct amdgpu_gfx { struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; struct amdgpu_ring_mux muxer; + + enum amdgpu_gfx_partition partition_mode; + uint32_t num_xcd; + uint32_t num_xcc_per_xcp; }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 88bc7f5f46e6..4e2531758866 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -395,8 +395,21 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, while (fault->timestamp >= stamp) { uint64_t tmp; - if (atomic64_read(&fault->key) == key) - return true; + if (atomic64_read(&fault->key) == key) { + /* + * if we get a fault which is already present in + * the fault_ring and the timestamp of + * the fault is after the expired timestamp, + * then this is a new fault that needs to be added + * into the fault ring. + */ + if (fault->timestamp_expiry != 0 && + amdgpu_ih_ts_after(fault->timestamp_expiry, + timestamp)) + break; + else + return true; + } tmp = fault->timestamp; fault = &gmc->fault_ring[fault->next]; @@ -432,15 +445,32 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, { struct amdgpu_gmc *gmc = &adev->gmc; uint64_t key = amdgpu_gmc_fault_key(addr, pasid); + struct amdgpu_ih_ring *ih; struct amdgpu_gmc_fault *fault; + uint32_t last_wptr; + uint64_t last_ts; uint32_t hash; uint64_t tmp; + ih = adev->irq.retry_cam_enabled ? &adev->irq.ih_soft : &adev->irq.ih1; + /* Get the WPTR of the last entry in IH ring */ + last_wptr = amdgpu_ih_get_wptr(adev, ih); + /* Order wptr with ring data. */ + rmb(); + /* Get the timetamp of the last entry in IH ring */ + last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1); + hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER); fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; do { - if (atomic64_cmpxchg(&fault->key, key, 0) == key) + if (atomic64_read(&fault->key) == key) { + /* + * Update the timestamp when this fault + * expired. + */ + fault->timestamp_expiry = last_ts; break; + } tmp = fault->timestamp; fault = &gmc->fault_ring[fault->next]; @@ -524,7 +554,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) for (i = 0; i < adev->num_rings; ++i) { ring = adev->rings[i]; - vmhub = ring->funcs->vmhub; + vmhub = ring->vm_hub; if (ring == &adev->mes.ring) continue; @@ -540,7 +570,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng); dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", - ring->name, ring->vm_inv_eng, ring->funcs->vmhub); + ring->name, ring->vm_inv_eng, ring->vm_hub); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 232523e3e270..6d105d7fb98b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -70,6 +70,7 @@ struct amdgpu_gmc_fault { uint64_t timestamp:48; uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER; atomic64_t key; + uint64_t timestamp_expiry:48; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index df7eb0b7c4b9..4ff348e10e4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -267,7 +267,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) - amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid); + amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid); amdgpu_ring_undo(ring); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 3f07b1a2ce47..c991ca0b7a1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -202,7 +202,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct dma_fence **fences; unsigned i; @@ -277,7 +277,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; bool needs_flush = vm->use_cpu_for_update; @@ -338,7 +338,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; uint64_t updates = amdgpu_vm_tlb_seq(vm); @@ -398,7 +398,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_job *job, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *idle = NULL; struct amdgpu_vmid *id = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index e9f2c11ea416..be243adf3e65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -98,6 +98,8 @@ struct amdgpu_irq { struct irq_domain *domain; /* GPU irq controller domain */ unsigned virq[AMDGPU_MAX_IRQ_SRC_ID]; uint32_t srbm_soft_reset; + u32 retry_cam_doorbell_index; + bool retry_cam_enabled; }; void amdgpu_irq_disable_all(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 0e55823ef6ca..f0f00466b59f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1434,13 +1434,31 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) struct amdgpu_firmware_info *info; char ucode_prefix[30]; char fw_name[40]; + bool need_retry = false; int r; - amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", - ucode_prefix, - pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, + sizeof(ucode_prefix)); + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", + ucode_prefix, + pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); + need_retry = true; + } else { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", + ucode_prefix, + pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); + } + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); + if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", + ucode_prefix); + DRM_INFO("try to fall back to %s\n", fw_name); + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], + fw_name); + } + if (r) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4069bce9479f..3ab8a88789c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2341,7 +2341,6 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) { switch (adev->ip_versions[MP0_HWIP][0]) { case IP_VERSION(13, 0, 2): - case IP_VERSION(13, 0, 10): return true; default: return false; @@ -2430,6 +2429,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) else adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | 1 << AMDGPU_RAS_BLOCK__JPEG); + + /* + * XGMI RAS is not supported if xgmi num physical nodes + * is zero + */ + if (!adev->gmc.xgmi.num_physical_nodes) + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL); } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index ef38f4c93df0..17b3d1992e80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -583,6 +583,10 @@ amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { return TA_RAS_BLOCK__FUSE; case AMDGPU_RAS_BLOCK__MCA: return TA_RAS_BLOCK__MCA; + case AMDGPU_RAS_BLOCK__VCN: + return TA_RAS_BLOCK__VCN; + case AMDGPU_RAS_BLOCK__JPEG: + return TA_RAS_BLOCK__JPEG; default: WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block); return TA_RAS_BLOCK__UMC; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 3106fa8a15ef..c2c2a7718613 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -107,47 +107,12 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { - if (adev->asic_type == CHIP_IP_DISCOVERY) { - switch (adev->ip_versions[MP1_HWIP][0]) { - case IP_VERSION(13, 0, 0): - case IP_VERSION(13, 0, 10): - return true; - default: - return false; - } - } - - return adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_ALDEBARAN; -} - -static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev, - struct amdgpu_ras_eeprom_control *control) -{ - struct atom_context *atom_ctx = adev->mode_info.atom_context; - - if (!control || !atom_ctx) - return false; - - if (strnstr(atom_ctx->vbios_version, - "D342", - sizeof(atom_ctx->vbios_version))) - control->i2c_address = EEPROM_I2C_MADDR_0; - else - control->i2c_address = EEPROM_I2C_MADDR_4; - - return true; -} - -static bool __get_eeprom_i2c_addr_ip_discovery(struct amdgpu_device *adev, - struct amdgpu_ras_eeprom_control *control) -{ switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */ + case IP_VERSION(11, 0, 7): /* Sienna cichlid */ case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 2): /* Aldebaran */ case IP_VERSION(13, 0, 10): - control->i2c_address = EEPROM_I2C_MADDR_4; return true; default: return false; @@ -178,29 +143,32 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return true; } - switch (adev->asic_type) { - case CHIP_VEGA20: - control->i2c_address = EEPROM_I2C_MADDR_0; + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 2): + /* VEGA20 and ARCTURUS */ + if (adev->asic_type == CHIP_VEGA20) + control->i2c_address = EEPROM_I2C_MADDR_0; + else if (strnstr(atom_ctx->vbios_version, + "D342", + sizeof(atom_ctx->vbios_version))) + control->i2c_address = EEPROM_I2C_MADDR_0; + else + control->i2c_address = EEPROM_I2C_MADDR_4; return true; - - case CHIP_ARCTURUS: - return __get_eeprom_i2c_addr_arct(adev, control); - - case CHIP_SIENNA_CICHLID: + case IP_VERSION(11, 0, 7): control->i2c_address = EEPROM_I2C_MADDR_0; return true; - - case CHIP_ALDEBARAN: + case IP_VERSION(13, 0, 2): if (strnstr(atom_ctx->vbios_version, "D673", sizeof(atom_ctx->vbios_version))) control->i2c_address = EEPROM_I2C_MADDR_4; else control->i2c_address = EEPROM_I2C_MADDR_0; return true; - - case CHIP_IP_DISCOVERY: - return __get_eeprom_i2c_addr_ip_discovery(adev, control); - + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 10): + control->i2c_address = EEPROM_I2C_MADDR_4; + return true; default: return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 018f36b10de8..d8749444b689 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -165,7 +165,6 @@ struct amdgpu_ring_funcs { bool support_64bit_ptrs; bool no_user_fence; bool secure_submission_supported; - unsigned vmhub; unsigned extra_dw; /* ring read/write ptr handling */ @@ -250,6 +249,7 @@ struct amdgpu_ring { uint64_t ptr_mask; uint32_t buf_mask; u32 idx; + u32 xcc_id; u32 me; u32 pipe; u32 queue; @@ -275,6 +275,7 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + unsigned vm_hub; unsigned vm_inv_eng; struct dma_fence *vmid_wait; bool has_compute_vm_bug; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index e9b45089a28a..863b2a34b2d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -38,6 +38,7 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, { struct fd f = fdget(fd); struct amdgpu_fpriv *fpriv; + struct amdgpu_ctx_mgr *mgr; struct amdgpu_ctx *ctx; uint32_t id; int r; @@ -51,8 +52,11 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, return r; } - idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id) + mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + idr_for_each_entry(&mgr->ctx_handles, ctx, id) amdgpu_ctx_priority_override(ctx, priority); + mutex_unlock(&mgr->lock); fdput(f); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 98d91ebf5c26..525dffbe046a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -233,7 +233,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, __entry->pasid = vm->pasid; __assign_str(ring, ring->name); __entry->vmid = job->vmid; - __entry->vm_hub = ring->funcs->vmhub, + __entry->vm_hub = ring->vm_hub, __entry->pd_addr = job->vm_pd_addr; __entry->needs_flush = job->vm_needs_flush; ), @@ -427,7 +427,7 @@ TRACE_EVENT(amdgpu_vm_flush, TP_fast_assign( __assign_str(ring, ring->name); __entry->vmid = vmid; - __entry->vm_hub = ring->funcs->vmhub; + __entry->vm_hub = ring->vm_hub; __entry->pd_addr = pd_addr; ), TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 9e2e97207e53..1edf8e6aeb16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -302,3 +302,34 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, err_data->err_addr_cnt++; } + +int amdgpu_umc_loop_channels(struct amdgpu_device *adev, + umc_func func, void *data) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + int ret = 0; + + if (adev->umc.node_inst_num) { + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + ret = func(adev, node_inst, umc_inst, ch_inst, data); + if (ret) { + dev_err(adev->dev, "Node %d umc %d ch %d func returns %d\n", + node_inst, umc_inst, ch_inst, ret); + return ret; + } + } + } else { + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + ret = func(adev, 0, umc_inst, ch_inst, data); + if (ret) { + dev_err(adev->dev, "Umc %d ch %d func returns %d\n", + umc_inst, ch_inst, ret); + return ret; + } + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index d7f1229ff11f..86133f77a9a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -47,6 +47,10 @@ #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) + +typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, + uint32_t umc_inst, uint32_t ch_inst, void *data); + struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); @@ -104,4 +108,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst); + +int amdgpu_umc_loop_channels(struct amdgpu_device *adev, + umc_func func, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 286e326bb4bd..3c0310576b3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -483,7 +483,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; if (job->vmid == 0) @@ -517,7 +517,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; bool spm_update_needed = job->spm_update_needed; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 516409989235..f5b5ce1051a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4461,6 +4461,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -4489,6 +4490,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX10_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -9249,7 +9251,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_gfx, .get_wptr = gfx_v10_0_ring_get_wptr_gfx, .set_wptr = gfx_v10_0_ring_set_wptr_gfx, @@ -9304,7 +9305,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -9340,7 +9340,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ecf8ceb53311..a9da0486467a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -866,6 +866,7 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -896,6 +897,7 @@ static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX11_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -4671,11 +4673,24 @@ static int gfx_v11_0_post_soft_reset(void *handle) static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; + uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL); + if (amdgpu_sriov_vf(adev)) { + clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); + clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); + clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); + } else { + clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + } + clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); amdgpu_gfx_off_ctrl(adev, true); return clock; @@ -6191,7 +6206,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v11_0_ring_get_rptr_gfx, .get_wptr = gfx_v11_0_ring_get_wptr_gfx, .set_wptr = gfx_v11_0_ring_set_wptr_gfx, @@ -6239,7 +6253,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v11_0_ring_get_rptr_compute, .get_wptr = gfx_v11_0_ring_get_wptr_compute, .set_wptr = gfx_v11_0_ring_set_wptr_compute, @@ -6275,7 +6288,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v11_0_ring_get_rptr_compute, .get_wptr = gfx_v11_0_ring_get_wptr_compute, .set_wptr = gfx_v11_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index b07a72ca25d9..068b9586a223 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -62,10 +62,18 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev, return -EINVAL; } - ih_data.head = *ras_if; - dev_warn(adev->dev, "RLC %s FED IRQ\n", ras_if->name); - amdgpu_ras_interrupt_dispatch(adev, &ih_data); + + if (!amdgpu_sriov_vf(adev)) { + ih_data.head = *ras_if; + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + } else { + if (adev->virt.ops && adev->virt.ops->ras_poison_handler) + adev->virt.ops->ras_poison_handler(adev); + else + dev_warn(adev->dev, + "No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ae09fc1cfe6b..adbcd8127c82 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -149,6 +149,16 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_UPPER_Raven 0x007a +#define mmGOLDEN_TSC_COUNT_UPPER_Raven_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_LOWER_Raven 0x007b +#define mmGOLDEN_TSC_COUNT_LOWER_Raven_BASE_IDX 0 + +#define mmGOLDEN_TSC_COUNT_UPPER_Raven2 0x0068 +#define mmGOLDEN_TSC_COUNT_UPPER_Raven2_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_LOWER_Raven2 0x0069 +#define mmGOLDEN_TSC_COUNT_LOWER_Raven2_BASE_IDX 0 + enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -1995,6 +2005,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -2094,6 +2105,7 @@ static int gfx_v9_0_sw_init(void *handle) /* disable scheduler on the real ring */ ring->no_scheduler = true; + ring->vm_hub = AMDGPU_GFXHUB_0; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2111,6 +2123,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; ring->is_sw_ring = true; hw_prio = amdgpu_sw_ring_priority(i); + ring->vm_hub = AMDGPU_GFXHUB_0; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, NULL); @@ -3988,6 +4001,36 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) preempt_enable(); clock = clock_lo | (clock_hi << 32ULL); break; + case IP_VERSION(9, 1, 0): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; + case IP_VERSION(9, 2, 2): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); + /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; default: amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); @@ -6750,7 +6793,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, @@ -6804,7 +6846,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = amdgpu_sw_ring_get_rptr_gfx, .get_wptr = amdgpu_sw_ring_get_wptr_gfx, .set_wptr = amdgpu_sw_ring_set_wptr_gfx, @@ -6858,7 +6899,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -6897,7 +6937,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c new file mode 100644 index 000000000000..5f8500577c02 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -0,0 +1,430 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> + +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "soc15.h" +#include "soc15_common.h" +#include "vega10_enum.h" + +#include "gc/gc_9_4_3_offset.h" +#include "gc/gc_9_4_3_sh_mask.h" + +#include "gfx_v9_4_3.h" + +#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L + +static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + uint64_t clock; + + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->gfx.gpu_clock_mutex); + WREG32_SOC15(GC, 0, regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + mutex_unlock(&adev->gfx.gpu_clock_mutex); + amdgpu_gfx_off_ctrl(adev, true); + + return clock; +} + +static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, + u32 se_num, + u32 sh_num, + u32 instance) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_INDEX, instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SH_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + + WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data); +} + +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) +{ + WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT) | + (SQ_IND_INDEX__FORCE_READ_MASK)); + return RREG32_SOC15(GC, 0, regSQ_IND_DATA); +} + +static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t regno, uint32_t num, uint32_t *out) +{ + WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | + (regno << SQ_IND_INDEX__INDEX__SHIFT) | + (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | + (SQ_IND_INDEX__FORCE_READ_MASK) | + (SQ_IND_INDEX__AUTO_INCR_MASK)); + while (num--) + *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); +} + +static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev, + uint32_t simd, uint32_t wave, + uint32_t *dst, int *no_fields) +{ + /* type 1 wave data */ + dst[(*no_fields)++] = 1; + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); +} + +static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t start, + uint32_t size, uint32_t *dst) +{ + wave_read_regs(adev, simd, wave, 0, + start + SQIND_WAVE_SGPRS_OFFSET, size, dst); +} + +static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t start, uint32_t size, + uint32_t *dst) +{ + wave_read_regs(adev, simd, wave, thread, + start + SQIND_WAVE_VGPRS_OFFSET, size, dst); +} + +static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 q, u32 vm) +{ + soc15_grbm_select(adev, me, pipe, q, vm); +} + +static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) +{ + uint32_t rlc_setting; + + /* if RLC is not enabled, do nothing */ + rlc_setting = RREG32_SOC15(GC, 0, regRLC_CNTL); + if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return false; + + return true; +} + +static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + unsigned i; + + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } +} + +static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RLC_SAFE_MODE__CMD_MASK; + WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); +} + +static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) +{ + /* init spm vmid with 0xf */ + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + + return 0; +} + +static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) +{ + u32 i, j, k; + u32 mask; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + for (k = 0; k < adev->usec_timeout; k++) { + if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0) + break; + udelay(1); + } + if (k == adev->usec_timeout) { + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, + 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + DRM_INFO("Timeout wait for RLC serdes %u,%u\n", + i, j); + return; + } + } + } + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; + for (k = 0; k < adev->usec_timeout; k++) { + if ((RREG32_SOC15(GC, 0, regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) + break; + udelay(1); + } +} + +static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + /* These interrupts should be enabled to drive DS clock */ + + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); + if (adev->gfx.num_gfx_rings) + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); + + WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); +} + +static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) +{ + WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); + gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); + gfx_v9_4_3_wait_for_rlc_serdes(adev); +} + +static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) +{ + WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + udelay(50); + WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + udelay(50); +} + +static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) +{ +#ifdef AMDGPU_RLC_DEBUG_RETRY + u32 rlc_ucode_ver; +#endif + + WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); + udelay(50); + + /* carrizo do enable cp interrupt after cp inited */ + if (!(adev->flags & AMD_IS_APU)) { + gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); + udelay(50); + } + +#ifdef AMDGPU_RLC_DEBUG_RETRY + /* RLC_GPM_GENERAL_6 : RLC Ucode version */ + rlc_ucode_ver = RREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_6); + if (rlc_ucode_ver == 0x108) { + dev_info(adev->dev, + "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", + rlc_ucode_ver, adev->gfx.rlc_fw_version); + /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, + * default is 0x9C4 to create a 100us interval */ + WREG32_SOC15(GC, 0, regRLC_GPM_TIMER_INT_3, 0x9C4); + /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr + * to disable the page fault retry interrupts, default is + * 0x100 (256) */ + WREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_12, 0x100); + } +#endif +} + +static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + if (!adev->gfx.rlc_fw) + return -EINVAL; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(&hdr->header); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, + RLCG_UCODE_LOADING_START_ADDRESS); + for (i = 0; i < fw_size; i++) { + if (amdgpu_emu_mode == 1 && i % 100 == 0) { + dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i); + msleep(1); + } + WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); + } + WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); + + return 0; +} + +static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) +{ + int r; + + adev->gfx.rlc.funcs->stop(adev); + + /* disable CG */ + WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); + + /* TODO: revisit pg function */ + /* gfx_v9_4_3_init_pg(adev);*/ + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + /* legacy rlc firmware loading */ + r = gfx_v9_4_3_rlc_load_microcode(adev); + if (r) + return r; + } + + adev->gfx.rlc.funcs->start(adev); + + return 0; +} + +static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +{ + u32 reg, data; + + reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + if (amdgpu_sriov_is_pp_one_vf(adev)) + data = RREG32_NO_KIQ(reg); + else + data = RREG32(reg); + + data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; + + if (amdgpu_sriov_is_pp_one_vf(adev)) + WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); + else + WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); +} + +static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = { + {SOC15_REG_ENTRY(GC, 0, regGRBM_GFX_INDEX)}, + {SOC15_REG_ENTRY(GC, 0, regSQ_IND_INDEX)}, +}; + +static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev, + uint32_t offset, + struct soc15_reg_rlcg *entries, int arr_size) +{ + int i; + uint32_t reg; + + if (!entries) + return false; + + for (i = 0; i < arr_size; i++) { + const struct soc15_reg_rlcg *entry; + + entry = &entries[i]; + reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + if (offset == reg) + return true; + } + + return false; +} + +static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) +{ + return gfx_v9_4_3_check_rlcg_range(adev, offset, + (void *)rlcg_access_gc_9_4_3, + ARRAY_SIZE(rlcg_access_gc_9_4_3)); +} + +const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_4_3_select_se_sh, + .read_wave_data = &gfx_v9_4_3_read_wave_data, + .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, + .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, + .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, +}; + +const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { + .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled, + .set_safe_mode = gfx_v9_4_3_set_safe_mode, + .unset_safe_mode = gfx_v9_4_3_unset_safe_mode, + .init = gfx_v9_4_3_rlc_init, + .resume = gfx_v9_4_3_rlc_resume, + .stop = gfx_v9_4_3_rlc_stop, + .reset = gfx_v9_4_3_rlc_reset, + .start = gfx_v9_4_3_rlc_start, + .update_spm_vmid = gfx_v9_4_3_update_spm_vmid, + .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h new file mode 100644 index 000000000000..84e69701b81a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h @@ -0,0 +1,30 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFX_V9_4_3_H__ +#define __GFX_V9_4_3_H__ + +extern const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs; +extern const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs; + +#endif /* __GFX_V9_4_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c index be0d0f47415e..13712640fa46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c @@ -417,34 +417,12 @@ static void gfxhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1); WREG32_SOC15(GC, 0, regCP_DEBUG, tmp); - /** - * Set GRBM_GFX_INDEX in broad cast mode - * before programming GL1C_UTCL0_CNTL1 and SQG_CONFIG - */ - WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, regGRBM_GFX_INDEX_DEFAULT); - - /** - * Retry respond mode: RETRY - * Error (no retry) respond mode: SUCCESS - */ - tmp = RREG32_SOC15(GC, 0, regGL1C_UTCL0_CNTL1); - tmp = REG_SET_FIELD(tmp, GL1C_UTCL0_CNTL1, RESP_MODE, 0); - tmp = REG_SET_FIELD(tmp, GL1C_UTCL0_CNTL1, RESP_FAULT_MODE, 0x2); - WREG32_SOC15(GC, 0, regGL1C_UTCL0_CNTL1, tmp); - /* These registers are not accessible to VF-SRIOV. * The PF will program them instead. */ if (amdgpu_sriov_vf(adev)) return; - /* Disable SQ XNACK interrupt for all VMIDs */ - tmp = RREG32_SOC15(GC, 0, regSQG_CONFIG); - tmp = REG_SET_FIELD(tmp, SQG_CONFIG, XNACK_INTR_MASK, - SQG_CONFIG__XNACK_INTR_MASK_MASK >> - SQG_CONFIG__XNACK_INTR_MASK__SHIFT); - WREG32_SOC15(GC, 0, regSQG_CONFIG, tmp); - tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d99821692ba3..b213dcf8ca06 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -479,8 +479,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; @@ -534,7 +534,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -1143,7 +1143,6 @@ static int gmc_v10_0_hw_fini(void *handle) return 0; } - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 9f4f28192c60..d95f9fe8f1c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -378,8 +378,8 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; @@ -433,7 +433,7 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; @@ -951,7 +951,6 @@ static int gmc_v11_0_hw_fini(void *handle) return 0; } - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v11_0_gart_disable(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 67ad92097a65..2fe21cefd772 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -555,32 +555,49 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, const char *mmhub_cid; const char *hub_name; u64 addr; + uint32_t cam_index = 0; + int ret; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (retry_fault) { - /* Returning 1 here also prevents sending the IV to the KFD */ + if (adev->irq.retry_cam_enabled) { + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + cam_index = entry->src_data[2] & 0x3ff; - /* Process it onyl if it's the first fault for this address */ - if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, + ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault); + WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); + if (ret) + return 1; + } else { + /* Process it onyl if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, entry->timestamp)) - return 1; + return 1; - /* Delegate it to a different ring if the hardware hasn't - * already done it. - */ - if (entry->ih == &adev->irq.ih) { - amdgpu_irq_delegate(adev, entry, 8); - return 1; - } + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } - /* Try to handle the recoverable page faults by filling page - * tables - */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) - return 1; + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) + return 1; + } } if (!printk_ratelimit()) @@ -990,9 +1007,9 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); struct amdgpu_device *adev = ring->adev; - struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; @@ -1043,10 +1060,10 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, uint32_t reg; /* Do nothing because there's no lut register for mmhub1. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_1) + if (ring->vm_hub == AMDGPU_MMHUB_1) return; - if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -1982,7 +1999,6 @@ static int gmc_v9_0_hw_fini(void *handle) if (adev->mmhub.funcs->update_power_gating) adev->mmhub.funcs->update_power_gating(adev, false); - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 9360204da7fb..a3076eb8af6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -376,7 +376,7 @@ static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring, static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -485,6 +485,7 @@ int jpeg_v1_0_sw_init(void *handle) return r; ring = &adev->jpeg.inst->ring_dec; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -548,7 +549,6 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .extra_dw = 64, .get_rptr = jpeg_v1_0_decode_ring_get_rptr, .get_wptr = jpeg_v1_0_decode_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index f3c1af5130ab..0eddf7c824a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -86,6 +86,7 @@ static int jpeg_v2_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -613,7 +614,7 @@ void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -762,7 +763,6 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 6b1887808782..b040f51d9aa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -127,6 +127,10 @@ static int jpeg_v2_5_sw_init(void *handle) ring = &adev->jpeg.inst[i].ring_dec; ring->use_doorbell = true; + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; sprintf(ring->name, "jpeg_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, @@ -645,7 +649,6 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_1, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, @@ -675,7 +678,6 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index a1b751d9ac06..c55e09432e26 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -100,6 +100,7 @@ static int jpeg_v3_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -559,7 +560,6 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 5f2a034b9ec0..77e1e64aa1d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -108,6 +108,7 @@ static int jpeg_v4_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, @@ -429,7 +430,7 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev) MMSCH_COMMAND__END; header.version = MMSCH_VERSION; - header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2; + header.total_size = RREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE); header.jpegdec.init_status = 0; header.jpegdec.table_offset = 0; @@ -715,7 +716,6 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 5826eac270d7..45280f047180 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -33,14 +33,19 @@ #include "mes_v11_api_def.h" MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); @@ -1089,13 +1094,14 @@ static int mes_v11_0_sw_fini(void *handle) return 0; } -static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev) +static void mes_v11_0_kiq_dequeue(struct amdgpu_ring *ring) { uint32_t data; int i; + struct amdgpu_device *adev = ring->adev; mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0); + soc21_grbm_select(adev, 3, ring->pipe, 0, 0); /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { @@ -1121,8 +1127,6 @@ static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - - adev->mes.ring.sched.ready = false; } static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) @@ -1139,6 +1143,16 @@ static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); } +static void mes_v11_0_kiq_clear(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* tell RLC which is KIQ dequeue */ + tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); + tmp &= ~RLC_CP_SCHEDULERS__scheduler0_MASK; + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); +} + static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; @@ -1176,11 +1190,17 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) - mes_v11_0_kiq_dequeue_sched(adev); + if (adev->mes.ring.sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring); + adev->mes.ring.sched.ready = false; + } - if (!amdgpu_sriov_vf(adev)) - mes_v11_0_enable(adev, false); + if (amdgpu_sriov_vf(adev)) { + mes_v11_0_kiq_dequeue(&adev->gfx.kiq.ring); + mes_v11_0_kiq_clear(adev); + } + + mes_v11_0_enable(adev, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 19455a725939..685abf57ffdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -238,7 +238,7 @@ static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev, if (use_doorbell) { ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); - ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 4); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 8); } else ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 47420b403871..98c826f1f89b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -531,13 +531,6 @@ static void nv_program_aspm(struct amdgpu_device *adev) } -static void nv_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) -{ - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); -} - const struct amdgpu_ip_block_version nv_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, @@ -999,6 +992,11 @@ static int nv_common_late_init(void *handle) } } + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + return 0; } @@ -1038,7 +1036,7 @@ static int nv_common_hw_init(void *handle) if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev)) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ - nv_enable_doorbell_aperture(adev, true); + adev->nbio.funcs->enable_doorbell_aperture(adev, true); return 0; } @@ -1047,8 +1045,13 @@ static int nv_common_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* disable the doorbell aperture */ - nv_enable_doorbell_aperture(adev, false); + /* Disable the doorbell aperture and selfring doorbell aperture + * separately in hw_fini because nv_enable_doorbell_aperture + * has been removed and there is no need to delay disabling + * selfring doorbell. + */ + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index b5affba22156..b3cc04dd8653 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1823,6 +1823,15 @@ static int sdma_v4_0_sw_init(void *handle) /* doorbell size is 2 dwords, get DWORD offset */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; + /* + * On Arcturus, SDMA instance 5~7 has a different vmhub + * type(AMDGPU_MMHUB_1). + */ + if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; + sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, @@ -1841,6 +1850,11 @@ static int sdma_v4_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; ring->doorbell_index += 0x400; + if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; + sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1870,7 +1884,7 @@ static int sdma_v4_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 0) || + if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) || adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) amdgpu_sdma_destroy_inst_ctx(adev, true); else @@ -2294,44 +2308,6 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = sdma_v4_0_ring_get_rptr, - .get_wptr = sdma_v4_0_ring_get_wptr, - .set_wptr = sdma_v4_0_ring_set_wptr, - .emit_frame_size = - 6 + /* sdma_v4_0_ring_emit_hdp_flush */ - 3 + /* hdp invalidate */ - 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - /* sdma_v4_0_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + - 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ - .emit_ib = sdma_v4_0_ring_emit_ib, - .emit_fence = sdma_v4_0_ring_emit_fence, - .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, - .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, - .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, - .test_ring = sdma_v4_0_ring_test_ring, - .test_ib = sdma_v4_0_ring_test_ib, - .insert_nop = sdma_v4_0_ring_insert_nop, - .pad_ib = sdma_v4_0_ring_pad_ib, - .emit_wreg = sdma_v4_0_ring_emit_wreg, - .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -/* - * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1). - * So create a individual constant ring_funcs for those instances. - */ -static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { - .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, - .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), - .support_64bit_ptrs = true, - .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, .set_wptr = sdma_v4_0_ring_set_wptr, @@ -2364,40 +2340,6 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = sdma_v4_0_ring_get_rptr, - .get_wptr = sdma_v4_0_page_ring_get_wptr, - .set_wptr = sdma_v4_0_page_ring_set_wptr, - .emit_frame_size = - 6 + /* sdma_v4_0_ring_emit_hdp_flush */ - 3 + /* hdp invalidate */ - 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - /* sdma_v4_0_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + - 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ - .emit_ib = sdma_v4_0_ring_emit_ib, - .emit_fence = sdma_v4_0_ring_emit_fence, - .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, - .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, - .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, - .test_ring = sdma_v4_0_ring_test_ring, - .test_ib = sdma_v4_0_ring_test_ib, - .insert_nop = sdma_v4_0_ring_insert_nop, - .pad_ib = sdma_v4_0_ring_pad_ib, - .emit_wreg = sdma_v4_0_ring_emit_wreg, - .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { - .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, - .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), - .support_64bit_ptrs = true, - .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_page_ring_get_wptr, .set_wptr = sdma_v4_0_page_ring_set_wptr, @@ -2429,19 +2371,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - adev->sdma.instance[i].ring.funcs = - &sdma_v4_0_ring_funcs_2nd_mmhub; - else - adev->sdma.instance[i].ring.funcs = - &sdma_v4_0_ring_funcs; + adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; if (adev->sdma.has_page_queue) { - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - adev->sdma.instance[i].page.funcs = - &sdma_v4_0_page_ring_funcs_2nd_mmhub; - else - adev->sdma.instance[i].page.funcs = + adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; adev->sdma.instance[i].page.me = i; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 1b04700a4d55..64dcaa2670dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1309,6 +1309,7 @@ static int sdma_v4_4_2_sw_init(void *handle) /* doorbell size is 2 dwords, get DWORD offset */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1327,6 +1328,7 @@ static int sdma_v4_4_2_sw_init(void *handle) */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; ring->doorbell_index += 0x400; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, @@ -1741,7 +1743,6 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = sdma_v4_4_2_ring_get_rptr, .get_wptr = sdma_v4_4_2_ring_get_wptr, .set_wptr = sdma_v4_4_2_ring_set_wptr, @@ -1773,7 +1774,6 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = sdma_v4_4_2_ring_get_rptr, .get_wptr = sdma_v4_4_2_page_ring_get_wptr, .set_wptr = sdma_v4_4_2_page_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1941b3b7c5d9..92e1299be021 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1389,6 +1389,7 @@ static int sdma_v5_0_sw_init(void *handle) (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : @@ -1765,7 +1766,6 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_0_ring_get_rptr, .get_wptr = sdma_v5_0_ring_get_wptr, .set_wptr = sdma_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 8e445eb9dd49..ca7e8757d78e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1253,6 +1253,7 @@ static int sdma_v5_2_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, @@ -1653,7 +1654,6 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_2_ring_get_rptr, .get_wptr = sdma_v5_2_ring_get_wptr, .set_wptr = sdma_v5_2_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 40e6b22daa22..3d9a80511a45 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -403,15 +403,26 @@ static void sdma_v6_0_rlc_stop(struct amdgpu_device *adev) } /** - * sdma_v6_0_ctx_switch_enable - stop the async dma engines context switch + * sdma_v6_0_ctxempty_int_enable - enable or disable context empty interrupts * * @adev: amdgpu_device pointer - * @enable: enable/disable the DMA MEs context switch. + * @enable: enable/disable context switching due to queue empty conditions * - * Halt or unhalt the async dma engines context switch. + * Enable or disable the async dma engines queue empty context switch. */ -static void sdma_v6_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +static void sdma_v6_0_ctxempty_int_enable(struct amdgpu_device *adev, bool enable) { + u32 f32_cntl; + int i; + + if (!amdgpu_sriov_vf(adev)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + CTXEMPTY_INT_ENABLE, enable ? 1 : 0); + WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL), f32_cntl); + } + } } /** @@ -499,10 +510,7 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - if (amdgpu_sriov_vf(adev)) - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); - else - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1); WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); @@ -579,10 +587,8 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) ring->sched.ready = true; - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v6_0_ctx_switch_enable(adev, true); + if (amdgpu_sriov_vf(adev)) sdma_v6_0_enable(adev, true); - } r = amdgpu_ring_test_helper(ring); if (r) { @@ -778,7 +784,6 @@ static int sdma_v6_0_start(struct amdgpu_device *adev) int r = 0; if (amdgpu_sriov_vf(adev)) { - sdma_v6_0_ctx_switch_enable(adev, false); sdma_v6_0_enable(adev, false); /* set RB registers */ @@ -799,7 +804,7 @@ static int sdma_v6_0_start(struct amdgpu_device *adev) /* unhalt the MEs */ sdma_v6_0_enable(adev, true); /* enable sdma ring preemption */ - sdma_v6_0_ctx_switch_enable(adev, true); + sdma_v6_0_ctxempty_int_enable(adev, true); /* start the gfx rings and rlc compute queues */ r = sdma_v6_0_gfx_resume(adev); @@ -1173,7 +1178,28 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); + + /* Update the PD address for this VMID. */ + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + + /* Trigger invalidation. */ + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) | + SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) | + SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f)); + amdgpu_ring_write(ring, req); + amdgpu_ring_write(ring, 0xFFFFFFFF); + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) | + SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F)); } static void sdma_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1272,6 +1298,7 @@ static int sdma_v6_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1319,7 +1346,7 @@ static int sdma_v6_0_hw_fini(void *handle) return 0; } - sdma_v6_0_ctx_switch_enable(adev, false); + sdma_v6_0_ctxempty_int_enable(adev, false); sdma_v6_0_enable(adev, false); return 0; @@ -1528,7 +1555,6 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v6_0_ring_get_rptr, .get_wptr = sdma_v6_0_ring_get_wptr, .set_wptr = sdma_v6_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 81a6d5b94987..8b8086d5c864 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -40,7 +40,7 @@ static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_c adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev)) return true; #endif - return false; + return amdgpu_reset_method == AMD_RESET_METHOD_MODE2; } static struct amdgpu_reset_handler * diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 7d04c39332ad..6d15d5cd9e07 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -301,11 +301,10 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) u32 reference_clock = adev->clock.spll.reference_freq; if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0) || - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1)) - return 10000; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) || + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1) || + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) || adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1)) - return reference_clock / 4; + return 10000; return reference_clock; } @@ -620,13 +619,6 @@ static void soc15_program_aspm(struct amdgpu_device *adev) adev->nbio.funcs->program_aspm(adev); } -static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) -{ - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); -} - const struct amdgpu_ip_block_version vega10_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, @@ -1101,6 +1093,11 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG; adev->external_rev_id = adev->rev_id + 0x3c; break; + case IP_VERSION(9, 4, 3): + adev->asic_funcs = &vega20_asic_funcs; + adev->cg_flags = 0; + adev->pg_flags = 0; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -1121,6 +1118,11 @@ static int soc15_common_late_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + return 0; } @@ -1178,7 +1180,8 @@ static int soc15_common_hw_init(void *handle) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ - soc15_enable_doorbell_aperture(adev, true); + adev->nbio.funcs->enable_doorbell_aperture(adev, true); + /* HW doorbell routing policy: doorbell writing not * in SDMA/IH/MM/ACV range will be routed to CP. So * we need to init SDMA doorbell range prior @@ -1194,8 +1197,14 @@ static int soc15_common_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* disable the doorbell aperture */ - soc15_enable_doorbell_aperture(adev, false); + /* Disable the doorbell aperture and selfring doorbell aperture + * separately in hw_fini because soc15_enable_doorbell_aperture + * has been removed and there is no need to delay disabling + * selfring doorbell. + */ + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); + if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 514bfc705d5a..744be2a05623 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -450,13 +450,6 @@ static void soc21_program_aspm(struct amdgpu_device *adev) adev->nbio.funcs->program_aspm(adev); } -static void soc21_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) -{ - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); -} - const struct amdgpu_ip_block_version soc21_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, @@ -764,6 +757,11 @@ static int soc21_common_late_init(void *handle) amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); } + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + return 0; } @@ -797,7 +795,7 @@ static int soc21_common_hw_init(void *handle) if (adev->nbio.funcs->remap_hdp_registers) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ - soc21_enable_doorbell_aperture(adev, true); + adev->nbio.funcs->enable_doorbell_aperture(adev, true); return 0; } @@ -806,8 +804,13 @@ static int soc21_common_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* disable the doorbell aperture */ - soc21_enable_doorbell_aperture(adev, false); + /* Disable the doorbell aperture and selfring doorbell aperture + * separately in hw_fini because soc21_enable_doorbell_aperture + * has been removed and there is no need to delay disabling + * selfring doorbell. + */ + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_put_irq(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 509d8a1945eb..30d0482ac466 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -84,6 +84,8 @@ enum ta_ras_block { TA_RAS_BLOCK__MP1, TA_RAS_BLOCK__FUSE, TA_RAS_BLOCK__MCA, + TA_RAS_BLOCK__VCN, + TA_RAS_BLOCK__JPEG, TA_NUM_BLOCK_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index e08e25a3a1a9..530549314ce4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -160,24 +160,28 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev } } +static int umc_v6_7_ecc_info_querry_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + + umc_v6_7_ecc_info_query_correctable_error_count(adev, + umc_inst, ch_inst, + &(err_data->ce_count)); + + umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, + umc_inst, ch_inst, + &(err_data->ue_count)); + + return 0; +} + static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC registers. Will add the protection */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_v6_7_ecc_info_query_correctable_error_count(adev, - umc_inst, ch_inst, - &(err_data->ce_count)); - umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, - umc_inst, ch_inst, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_ecc_info_querry_ecc_error_count, ras_error_status); } void umc_v6_7_convert_error_address(struct amdgpu_device *adev, @@ -215,23 +219,23 @@ void umc_v6_7_convert_error_address(struct amdgpu_device *adev, } } -static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t ch_inst, - uint32_t umc_inst) +static int umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint64_t mc_umc_status, err_addr; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct ras_err_data *err_data = (struct ras_err_data *)data; eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) - return; + return 0; /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -243,25 +247,15 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, umc_v6_7_convert_error_address(adev, err_data, err_addr, ch_inst, umc_inst); } + + return 0; } static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC resgisters. Will add the protection - * when firmware interface is ready */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_v6_7_ecc_info_query_error_address(adev, - err_data, - ch_inst, - umc_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_ecc_info_query_error_address, ras_error_status); } static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, @@ -364,11 +358,14 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev } } -static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, - uint32_t umc_reg_offset) +static int umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t ecc_err_cnt_addr; uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t umc_reg_offset = + get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); ecc_err_cnt_sel_addr = SOC15_REG_OFFSET(UMC, 0, @@ -402,58 +399,54 @@ static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, /* clear higher chip error count */ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_7_CE_CNT_INIT); + + return 0; } static void umc_v6_7_reset_error_count(struct amdgpu_device *adev) { - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; + amdgpu_umc_loop_channels(adev, + umc_v6_7_reset_error_count_per_channel, NULL); +} + +static int umc_v6_7_query_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); + + umc_v6_7_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count), + ch_inst, umc_inst); - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); + umc_v6_7_querry_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); - umc_v6_7_reset_error_count_per_channel(adev, - umc_reg_offset); - } + return 0; } static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC registers. Will add the protection */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); - umc_v6_7_query_correctable_error_count(adev, - umc_reg_offset, - &(err_data->ce_count), - ch_inst, umc_inst); - umc_v6_7_querry_uncorrectable_error_count(adev, - umc_reg_offset, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_query_ecc_error_count, ras_error_status); umc_v6_7_reset_error_count(adev); } -static void umc_v6_7_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t ch_inst, - uint32_t umc_inst) +static int umc_v6_7_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t mc_umc_status_addr; uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr; + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -463,12 +456,12 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) { /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); - return; + return 0; } /* calculate error address if ue error is detected */ @@ -484,29 +477,15 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + + return 0; } static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC resgisters. Will add the protection - * when firmware interface is ready */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); - umc_v6_7_query_error_address(adev, - err_data, - umc_reg_offset, ch_inst, - umc_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_query_error_address, ras_error_status); } static uint32_t umc_v6_7_query_ras_poison_mode_per_channel( diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index fb55e8cb9967..d51ae0bc36f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -76,10 +76,13 @@ static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev, UMC_8_NODE_DIST * node_inst; } -static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, - uint32_t umc_reg_offset) +static int umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst); ecc_err_cnt_addr = SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); @@ -87,24 +90,14 @@ static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, /* clear error count */ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); + + return 0; } static void umc_v8_10_clear_error_count(struct amdgpu_device *adev) { - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_reg_offset = get_umc_v8_10_reg_offset(adev, - node_inst, - umc_inst, - ch_inst); - - umc_v8_10_clear_error_count_per_channel(adev, - umc_reg_offset); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_clear_error_count_per_channel, NULL); } static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, @@ -147,29 +140,29 @@ static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev *error_count += 1; } +static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst); + + umc_v8_10_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_10_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + + return 0; +} + static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_reg_offset = get_umc_v8_10_reg_offset(adev, - node_inst, - umc_inst, - ch_inst); - - umc_v8_10_query_correctable_error_count(adev, - umc_reg_offset, - &(err_data->ce_count)); - umc_v8_10_query_uncorrectable_error_count(adev, - umc_reg_offset, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_query_ecc_error_count, ras_error_status); umc_v8_10_clear_error_count(adev); } @@ -248,28 +241,28 @@ static void umc_v8_10_convert_error_address(struct amdgpu_device *adev, } } -static void umc_v8_10_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t umc_reg_offset, - uint32_t node_inst, - uint32_t ch_inst, - uint32_t umc_inst) +static int umc_v8_10_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint64_t mc_umc_status_addr; uint64_t mc_umc_status, err_addr; uint64_t mc_umc_addrt0; + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst); mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) { /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); - return; + return 0; } /* calculate error address if ue error is detected */ @@ -287,37 +280,25 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev, /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + + return 0; } static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_reg_offset = get_umc_v8_10_reg_offset(adev, - node_inst, - umc_inst, - ch_inst); - - umc_v8_10_query_error_address(adev, - err_data, - umc_reg_offset, - node_inst, - ch_inst, - umc_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_query_error_address, ras_error_status); } -static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, - uint32_t umc_reg_offset) +static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst); ecc_err_cnt_sel_addr = SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel); @@ -332,23 +313,14 @@ static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); /* set error count to initial value */ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); + + return 0; } static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) { - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_reg_offset = get_umc_v8_10_reg_offset(adev, - node_inst, - umc_inst, - ch_inst); - - umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_err_cnt_init_per_channel, NULL); } static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev) @@ -406,37 +378,35 @@ static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_dev } } +static int umc_v8_10_ecc_info_query_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + + umc_v8_10_ecc_info_query_correctable_error_count(adev, + node_inst, umc_inst, ch_inst, + &(err_data->ce_count)); + umc_v8_10_ecc_info_query_uncorrectable_error_count(adev, + node_inst, umc_inst, ch_inst, + &(err_data->ue_count)); + return 0; +} + static void umc_v8_10_ecc_info_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /* TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC registers. Will add the protection - */ - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_v8_10_ecc_info_query_correctable_error_count(adev, - node_inst, umc_inst, ch_inst, - &(err_data->ce_count)); - umc_v8_10_ecc_info_query_uncorrectable_error_count(adev, - node_inst, umc_inst, ch_inst, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_ecc_info_query_ecc_error_count, ras_error_status); } -static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t ch_inst, - uint32_t umc_inst, - uint32_t node_inst) +static int umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t eccinfo_table_idx; uint64_t mc_umc_status, err_addr; - + struct ras_err_data *err_data = (struct ras_err_data *)data; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); eccinfo_table_idx = node_inst * adev->umc.umc_inst_num * @@ -447,10 +417,10 @@ static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev, mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) - return; + return 0; /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -463,28 +433,15 @@ static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev, umc_v8_10_convert_error_address(adev, err_data, err_addr, ch_inst, umc_inst, node_inst, mc_umc_status); } + + return 0; } static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /* TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC resgisters. Will add the protection - * when firmware interface is ready - */ - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_v8_10_ecc_info_query_error_address(adev, - err_data, - ch_inst, - umc_inst, - node_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_ecc_info_query_error_address, ras_error_status); } const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e407be6cb63c..e32b656b3dab 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -444,6 +444,7 @@ static int uvd_v7_0_sw_init(void *handle) continue; if (!amdgpu_sriov_vf(adev)) { ring = &adev->uvd.inst[j].ring; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "uvd_%d", ring->me); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0, @@ -454,6 +455,7 @@ static int uvd_v7_0_sw_init(void *handle) for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst[j].ring_enc[i]; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i); if (amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; @@ -1397,7 +1399,7 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1440,7 +1442,7 @@ static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1802,7 +1804,6 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, @@ -1835,7 +1836,6 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 66cd3d11aa4b..57b85bb6a1e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -466,6 +466,7 @@ static int vce_v4_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i); ring = &adev->vce.ring[i]; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vce%d", i); if (amdgpu_sriov_vf(adev)) { /* DOORBELL only works under SRIOV */ @@ -1021,7 +1022,7 @@ static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1103,7 +1104,6 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c index 1ceda3d0cd5b..2b9ddb3d2fe1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c @@ -65,7 +65,7 @@ void vcn_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, void vcn_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index c305b2cb8490..761c28fa6ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -120,6 +120,7 @@ static int vcn_v1_0_sw_init(void *handle) return r; ring = &adev->vcn.inst->ring_dec; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -141,6 +142,7 @@ static int vcn_v1_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i); ring = &adev->vcn.inst->ring_enc[i]; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_enc%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, hw_prio, NULL); @@ -1548,7 +1550,7 @@ static void vcn_v1_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1693,7 +1695,7 @@ static void vcn_v1_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1977,7 +1979,6 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .support_64bit_ptrs = false, .no_user_fence = true, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, @@ -2012,7 +2013,6 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .nop = VCN_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_enc_ring_get_rptr, .get_wptr = vcn_v1_0_enc_ring_get_wptr, .set_wptr = vcn_v1_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 4b4cd88414e0..7c2b3aa48083 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -129,6 +129,7 @@ static int vcn_v2_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, @@ -159,6 +160,7 @@ static int vcn_v2_0_sw_init(void *handle) ring = &adev->vcn.inst->ring_enc[i]; ring->use_doorbell = true; + ring->vm_hub = AMDGPU_MMHUB_0; if (!amdgpu_sriov_vf(adev)) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; else @@ -1511,7 +1513,7 @@ void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1671,7 +1673,7 @@ void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -2014,7 +2016,6 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_dec_ring_get_rptr, .get_wptr = vcn_v2_0_dec_ring_get_wptr, .set_wptr = vcn_v2_0_dec_ring_set_wptr, @@ -2045,7 +2046,6 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_enc_ring_get_rptr, .get_wptr = vcn_v2_0_enc_ring_get_wptr, .set_wptr = vcn_v2_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 223e7dfe4618..ab0b45d0ead1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -186,6 +186,12 @@ static int vcn_v2_5_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + (amdgpu_sriov_vf(adev) ? 2*j : 8*j); + + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; + sprintf(ring->name, "vcn_dec_%d", j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -201,6 +207,11 @@ static int vcn_v2_5_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; + sprintf(ring->name, "vcn_enc_%d.%d", j, i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0, @@ -1562,38 +1573,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_1, - .get_rptr = vcn_v2_5_dec_ring_get_rptr, - .get_wptr = vcn_v2_5_dec_ring_get_wptr, - .set_wptr = vcn_v2_5_dec_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + - 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ - 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ - 6, - .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ - .emit_ib = vcn_v2_0_dec_ring_emit_ib, - .emit_fence = vcn_v2_0_dec_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, - .test_ring = vcn_v2_0_dec_ring_test_ring, - .test_ib = amdgpu_vcn_dec_ring_test_ib, - .insert_nop = vcn_v2_0_dec_ring_insert_nop, - .insert_start = vcn_v2_0_dec_ring_insert_start, - .insert_end = vcn_v2_0_dec_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -static const struct amdgpu_ring_funcs vcn_v2_6_dec_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_DEC, - .align_mask = 0xf, - .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_5_dec_ring_get_rptr, .get_wptr = vcn_v2_5_dec_ring_get_wptr, .set_wptr = vcn_v2_5_dec_ring_set_wptr, @@ -1693,7 +1672,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_1, .get_rptr = vcn_v2_5_enc_ring_get_rptr, .get_wptr = vcn_v2_5_enc_ring_get_wptr, .set_wptr = vcn_v2_5_enc_ring_set_wptr, @@ -1719,36 +1697,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static const struct amdgpu_ring_funcs vcn_v2_6_enc_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_ENC, - .align_mask = 0x3f, - .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = vcn_v2_5_enc_ring_get_rptr, - .get_wptr = vcn_v2_5_enc_ring_get_wptr, - .set_wptr = vcn_v2_5_enc_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + - 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ - 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ - 1, /* vcn_v2_0_enc_ring_insert_end */ - .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ - .emit_ib = vcn_v2_0_enc_ring_emit_ib, - .emit_fence = vcn_v2_0_enc_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_enc_ring_test_ring, - .test_ib = amdgpu_vcn_enc_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, - .insert_end = vcn_v2_0_enc_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) { int i; @@ -1756,10 +1704,7 @@ static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; - else /* CHIP_ALDEBARAN */ - adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_6_dec_ring_vm_funcs; + adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; adev->vcn.inst[i].ring_dec.me = i; DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); } @@ -1773,10 +1718,7 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << j)) continue; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; - else /* CHIP_ALDEBARAN */ - adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_6_enc_ring_vm_funcs; + adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; adev->vcn.inst[j].ring_enc[i].me = j; } DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 66439388faee..3eab186261aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -189,6 +189,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, AMDGPU_RING_PRIO_DEFAULT, @@ -212,6 +213,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_enc_%d.%d", i, j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, hw_prio, &adev->vcn.inst[i].sched_score); @@ -1738,7 +1740,6 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCN_DEC_SW_CMD_NO_OP, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, .set_wptr = vcn_v3_0_dec_ring_set_wptr, @@ -1899,7 +1900,6 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, .set_wptr = vcn_v3_0_dec_ring_set_wptr, @@ -2000,7 +2000,6 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v3_0_enc_ring_get_rptr, .get_wptr = vcn_v3_0_enc_ring_get_wptr, .set_wptr = vcn_v3_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 720ab36f9c92..bf0674039598 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -149,7 +149,7 @@ static int vcn_v4_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; - + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_unified_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, @@ -1798,7 +1798,6 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 827e2768f867..536128447b71 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -38,6 +38,11 @@ #define mmIH_CHICKEN_ALDEBARAN 0x18d #define mmIH_CHICKEN_ALDEBARAN_BASE_IDX 0 +#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN 0x00ea +#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN_BASE_IDX 0 +#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE__SHIFT 0x10 +#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE_MASK 0x00010000L + static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev); /** @@ -251,36 +256,14 @@ static int vega20_ih_enable_ring(struct amdgpu_device *adev, return 0; } -/** - * vega20_ih_reroute_ih - reroute VMC/UTCL2 ih to an ih ring - * - * @adev: amdgpu_device pointer - * - * Reroute VMC and UMC interrupts on primary ih ring to - * ih ring 1 so they won't lose when bunches of page faults - * interrupts overwhelms the interrupt handler(VEGA20) - */ -static void vega20_ih_reroute_ih(struct amdgpu_device *adev) +static uint32_t vega20_setup_retry_doorbell(u32 doorbell_index) { - uint32_t tmp; + u32 val = 0; - /* vega20 ih reroute will go through psp this - * function is used for newer asics starting arcturus - */ - if (adev->ip_versions[OSSSYS_HWIP][0] >= IP_VERSION(4, 2, 1)) { - /* Reroute to IH ring 1 for VMC */ - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12); - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp); - - /* Reroute IH ring 1 for UTCL2 */ - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x1B); - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp); - } + val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, OFFSET, doorbell_index); + val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, ENABLE, 1); + + return val; } /** @@ -333,8 +316,6 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(ih); i++) { if (ih[i]->ring_size) { - if (i == 1) - vega20_ih_reroute_ih(adev); ret = vega20_ih_enable_ring(adev, ih[i]); if (ret) return ret; @@ -347,6 +328,20 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) pci_set_master(adev->pdev); + /* Allocate the doorbell for IH Retry CAM */ + adev->irq.retry_cam_doorbell_index = (adev->doorbell_index.ih + 3) << 1; + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RETRY_CAM, + vega20_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index)); + + /* Enable IH Retry CAM */ + if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) + WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN, + ENABLE, 1); + else + WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL, ENABLE, 1); + + adev->irq.retry_cam_enabled = true; + /* enable interrupts */ ret = vega20_ih_toggle_interrupts(adev, true); if (ret) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0bf8d75950f7..00f528eb9812 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -204,6 +204,14 @@ static void kfd_device_info_init(struct kfd_dev *kfd, /* Navi1x+ */ if (gc_version >= IP_VERSION(10, 1, 1)) kfd->device_info.needs_pci_atomics = true; + } else if (gc_version < IP_VERSION(12, 0, 0)) { + /* + * PCIe atomics support acknowledgment in GFX11 RS64 CPFW requires + * MEC version >= 509. Prior RS64 CPFW versions (and all F32) require + * PCIe atomics support. + */ + kfd->device_info.needs_pci_atomics = true; + kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0; } } else { kfd->device_info.doorbell_size = 4; @@ -315,10 +323,13 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) break; /* Aldebaran */ case IP_VERSION(9, 4, 2): - case IP_VERSION(9, 4, 3): gfx_target_version = 90010; f2g = &aldebaran_kfd2kgd; break; + case IP_VERSION(9, 4, 3): + gfx_target_version = 90400; + f2g = &aldebaran_kfd2kgd; + break; /* Navi10 */ case IP_VERSION(10, 1, 10): gfx_target_version = 100100; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 4a9af800b1f1..5aa75f72caa1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -143,6 +143,13 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + /* + * GFX11 RS64 CPFW version >= 509 supports PCIe atomics support + * acknowledgment. + */ + if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev)) + m->cp_hqd_hq_status0 |= 1 << 29; + if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_aql_control = 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; @@ -350,6 +357,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdmax_rlcx_doorbell_offset = q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum + << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT) + & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK; + m->sdma_engine_id = q->sdma_engine_id; m->sdma_queue_id = q->sdma_queue_id; m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 4dfae19714ab..fdbfd725841f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -224,6 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q, struct mqd_update_info *minfo) { + struct amdgpu_device *adev = (struct amdgpu_device *)mm->dev->adev; struct v9_mqd *m; m = get_mqd(mqd); @@ -269,10 +270,13 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_vmid = q->vmid; if (q->format == KFD_QUEUE_FORMAT_AQL) { - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | + m->cp_hqd_pq_control |= 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; + if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + m->cp_hqd_pq_control |= + CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; m->cp_hqd_pq_doorbell_control |= 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index dc6fd6967050..96a138a39515 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2172,7 +2172,15 @@ restart: pr_debug("drain retry fault gpu %d svms %p\n", i, svms); amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, - &pdd->dev->adev->irq.ih1); + pdd->dev->adev->irq.retry_cam_enabled ? + &pdd->dev->adev->irq.ih : + &pdd->dev->adev->irq.ih1); + + if (pdd->dev->adev->irq.retry_cam_enabled) + amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, + &pdd->dev->adev->irq.ih_soft); + + pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e29655a26dd4..8d17fd5a817e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1890,7 +1890,8 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) if (adev->dm.dc) dc_deinit_callbacks(adev->dm.dc); - dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv); + if (adev->dm.dc) + dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv); if (dc_enable_dmub_notifications(adev->dm.dc)) { kfree(adev->dm.dmub_notify); @@ -3127,9 +3128,12 @@ void amdgpu_dm_update_connector_after_detect( aconnector->edid); } - aconnector->timing_requested = kzalloc(sizeof(struct dc_crtc_timing), GFP_KERNEL); - if (!aconnector->timing_requested) - dm_error("%s: failed to create aconnector->requested_timing\n", __func__); + if (!aconnector->timing_requested) { + aconnector->timing_requested = + kzalloc(sizeof(struct dc_crtc_timing), GFP_KERNEL); + if (!aconnector->timing_requested) + dm_error("failed to create aconnector->requested_timing\n"); + } drm_connector_update_edid_property(connector, aconnector->edid); amdgpu_dm_update_freesync_caps(connector, aconnector->edid); @@ -7897,6 +7901,13 @@ static void amdgpu_dm_commit_cursors(struct drm_atomic_state *state) amdgpu_dm_plane_handle_cursor_update(plane, old_plane_state); } +static inline uint32_t get_mem_type(struct drm_framebuffer *fb) +{ + struct amdgpu_bo *abo = gem_to_amdgpu_bo(fb->obj[0]); + + return abo->tbo.resource ? abo->tbo.resource->mem_type : 0; +} + static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dc_state *dc_state, struct drm_device *dev, @@ -7971,6 +7982,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, continue; dc_plane = dm_new_plane_state->dc_state; + if (!dc_plane) + continue; bundle->surface_updates[planes_count].surface = dc_plane; if (new_pcrtc_state->color_mgmt_changed) { @@ -8037,11 +8050,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, /* * Only allow immediate flips for fast updates that don't - * change FB pitch, DCC state, rotation or mirroing. + * change memory domain, FB pitch, DCC state, rotation or + * mirroring. */ bundle->flip_addrs[planes_count].flip_immediate = crtc->state->async_flip && - acrtc_state->update_type == UPDATE_TYPE_FAST; + acrtc_state->update_type == UPDATE_TYPE_FAST && + get_mem_type(old_plane_state->fb) == get_mem_type(fb); timestamp_ns = ktime_get_ns(); bundle->flip_addrs[planes_count].flip_timestamp_in_us = div_u64(timestamp_ns, 1000); @@ -8553,6 +8568,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + if (!adev->dm.hdcp_workqueue) + continue; + pr_debug("[HDCP_DM] -------------- i : %x ----------\n", i); if (!connector) @@ -8601,6 +8619,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + if (!adev->dm.hdcp_workqueue) + continue; + new_crtc_state = NULL; old_crtc_state = NULL; @@ -9619,8 +9640,9 @@ static int dm_update_plane_state(struct dc *dc, return -EINVAL; } + if (dm_old_plane_state->dc_state) + dc_plane_state_release(dm_old_plane_state->dc_state); - dc_plane_state_release(dm_old_plane_state->dc_state); dm_new_plane_state->dc_state = NULL; *lock_and_validation_needed = true; @@ -10157,6 +10179,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); if (ret) { DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); + ret = -EINVAL; goto fail; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 330ab036c830..c6ce2b7123b7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -687,7 +687,6 @@ static void apply_synaptics_fifo_reset_wa(struct drm_dp_aux *aux) return; data[0] |= (1 << 1); // set bit 1 to 1 - return; if (!execute_synaptics_rc_command(aux, false, 0x31, 4, 0x221198, data)) return; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 994ba426ca66..810ab682f424 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -379,13 +379,17 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) if (aconnector->dc_sink && connector->state) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = drm_to_adev(dev); - struct hdcp_workqueue *hdcp_work = adev->dm.hdcp_workqueue; - struct hdcp_workqueue *hdcp_w = &hdcp_work[aconnector->dc_link->link_index]; - connector->state->hdcp_content_type = - hdcp_w->hdcp_content_type[connector->index]; - connector->state->content_protection = - hdcp_w->content_protection[connector->index]; + if (adev->dm.hdcp_workqueue) { + struct hdcp_workqueue *hdcp_work = adev->dm.hdcp_workqueue; + struct hdcp_workqueue *hdcp_w = + &hdcp_work[aconnector->dc_link->link_index]; + + connector->state->hdcp_content_type = + hdcp_w->hdcp_content_type[connector->index]; + connector->state->content_protection = + hdcp_w->content_protection[connector->index]; + } } if (aconnector->dc_sink) { @@ -1406,6 +1410,7 @@ int pre_validate_dsc(struct drm_atomic_state *state, ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars); if (ret != 0) { DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n"); + ret = -EINVAL; goto clean_exit; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c index 1743ca0a3641..c42aa947c969 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c @@ -89,6 +89,7 @@ void dc_fpu_begin(const char *function_name, const int line) if (*pcpu == 1) { #if defined(CONFIG_X86) + migrate_disable(); kernel_fpu_begin(); #elif defined(CONFIG_PPC64) if (cpu_has_feature(CPU_FTR_VSX_COMP)) { @@ -129,6 +130,7 @@ void dc_fpu_end(const char *function_name, const int line) if (*pcpu <= 0) { #if defined(CONFIG_X86) kernel_fpu_end(); + migrate_enable(); #elif defined(CONFIG_PPC64) if (cpu_has_feature(CPU_FTR_VSX_COMP)) { disable_kernel_vsx(); diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index f0f948501e9a..cce47d3f1a13 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -515,11 +515,8 @@ static enum bp_result get_gpio_i2c_info( info->i2c_slave_address = record->i2c_slave_addr; /* TODO: check how to get register offset for en, Y, etc. */ - info->gpio_info.clk_a_register_index = - le16_to_cpu( - header->gpio_pin[table_index].data_a_reg_index); - info->gpio_info.clk_a_shift = - header->gpio_pin[table_index].gpio_bitshift; + info->gpio_info.clk_a_register_index = le16_to_cpu(pin->data_a_reg_index); + info->gpio_info.clk_a_shift = pin->gpio_bitshift; return BP_RESULT_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index af108f88b112..8d9444db092a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -399,6 +399,23 @@ static void dcn32_update_clocks_update_dentist( } +static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + uint32_t dispclk_wdivider; + int disp_divider; + + REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, &dispclk_wdivider); + disp_divider = dentist_get_divider_from_did(dispclk_wdivider); + + /* Return DISPCLK freq in Khz */ + if (disp_divider) + return (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / disp_divider; + + return 0; +} + + static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower) @@ -852,6 +869,7 @@ static struct clk_mgr_funcs dcn32_funcs = { .are_clock_states_equal = dcn32_are_clock_states_equal, .enable_pme_wa = dcn32_enable_pme_wa, .is_smu_present = dcn32_is_smu_present, + .get_dispclk_from_dentist = dcn32_get_dispclk_from_dentist, }; void dcn32_clk_mgr_construct( @@ -860,6 +878,8 @@ void dcn32_clk_mgr_construct( struct pp_smu_funcs *pp_smu, struct dccg *dccg) { + struct clk_log_info log_info = {0}; + clk_mgr->base.ctx = ctx; clk_mgr->base.funcs = &dcn32_funcs; if (ASICREV_IS_GC_11_0_2(clk_mgr->base.ctx->asic_id.hw_internal_rev)) { @@ -893,6 +913,7 @@ void dcn32_clk_mgr_construct( clk_mgr->base.clks.ref_dtbclk_khz = 268750; } + /* integer part is now VCO frequency in kHz */ clk_mgr->base.dentist_vco_freq_khz = dcn32_get_vco_frequency_from_reg(clk_mgr); @@ -900,6 +921,8 @@ void dcn32_clk_mgr_construct( if (clk_mgr->base.dentist_vco_freq_khz == 0) clk_mgr->base.dentist_vco_freq_khz = 4300000; /* Updated as per HW docs */ + dcn32_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info); + if (ctx->dc->debug.disable_dtb_ref_clk_switch && clk_mgr->base.clks.ref_dtbclk_khz != clk_mgr->base.boot_snapshot.dtbclk) { clk_mgr->base.clks.ref_dtbclk_khz = clk_mgr->base.boot_snapshot.dtbclk; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 41198c729d90..30c0644d4418 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -622,7 +622,6 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state) int i, j; uint8_t valid_count = 0; uint8_t dig_stream_count = 0; - int matching_stream_ptrs = 0; int eng_ids_per_ep_id[MAX_PIPES] = {0}; int ep_ids_per_eng_id[MAX_PIPES] = {0}; int valid_bitmap = 0; @@ -645,9 +644,7 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state) struct link_enc_assignment assignment = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i]; if (assignment.valid) { - if (assignment.stream == state->streams[i]) - matching_stream_ptrs++; - else + if (assignment.stream != state->streams[i]) valid_stream_ptrs = false; } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 85d54bfb595c..117d80cb36fb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1707,6 +1707,9 @@ bool dc_remove_plane_from_context( struct dc_stream_status *stream_status = NULL; struct resource_pool *pool = dc->res_pool; + if (!plane_state) + return true; + for (i = 0; i < context->stream_count; i++) if (context->streams[i] == stream) { stream_status = &context->stream_status[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e363a3c88250..30f0ba05a6e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -45,7 +45,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.229" +#define DC_VER "3.2.230" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -405,6 +405,7 @@ struct dc_config { bool force_bios_enable_lttpr; uint8_t force_bios_fixed_vs; int sdpif_request_limit_words_per_umc; + bool use_old_fixed_vs_sequence; bool disable_subvp_drr; }; @@ -875,6 +876,9 @@ struct dc_debug_options { bool override_dispclk_programming; bool disable_fpo_optimizations; bool support_eDP1_5; + uint32_t fpo_vactive_margin_us; + bool disable_fpo_vactive; + bool disable_boot_optimizations; }; struct gpu_info_soc_bounding_box_v1_0; @@ -1450,6 +1454,7 @@ struct dc_link { struct ddc_service *ddc; + enum dp_panel_mode panel_mode; bool aux_mode; /* Private to DC core */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index dd6f643254fe..a9b9490a532c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -327,6 +327,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru int i = 0, k = 0; int ramp_up_num_steps = 1; // TODO: Ramp is currently disabled. Reenable it. uint8_t visual_confirm_enabled; + int pipe_idx = 0; if (dc == NULL) return false; @@ -339,6 +340,25 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru cmd.fw_assisted_mclk_switch.config_data.fams_enabled = should_manage_pstate; cmd.fw_assisted_mclk_switch.config_data.visual_confirm_enabled = visual_confirm_enabled; + if (should_manage_pstate) { + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + /* If FAMS is being used to support P-State and there is a stream + * that does not use FAMS, we are in an FPO + VActive scenario. + * Assign vactive stretch margin in this case. + */ + if (!pipe->stream->fpo_in_use) { + cmd.fw_assisted_mclk_switch.config_data.vactive_stretch_margin_us = dc->debug.fpo_vactive_margin_us; + break; + } + pipe_idx++; + } + } + for (i = 0, k = 0; context && i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 181a3408cc61..25284006019c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -144,7 +144,7 @@ struct test_pattern { unsigned int cust_pattern_size; }; -#define SUBVP_DRR_MARGIN_US 600 // 600us for DRR margin (SubVP + DRR) +#define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR) enum mall_stream_type { SUBVP_NONE, // subvp not in use diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 9fe0ce91db00..8d2460d06bce 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -3031,10 +3031,12 @@ void dce110_enable_dp_link_output( const struct link_hwss *link_hwss = get_link_hwss(link, link_res); unsigned int i; - + /* + * Add the logic to extract BOTH power up and power down sequences + * from enable/disable link output and only call edp panel control + * in enable_link_dp and disable_link_dp once. + */ if (link->connector_signal == SIGNAL_TYPE_EDP) { - if (!link->dc->config.edp_no_power_sequencing) - link->dc->hwss.edp_power_control(link, true); link->dc->hwss.edp_wait_for_hpd_ready(link, true); } @@ -3096,11 +3098,12 @@ void dce110_disable_link_output(struct dc_link *link, link_hwss->disable_link_output(link, link_res, signal); link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; - - if (signal == SIGNAL_TYPE_EDP && - link->dc->hwss.edp_backlight_control) - link->dc->hwss.edp_power_control(link, false); - else if (dmcu != NULL && dmcu->funcs->lock_phy) + /* + * Add the logic to extract BOTH power up and power down sequences + * from enable/disable link output and only call edp panel control + * in enable_link_dp and disable_link_dp once. + */ + if (dmcu != NULL && dmcu->funcs->lock_phy) dmcu->funcs->unlock_phy(dmcu); dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index a142a00bc432..bf399819ca80 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -755,8 +755,8 @@ bool hubp1_is_flip_pending(struct hubp *hubp) return false; } -uint32_t aperture_default_system = 1; -uint32_t context0_default_system; /* = 0;*/ +static uint32_t aperture_default_system = 1; +static uint32_t context0_default_system; /* = 0;*/ static void hubp1_set_vm_system_aperture_settings(struct hubp *hubp, struct vm_system_aperture_param *apt) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h index 893c0809cd4e..7bdc146f7cb5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h @@ -205,6 +205,11 @@ type PHYDSYMCLK_GATE_DISABLE; \ type PHYESYMCLK_GATE_DISABLE; +#define DCCG314_REG_FIELD_LIST(type) \ + type DSCCLK3_DTO_PHASE;\ + type DSCCLK3_DTO_MODULO;\ + type DSCCLK3_DTO_ENABLE; + #define DCCG32_REG_FIELD_LIST(type) \ type DPSTREAMCLK0_EN;\ type DPSTREAMCLK1_EN;\ @@ -237,6 +242,7 @@ struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) DCCG3_REG_FIELD_LIST(uint8_t) DCCG31_REG_FIELD_LIST(uint8_t) + DCCG314_REG_FIELD_LIST(uint8_t) DCCG32_REG_FIELD_LIST(uint8_t) }; @@ -244,6 +250,7 @@ struct dccg_mask { DCCG_REG_FIELD_LIST(uint32_t) DCCG3_REG_FIELD_LIST(uint32_t) DCCG31_REG_FIELD_LIST(uint32_t) + DCCG314_REG_FIELD_LIST(uint32_t) DCCG32_REG_FIELD_LIST(uint32_t) }; @@ -273,6 +280,7 @@ struct dccg_registers { uint32_t DSCCLK0_DTO_PARAM; uint32_t DSCCLK1_DTO_PARAM; uint32_t DSCCLK2_DTO_PARAM; + uint32_t DSCCLK3_DTO_PARAM; uint32_t DPSTREAMCLK_ROOT_GATE_DISABLE; uint32_t DPSTREAMCLK_GATE_DISABLE; uint32_t DCCG_GATE_DISABLE_CNTL; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 5403e9399a46..422fbf79da64 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2113,6 +2113,15 @@ void dcn20_optimize_bandwidth( if (hubbub->funcs->program_compbuf_size) hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + dc_dmub_srv_p_state_delegate(dc, + true, context); + context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + dc->clk_mgr->clks.fw_based_mclk_switching = true; + } else { + dc->clk_mgr->clks.fw_based_mclk_switching = false; + } + dc->clk_mgr->funcs->update_clocks( dc->clk_mgr, context, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 0e071fbc9154..8263a07f265f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -983,13 +983,36 @@ void dcn30_set_disp_pattern_generator(const struct dc *dc, } void dcn30_prepare_bandwidth(struct dc *dc, - struct dc_state *context) + struct dc_state *context) { + bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; + /* Any transition into an FPO config should disable MCLK switching first to avoid + * driver and FW P-State synchronization issues. + */ + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + dc->optimized_required = true; + context->bw_ctx.bw.dcn.clk.p_state_change_support = false; + } + if (dc->clk_mgr->dc_mode_softmax_enabled) if (dc->clk_mgr->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && context->bw_ctx.bw.dcn.clk.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz); dcn20_prepare_bandwidth(dc, context); + /* + * enabled -> enabled: do not disable + * enabled -> disabled: disable + * disabled -> enabled: don't care + * disabled -> disabled: don't care + */ + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) + dc_dmub_srv_p_state_delegate(dc, false, context); + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + /* After disabling P-State, restore the original value to ensure we get the correct P-State + * on the next optimize. */ + context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support; + } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 965f5ceb33f7..67a34cda3774 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -701,7 +701,9 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = 167, .nv12 = 167, .fp16 = 167 - } + }, + 16, + 16 }; static const struct dc_debug_options debug_defaults_drv = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index 7d2b982506fd..4c2fdfea162f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -360,6 +360,15 @@ void dccg31_disable_dscclk(struct dccg *dccg, int inst) DSCCLK2_DTO_PHASE, 0, DSCCLK2_DTO_MODULO, 1); break; + case 3: + if (REG(DSCCLK3_DTO_PARAM)) { + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK3_DTO_ENABLE, 1); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 0, + DSCCLK3_DTO_MODULO, 1); + } + break; default: BREAK_TO_DEBUGGER(); return; @@ -395,6 +404,15 @@ void dccg31_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_DTO_ENABLE, 0); break; + case 3: + if (REG(DSCCLK3_DTO_PARAM)) { + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK3_DTO_ENABLE, 0); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 0, + DSCCLK3_DTO_MODULO, 0); + } + break; default: BREAK_TO_DEBUGGER(); return; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 7ac6e69cff37..62ce36c75c4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -295,6 +295,10 @@ void dcn31_init_hw(struct dc *dc) if (dc->res_pool->hubbub->funcs->init_crb) dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub); #endif + + // Get DMCUB capabilities + dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv->dmub); + dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr; } void dcn31_dsc_pg_control( diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index eaaa2e01f6d0..ff8cd5076434 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1965,6 +1965,8 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->config.use_old_fixed_vs_sequence = true; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 081ce168f621..de7bfba2c179 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -274,6 +274,32 @@ static void dccg314_set_dpstreamclk( } } +static void dccg314_init(struct dccg *dccg) +{ + int otg_inst; + + /* Set HPO stream encoder to use refclk to avoid case where PHY is + * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which + * will cause DCN to hang. + */ + for (otg_inst = 0; otg_inst < 4; otg_inst++) + dccg31_disable_symclk32_se(dccg, otg_inst); + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + for (otg_inst = 0; otg_inst < 2; otg_inst++) + dccg31_disable_symclk32_le(dccg, otg_inst); + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + for (otg_inst = 0; otg_inst < 4; otg_inst++) + dccg314_set_dpstreamclk(dccg, REFCLK, otg_inst, + otg_inst); + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + for (otg_inst = 0; otg_inst < 5; otg_inst++) + dccg31_set_physymclk(dccg, otg_inst, + PHYSYMCLK_FORCE_SRC_SYMCLK, false); +} + static void dccg314_set_valid_pixel_rate( struct dccg *dccg, int ref_dtbclk_khz, @@ -315,7 +341,7 @@ static const struct dccg_funcs dccg314_funcs = { .update_dpp_dto = dccg31_update_dpp_dto, .dpp_root_clock_control = dccg314_dpp_root_clock_control, .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, - .dccg_init = dccg31_init, + .dccg_init = dccg314_init, .set_dpstreamclk = dccg314_set_dpstreamclk, .enable_symclk32_se = dccg31_enable_symclk32_se, .disable_symclk32_se = dccg31_disable_symclk32_se, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h index 6a35986307af..90687a9e8fdd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h @@ -68,6 +68,7 @@ SR(DSCCLK0_DTO_PARAM),\ SR(DSCCLK1_DTO_PARAM),\ SR(DSCCLK2_DTO_PARAM),\ + SR(DSCCLK3_DTO_PARAM),\ SR(DSCCLK_DTO_CTRL),\ SR(DCCG_GATE_DISABLE_CNTL2),\ SR(DCCG_GATE_DISABLE_CNTL3),\ @@ -149,12 +150,20 @@ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_MODULO, mask_sh),\ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, mask_sh),\ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_MODULO, mask_sh),\ + DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, mask_sh),\ + DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_MODULO, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh) @@ -178,6 +187,7 @@ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_ENABLE, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_ENABLE, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_ENABLE, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_DTO_ENABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_GATE_DISABLE, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 50ed7e09d5ba..2f7df8d34a91 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -885,7 +885,7 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .enable_z9_disable_interface = true, - .minimum_z8_residency_time = 3080, + .minimum_z8_residency_time = 2000, .psr_skip_crtc_disable = true, .disable_dmcu = true, .force_abm_enable = false, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index b3824287c224..1f5ee5cde6e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -721,10 +721,19 @@ static void dcn32_initialize_min_clocks(struct dc *dc) clocks->socclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].socclk_mhz * 1000; clocks->dramclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].memclk_mhz * 1000; clocks->dppclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dppclk_mhz * 1000; - clocks->dispclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz * 1000; - clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; - clocks->fclk_p_state_change_support = true; - clocks->p_state_change_support = true; + if (dc->debug.disable_boot_optimizations) { + clocks->dispclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz * 1000; + } else { + /* Even though DPG_EN = 1 for the connected display, it still requires the + * correct timing so we cannot set DISPCLK to min freq or it could cause + * audio corruption. Read current DISPCLK from DENTIST and request the same + * freq to ensure that the timing is valid and unchanged. + */ + clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr); + clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; + clocks->fclk_p_state_change_support = true; + clocks->p_state_change_support = true; + } dc->clk_mgr->funcs->update_clocks( dc->clk_mgr, @@ -823,7 +832,14 @@ void dcn32_init_hw(struct dc *dc) * everything down. */ if (dcb->funcs->is_accelerated_mode(dcb) || !dc->config.seamless_boot_edp_requested) { - hws->funcs.init_pipes(dc, dc->current_state); + /* Disable boot optimizations means power down everything including PHY, DIG, + * and OTG (i.e. the boot is not optimized because we do a full power down). + */ + if (dc->hwss.enable_accelerated_mode && dc->debug.disable_boot_optimizations) + dc->hwss.enable_accelerated_mode(dc, dc->current_state); + else + hws->funcs.init_pipes(dc, dc->current_state); + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); @@ -932,6 +948,7 @@ void dcn32_init_hw(struct dc *dc) if (dc->ctx->dmub_srv) { dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv->dmub); dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr; + dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch; } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 502f990346b1..22dd1ebea618 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -324,7 +324,6 @@ static const struct dcn10_link_enc_shift le_shift = { static const struct dcn10_link_enc_mask le_mask = { LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ - //DPCS_DCN31_MASK_SH_LIST(_MASK) }; @@ -726,6 +725,9 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_unbounded_requesting = false, .override_dispclk_programming = true, .disable_fpo_optimizations = false, + .fpo_vactive_margin_us = 2000, // 2000us + .disable_fpo_vactive = true, + .disable_boot_optimizations = false, }; static const struct dc_debug_options debug_defaults_diags = { @@ -2021,7 +2023,7 @@ int dcn32_populate_dml_pipes_from_context( // In general cases we want to keep the dram clock change requirement // (prefer configs that support MCLK switch). Only override to false // for SubVP - if (subvp_in_use) + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || subvp_in_use) context->bw_ctx.dml.soc.dram_clock_change_requirement_final = false; else context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true; @@ -2077,6 +2079,14 @@ static struct resource_funcs dcn32_res_pool_funcs = { .restore_mall_state = dcn32_restore_mall_state, }; +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* DCN32 support max 4 pipes */ + value = value & 0xf; + return value; +} + static bool dcn32_resource_construct( uint8_t num_virtual_links, @@ -2090,27 +2100,28 @@ static bool dcn32_resource_construct( uint32_t pipe_fuses = 0; uint32_t num_pipes = 4; - #undef REG_STRUCT - #define REG_STRUCT bios_regs - bios_regs_init(); - - #undef REG_STRUCT - #define REG_STRUCT clk_src_regs - clk_src_regs_init(0, A), - clk_src_regs_init(1, B), - clk_src_regs_init(2, C), - clk_src_regs_init(3, D), - clk_src_regs_init(4, E); - #undef REG_STRUCT - #define REG_STRUCT abm_regs - abm_regs_init(0), - abm_regs_init(1), - abm_regs_init(2), - abm_regs_init(3); - - #undef REG_STRUCT - #define REG_STRUCT dccg_regs - dccg_regs_init(); +#undef REG_STRUCT +#define REG_STRUCT bios_regs + bios_regs_init(); + +#undef REG_STRUCT +#define REG_STRUCT clk_src_regs + clk_src_regs_init(0, A), + clk_src_regs_init(1, B), + clk_src_regs_init(2, C), + clk_src_regs_init(3, D), + clk_src_regs_init(4, E); + +#undef REG_STRUCT +#define REG_STRUCT abm_regs + abm_regs_init(0), + abm_regs_init(1), + abm_regs_init(2), + abm_regs_init(3); + +#undef REG_STRUCT +#define REG_STRUCT dccg_regs + dccg_regs_init(); DC_FP_START(); @@ -2119,7 +2130,7 @@ static bool dcn32_resource_construct( pool->base.res_cap = &res_cap_dcn32; /* max number of pipes for ASIC before checking for pipe fuses */ num_pipes = pool->base.res_cap->num_timing_generator; - pipe_fuses = REG_READ(CC_DC_PIPE_DIS); + pipe_fuses = read_pipe_fuses(ctx); for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) if (pipe_fuses & 1 << i) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 10a3350376e4..3937dbc1e552 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -39,6 +39,7 @@ #define DCN3_2_MBLK_HEIGHT_8BPE 64 #define DCN3_2_VMIN_DISPCLK_HZ 717000000 #define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq +#define DCN3_2_MIN_ACTIVE_SWITCH_MARGIN_FPO_US 100 // Only allow FPO + Vactive if active margin >= 100 #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) @@ -146,6 +147,8 @@ void dcn32_restore_mall_state(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); +struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context); + bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe); unsigned int dcn32_calc_num_avail_chans_for_mall(struct dc *dc, int num_chans); @@ -472,6 +475,7 @@ double dcn32_determine_max_vratio_prefetch(struct dc *dc, struct dc_state *conte SRI_ARR(OTG_H_BLANK, DSCL, id), SRI_ARR(OTG_V_BLANK, DSCL, id), \ SRI_ARR(SCL_MODE, DSCL, id), SRI_ARR(LB_DATA_FORMAT, DSCL, id), \ SRI_ARR(LB_MEMORY_CTRL, DSCL, id), SRI_ARR(DSCL_AUTOCAL, DSCL, id), \ + SRI_ARR(DSCL_CONTROL, DSCL, id), \ SRI_ARR(SCL_TAP_CONTROL, DSCL, id), \ SRI_ARR(SCL_COEF_RAM_TAP_SELECT, DSCL, id), \ SRI_ARR(SCL_COEF_RAM_TAP_DATA, DSCL, id), \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 47fa51c1d3f4..eeca16faf31a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -27,6 +27,7 @@ #include "dcn32_resource.h" #include "dcn20/dcn20_resource.h" #include "dml/dcn32/display_mode_vba_util_32.h" +#include "dml/dcn32/dcn32_fpu.h" static bool is_dual_plane(enum surface_pixel_format format) { @@ -500,3 +501,158 @@ void dcn32_restore_mall_state(struct dc *dc, pipe->plane_state->is_phantom = temp_config->is_phantom_plane[i]; } } + +#define MAX_STRETCHED_V_BLANK 1000 // in micro-seconds (must ensure to match value in FW) +/* + * Scaling factor for v_blank stretch calculations considering timing in + * micro-seconds and pixel clock in 100hz. + * Note: the parenthesis are necessary to ensure the correct order of + * operation where V_SCALE is used. + */ +#define V_SCALE (10000 / MAX_STRETCHED_V_BLANK) + +static int get_frame_rate_at_max_stretch_100hz( + struct dc_stream_state *fpo_candidate_stream, + uint32_t fpo_vactive_margin_us) +{ + struct dc_crtc_timing *timing = NULL; + uint32_t sec_per_100_lines; + uint32_t max_v_blank; + uint32_t curr_v_blank; + uint32_t v_stretch_max; + uint32_t stretched_frame_pix_cnt; + uint32_t scaled_stretched_frame_pix_cnt; + uint32_t scaled_refresh_rate; + uint32_t v_scale; + + if (fpo_candidate_stream == NULL) + return 0; + + /* check if refresh rate at least 120hz */ + timing = &fpo_candidate_stream->timing; + if (timing == NULL) + return 0; + + v_scale = 10000 / (MAX_STRETCHED_V_BLANK + fpo_vactive_margin_us); + + sec_per_100_lines = timing->pix_clk_100hz / timing->h_total + 1; + max_v_blank = sec_per_100_lines / v_scale + 1; + curr_v_blank = timing->v_total - timing->v_addressable; + v_stretch_max = (max_v_blank > curr_v_blank) ? (max_v_blank - curr_v_blank) : (0); + stretched_frame_pix_cnt = (v_stretch_max + timing->v_total) * timing->h_total; + scaled_stretched_frame_pix_cnt = stretched_frame_pix_cnt / 10000; + scaled_refresh_rate = (timing->pix_clk_100hz) / scaled_stretched_frame_pix_cnt + 1; + + return scaled_refresh_rate; + +} + +static bool is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch( + struct dc_stream_state *fpo_candidate_stream, uint32_t fpo_vactive_margin_us) +{ + int refresh_rate_max_stretch_100hz; + int min_refresh_100hz; + + if (fpo_candidate_stream == NULL) + return false; + + refresh_rate_max_stretch_100hz = get_frame_rate_at_max_stretch_100hz(fpo_candidate_stream, fpo_vactive_margin_us); + min_refresh_100hz = fpo_candidate_stream->timing.min_refresh_in_uhz / 10000; + + if (refresh_rate_max_stretch_100hz < min_refresh_100hz) + return false; + + return true; +} + +static int get_refresh_rate(struct dc_stream_state *fpo_candidate_stream) +{ + int refresh_rate = 0; + int h_v_total = 0; + struct dc_crtc_timing *timing = NULL; + + if (fpo_candidate_stream == NULL) + return 0; + + /* check if refresh rate at least 120hz */ + timing = &fpo_candidate_stream->timing; + if (timing == NULL) + return 0; + + h_v_total = timing->h_total * timing->v_total; + if (h_v_total == 0) + return 0; + + refresh_rate = ((timing->pix_clk_100hz * 100) / (h_v_total)) + 1; + return refresh_rate; +} + +/** + * dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch - Determines if config can support FPO + * + * @param [in]: dc - current dc state + * @param [in]: context - new dc state + * + * Return: Pointer to FPO stream candidate if config can support FPO, otherwise NULL + */ +struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context) +{ + int refresh_rate = 0; + const int minimum_refreshrate_supported = 120; + struct dc_stream_state *fpo_candidate_stream = NULL; + bool is_fpo_vactive = false; + uint32_t fpo_vactive_margin_us = 0; + + if (context == NULL) + return NULL; + + if (dc->debug.disable_fams) + return NULL; + + if (!dc->caps.dmub_caps.mclk_sw) + return NULL; + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching_shut_down) + return NULL; + + /* For FPO we can support up to 2 display configs if: + * - first display uses FPO + * - Second display switches in VACTIVE */ + if (context->stream_count > 2) + return NULL; + else if (context->stream_count == 2) { + DC_FP_START(); + dcn32_assign_fpo_vactive_candidate(dc, context, &fpo_candidate_stream); + DC_FP_END(); + + DC_FP_START(); + is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, DCN3_2_MIN_ACTIVE_SWITCH_MARGIN_FPO_US); + DC_FP_END(); + if (!is_fpo_vactive || dc->debug.disable_fpo_vactive) + return NULL; + } else + fpo_candidate_stream = context->streams[0]; + + if (!fpo_candidate_stream) + return NULL; + + if (fpo_candidate_stream->sink->edid_caps.panel_patch.disable_fams) + return NULL; + + refresh_rate = get_refresh_rate(fpo_candidate_stream); + if (refresh_rate < minimum_refreshrate_supported) + return NULL; + + fpo_vactive_margin_us = is_fpo_vactive ? dc->debug.fpo_vactive_margin_us : 0; // For now hardcode the FPO + Vactive stretch margin to be 2000us + if (!is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(fpo_candidate_stream, fpo_vactive_margin_us)) + return NULL; + + // check if freesync enabled + if (!fpo_candidate_stream->allow_freesync) + return NULL; + + if (fpo_candidate_stream->vrr_active_variable) + return NULL; + + return fpo_candidate_stream; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 79664ba7e7af..a60ddb343d13 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -724,6 +724,9 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_unbounded_requesting = false, .override_dispclk_programming = true, .disable_fpo_optimizations = false, + .fpo_vactive_margin_us = 2000, // 2000us + .disable_fpo_vactive = true, + .disable_boot_optimizations = false, }; static const struct dc_debug_options debug_defaults_diags = { @@ -1629,6 +1632,14 @@ static struct resource_funcs dcn321_res_pool_funcs = { .restore_mall_state = dcn32_restore_mall_state, }; +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* DCN321 support max 4 pipes */ + value = value & 0xf; + return value; +} + static bool dcn321_resource_construct( uint8_t num_virtual_links, @@ -1671,7 +1682,7 @@ static bool dcn321_resource_construct( pool->base.res_cap = &res_cap_dcn321; /* max number of pipes for ASIC before checking for pipe fuses */ num_pipes = pool->base.res_cap->num_timing_generator; - pipe_fuses = REG_READ(CC_DC_PIPE_DIS); + pipe_fuses = read_pipe_fuses(ctx); for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) if (pipe_fuses & 1 << i) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 38d1f2be8cf3..f1c1a4b5fcac 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -917,19 +917,19 @@ void dcn20_populate_dml_writeback_from_context(struct dc *dc, } void dcn20_fpu_set_wb_arb_params(struct mcif_arb_params *wb_arb_params, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, int i) + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, int i) { - int k; + int k; - dc_assert_fp_enabled(); + dc_assert_fp_enabled(); - for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) { - wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - } - wb_arb_params->time_per_pixel = 16.0 * 1000 / (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk / 1000); /* 4 bit fraction, ms */ + for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) { + wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + wb_arb_params->time_per_pixel = 16.0 * 1000 / (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk / 1000); /* 4 bit fraction, ms */ } static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) @@ -1037,11 +1037,11 @@ static void dcn20_adjust_freesync_v_startup( *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); } -void dcn20_calculate_dlg_params( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) +void dcn20_calculate_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) { int i, pipe_idx; @@ -1083,6 +1083,7 @@ void dcn20_calculate_dlg_params( pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; @@ -1091,6 +1092,7 @@ void dcn20_calculate_dlg_params( context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes; context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; } + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = @@ -1118,6 +1120,7 @@ void dcn20_calculate_dlg_params( if (!context->res_ctx.pipe_ctx[i].stream) continue; + /* cstate disabled on 201 */ if (dc->ctx->dce_version == DCN_VERSION_2_01) cstate_en = false; @@ -1201,11 +1204,10 @@ static void swizzle_to_dml_params( } } -int dcn20_populate_dml_pipes_from_context( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) +int dcn20_populate_dml_pipes_from_context(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) { int pipe_cnt, i; bool synchronized_vblank = true; @@ -1257,6 +1259,8 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + pipes[pipe_cnt].pipe.dest.use_maximum_vstartup = dc->ctx->dce_version == DCN_VERSION_2_01; + pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC; /* todo: rotation?*/ pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h; @@ -1296,8 +1300,7 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.dest.pixel_rate_mhz *= 2; pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst; pipes[pipe_cnt].dout.dp_lanes = 4; - if (res_ctx->pipe_ctx[i].stream->link) - pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_na; + pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_na; pipes[pipe_cnt].dout.is_virtual = 0; pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min; pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max; @@ -1357,7 +1360,6 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].dout.is_virtual = 1; pipes[pipe_cnt].dout.output_type = dm_dp; pipes[pipe_cnt].dout.dp_lanes = 4; - pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_hbr2; } switch (res_ctx->pipe_ctx[i].stream->timing.display_color_depth) { @@ -1507,6 +1509,7 @@ int dcn20_populate_dml_pipes_from_context( default: break; } + pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport.y; pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c.y; pipes[pipe_cnt].pipe.src.viewport_x_y = scl->viewport.x; @@ -1615,13 +1618,12 @@ int dcn20_populate_dml_pipes_from_context( return pipe_cnt; } -void dcn20_calculate_wm( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *out_pipe_cnt, - int *pipe_split_from, - int vlevel, - bool fast_validate) +void dcn20_calculate_wm(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *out_pipe_cnt, + int *pipe_split_from, + int vlevel, + bool fast_validate) { int pipe_cnt, i, pipe_idx; @@ -1733,8 +1735,11 @@ void dcn20_calculate_wm( context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; } -void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb, - struct pp_smu_nv_clock_table *max_clocks, unsigned int *uclk_states, unsigned int num_states) +void dcn20_update_bounding_box(struct dc *dc, + struct _vcs_dpi_soc_bounding_box_st *bb, + struct pp_smu_nv_clock_table *max_clocks, + unsigned int *uclk_states, + unsigned int num_states) { int num_calculated_states = 0; int min_dcfclk = 0; @@ -1796,9 +1801,8 @@ void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s bb->clock_limits[num_calculated_states].state = bb->num_states; } -void dcn20_cap_soc_clocks( - struct _vcs_dpi_soc_bounding_box_st *bb, - struct pp_smu_nv_clock_table max_clocks) +void dcn20_cap_soc_clocks(struct _vcs_dpi_soc_bounding_box_st *bb, + struct pp_smu_nv_clock_table max_clocks) { int i; @@ -1954,80 +1958,80 @@ validate_out: } bool dcn20_validate_bandwidth_fp(struct dc *dc, - struct dc_state *context, - bool fast_validate) + struct dc_state *context, + bool fast_validate) { - bool voltage_supported = false; - bool full_pstate_supported = false; - bool dummy_pstate_supported = false; - double p_state_latency_us; + bool voltage_supported = false; + bool full_pstate_supported = false; + bool dummy_pstate_supported = false; + double p_state_latency_us; - dc_assert_fp_enabled(); + dc_assert_fp_enabled(); - p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us; - context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support = - dc->debug.disable_dram_clock_change_vactive_support; - context->bw_ctx.dml.soc.allow_dram_clock_one_display_vactive = - dc->debug.enable_dram_clock_change_one_display_vactive; + p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us; + context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support = + dc->debug.disable_dram_clock_change_vactive_support; + context->bw_ctx.dml.soc.allow_dram_clock_one_display_vactive = + dc->debug.enable_dram_clock_change_one_display_vactive; - /*Unsafe due to current pipe merge and split logic*/ - ASSERT(context != dc->current_state); + /*Unsafe due to current pipe merge and split logic*/ + ASSERT(context != dc->current_state); - if (fast_validate) { - return dcn20_validate_bandwidth_internal(dc, context, true); - } + if (fast_validate) { + return dcn20_validate_bandwidth_internal(dc, context, true); + } - // Best case, we support full UCLK switch latency - voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); - full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + // Best case, we support full UCLK switch latency + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); + full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; - if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || - (voltage_supported && full_pstate_supported)) { - context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported; - goto restore_dml_state; - } + if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || + (voltage_supported && full_pstate_supported)) { + context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported; + goto restore_dml_state; + } - // Fallback: Try to only support G6 temperature read latency - context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; + // Fallback: Try to only support G6 temperature read latency + context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; - voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); - dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); + dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; - if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) { - context->bw_ctx.bw.dcn.clk.p_state_change_support = false; - goto restore_dml_state; - } + if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) { + context->bw_ctx.bw.dcn.clk.p_state_change_support = false; + goto restore_dml_state; + } - // ERROR: fallback is supposed to always work. - ASSERT(false); + // ERROR: fallback is supposed to always work. + ASSERT(false); restore_dml_state: - context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us; - return voltage_supported; + context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us; + return voltage_supported; } void dcn20_fpu_set_wm_ranges(int i, - struct pp_smu_wm_range_sets *ranges, - struct _vcs_dpi_soc_bounding_box_st *loaded_bb) + struct pp_smu_wm_range_sets *ranges, + struct _vcs_dpi_soc_bounding_box_st *loaded_bb) { - dc_assert_fp_enabled(); + dc_assert_fp_enabled(); - ranges->reader_wm_sets[i].min_fill_clk_mhz = (i > 0) ? (loaded_bb->clock_limits[i - 1].dram_speed_mts / 16) + 1 : 0; - ranges->reader_wm_sets[i].max_fill_clk_mhz = loaded_bb->clock_limits[i].dram_speed_mts / 16; + ranges->reader_wm_sets[i].min_fill_clk_mhz = (i > 0) ? (loaded_bb->clock_limits[i - 1].dram_speed_mts / 16) + 1 : 0; + ranges->reader_wm_sets[i].max_fill_clk_mhz = loaded_bb->clock_limits[i].dram_speed_mts / 16; } void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v, - int vlevel, - int max_mpc_comb, - int pipe_idx, - bool is_validating_bw) + int vlevel, + int max_mpc_comb, + int pipe_idx, + bool is_validating_bw) { - dc_assert_fp_enabled(); + dc_assert_fp_enabled(); - if (is_validating_bw) - v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] *= 2; - else - v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2; + if (is_validating_bw) + v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] *= 2; + else + v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2; } int dcn21_populate_dml_pipes_from_context(struct dc *dc, @@ -2329,7 +2333,7 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params k++; } - memcpy(dcn2_1_soc.clock_limits, s, sizeof(dcn2_1_soc.clock_limits)); + memcpy(&dcn2_1_soc.clock_limits, s, sizeof(dcn2_1_soc.clock_limits)); if (clk_table->num_entries) { dcn2_1_soc.num_states = clk_table->num_entries + 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index 80972ee5e55b..a352c703e258 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -368,7 +368,9 @@ void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) dc_assert_fp_enabled(); if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || + context->bw_ctx.dml.soc.dram_clock_change_latency_us == 0) + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; } @@ -563,6 +565,20 @@ void dcn30_fpu_calculate_wm_and_dlg( pipe_idx++; } + // WA: restrict FPO to use first non-strobe mode (NV24 BW issue) + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && + dc->dml.soc.num_chans <= 4 && + context->bw_ctx.dml.vba.DRAMSpeed <= 1700 && + context->bw_ctx.dml.vba.DRAMSpeed >= 1500) { + + for (i = 0; i < dc->dml.soc.num_states; i++) { + if (dc->dml.soc.clock_limits[i].dram_speed_mts > 1700) { + context->bw_ctx.dml.vba.DRAMSpeed = dc->dml.soc.clock_limits[i].dram_speed_mts; + break; + } + } + } + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); if (!pstate_en) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index d0303173ce80..7d0626e42ea6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -4864,7 +4864,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->DETBufferSizeCThisState[k], &v->UrgentBurstFactorCursorPre[k], &v->UrgentBurstFactorLumaPre[k], - &v->UrgentBurstFactorChroma[k], + &v->UrgentBurstFactorChromaPre[k], &v->NoUrgentLatencyHidingPre[k]); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 536a63624595..bd674dc30df3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -5191,7 +5191,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->DETBufferSizeCThisState[k], &v->UrgentBurstFactorCursorPre[k], &v->UrgentBurstFactorLumaPre[k], - &v->UrgentBurstFactorChroma[k], + &v->UrgentBurstFactorChromaPre[k], &v->NotUrgentLatencyHidingPre[k]); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index c52b76610bd2..9e54e3d0eb78 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -97,7 +97,7 @@ struct _vcs_dpi_ip_params_st dcn3_14_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { /*TODO: correct dispclk/dppclk voltage level determination*/ .clock_limits = { { @@ -149,8 +149,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { .num_states = 5, .sr_exit_time_us = 16.5, .sr_enter_plus_exit_time_us = 18.5, - .sr_exit_z8_time_us = 210.0, - .sr_enter_plus_exit_z8_time_us = 310.0, + .sr_exit_z8_time_us = 268.0, + .sr_enter_plus_exit_z8_time_us = 393.0, .writeback_latency_us = 12.0, .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index daf319370190..7eb2173b7691 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -5288,7 +5288,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_ v->DETBufferSizeCThisState[k], &v->UrgentBurstFactorCursorPre[k], &v->UrgentBurstFactorLumaPre[k], - &v->UrgentBurstFactorChroma[k], + &v->UrgentBurstFactorChromaPre[k], &v->NotUrgentLatencyHidingPre[k]); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 1e26adf987cc..47beb4ea779d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -109,7 +109,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { { .state = 0, .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, + .fabricclk_mhz = 2500.0, .dispclk_mhz = 2150.0, .dppclk_mhz = 2150.0, .phyclk_mhz = 810.0, @@ -117,7 +117,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .phyclk_d32_mhz = 625.0, .socclk_mhz = 1200.0, .dscclk_mhz = 716.667, - .dram_speed_mts = 16000.0, + .dram_speed_mts = 18000.0, .dtbclk_mhz = 1564.0, }, }, @@ -148,7 +148,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .max_avg_fabric_bw_use_normal_percent = 60.0, .max_avg_dram_bw_use_normal_strobe_percent = 50.0, .max_avg_dram_bw_use_normal_percent = 15.0, - .num_chans = 8, + .num_chans = 24, .dram_channel_width_bytes = 2, .fabric_datapath_to_dcn_data_return_bytes = 64, .return_bus_width_bytes = 64, @@ -1331,6 +1331,11 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_unsupported; + /* Pstate change might not be supported by hardware, but it might be + * possible with firmware driven vertical blank stretching. + */ + context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching; + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000; @@ -1927,6 +1932,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, unsigned int min_dram_speed_mts_margin; bool need_fclk_lat_as_dummy = false; bool is_subvp_p_drr = false; + struct dc_stream_state *fpo_candidate_stream = NULL; dc_assert_fp_enabled(); @@ -1968,8 +1974,11 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, if (!pstate_en || (!dc->debug.disable_fpo_optimizations && pstate_en && vlevel != 0)) { /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */ - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = - dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context); + fpo_candidate_stream = dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context); + if (fpo_candidate_stream) { + fpo_candidate_stream->fpo_in_use = true; + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = true; + } if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc, @@ -2002,6 +2011,10 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, * voltage level) */ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]) + context->streams[i]->fpo_in_use = false; + } context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); } @@ -2157,7 +2170,13 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, * DCFCLK: Min, as reported by PM FW, when available * UCLK: Min, as reported by PM FW, when available */ - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + + /* For set A set the correct latency values (i.e. non-dummy values) unconditionally + */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; @@ -2792,3 +2811,74 @@ double dcn32_determine_max_vratio_prefetch(struct dc *dc, struct dc_state *conte } return max_vratio_pre; } + +/** + * dcn32_assign_fpo_vactive_candidate - Assign the FPO stream candidate for FPO + VActive case + * + * This function chooses the FPO candidate stream for FPO + VActive cases (2 stream config). + * For FPO + VAtive cases, the assumption is that one display has ActiveMargin > 0, and the + * other display has ActiveMargin <= 0. This function will choose the pipe/stream that has + * ActiveMargin <= 0 to be the FPO stream candidate if found. + * + * + * @param [in]: dc - current dc state + * @param [in]: context - new dc state + * @param [out]: fpo_candidate_stream - pointer to FPO stream candidate if one is found + * + * Return: void + */ +void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *context, struct dc_stream_state **fpo_candidate_stream) +{ + unsigned int i, pipe_idx; + const struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { + *fpo_candidate_stream = pipe->stream; + break; + } + pipe_idx++; + } +} + +/** + * dcn32_find_vactive_pipe - Determines if the config has a pipe that can switch in VACTIVE + * + * @param [in]: dc - current dc state + * @param [in]: context - new dc state + * @param [in]: vactive_margin_req_us - The vactive marign required for a vactive pipe to be + * considered "found" + * + * Return: True if VACTIVE display is found, false otherwise + */ +bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint32_t vactive_margin_req_us) +{ + unsigned int i, pipe_idx; + const struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + bool vactive_found = false; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] >= vactive_margin_req_us) { + vactive_found = true; + break; + } + pipe_idx++; + } + return vactive_found; +} + +void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb) +{ + dc_assert_fp_enabled(); + dcn3_2_soc.clock_limits[0].dcfclk_mhz = 1200.0; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index ab010e7e840b..dcf512cd3072 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -76,4 +76,10 @@ void dcn32_patch_dpm_table(struct clk_bw_params *bw_params); void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, int pipe_cnt); +void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *context, struct dc_stream_state **fpo_candidate_stream); + +bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint32_t vactive_margin_req); + +void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index f74730c2abbd..13c7e7394b1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -3354,7 +3354,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /* Output */ &mode_lib->vba.UrgentBurstFactorCursorPre[k], &mode_lib->vba.UrgentBurstFactorLumaPre[k], - &mode_lib->vba.UrgentBurstFactorChroma[k], + &mode_lib->vba.UrgentBurstFactorChromaPre[k], &mode_lib->vba.NotUrgentLatencyHidingPre[k]); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 57b9bd896678..342a1bcb4927 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -106,16 +106,16 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .clock_limits = { { .state = 0, - .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, - .dispclk_mhz = 2150.0, - .dppclk_mhz = 2150.0, + .dcfclk_mhz = 1434.0, + .fabricclk_mhz = 2250.0, + .dispclk_mhz = 1720.0, + .dppclk_mhz = 1720.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .phyclk_d32_mhz = 625.0, + .phyclk_d32_mhz = 313.0, .socclk_mhz = 1200.0, - .dscclk_mhz = 716.667, - .dram_speed_mts = 1600.0, + .dscclk_mhz = 573.333, + .dram_speed_mts = 16000.0, .dtbclk_mhz = 1564.0, }, }, @@ -125,14 +125,14 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .sr_exit_z8_time_us = 285.0, .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, - .round_trip_ping_latency_dcfclk_cycles = 263, + .round_trip_ping_latency_dcfclk_cycles = 207, .urgent_latency_pixel_data_only_us = 4, .urgent_latency_pixel_mixed_with_vm_data_us = 4, .urgent_latency_vm_data_only_us = 4, - .fclk_change_latency_us = 20, - .usr_retraining_latency_us = 2, - .smn_latency_us = 2, - .mall_allocated_for_dcn_mbytes = 64, + .fclk_change_latency_us = 7, + .usr_retraining_latency_us = 0, + .smn_latency_us = 0, + .mall_allocated_for_dcn_mbytes = 32, .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index 4125d3d111d1..bdf3ac6cadd5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -41,51 +41,51 @@ #include "dcn32/display_rq_dlg_calc_32.h" #include "dml_logger.h" -const struct dml_funcs dml20_funcs = { +static const struct dml_funcs dml20_funcs = { .validate = dml20_ModeSupportAndSystemConfigurationFull, .recalculate = dml20_recalculate, .rq_dlg_get_dlg_reg = dml20_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml20_rq_dlg_get_rq_reg }; -const struct dml_funcs dml20v2_funcs = { +static const struct dml_funcs dml20v2_funcs = { .validate = dml20v2_ModeSupportAndSystemConfigurationFull, .recalculate = dml20v2_recalculate, .rq_dlg_get_dlg_reg = dml20v2_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml20v2_rq_dlg_get_rq_reg }; -const struct dml_funcs dml21_funcs = { - .validate = dml21_ModeSupportAndSystemConfigurationFull, - .recalculate = dml21_recalculate, - .rq_dlg_get_dlg_reg = dml21_rq_dlg_get_dlg_reg, - .rq_dlg_get_rq_reg = dml21_rq_dlg_get_rq_reg +static const struct dml_funcs dml21_funcs = { + .validate = dml21_ModeSupportAndSystemConfigurationFull, + .recalculate = dml21_recalculate, + .rq_dlg_get_dlg_reg = dml21_rq_dlg_get_dlg_reg, + .rq_dlg_get_rq_reg = dml21_rq_dlg_get_rq_reg }; -const struct dml_funcs dml30_funcs = { +static const struct dml_funcs dml30_funcs = { .validate = dml30_ModeSupportAndSystemConfigurationFull, .recalculate = dml30_recalculate, .rq_dlg_get_dlg_reg = dml30_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml30_rq_dlg_get_rq_reg }; -const struct dml_funcs dml31_funcs = { +static const struct dml_funcs dml31_funcs = { .validate = dml31_ModeSupportAndSystemConfigurationFull, .recalculate = dml31_recalculate, .rq_dlg_get_dlg_reg = dml31_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml31_rq_dlg_get_rq_reg }; -const struct dml_funcs dml314_funcs = { +static const struct dml_funcs dml314_funcs = { .validate = dml314_ModeSupportAndSystemConfigurationFull, .recalculate = dml314_recalculate, .rq_dlg_get_dlg_reg = dml314_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml314_rq_dlg_get_rq_reg }; -const struct dml_funcs dml32_funcs = { +static const struct dml_funcs dml32_funcs = { .validate = dml32_ModeSupportAndSystemConfigurationFull, - .recalculate = dml32_recalculate, + .recalculate = dml32_recalculate, .rq_dlg_get_dlg_reg_v2 = dml32_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg_v2 = dml32_rq_dlg_get_rq_reg }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 591ab1389e3b..bef843cc32a1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -293,6 +293,9 @@ struct clk_mgr_funcs { /* Get SMU present */ bool (*is_smu_present)(struct clk_mgr *clk_mgr); + + int (*get_dispclk_from_dentist)(struct clk_mgr *clk_mgr_base); + }; struct clk_mgr { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index f6c5ee2d639b..2267fb097830 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -1927,6 +1927,11 @@ static void disable_link_dp(struct dc_link *link, dp_disable_link_phy(link, link_res, signal); + if (link->connector_signal == SIGNAL_TYPE_EDP) { + if (!link->dc->config.edp_no_power_sequencing) + link->dc->hwss.edp_power_control(link, false); + } + if (signal == SIGNAL_TYPE_DISPLAY_PORT_MST) /* set the sink to SST mode after disabling the link */ enable_mst_on_sink(link, false); @@ -2035,6 +2040,12 @@ static enum dc_status enable_link_dp(struct dc_state *state, uint32_t post_oui_delay = 30; // 30ms /* Reduce link bandwidth between failed link training attempts. */ bool do_fallback = false; + int lt_attempts = LINK_TRAINING_ATTEMPTS; + + // Increase retry count if attempting DP1.x on FIXED_VS link + if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) + lt_attempts = 10; // check for seamless boot for (i = 0; i < state->stream_count; i++) { @@ -2099,7 +2110,7 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (perform_link_training_with_retries(link_settings, skip_video_pattern, - LINK_TRAINING_ATTEMPTS, + lt_attempts, pipe_ctx, pipe_ctx->stream->signal, do_fallback)) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index c840ef17802e..ba98013fecd0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1043,6 +1043,9 @@ static enum dc_status wake_up_aux_channel(struct dc_link *link) DP_SET_POWER, &dpcd_power_state, sizeof(dpcd_power_state)); + if (status < 0) + DC_LOG_DC("%s: Failed to power up sink: %s\n", __func__, + dpcd_power_state == DP_SET_POWER_D0 ? "D0" : "D3"); return DC_ERROR_UNEXPECTED; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index f301c9eaf2f9..579fa222810d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -212,27 +212,36 @@ enum dpcd_training_patterns switch (pattern) { case DP_TRAINING_PATTERN_SEQUENCE_1: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern TPS1\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1; break; case DP_TRAINING_PATTERN_SEQUENCE_2: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern TPS2\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2; break; case DP_TRAINING_PATTERN_SEQUENCE_3: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern TPS3\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3; break; case DP_TRAINING_PATTERN_SEQUENCE_4: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern TPS4\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4; break; case DP_128b_132b_TPS1: + DC_LOG_HW_LINK_TRAINING("%s: Using DP 128b/132b training pattern TPS1\n", __func__); dpcd_tr_pattern = DPCD_128b_132b_TPS1; break; case DP_128b_132b_TPS2: + DC_LOG_HW_LINK_TRAINING("%s: Using DP 128b/132b training pattern TPS2\n", __func__); dpcd_tr_pattern = DPCD_128b_132b_TPS2; break; case DP_128b_132b_TPS2_CDS: + DC_LOG_HW_LINK_TRAINING("%s: Using DP 128b/132b training pattern TPS2 CDS\n", + __func__); dpcd_tr_pattern = DPCD_128b_132b_TPS2_CDS; break; case DP_TRAINING_PATTERN_VIDEOIDLE: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern videoidle\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_VIDEOIDLE; break; default: @@ -1496,7 +1505,10 @@ enum link_training_result dp_perform_link_training( * Non-LT AUX transactions inside training mode. */ if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING) - status = dp_perform_fixed_vs_pe_training_sequence(link, link_res, <_settings); + if (link->dc->config.use_old_fixed_vs_sequence) + status = dp_perform_fixed_vs_pe_training_sequence_legacy(link, link_res, <_settings); + else + status = dp_perform_fixed_vs_pe_training_sequence(link, link_res, <_settings); else if (encoding == DP_8b_10b_ENCODING) status = dp_perform_8b_10b_link_training(link, link_res, <_settings); else if (encoding == DP_128b_132b_ENCODING) @@ -1557,9 +1569,10 @@ bool perform_link_training_with_retries( j = 0; while (j < attempts && fail_count < (attempts * 10)) { - DC_LOG_HW_LINK_TRAINING("%s: Beginning link(%d) training attempt %u of %d @ rate(%d) x lane(%d)\n", - __func__, link->link_index, (unsigned int)j + 1, attempts, cur_link_settings.link_rate, - cur_link_settings.lane_count); + DC_LOG_HW_LINK_TRAINING("%s: Beginning link(%d) training attempt %u of %d @ rate(%d) x lane(%d) @ spread = %x\n", + __func__, link->link_index, (unsigned int)j + 1, attempts, + cur_link_settings.link_rate, cur_link_settings.lane_count, + cur_link_settings.link_spread); dp_enable_link_phy( link, @@ -1583,7 +1596,10 @@ bool perform_link_training_with_retries( * Report and continue with eDP panel mode to * perform eDP link training with right settings */ - cp_psp->funcs.enable_assr(cp_psp->handle, link); + bool result; + result = cp_psp->funcs.enable_assr(cp_psp->handle, link); + if (!result && link->panel_mode != DP_PANEL_MODE_EDP) + panel_mode = DP_PANEL_MODE_DEFAULT; } } @@ -1637,9 +1653,10 @@ bool perform_link_training_with_retries( break; } - DC_LOG_WARNING("%s: Link(%d) training attempt %u of %d failed @ rate(%d) x lane(%d) : fail reason:(%d)\n", - __func__, link->link_index, (unsigned int)j + 1, attempts, cur_link_settings.link_rate, - cur_link_settings.lane_count, status); + DC_LOG_WARNING("%s: Link(%d) training attempt %u of %d failed @ rate(%d) x lane(%d) @ spread = %x : fail reason:(%d)\n", + __func__, link->link_index, (unsigned int)j + 1, attempts, + cur_link_settings.link_rate, cur_link_settings.lane_count, + cur_link_settings.link_spread, status); dp_disable_link_phy(link, &pipe_ctx->link_res, signal); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 14b98e096d39..3889ebb2256b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -225,8 +225,10 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( offset); /* 5. check CR done*/ - if (dp_is_cr_done(lane_count, dpcd_lane_status)) + if (dp_is_cr_done(lane_count, dpcd_lane_status)) { + DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__); return LINK_TRAINING_SUCCESS; + } /* 6. max VS reached*/ if ((link_dp_get_encoding_format(<_settings->link_settings) == diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index ab4aafdb5e5c..4f4e899e5c46 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -401,6 +401,7 @@ static enum link_training_result dpia_training_cr_non_transparent( /* Check if clock recovery successful. */ if (dp_is_cr_done(lane_count, dpcd_lane_status)) { + DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__); result = LINK_TRAINING_SUCCESS; break; } @@ -508,6 +509,7 @@ static enum link_training_result dpia_training_cr_transparent( /* Check if clock recovery successful. */ if (dp_is_cr_done(lane_count, dpcd_lane_status)) { + DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__); result = LINK_TRAINING_SUCCESS; break; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c index a4071d2959a0..5731c4b61f9f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c @@ -223,7 +223,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq } -enum link_training_result dp_perform_fixed_vs_pe_training_sequence( +enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( struct dc_link *link, const struct link_resource *link_res, struct link_training_settings *lt_settings) @@ -577,3 +577,379 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( return status; } + +enum link_training_result dp_perform_fixed_vs_pe_training_sequence( + struct dc_link *link, + const struct link_resource *link_res, + struct link_training_settings *lt_settings) +{ + const uint8_t vendor_lttpr_write_data_reset[4] = {0x1, 0x50, 0x63, 0xFF}; + const uint8_t offset = dp_parse_lttpr_repeater_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + const uint8_t vendor_lttpr_write_data_intercept_en[4] = {0x1, 0x55, 0x63, 0x0}; + const uint8_t vendor_lttpr_write_data_intercept_dis[4] = {0x1, 0x55, 0x63, 0x6E}; + const uint8_t vendor_lttpr_write_data_adicora_eq1[4] = {0x1, 0x55, 0x63, 0x2E}; + const uint8_t vendor_lttpr_write_data_adicora_eq2[4] = {0x1, 0x55, 0x63, 0x01}; + const uint8_t vendor_lttpr_write_data_adicora_eq3[4] = {0x1, 0x55, 0x63, 0x68}; + uint32_t pre_disable_intercept_delay_ms = link->dc->debug.fixed_vs_aux_delay_config_wa; + uint8_t vendor_lttpr_write_data_vs[4] = {0x1, 0x51, 0x63, 0x0}; + uint8_t vendor_lttpr_write_data_pe[4] = {0x1, 0x52, 0x63, 0x0}; + + uint32_t vendor_lttpr_write_address = 0xF004F; + enum link_training_result status = LINK_TRAINING_SUCCESS; + uint8_t lane = 0; + union down_spread_ctrl downspread = {0}; + union lane_count_set lane_count_set = {0}; + uint8_t toggle_rate; + uint8_t rate; + + /* Only 8b/10b is supported */ + ASSERT(link_dp_get_encoding_format(<_settings->link_settings) == + DP_8b_10b_ENCODING); + + if (lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) { + status = perform_fixed_vs_pe_nontransparent_training_sequence(link, link_res, lt_settings); + return status; + } + + if (offset != 0xFF) { + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + + /* Certain display and cable configuration require extra delay */ + if (offset > 2) + pre_disable_intercept_delay_ms = link->dc->debug.fixed_vs_aux_delay_config_wa * 2; + } + + /* Vendor specific: Reset lane settings */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_reset[0], + sizeof(vendor_lttpr_write_data_reset)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + + /* Vendor specific: Enable intercept */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_intercept_en[0], + sizeof(vendor_lttpr_write_data_intercept_en)); + + /* 1. set link rate, lane count and spread. */ + + downspread.raw = (uint8_t)(lt_settings->link_settings.link_spread); + + lane_count_set.bits.LANE_COUNT_SET = + lt_settings->link_settings.lane_count; + + lane_count_set.bits.ENHANCED_FRAMING = lt_settings->enhanced_framing; + lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = 0; + + + if (lt_settings->pattern_for_eq < DP_TRAINING_PATTERN_SEQUENCE_4) { + lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = + link->dpcd_caps.max_ln_count.bits.POST_LT_ADJ_REQ_SUPPORTED; + } + + core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, + &downspread.raw, sizeof(downspread)); + + core_link_write_dpcd(link, DP_LANE_COUNT_SET, + &lane_count_set.raw, 1); + + rate = get_dpcd_link_rate(<_settings->link_settings); + + /* Vendor specific: Toggle link rate */ + toggle_rate = (rate == 0x6) ? 0xA : 0x6; + + if (link->vendor_specific_lttpr_link_rate_wa == rate) { + core_link_write_dpcd( + link, + DP_LINK_BW_SET, + &toggle_rate, + 1); + } + + link->vendor_specific_lttpr_link_rate_wa = rate; + + core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1); + + DC_LOG_HW_LINK_TRAINING("%s\n %x rate = %x\n %x lane = %x framing = %x\n %x spread = %x\n", + __func__, + DP_LINK_BW_SET, + lt_settings->link_settings.link_rate, + DP_LANE_COUNT_SET, + lt_settings->link_settings.lane_count, + lt_settings->enhanced_framing, + DP_DOWNSPREAD_CTRL, + lt_settings->link_settings.link_spread); + + /* 2. Perform link training */ + + /* Perform Clock Recovery Sequence */ + if (status == LINK_TRAINING_SUCCESS) { + const uint8_t max_vendor_dpcd_retries = 10; + uint32_t retries_cr; + uint32_t retry_count; + uint32_t wait_time_microsec; + enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX]; + union lane_align_status_updated dpcd_lane_status_updated; + union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0}; + enum dc_status dpcd_status = DC_OK; + uint8_t i = 0; + + retries_cr = 0; + retry_count = 0; + + memset(&dpcd_lane_status, '\0', sizeof(dpcd_lane_status)); + memset(&dpcd_lane_status_updated, '\0', + sizeof(dpcd_lane_status_updated)); + + while ((retries_cr < LINK_TRAINING_MAX_RETRY_COUNT) && + (retry_count < LINK_TRAINING_MAX_CR_RETRY)) { + + + /* 1. call HWSS to set lane settings */ + dp_set_hw_lane_settings( + link, + link_res, + lt_settings, + 0); + + /* 2. update DPCD of the receiver */ + if (!retry_count) { + /* EPR #361076 - write as a 5-byte burst, + * but only for the 1-st iteration. + */ + dpcd_set_lt_pattern_and_lane_settings( + link, + lt_settings, + lt_settings->pattern_for_cr, + 0); + /* Vendor specific: Disable intercept */ + for (i = 0; i < max_vendor_dpcd_retries; i++) { + msleep(pre_disable_intercept_delay_ms); + dpcd_status = core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_intercept_dis[0], + sizeof(vendor_lttpr_write_data_intercept_dis)); + + if (dpcd_status == DC_OK) + break; + + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_intercept_en[0], + sizeof(vendor_lttpr_write_data_intercept_en)); + } + } else { + vendor_lttpr_write_data_vs[3] = 0; + vendor_lttpr_write_data_pe[3] = 0; + + for (lane = 0; lane < lane_count; lane++) { + vendor_lttpr_write_data_vs[3] |= + lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET << (2 * lane); + vendor_lttpr_write_data_pe[3] |= + lt_settings->dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET << (2 * lane); + } + + /* Vendor specific: Update VS and PE to DPRX requested value */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + + dpcd_set_lane_settings( + link, + lt_settings, + 0); + } + + /* 3. wait receiver to lock-on*/ + wait_time_microsec = lt_settings->cr_pattern_time; + + dp_wait_for_training_aux_rd_interval( + link, + wait_time_microsec); + + /* 4. Read lane status and requested drive + * settings as set by the sink + */ + dp_get_lane_status_and_lane_adjust( + link, + lt_settings, + dpcd_lane_status, + &dpcd_lane_status_updated, + dpcd_lane_adjust, + 0); + + /* 5. check CR done*/ + if (dp_is_cr_done(lane_count, dpcd_lane_status)) { + status = LINK_TRAINING_SUCCESS; + break; + } + + /* 6. max VS reached*/ + if (dp_is_max_vs_reached(lt_settings)) + break; + + /* 7. same lane settings */ + /* Note: settings are the same for all lanes, + * so comparing first lane is sufficient + */ + if (lt_settings->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET == + dpcd_lane_adjust[0].bits.VOLTAGE_SWING_LANE) + retries_cr++; + else + retries_cr = 0; + + /* 8. update VS/PE/PC2 in lt_settings*/ + dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, + lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); + retry_count++; + } + + if (retry_count >= LINK_TRAINING_MAX_CR_RETRY) { + ASSERT(0); + DC_LOG_ERROR("%s: Link Training Error, could not get CR after %d tries. Possibly voltage swing issue", + __func__, + LINK_TRAINING_MAX_CR_RETRY); + + } + + status = dp_get_cr_failure(lane_count, dpcd_lane_status); + } + + /* Perform Channel EQ Sequence */ + if (status == LINK_TRAINING_SUCCESS) { + enum dc_dp_training_pattern tr_pattern; + uint32_t retries_ch_eq; + uint32_t wait_time_microsec; + enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; + union lane_align_status_updated dpcd_lane_status_updated = {0}; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; + union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0}; + + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_adicora_eq1[0], + sizeof(vendor_lttpr_write_data_adicora_eq1)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_adicora_eq2[0], + sizeof(vendor_lttpr_write_data_adicora_eq2)); + + /* Note: also check that TPS4 is a supported feature*/ + tr_pattern = lt_settings->pattern_for_eq; + + dp_set_hw_training_pattern(link, link_res, tr_pattern, 0); + + status = LINK_TRAINING_EQ_FAIL_EQ; + + for (retries_ch_eq = 0; retries_ch_eq <= LINK_TRAINING_MAX_RETRY_COUNT; + retries_ch_eq++) { + + dp_set_hw_lane_settings(link, link_res, lt_settings, 0); + + vendor_lttpr_write_data_vs[3] = 0; + vendor_lttpr_write_data_pe[3] = 0; + + for (lane = 0; lane < lane_count; lane++) { + vendor_lttpr_write_data_vs[3] |= + lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET << (2 * lane); + vendor_lttpr_write_data_pe[3] |= + lt_settings->dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET << (2 * lane); + } + + /* Vendor specific: Update VS and PE to DPRX requested value */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + + /* 2. update DPCD*/ + if (!retries_ch_eq) { + /* EPR #361076 - write as a 5-byte burst, + * but only for the 1-st iteration + */ + + dpcd_set_lt_pattern_and_lane_settings( + link, + lt_settings, + tr_pattern, 0); + + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_adicora_eq3[0], + sizeof(vendor_lttpr_write_data_adicora_eq3)); + } else + dpcd_set_lane_settings(link, lt_settings, 0); + + /* 3. wait for receiver to lock-on*/ + wait_time_microsec = lt_settings->eq_pattern_time; + + dp_wait_for_training_aux_rd_interval( + link, + wait_time_microsec); + + /* 4. Read lane status and requested + * drive settings as set by the sink + */ + dp_get_lane_status_and_lane_adjust( + link, + lt_settings, + dpcd_lane_status, + &dpcd_lane_status_updated, + dpcd_lane_adjust, + 0); + + /* 5. check CR done*/ + if (!dp_is_cr_done(lane_count, dpcd_lane_status)) { + status = LINK_TRAINING_EQ_FAIL_CR; + break; + } + + /* 6. check CHEQ done*/ + if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) && + dp_is_symbol_locked(lane_count, dpcd_lane_status) && + dp_is_interlane_aligned(dpcd_lane_status_updated)) { + status = LINK_TRAINING_SUCCESS; + break; + } + + /* 7. update VS/PE/PC2 in lt_settings*/ + dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, + lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); + } + } + + return status; +} diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h index e61970e27661..c0d6ea329504 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h @@ -28,6 +28,11 @@ #define __DC_LINK_DP_FIXED_VS_PE_RETIMER_H__ #include "link_dp_training.h" +enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( + struct dc_link *link, + const struct link_resource *link_res, + struct link_training_settings *lt_settings); + enum link_training_result dp_perform_fixed_vs_pe_training_sequence( struct dc_link *link, const struct link_resource *link_res, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index d895046787bc..8d1df863659c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -83,6 +83,7 @@ void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode) ASSERT(result == DC_OK); } } + link->panel_mode = panel_mode; DC_LOG_DETECTION_DP_CAPS("Link: %d eDP panel mode supported: %d " "eDP panel mode enabled: %d \n", link->link_index, diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 15d26222597a..598fa1de54ce 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -362,7 +362,7 @@ union dmub_fw_boot_status { uint32_t defer_load : 1; /**< 1 if VBIOS data is deferred programmed */ uint32_t reserved : 1; uint32_t detection_required: 1; /**< if detection need to be triggered by driver */ - + uint32_t hw_power_init_done: 1; /**< 1 if hw power init is completed */ } bits; /**< status bits */ uint32_t all; /**< 32-bit access to status bits */ }; @@ -377,6 +377,7 @@ enum dmub_fw_boot_status_bit { DMUB_FW_BOOT_STATUS_BIT_RESTORE_REQUIRED = (1 << 3), /**< 1 if driver should call restore */ DMUB_FW_BOOT_STATUS_BIT_DEFERRED_LOADED = (1 << 4), /**< 1 if VBIOS data is deferred programmed */ DMUB_FW_BOOT_STATUS_BIT_DETECTION_REQUIRED = (1 << 6), /**< 1 if detection need to be triggered by driver*/ + DMUB_FW_BOOT_STATUS_BIT_HW_POWER_INIT_DONE = (1 << 7), /**< 1 if hw power init is completed */ }; /* Register bit definition for SCRATCH5 */ @@ -1104,7 +1105,12 @@ enum dmub_cmd_idle_opt_type { /** * DCN hardware save. */ - DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT = 1 + DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT = 1, + + /** + * DCN hardware notify idle. + */ + DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE = 2 }; /** @@ -1115,6 +1121,24 @@ struct dmub_rb_cmd_idle_opt_dcn_restore { }; /** + * struct dmub_dcn_notify_idle_cntl_data - Data passed to FW in a DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE command. + */ +struct dmub_dcn_notify_idle_cntl_data { + uint8_t driver_idle; + uint8_t d3_entry; + uint8_t trigger; + uint8_t pad[1]; +}; + +/** + * struct dmub_rb_cmd_idle_opt_dcn_notify_idle - Data passed to FW in a DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE command. + */ +struct dmub_rb_cmd_idle_opt_dcn_notify_idle { + struct dmub_cmd_header header; /**< header */ + struct dmub_dcn_notify_idle_cntl_data cntl_data; +}; + +/** * struct dmub_clocks - Clock update notification. */ struct dmub_clocks { diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c index a76da0131add..9c20516be066 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c @@ -130,12 +130,13 @@ void dmub_dcn32_reset(struct dmub_srv *dmub) REG_WRITE(DMCUB_INBOX1_WPTR, 0); REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); REG_WRITE(DMCUB_OUTBOX1_WPTR, 0); + REG_WRITE(DMCUB_OUTBOX0_RPTR, 0); + REG_WRITE(DMCUB_OUTBOX0_WPTR, 0); REG_WRITE(DMCUB_SCRATCH0, 0); } void dmub_dcn32_reset_release(struct dmub_srv *dmub) { - REG_WRITE(DMCUB_GPINT_DATAIN1, 0); REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0); REG_WRITE(DMCUB_SCRATCH15, dmub->psp_version & 0x001100FF); REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1); diff --git a/drivers/gpu/drm/amd/display/include/signal_types.h b/drivers/gpu/drm/amd/display/include/signal_types.h index beed70179bb5..23a308c3eccb 100644 --- a/drivers/gpu/drm/amd/display/include/signal_types.h +++ b/drivers/gpu/drm/amd/display/include/signal_types.h @@ -104,6 +104,7 @@ static inline bool dc_is_audio_capable_signal(enum signal_type signal) { return (signal == SIGNAL_TYPE_DISPLAY_PORT || signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + signal == SIGNAL_TYPE_VIRTUAL || dc_is_hdmi_signal(signal)); } diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 315da61ee897..5c41a4751db4 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -1129,7 +1129,6 @@ void mod_freesync_handle_preflip(struct mod_freesync *mod_freesync, { struct core_freesync *core_freesync = NULL; unsigned int last_render_time_in_us = 0; - unsigned int average_render_time_in_us = 0; if (mod_freesync == NULL) return; @@ -1138,7 +1137,6 @@ void mod_freesync_handle_preflip(struct mod_freesync *mod_freesync, if (in_out_vrr->supported && in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE) { - unsigned int i = 0; unsigned int oldest_index = plane->time.index + 1; if (oldest_index >= DC_PLANE_UPDATE_TIMES_MAX) @@ -1147,18 +1145,6 @@ void mod_freesync_handle_preflip(struct mod_freesync *mod_freesync, last_render_time_in_us = curr_time_stamp_in_us - plane->time.prev_update_time_in_us; - /* Sum off all entries except oldest one */ - for (i = 0; i < DC_PLANE_UPDATE_TIMES_MAX; i++) { - average_render_time_in_us += - plane->time.time_elapsed_in_us[i]; - } - average_render_time_in_us -= - plane->time.time_elapsed_in_us[oldest_index]; - - /* Add render time for current flip */ - average_render_time_in_us += last_render_time_in_us; - average_render_time_in_us /= DC_PLANE_UPDATE_TIMES_MAX; - if (in_out_vrr->btr.btr_enabled) { apply_below_the_range(core_freesync, stream, diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h index bd129266ebfd..a84a7cfaf71e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h @@ -135,6 +135,8 @@ #define mmIH_RB_WPTR_ADDR_LO_BASE_IDX 0 #define mmIH_DOORBELL_RPTR 0x0087 #define mmIH_DOORBELL_RPTR_BASE_IDX 0 +#define mmIH_DOORBELL_RETRY_CAM 0x0088 +#define mmIH_DOORBELL_RETRY_CAM_BASE_IDX 0 #define mmIH_RB_CNTL_RING1 0x008c #define mmIH_RB_CNTL_RING1_BASE_IDX 0 #define mmIH_RB_BASE_RING1 0x008d @@ -159,6 +161,8 @@ #define mmIH_RB_WPTR_RING2_BASE_IDX 0 #define mmIH_DOORBELL_RPTR_RING2 0x009f #define mmIH_DOORBELL_RPTR_RING2_BASE_IDX 0 +#define mmIH_RETRY_CAM_ACK 0x00a4 +#define mmIH_RETRY_CAM_ACK_BASE_IDX 0 #define mmIH_VERSION 0x00a5 #define mmIH_VERSION_BASE_IDX 0 #define mmIH_CNTL 0x00c0 @@ -235,6 +239,8 @@ #define mmIH_MMHUB_ERROR_BASE_IDX 0 #define mmIH_MEM_POWER_CTRL 0x00e8 #define mmIH_MEM_POWER_CTRL_BASE_IDX 0 +#define mmIH_RETRY_INT_CAM_CNTL 0x00e9 +#define mmIH_RETRY_INT_CAM_CNTL_BASE_IDX 0 #define mmIH_REGISTER_LAST_PART2 0x00ff #define mmIH_REGISTER_LAST_PART2_BASE_IDX 0 #define mmSEM_CLK_CTRL 0x0100 diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h index 3ea83ea9ce3a..75c04fc275a0 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h @@ -349,6 +349,17 @@ #define IH_DOORBELL_RPTR_RING2__ENABLE__SHIFT 0x1c #define IH_DOORBELL_RPTR_RING2__OFFSET_MASK 0x03FFFFFFL #define IH_DOORBELL_RPTR_RING2__ENABLE_MASK 0x10000000L +//IH_RETRY_INT_CAM_CNTL +#define IH_RETRY_INT_CAM_CNTL__CAM_SIZE__SHIFT 0x0 +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_SKID_VALUE__SHIFT 0x8 +#define IH_RETRY_INT_CAM_CNTL__ENABLE__SHIFT 0x10 +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_ENABLE__SHIFT 0x11 +#define IH_RETRY_INT_CAM_CNTL__PER_VF_ENTRY_SIZE__SHIFT 0x14 +#define IH_RETRY_INT_CAM_CNTL__CAM_SIZE_MASK 0x0000001FL +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_SKID_VALUE_MASK 0x00003F00L +#define IH_RETRY_INT_CAM_CNTL__ENABLE_MASK 0x00010000L +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_ENABLE_MASK 0x00020000L +#define IH_RETRY_INT_CAM_CNTL__PER_VF_ENTRY_SIZE_MASK 0x00300000L //IH_VERSION #define IH_VERSION__MINVER__SHIFT 0x0 #define IH_VERSION__MAJVER__SHIFT 0x8 diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h index b8ff7456ae0b..f8008270f813 100644 --- a/drivers/gpu/drm/amd/include/v11_structs.h +++ b/drivers/gpu/drm/amd/include/v11_structs.h @@ -25,14 +25,14 @@ #define V11_STRUCTS_H_ struct v11_gfx_mqd { - uint32_t reserved_0; // offset: 0 (0x0) - uint32_t reserved_1; // offset: 1 (0x1) - uint32_t reserved_2; // offset: 2 (0x2) - uint32_t reserved_3; // offset: 3 (0x3) - uint32_t reserved_4; // offset: 4 (0x4) - uint32_t reserved_5; // offset: 5 (0x5) - uint32_t reserved_6; // offset: 6 (0x6) - uint32_t reserved_7; // offset: 7 (0x7) + uint32_t shadow_base_lo; // offset: 0 (0x0) + uint32_t shadow_base_hi; // offset: 1 (0x1) + uint32_t gds_bkup_base_lo; // offset: 2 (0x2) + uint32_t gds_bkup_base_hi; // offset: 3 (0x3) + uint32_t fw_work_area_base_lo; // offset: 4 (0x4) + uint32_t fw_work_area_base_hi; // offset: 5 (0x5) + uint32_t shadow_initialized; // offset: 6 (0x6) + uint32_t ib_vmid; // offset: 7 (0x7) uint32_t reserved_8; // offset: 8 (0x8) uint32_t reserved_9; // offset: 9 (0x9) uint32_t reserved_10; // offset: 10 (0xA) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index e011041e3ec6..58c2246918fd 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3395,7 +3395,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ effective_mode &= ~S_IWUSR; - /* not implemented yet for GC 10.3.1 APUs */ + /* In the case of APUs, this is only implemented on Vangogh */ if (((adev->family == AMDGPU_FAMILY_SI) || ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)))) && (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || @@ -3404,7 +3404,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr)) return 0; - /* not implemented yet for APUs having <= GC 9.3.0 */ + /* not implemented yet for APUs having < GC 9.3.0 (Renoir) */ if (((adev->family == AMDGPU_FAMILY_SI) || ((adev->flags & AMD_IS_APU) && (gc_ver < IP_VERSION(9, 3, 0)))) && (attr == &sensor_dev_attr_power1_average.dev_attr.attr)) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 056ac2b512eb..5633c5797e85 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1712,8 +1712,6 @@ static int smu_display_configuration_change(void *handle, const struct amd_pp_display_configuration *display_config) { struct smu_context *smu = handle; - int index = 0; - int num_of_active_display = 0; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; @@ -1724,11 +1722,6 @@ static int smu_display_configuration_change(void *handle, smu_set_min_dcef_deep_sleep(smu, display_config->min_dcef_deep_sleep_set_clk / 100); - for (index = 0; index < display_config->num_path_including_non_display; index++) { - if (display_config->displays[index].controller_id != 0) - num_of_active_display++; - } - return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 0ef37837b164..df3baaab0037 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -62,6 +62,12 @@ #define CTF_OFFSET_HOTSPOT 5 #define CTF_OFFSET_MEM 5 +extern const int pmfw_decoded_link_speed[5]; +extern const int pmfw_decoded_link_width[7]; + +#define DECODE_GEN_SPEED(gen_speed_idx) (pmfw_decoded_link_speed[gen_speed_idx]) +#define DECODE_LANE_WIDTH(lane_width_idx) (pmfw_decoded_link_width[lane_width_idx]) + struct smu_13_0_max_sustainable_clocks { uint32_t display_clock; uint32_t phy_clock; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 73175c993da9..393c6a7b9609 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -85,6 +85,9 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_10.bin"); static const int link_width[] = {0, 1, 2, 4, 8, 12, 16}; static const int link_speed[] = {25, 50, 80, 160}; +const int pmfw_decoded_link_speed[5] = {1, 2, 3, 4, 5}; +const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; + int smu_v13_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e9766fe5656e..09405ef1e3c8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1144,8 +1144,8 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, (pcie_table->pcie_lane[i] == 5) ? "x12" : (pcie_table->pcie_lane[i] == 6) ? "x16" : "", pcie_table->clk_freq[i], - ((gen_speed - 1) == pcie_table->pcie_gen[i]) && - (lane_width == link_width[pcie_table->pcie_lane[i]]) ? + (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) && + (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ? "*" : ""); break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 1b2c82449f20..3d9ff46706fb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -575,6 +575,14 @@ static int smu_v13_0_7_set_default_dpm_table(struct smu_context *smu) dpm_table); if (ret) return ret; + + if (skutable->DriverReportedClocks.GameClockAc && + (dpm_table->dpm_levels[dpm_table->count - 1].value > + skutable->DriverReportedClocks.GameClockAc)) { + dpm_table->dpm_levels[dpm_table->count - 1].value = + skutable->DriverReportedClocks.GameClockAc; + dpm_table->max = skutable->DriverReportedClocks.GameClockAc; + } } else { dpm_table->count = 1; dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; @@ -828,6 +836,57 @@ static int smu_v13_0_7_get_smu_metrics_data(struct smu_context *smu, return ret; } +static int smu_v13_0_7_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) +{ + struct smu_13_0_dpm_context *dpm_context = + smu->smu_dpm.dpm_context; + struct smu_13_0_dpm_table *dpm_table; + + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + /* uclk dpm table */ + dpm_table = &dpm_context->dpm_tables.uclk_table; + break; + case SMU_GFXCLK: + case SMU_SCLK: + /* gfxclk dpm table */ + dpm_table = &dpm_context->dpm_tables.gfx_table; + break; + case SMU_SOCCLK: + /* socclk dpm table */ + dpm_table = &dpm_context->dpm_tables.soc_table; + break; + case SMU_FCLK: + /* fclk dpm table */ + dpm_table = &dpm_context->dpm_tables.fclk_table; + break; + case SMU_VCLK: + case SMU_VCLK1: + /* vclk dpm table */ + dpm_table = &dpm_context->dpm_tables.vclk_table; + break; + case SMU_DCLK: + case SMU_DCLK1: + /* dclk dpm table */ + dpm_table = &dpm_context->dpm_tables.dclk_table; + break; + default: + dev_err(smu->adev->dev, "Unsupported clock type!\n"); + return -EINVAL; + } + + if (min) + *min = dpm_table->min; + if (max) + *max = dpm_table->max; + + return 0; +} + static int smu_v13_0_7_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, @@ -1074,8 +1133,8 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, (pcie_table->pcie_lane[i] == 5) ? "x12" : (pcie_table->pcie_lane[i] == 6) ? "x16" : "", pcie_table->clk_freq[i], - (gen_speed == pcie_table->pcie_gen[i]) && - (lane_width == pcie_table->pcie_lane[i]) ? + (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) && + (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ? "*" : ""); break; @@ -1329,9 +1388,17 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu) &dpm_context->dpm_tables.fclk_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + DriverReportedClocks_t driver_clocks = + pptable->SkuTable.DriverReportedClocks; pstate_table->gfxclk_pstate.min = gfx_table->min; - pstate_table->gfxclk_pstate.peak = gfx_table->max; + if (driver_clocks.GameClockAc && + (driver_clocks.GameClockAc < gfx_table->max)) + pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc; + else + pstate_table->gfxclk_pstate.peak = gfx_table->max; pstate_table->uclk_pstate.min = mem_table->min; pstate_table->uclk_pstate.peak = mem_table->max; @@ -1348,12 +1415,12 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu) pstate_table->fclk_pstate.min = fclk_table->min; pstate_table->fclk_pstate.peak = fclk_table->max; - /* - * For now, just use the mininum clock frequency. - * TODO: update them when the real pstate settings available - */ - pstate_table->gfxclk_pstate.standard = gfx_table->min; - pstate_table->uclk_pstate.standard = mem_table->min; + if (driver_clocks.BaseClockAc && + driver_clocks.BaseClockAc < gfx_table->max) + pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc; + else + pstate_table->gfxclk_pstate.standard = gfx_table->max; + pstate_table->uclk_pstate.standard = mem_table->max; pstate_table->socclk_pstate.standard = soc_table->min; pstate_table->vclk_pstate.standard = vclk_table->min; pstate_table->dclk_pstate.standard = dclk_table->min; @@ -1678,7 +1745,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable, .init_pptable_microcode = smu_v13_0_init_pptable_microcode, .populate_umd_state_clk = smu_v13_0_7_populate_umd_state_clk, - .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq, + .get_dpm_ultimate_freq = smu_v13_0_7_get_dpm_ultimate_freq, .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, .read_sensor = smu_v13_0_7_read_sensor, .feature_is_enabled = smu_cmn_feature_is_enabled, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 16c539657f73..6b73fb7a83c3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -16,7 +16,6 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_drv.h> -#include <drm/drm_fb_helper.h> #include <drm/drm_file.h> #include <drm/drm_fourcc.h> #include <drm/drm_ioctl.h> @@ -108,7 +107,6 @@ static const struct drm_driver exynos_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC | DRIVER_RENDER, .open = exynos_drm_open, - .lastclose = drm_fb_helper_lastclose, .postclose = exynos_drm_postclose, .dumb_create = exynos_drm_gem_dumb_create, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, @@ -288,19 +286,15 @@ static int exynos_drm_bind(struct device *dev) /* init kms poll for handling hpd */ drm_kms_helper_poll_init(drm); - ret = exynos_drm_fbdev_init(drm); - if (ret) - goto err_cleanup_poll; - /* register the DRM device */ ret = drm_dev_register(drm, 0); if (ret < 0) - goto err_cleanup_fbdev; + goto err_cleanup_poll; + + exynos_drm_fbdev_setup(drm); return 0; -err_cleanup_fbdev: - exynos_drm_fbdev_fini(drm); err_cleanup_poll: drm_kms_helper_poll_fini(drm); err_unbind_all: @@ -321,7 +315,6 @@ static void exynos_drm_unbind(struct device *dev) drm_dev_unregister(drm); - exynos_drm_fbdev_fini(drm); drm_kms_helper_poll_fini(drm); component_unbind_all(drm->dev, drm); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index 6ae9056e7a18..81d501efd013 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -197,8 +197,6 @@ struct drm_exynos_file_private { * @wait: wait an atomic commit to finish */ struct exynos_drm_private { - struct drm_fb_helper *fb_helper; - struct device *g2d_dev; struct device *dma_dev; void *mapping; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 97f2dee2db29..fc1c5608db96 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -11,7 +11,6 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> -#include <drm/drm_fb_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_framebuffer_helper.h> @@ -157,7 +156,6 @@ static struct drm_mode_config_helper_funcs exynos_drm_mode_config_helpers = { static const struct drm_mode_config_funcs exynos_drm_mode_config_funcs = { .fb_create = exynos_user_fb_create, - .output_poll_changed = drm_fb_helper_output_poll_changed, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index 4929ffe5a09a..ea4b3d248aac 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -8,16 +8,12 @@ * Seung-Woo Kim <sw0312.kim@samsung.com> */ -#include <linux/console.h> -#include <linux/dma-mapping.h> -#include <linux/vmalloc.h> - -#include <drm/drm_crtc.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_drv.h> #include <drm/drm_fb_helper.h> -#include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_prime.h> -#include <drm/drm_probe_helper.h> #include <drm/exynos_drm.h> #include "exynos_drm_drv.h" @@ -27,22 +23,26 @@ #define MAX_CONNECTOR 4 #define PREFERRED_BPP 32 -#define to_exynos_fbdev(x) container_of(x, struct exynos_drm_fbdev,\ - drm_fb_helper) +static int exynos_drm_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) +{ + struct drm_fb_helper *helper = info->par; + struct drm_gem_object *obj = drm_gem_fb_get_obj(helper->fb, 0); -struct exynos_drm_fbdev { - struct drm_fb_helper drm_fb_helper; - struct exynos_drm_gem *exynos_gem; -}; + return drm_gem_prime_mmap(obj, vma); +} -static int exynos_drm_fb_mmap(struct fb_info *info, - struct vm_area_struct *vma) +static void exynos_drm_fb_destroy(struct fb_info *info) { - struct drm_fb_helper *helper = info->par; - struct exynos_drm_fbdev *exynos_fbd = to_exynos_fbdev(helper); - struct exynos_drm_gem *exynos_gem = exynos_fbd->exynos_gem; + struct drm_fb_helper *fb_helper = info->par; + struct drm_framebuffer *fb = fb_helper->fb; + + drm_fb_helper_fini(fb_helper); - return drm_gem_prime_mmap(&exynos_gem->base, vma); + drm_framebuffer_remove(fb); + + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); } static const struct fb_ops exynos_drm_fb_ops = { @@ -54,6 +54,7 @@ static const struct fb_ops exynos_drm_fb_ops = { .fb_fillrect = drm_fb_helper_cfb_fillrect, .fb_copyarea = drm_fb_helper_cfb_copyarea, .fb_imageblit = drm_fb_helper_cfb_imageblit, + .fb_destroy = exynos_drm_fb_destroy, }; static int exynos_drm_fbdev_update(struct drm_fb_helper *helper, @@ -89,7 +90,6 @@ static int exynos_drm_fbdev_update(struct drm_fb_helper *helper, static int exynos_drm_fbdev_create(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { - struct exynos_drm_fbdev *exynos_fbdev = to_exynos_fbdev(helper); struct exynos_drm_gem *exynos_gem; struct drm_device *dev = helper->dev; struct drm_mode_fb_cmd2 mode_cmd = { 0 }; @@ -113,8 +113,6 @@ static int exynos_drm_fbdev_create(struct drm_fb_helper *helper, if (IS_ERR(exynos_gem)) return PTR_ERR(exynos_gem); - exynos_fbdev->exynos_gem = exynos_gem; - helper->fb = exynos_drm_framebuffer_init(dev, &mode_cmd, &exynos_gem, 1); if (IS_ERR(helper->fb)) { @@ -127,19 +125,13 @@ static int exynos_drm_fbdev_create(struct drm_fb_helper *helper, if (ret < 0) goto err_destroy_framebuffer; - return ret; + return 0; err_destroy_framebuffer: drm_framebuffer_cleanup(helper->fb); + helper->fb = NULL; err_destroy_gem: exynos_drm_gem_destroy(exynos_gem); - - /* - * if failed, all resources allocated above would be released by - * drm_mode_config_cleanup() when drm_load() had been called prior - * to any specific driver such as fimd or hdmi driver. - */ - return ret; } @@ -147,80 +139,92 @@ static const struct drm_fb_helper_funcs exynos_drm_fb_helper_funcs = { .fb_probe = exynos_drm_fbdev_create, }; -int exynos_drm_fbdev_init(struct drm_device *dev) +/* + * struct drm_client + */ + +static void exynos_drm_fbdev_client_unregister(struct drm_client_dev *client) { - struct exynos_drm_fbdev *fbdev; - struct exynos_drm_private *private = dev->dev_private; - struct drm_fb_helper *helper; - int ret; + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + + if (fb_helper->info) { + drm_fb_helper_unregister_info(fb_helper); + } else { + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); + } +} + +static int exynos_drm_fbdev_client_restore(struct drm_client_dev *client) +{ + drm_fb_helper_lastclose(client->dev); - if (!dev->mode_config.num_crtc) - return 0; + return 0; +} - fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL); - if (!fbdev) - return -ENOMEM; +static int exynos_drm_fbdev_client_hotplug(struct drm_client_dev *client) +{ + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + struct drm_device *dev = client->dev; + int ret; - private->fb_helper = helper = &fbdev->drm_fb_helper; + if (dev->fb_helper) + return drm_fb_helper_hotplug_event(dev->fb_helper); - drm_fb_helper_prepare(dev, helper, PREFERRED_BPP, &exynos_drm_fb_helper_funcs); + ret = drm_fb_helper_init(dev, fb_helper); + if (ret) + goto err_drm_err; - ret = drm_fb_helper_init(dev, helper); - if (ret < 0) { - DRM_DEV_ERROR(dev->dev, - "failed to initialize drm fb helper.\n"); - goto err_init; - } + if (!drm_drv_uses_atomic_modeset(dev)) + drm_helper_disable_unused_functions(dev); - ret = drm_fb_helper_initial_config(helper); - if (ret < 0) { - DRM_DEV_ERROR(dev->dev, - "failed to set up hw configuration.\n"); - goto err_setup; - } + ret = drm_fb_helper_initial_config(fb_helper); + if (ret) + goto err_drm_fb_helper_fini; return 0; -err_setup: - drm_fb_helper_fini(helper); -err_init: - drm_fb_helper_unprepare(helper); - private->fb_helper = NULL; - kfree(fbdev); - +err_drm_fb_helper_fini: + drm_fb_helper_fini(fb_helper); +err_drm_err: + drm_err(dev, "Failed to setup fbdev emulation (ret=%d)\n", ret); return ret; } -static void exynos_drm_fbdev_destroy(struct drm_device *dev, - struct drm_fb_helper *fb_helper) +static const struct drm_client_funcs exynos_drm_fbdev_client_funcs = { + .owner = THIS_MODULE, + .unregister = exynos_drm_fbdev_client_unregister, + .restore = exynos_drm_fbdev_client_restore, + .hotplug = exynos_drm_fbdev_client_hotplug, +}; + +void exynos_drm_fbdev_setup(struct drm_device *dev) { - struct drm_framebuffer *fb; + struct drm_fb_helper *fb_helper; + int ret; - /* release drm framebuffer and real buffer */ - if (fb_helper->fb && fb_helper->fb->funcs) { - fb = fb_helper->fb; - if (fb) - drm_framebuffer_remove(fb); - } + drm_WARN(dev, !dev->registered, "Device has not been registered.\n"); + drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n"); - drm_fb_helper_unregister_info(fb_helper); + fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); + if (!fb_helper) + return; + drm_fb_helper_prepare(dev, fb_helper, PREFERRED_BPP, &exynos_drm_fb_helper_funcs); - drm_fb_helper_fini(fb_helper); -} + ret = drm_client_init(dev, &fb_helper->client, "fbdev", &exynos_drm_fbdev_client_funcs); + if (ret) + goto err_drm_client_init; -void exynos_drm_fbdev_fini(struct drm_device *dev) -{ - struct exynos_drm_private *private = dev->dev_private; - struct exynos_drm_fbdev *fbdev; + ret = exynos_drm_fbdev_client_hotplug(&fb_helper->client); + if (ret) + drm_dbg_kms(dev, "client hotplug ret=%d\n", ret); - if (!private || !private->fb_helper) - return; + drm_client_register(&fb_helper->client); - fbdev = to_exynos_fbdev(private->fb_helper); + return; - exynos_drm_fbdev_destroy(dev, private->fb_helper); - drm_fb_helper_unprepare(private->fb_helper); - kfree(fbdev); - private->fb_helper = NULL; +err_drm_client_init: + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); } - diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h index 3b1e98e84580..1e1dea627cd9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h @@ -12,27 +12,11 @@ #define _EXYNOS_DRM_FBDEV_H_ #ifdef CONFIG_DRM_FBDEV_EMULATION - -int exynos_drm_fbdev_init(struct drm_device *dev); -void exynos_drm_fbdev_fini(struct drm_device *dev); - +void exynos_drm_fbdev_setup(struct drm_device *dev); #else - -static inline int exynos_drm_fbdev_init(struct drm_device *dev) -{ - return 0; -} - -static inline void exynos_drm_fbdev_fini(struct drm_device *dev) +static inline void exynos_drm_fbdev_setup(struct drm_device *dev) { } - -static inline void exynos_drm_fbdev_restore_mode(struct drm_device *dev) -{ -} - -#define exynos_drm_output_poll_changed (NULL) - #endif #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c index 115d0997fd62..4ebc030e40d1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c @@ -606,7 +606,7 @@ gv100_disp_curs = { .user = 73, }; -const struct nvkm_disp_mthd_list +static const struct nvkm_disp_mthd_list gv100_disp_core_mthd_base = { .mthd = 0x0000, .addr = 0x000000, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c index 1e2eabec1a76..5d28d30d09d5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c @@ -47,7 +47,7 @@ ga100_mc_device_enabled(struct nvkm_mc *mc, u32 mask) return (nvkm_rd32(mc->subdev.device, 0x000600) & mask) == mask; } -const struct nvkm_mc_device_func +static const struct nvkm_mc_device_func ga100_mc_device = { .enabled = ga100_mc_device_enabled, .enable = ga100_mc_device_enable, diff --git a/drivers/gpu/drm/omapdrm/omap_debugfs.c b/drivers/gpu/drm/omapdrm/omap_debugfs.c index bfb2ccb40bd1..a3d470468e5b 100644 --- a/drivers/gpu/drm/omapdrm/omap_debugfs.c +++ b/drivers/gpu/drm/omapdrm/omap_debugfs.c @@ -47,15 +47,15 @@ static int fb_show(struct seq_file *m, void *arg) { struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; - struct omap_drm_private *priv = dev->dev_private; + struct drm_fb_helper *helper = dev->fb_helper; struct drm_framebuffer *fb; seq_printf(m, "fbcon "); - omap_framebuffer_describe(priv->fbdev->fb, m); + omap_framebuffer_describe(helper->fb, m); mutex_lock(&dev->mode_config.fb_lock); list_for_each_entry(fb, &dev->mode_config.fb_list, head) { - if (fb == priv->fbdev->fb) + if (fb == helper->fb) continue; seq_printf(m, "user "); diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 699ed814e021..5ed549726104 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -6,6 +6,7 @@ #include <linux/dma-mapping.h> #include <linux/platform_device.h> +#include <linux/of.h> #include <linux/sort.h> #include <linux/sys_soc.h> @@ -14,7 +15,6 @@ #include <drm/drm_bridge.h> #include <drm/drm_bridge_connector.h> #include <drm/drm_drv.h> -#include <drm/drm_fb_helper.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> #include <drm/drm_panel.h> @@ -24,6 +24,7 @@ #include "omap_dmm_tiler.h" #include "omap_drv.h" +#include "omap_fbdev.h" #define DRIVER_NAME MODULE_NAME #define DRIVER_DESC "OMAP DRM" @@ -219,7 +220,6 @@ static const struct drm_mode_config_helper_funcs omap_mode_config_helper_funcs = static const struct drm_mode_config_funcs omap_mode_config_funcs = { .fb_create = omap_framebuffer_create, - .output_poll_changed = drm_fb_helper_output_poll_changed, .atomic_check = omap_atomic_check, .atomic_commit = drm_atomic_helper_commit, }; @@ -652,7 +652,6 @@ static const struct drm_driver omap_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC | DRIVER_RENDER, .open = dev_open, - .lastclose = drm_fb_helper_lastclose, #ifdef CONFIG_DEBUG_FS .debugfs_init = omap_debugfs_init, #endif @@ -741,8 +740,6 @@ static int omapdrm_init(struct omap_drm_private *priv, struct device *dev) goto err_cleanup_modeset; } - omap_fbdev_init(ddev); - drm_kms_helper_poll_init(ddev); /* @@ -753,12 +750,12 @@ static int omapdrm_init(struct omap_drm_private *priv, struct device *dev) if (ret) goto err_cleanup_helpers; + omap_fbdev_setup(ddev); + return 0; err_cleanup_helpers: drm_kms_helper_poll_fini(ddev); - - omap_fbdev_fini(ddev); err_cleanup_modeset: omap_modeset_fini(ddev); err_free_overlays: @@ -784,8 +781,6 @@ static void omapdrm_cleanup(struct omap_drm_private *priv) drm_kms_helper_poll_fini(ddev); - omap_fbdev_fini(ddev); - drm_atomic_helper_shutdown(ddev); omap_modeset_fini(ddev); diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index 825960fd3ea9..4c7217b35f6b 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -21,7 +21,6 @@ #include "omap_crtc.h" #include "omap_encoder.h" #include "omap_fb.h" -#include "omap_fbdev.h" #include "omap_gem.h" #include "omap_irq.h" #include "omap_plane.h" @@ -77,8 +76,6 @@ struct omap_drm_private { struct drm_private_obj glob_obj; - struct drm_fb_helper *fbdev; - struct workqueue_struct *wq; /* lock for obj_list below */ diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c index a6c8542087ec..b950e93b3846 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.c +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c @@ -4,14 +4,17 @@ * Author: Rob Clark <rob@ti.com> */ -#include <drm/drm_crtc.h> -#include <drm/drm_util.h> +#include <drm/drm_drv.h> +#include <drm/drm_crtc_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_file.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> +#include <drm/drm_gem_framebuffer_helper.h> +#include <drm/drm_util.h> #include "omap_drv.h" +#include "omap_fbdev.h" MODULE_PARM_DESC(ywrap, "Enable ywrap scrolling (omap44xx and later, default 'y')"); static bool ywrap_enabled = true; @@ -25,8 +28,6 @@ module_param_named(ywrap, ywrap_enabled, bool, 0644); struct omap_fbdev { struct drm_fb_helper base; - struct drm_framebuffer *fb; - struct drm_gem_object *bo; bool ywrap_enabled; /* for deferred dmm roll when getting called in atomic ctx */ @@ -38,12 +39,14 @@ static struct drm_fb_helper *get_fb(struct fb_info *fbi); static void pan_worker(struct work_struct *work) { struct omap_fbdev *fbdev = container_of(work, struct omap_fbdev, work); - struct fb_info *fbi = fbdev->base.info; + struct drm_fb_helper *helper = &fbdev->base; + struct fb_info *fbi = helper->info; + struct drm_gem_object *bo = drm_gem_fb_get_obj(helper->fb, 0); int npages; /* DMM roll shifts in 4K pages: */ npages = fbi->fix.line_length >> PAGE_SHIFT; - omap_gem_roll(fbdev->bo, fbi->var.yoffset * npages); + omap_gem_roll(bo, fbi->var.yoffset * npages); } static int omap_fbdev_pan_display(struct fb_var_screeninfo *var, @@ -71,6 +74,25 @@ fallback: return drm_fb_helper_pan_display(var, fbi); } +static void omap_fbdev_fb_destroy(struct fb_info *info) +{ + struct drm_fb_helper *helper = info->par; + struct drm_framebuffer *fb = helper->fb; + struct drm_gem_object *bo = drm_gem_fb_get_obj(fb, 0); + struct omap_fbdev *fbdev = to_omap_fbdev(helper); + + DBG(); + + drm_fb_helper_fini(helper); + + omap_gem_unpin(bo); + drm_framebuffer_remove(fb); + + drm_client_release(&helper->client); + drm_fb_helper_unprepare(helper); + kfree(fbdev); +} + static const struct fb_ops omap_fb_ops = { .owner = THIS_MODULE, @@ -86,6 +108,8 @@ static const struct fb_ops omap_fb_ops = { .fb_fillrect = drm_fb_helper_sys_fillrect, .fb_copyarea = drm_fb_helper_sys_copyarea, .fb_imageblit = drm_fb_helper_sys_imageblit, + + .fb_destroy = omap_fbdev_fb_destroy, }; static int omap_fbdev_create(struct drm_fb_helper *helper, @@ -98,6 +122,7 @@ static int omap_fbdev_create(struct drm_fb_helper *helper, union omap_gem_size gsize; struct fb_info *fbi = NULL; struct drm_mode_fb_cmd2 mode_cmd = {0}; + struct drm_gem_object *bo; dma_addr_t dma_addr; int ret; @@ -128,20 +153,20 @@ static int omap_fbdev_create(struct drm_fb_helper *helper, .bytes = PAGE_ALIGN(mode_cmd.pitches[0] * mode_cmd.height), }; DBG("allocating %d bytes for fb %d", gsize.bytes, dev->primary->index); - fbdev->bo = omap_gem_new(dev, gsize, OMAP_BO_SCANOUT | OMAP_BO_WC); - if (!fbdev->bo) { + bo = omap_gem_new(dev, gsize, OMAP_BO_SCANOUT | OMAP_BO_WC); + if (!bo) { dev_err(dev->dev, "failed to allocate buffer object\n"); ret = -ENOMEM; goto fail; } - fb = omap_framebuffer_init(dev, &mode_cmd, &fbdev->bo); + fb = omap_framebuffer_init(dev, &mode_cmd, &bo); if (IS_ERR(fb)) { dev_err(dev->dev, "failed to allocate fb\n"); /* note: if fb creation failed, we can't rely on fb destroy * to unref the bo: */ - drm_gem_object_put(fbdev->bo); + drm_gem_object_put(bo); ret = PTR_ERR(fb); goto fail; } @@ -154,7 +179,7 @@ static int omap_fbdev_create(struct drm_fb_helper *helper, * to it). Then we just need to be sure that we are able to re- * pin it in case of an opps. */ - ret = omap_gem_pin(fbdev->bo, &dma_addr); + ret = omap_gem_pin(bo, &dma_addr); if (ret) { dev_err(dev->dev, "could not pin framebuffer\n"); ret = -ENOMEM; @@ -170,17 +195,16 @@ static int omap_fbdev_create(struct drm_fb_helper *helper, DBG("fbi=%p, dev=%p", fbi, dev); - fbdev->fb = fb; helper->fb = fb; fbi->fbops = &omap_fb_ops; drm_fb_helper_fill_info(fbi, helper, sizes); - fbi->screen_buffer = omap_gem_vaddr(fbdev->bo); - fbi->screen_size = fbdev->bo->size; + fbi->screen_buffer = omap_gem_vaddr(bo); + fbi->screen_size = bo->size; fbi->fix.smem_start = dma_addr; - fbi->fix.smem_len = fbdev->bo->size; + fbi->fix.smem_len = bo->size; /* if we have DMM, then we can use it for scrolling by just * shuffling pages around in DMM rather than doing sw blit. @@ -193,7 +217,7 @@ static int omap_fbdev_create(struct drm_fb_helper *helper, DBG("par=%p, %dx%d", fbi->par, fbi->var.xres, fbi->var.yres); - DBG("allocated %dx%d fb", fbdev->fb->width, fbdev->fb->height); + DBG("allocated %dx%d fb", fb->width, fb->height); return 0; @@ -220,75 +244,94 @@ static struct drm_fb_helper *get_fb(struct fb_info *fbi) return fbi->par; } -/* initialize fbdev helper */ -void omap_fbdev_init(struct drm_device *dev) +/* + * struct drm_client + */ + +static void omap_fbdev_client_unregister(struct drm_client_dev *client) { - struct omap_drm_private *priv = dev->dev_private; - struct omap_fbdev *fbdev = NULL; - struct drm_fb_helper *helper; - int ret = 0; + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); - if (!priv->num_pipes) - return; + if (fb_helper->info) { + drm_fb_helper_unregister_info(fb_helper); + } else { + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); + } +} - fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL); - if (!fbdev) - return; +static int omap_fbdev_client_restore(struct drm_client_dev *client) +{ + drm_fb_helper_lastclose(client->dev); - INIT_WORK(&fbdev->work, pan_worker); + return 0; +} - helper = &fbdev->base; +static int omap_fbdev_client_hotplug(struct drm_client_dev *client) +{ + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + struct drm_device *dev = client->dev; + int ret; - drm_fb_helper_prepare(dev, helper, 32, &omap_fb_helper_funcs); + if (dev->fb_helper) + return drm_fb_helper_hotplug_event(dev->fb_helper); - ret = drm_fb_helper_init(dev, helper); + ret = drm_fb_helper_init(dev, fb_helper); if (ret) - goto fail; + goto err_drm_err; - ret = drm_fb_helper_initial_config(helper); + ret = drm_fb_helper_initial_config(fb_helper); if (ret) - goto fini; - - priv->fbdev = helper; - - return; + goto err_drm_fb_helper_fini; -fini: - drm_fb_helper_fini(helper); -fail: - drm_fb_helper_unprepare(helper); - kfree(fbdev); + return 0; - dev_warn(dev->dev, "omap_fbdev_init failed\n"); +err_drm_fb_helper_fini: + drm_fb_helper_fini(fb_helper); +err_drm_err: + drm_err(dev, "Failed to setup fbdev emulation (ret=%d)\n", ret); + return ret; } -void omap_fbdev_fini(struct drm_device *dev) +static const struct drm_client_funcs omap_fbdev_client_funcs = { + .owner = THIS_MODULE, + .unregister = omap_fbdev_client_unregister, + .restore = omap_fbdev_client_restore, + .hotplug = omap_fbdev_client_hotplug, +}; + +void omap_fbdev_setup(struct drm_device *dev) { - struct omap_drm_private *priv = dev->dev_private; - struct drm_fb_helper *helper = priv->fbdev; struct omap_fbdev *fbdev; + struct drm_fb_helper *helper; + int ret; - DBG(); + drm_WARN(dev, !dev->registered, "Device has not been registered.\n"); + drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n"); - if (!helper) + fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL); + if (!fbdev) return; + helper = &fbdev->base; - drm_fb_helper_unregister_info(helper); + drm_fb_helper_prepare(dev, helper, 32, &omap_fb_helper_funcs); - drm_fb_helper_fini(helper); + ret = drm_client_init(dev, &helper->client, "fbdev", &omap_fbdev_client_funcs); + if (ret) + goto err_drm_client_init; + + INIT_WORK(&fbdev->work, pan_worker); - fbdev = to_omap_fbdev(helper); + ret = omap_fbdev_client_hotplug(&helper->client); + if (ret) + drm_dbg_kms(dev, "client hotplug ret=%d\n", ret); - /* unpin the GEM object pinned in omap_fbdev_create() */ - if (fbdev->bo) - omap_gem_unpin(fbdev->bo); + drm_client_register(&helper->client); - /* this will free the backing object */ - if (fbdev->fb) - drm_framebuffer_remove(fbdev->fb); + return; +err_drm_client_init: drm_fb_helper_unprepare(helper); kfree(fbdev); - - priv->fbdev = NULL; } diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.h b/drivers/gpu/drm/omapdrm/omap_fbdev.h index 74a68a5a6eab..74c691a8d45f 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.h +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.h @@ -10,16 +10,11 @@ #define __OMAPDRM_FBDEV_H__ struct drm_device; -struct drm_fb_helper; #ifdef CONFIG_DRM_FBDEV_EMULATION -void omap_fbdev_init(struct drm_device *dev); -void omap_fbdev_fini(struct drm_device *dev); +void omap_fbdev_setup(struct drm_device *dev); #else -static inline void omap_fbdev_init(struct drm_device *dev) -{ -} -static inline void omap_fbdev_fini(struct drm_device *dev) +static inline void omap_fbdev_setup(struct drm_device *dev) { } #endif diff --git a/drivers/gpu/drm/panel/panel-novatek-nt35950.c b/drivers/gpu/drm/panel/panel-novatek-nt35950.c index abf752b36a52..8b108ac80b55 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt35950.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt35950.c @@ -585,8 +585,12 @@ static int nt35950_probe(struct mipi_dsi_device *dsi) DRM_MODE_CONNECTOR_DSI); ret = drm_panel_of_backlight(&nt->panel); - if (ret) + if (ret) { + if (num_dsis == 2) + mipi_dsi_device_unregister(nt->dsi[1]); + return dev_err_probe(dev, ret, "Failed to get backlight\n"); + } drm_panel_add(&nt->panel); @@ -602,6 +606,10 @@ static int nt35950_probe(struct mipi_dsi_device *dsi) ret = mipi_dsi_attach(nt->dsi[i]); if (ret < 0) { + /* If we fail to attach to either host, we're done */ + if (num_dsis == 2) + mipi_dsi_device_unregister(nt->dsi[1]); + return dev_err_probe(dev, ret, "Cannot attach to DSI%d host.\n", i); } diff --git a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c index b4729a94c34a..898b892f1143 100644 --- a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c +++ b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c @@ -471,7 +471,7 @@ static int otm8009a_probe(struct mipi_dsi_device *dsi) DRM_MODE_CONNECTOR_DSI); ctx->bl_dev = devm_backlight_device_register(dev, dev_name(dev), - dsi->host->dev, ctx, + dev, ctx, &otm8009a_backlight_ops, NULL); if (IS_ERR(ctx->bl_dev)) { diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 15d04a0ec623..e0a8890a62e2 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -507,12 +507,19 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) { struct drm_sched_entity *entity = sched_job->entity; bool first; + ktime_t submit_ts; trace_drm_sched_job(sched_job, entity); atomic_inc(entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); + + /* + * After the sched_job is pushed into the entity queue, it may be + * completed and freed up at any time. We can no longer access it. + * Make sure to set the submit_ts first, to avoid a race. + */ + sched_job->submit_ts = submit_ts = ktime_get(); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); - sched_job->submit_ts = ktime_get(); /* first job wakes up scheduler */ if (first) { @@ -529,7 +536,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) spin_unlock(&entity->rq_lock); if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_update_fifo(entity, sched_job->submit_ts); + drm_sched_rq_update_fifo(entity, submit_ts); drm_sched_wakeup(entity->rq->sched); } diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 18c342a919a2..dfce896c4bae 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -47,11 +47,6 @@ #include "ttm_module.h" -#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) -#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) -/* Some architectures have a weird PMD_SHIFT */ -#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) - /** * struct ttm_pool_dma - Helper object for coherent DMA mappings * @@ -70,11 +65,11 @@ module_param(page_pool_size, ulong, 0644); static atomic_long_t allocated_pages; -static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; -static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; +static struct ttm_pool_type global_write_combined[MAX_ORDER]; +static struct ttm_pool_type global_uncached[MAX_ORDER]; -static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; -static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; +static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; +static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; @@ -449,7 +444,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); + for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; @@ -568,7 +563,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, if (use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < TTM_DIM_ORDER; ++j) + for (j = 0; j < MAX_ORDER; ++j) ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } @@ -588,7 +583,7 @@ void ttm_pool_fini(struct ttm_pool *pool) if (pool->use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < TTM_DIM_ORDER; ++j) + for (j = 0; j < MAX_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } @@ -642,7 +637,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) unsigned int i; seq_puts(m, "\t "); - for (i = 0; i < TTM_DIM_ORDER; ++i) + for (i = 0; i < MAX_ORDER; ++i) seq_printf(m, " ---%2u---", i); seq_puts(m, "\n"); } @@ -653,7 +648,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, { unsigned int i; - for (i = 0; i < TTM_DIM_ORDER; ++i) + for (i = 0; i < MAX_ORDER; ++i) seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); seq_puts(m, "\n"); } @@ -756,16 +751,13 @@ int ttm_pool_mgr_init(unsigned long num_pages) { unsigned int i; - BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); - BUILD_BUG_ON(TTM_DIM_ORDER < 1); - if (!page_pool_size) page_pool_size = num_pages; spin_lock_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); - for (i = 0; i < TTM_DIM_ORDER; ++i) { + for (i = 0; i < MAX_ORDER; ++i) { ttm_pool_type_init(&global_write_combined[i], NULL, ttm_write_combined, i); ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); @@ -798,7 +790,7 @@ void ttm_pool_mgr_fini(void) { unsigned int i; - for (i = 0; i < TTM_DIM_ORDER; ++i) { + for (i = 0; i < MAX_ORDER; ++i) { ttm_pool_type_fini(&global_write_combined[i]); ttm_pool_type_fini(&global_uncached[i]); diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 1e6db0121ccd..563b3dfeb9b9 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -57,8 +57,6 @@ V3D_INT_FLDONE | \ V3D_INT_FRDONE) -DECLARE_WAIT_QUEUE_HEAD(render_wait); - static void vc4_overflow_mem_work(struct work_struct *work) { diff --git a/drivers/gpu/drm/vkms/vkms_output.c b/drivers/gpu/drm/vkms/vkms_output.c index 991857125bb4..5ce70dd946aa 100644 --- a/drivers/gpu/drm/vkms/vkms_output.c +++ b/drivers/gpu/drm/vkms/vkms_output.c @@ -4,21 +4,19 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_edid.h> #include <drm/drm_probe_helper.h> -#include <drm/drm_simple_kms_helper.h> - -static void vkms_connector_destroy(struct drm_connector *connector) -{ - drm_connector_cleanup(connector); -} static const struct drm_connector_funcs vkms_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = vkms_connector_destroy, + .destroy = drm_connector_cleanup, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; +static const struct drm_encoder_funcs vkms_encoder_funcs = { + .destroy = drm_encoder_cleanup, +}; + static int vkms_conn_get_modes(struct drm_connector *connector) { int count; @@ -91,7 +89,8 @@ int vkms_output_init(struct vkms_device *vkmsdev, int index) drm_connector_helper_add(connector, &vkms_conn_helper_funcs); - ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_VIRTUAL); + ret = drm_encoder_init(dev, encoder, &vkms_encoder_funcs, + DRM_MODE_ENCODER_VIRTUAL, NULL); if (ret) { DRM_ERROR("Failed to init encoder\n"); goto err_encoder; diff --git a/drivers/gpu/drm/vkms/vkms_plane.c b/drivers/gpu/drm/vkms/vkms_plane.c index b3f8a115cc23..c41cec7dcb70 100644 --- a/drivers/gpu/drm/vkms/vkms_plane.c +++ b/drivers/gpu/drm/vkms/vkms_plane.c @@ -132,7 +132,6 @@ static int vkms_plane_atomic_check(struct drm_plane *plane, struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); struct drm_crtc_state *crtc_state; - bool can_position = false; int ret; if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc)) @@ -143,20 +142,13 @@ static int vkms_plane_atomic_check(struct drm_plane *plane, if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); - if (plane->type != DRM_PLANE_TYPE_PRIMARY) - can_position = true; - ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, DRM_PLANE_NO_SCALING, DRM_PLANE_NO_SCALING, - can_position, true); + true, true); if (ret != 0) return ret; - /* for now primary plane must be visible and full screen */ - if (!new_plane_state->visible && !can_position) - return -EINVAL; - return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 2588615a2a38..8b24ecf60e3e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -45,6 +45,9 @@ #include <drm/ttm/ttm_placement.h> #include <generated/utsrelease.h> +#ifdef CONFIG_X86 +#include <asm/hypervisor.h> +#endif #include <linux/cc_platform.h> #include <linux/dma-mapping.h> #include <linux/module.h> @@ -897,6 +900,16 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) cap2_names, ARRAY_SIZE(cap2_names)); } + if (!vmwgfx_supported(dev_priv)) { + vmw_disable_backdoor(); + drm_err_once(&dev_priv->drm, + "vmwgfx seems to be running on an unsupported hypervisor."); + drm_err_once(&dev_priv->drm, + "This configuration is likely broken."); + drm_err_once(&dev_priv->drm, + "Please switch to a supported graphics device to avoid problems."); + } + ret = vmw_dma_select_mode(dev_priv); if (unlikely(ret != 0)) { drm_info(&dev_priv->drm, @@ -1320,6 +1333,22 @@ static void vmw_master_drop(struct drm_device *dev, vmw_kms_legacy_hotspot_clear(dev_priv); } +bool vmwgfx_supported(struct vmw_private *vmw) +{ +#if defined(CONFIG_X86) + return hypervisor_is_type(X86_HYPER_VMWARE); +#elif defined(CONFIG_ARM64) + /* + * On aarch64 only svga3 is supported + */ + return vmw->pci_id == VMWGFX_PCI_ID_SVGA3; +#else + drm_warn_once(&vmw->drm, + "vmwgfx is running on an unknown architecture."); + return false; +#endif +} + /** * __vmw_svga_enable - Enable SVGA mode, FIFO and use of VRAM. * diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index fb8f0c0642c0..3810a9984a7f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -773,6 +773,7 @@ static inline u32 vmw_max_num_uavs(struct vmw_private *dev_priv) extern void vmw_svga_enable(struct vmw_private *dev_priv); extern void vmw_svga_disable(struct vmw_private *dev_priv); +bool vmwgfx_supported(struct vmw_private *vmw); /** @@ -1358,6 +1359,7 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, struct vmw_diff_cpy *diff); /* Host messaging -vmwgfx_msg.c: */ +void vmw_disable_backdoor(void); int vmw_host_get_guestinfo(const char *guest_info_param, char *buffer, size_t *length); __printf(1, 2) int vmw_host_printf(const char *fmt, ...); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 5162a7a12792..b62207be3363 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1396,70 +1396,10 @@ static void vmw_framebuffer_bo_destroy(struct drm_framebuffer *framebuffer) kfree(vfbd); } -static int vmw_framebuffer_bo_dirty(struct drm_framebuffer *framebuffer, - struct drm_file *file_priv, - unsigned int flags, unsigned int color, - struct drm_clip_rect *clips, - unsigned int num_clips) -{ - struct vmw_private *dev_priv = vmw_priv(framebuffer->dev); - struct vmw_framebuffer_bo *vfbd = - vmw_framebuffer_to_vfbd(framebuffer); - struct drm_clip_rect norect; - int ret, increment = 1; - - drm_modeset_lock_all(&dev_priv->drm); - - if (!num_clips) { - num_clips = 1; - clips = &norect; - norect.x1 = norect.y1 = 0; - norect.x2 = framebuffer->width; - norect.y2 = framebuffer->height; - } else if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) { - num_clips /= 2; - increment = 2; - } - - switch (dev_priv->active_display_unit) { - case vmw_du_legacy: - ret = vmw_kms_ldu_do_bo_dirty(dev_priv, &vfbd->base, 0, 0, - clips, num_clips, increment); - break; - default: - ret = -EINVAL; - WARN_ONCE(true, "Dirty called with invalid display system.\n"); - break; - } - - vmw_cmd_flush(dev_priv, false); - - drm_modeset_unlock_all(&dev_priv->drm); - - return ret; -} - -static int vmw_framebuffer_bo_dirty_ext(struct drm_framebuffer *framebuffer, - struct drm_file *file_priv, - unsigned int flags, unsigned int color, - struct drm_clip_rect *clips, - unsigned int num_clips) -{ - struct vmw_private *dev_priv = vmw_priv(framebuffer->dev); - - if (dev_priv->active_display_unit == vmw_du_legacy && - vmw_cmd_supported(dev_priv)) - return vmw_framebuffer_bo_dirty(framebuffer, file_priv, flags, - color, clips, num_clips); - - return drm_atomic_helper_dirtyfb(framebuffer, file_priv, flags, color, - clips, num_clips); -} - static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = { .create_handle = vmw_framebuffer_bo_create_handle, .destroy = vmw_framebuffer_bo_destroy, - .dirty = vmw_framebuffer_bo_dirty_ext, + .dirty = drm_atomic_helper_dirtyfb, }; /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 3de7b4b6a230..db81e635dc06 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -507,11 +507,6 @@ void vmw_du_connector_destroy_state(struct drm_connector *connector, */ int vmw_kms_ldu_init_display(struct vmw_private *dev_priv); int vmw_kms_ldu_close_display(struct vmw_private *dev_priv); -int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv, - struct vmw_framebuffer *framebuffer, - unsigned int flags, unsigned int color, - struct drm_clip_rect *clips, - unsigned int num_clips, int increment); int vmw_kms_update_proxy(struct vmw_resource *res, const struct drm_clip_rect *clips, unsigned num_clips, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index c0e42f2ed144..a82fa9700370 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -275,6 +275,7 @@ static const struct drm_crtc_funcs vmw_legacy_crtc_funcs = { .atomic_duplicate_state = vmw_du_crtc_duplicate_state, .atomic_destroy_state = vmw_du_crtc_destroy_state, .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, }; @@ -314,6 +315,12 @@ static const struct drm_connector_helper_funcs vmw_ldu_connector_helper_funcs = { }; +static int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv, + struct vmw_framebuffer *framebuffer, + unsigned int flags, unsigned int color, + struct drm_mode_rect *clips, + unsigned int num_clips); + /* * Legacy Display Plane Functions */ @@ -332,7 +339,6 @@ vmw_ldu_primary_plane_atomic_update(struct drm_plane *plane, struct drm_framebuffer *fb; struct drm_crtc *crtc = new_state->crtc ?: old_state->crtc; - ldu = vmw_crtc_to_ldu(crtc); dev_priv = vmw_priv(plane->dev); fb = new_state->fb; @@ -345,8 +351,31 @@ vmw_ldu_primary_plane_atomic_update(struct drm_plane *plane, vmw_ldu_del_active(dev_priv, ldu); vmw_ldu_commit_list(dev_priv); -} + if (vfb && vmw_cmd_supported(dev_priv)) { + struct drm_mode_rect fb_rect = { + .x1 = 0, + .y1 = 0, + .x2 = vfb->base.width, + .y2 = vfb->base.height + }; + struct drm_mode_rect *damage_rects = drm_plane_get_damage_clips(new_state); + u32 rect_count = drm_plane_get_damage_clips_count(new_state); + int ret; + + if (!damage_rects) { + damage_rects = &fb_rect; + rect_count = 1; + } + + ret = vmw_kms_ldu_do_bo_dirty(dev_priv, vfb, 0, 0, damage_rects, rect_count); + + drm_WARN_ONCE(plane->dev, ret, + "vmw_kms_ldu_do_bo_dirty failed with: ret=%d\n", ret); + + vmw_cmd_flush(dev_priv, false); + } +} static const struct drm_plane_funcs vmw_ldu_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, @@ -577,11 +606,11 @@ int vmw_kms_ldu_close_display(struct vmw_private *dev_priv) } -int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv, - struct vmw_framebuffer *framebuffer, - unsigned int flags, unsigned int color, - struct drm_clip_rect *clips, - unsigned int num_clips, int increment) +static int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv, + struct vmw_framebuffer *framebuffer, + unsigned int flags, unsigned int color, + struct drm_mode_rect *clips, + unsigned int num_clips) { size_t fifo_size; int i; @@ -597,7 +626,7 @@ int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv, return -ENOMEM; memset(cmd, 0, fifo_size); - for (i = 0; i < num_clips; i++, clips += increment) { + for (i = 0; i < num_clips; i++, clips++) { cmd[i].header = SVGA_CMD_UPDATE; cmd[i].body.x = clips->x1; cmd[i].body.y = clips->y1; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index e76976a95a1e..2651fe0ef518 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -703,32 +703,6 @@ static inline void hypervisor_ppn_remove(PPN64 pfn) #define MKSSTAT_KERNEL_DESCRIPTION "vmwgfx" /** - * mksstat_init_record: Initializes an MKSGuestStatCounter-based record - * for the respective mksGuestStat index. - * - * @stat_idx: Index of the MKSGuestStatCounter-based mksGuestStat record. - * @pstat: Pointer to array of MKSGuestStatCounterTime. - * @pinfo: Pointer to array of MKSGuestStatInfoEntry. - * @pstrs: Pointer to current end of the name/description sequence. - * Return: Pointer to the new end of the names/description sequence. - */ - -static inline char *mksstat_init_record(mksstat_kern_stats_t stat_idx, - MKSGuestStatCounterTime *pstat, MKSGuestStatInfoEntry *pinfo, char *pstrs) -{ - char *const pstrd = pstrs + strlen(mksstat_kern_name_desc[stat_idx][0]) + 1; - strcpy(pstrs, mksstat_kern_name_desc[stat_idx][0]); - strcpy(pstrd, mksstat_kern_name_desc[stat_idx][1]); - - pinfo[stat_idx].name.s = pstrs; - pinfo[stat_idx].description.s = pstrd; - pinfo[stat_idx].flags = MKS_GUEST_STAT_FLAG_NONE; - pinfo[stat_idx].stat.counter = (MKSGuestStatCounter *)&pstat[stat_idx]; - - return pstrd + strlen(mksstat_kern_name_desc[stat_idx][1]) + 1; -} - -/** * mksstat_init_record_time: Initializes an MKSGuestStatCounterTime-based record * for the respective mksGuestStat index. * @@ -1205,3 +1179,12 @@ int vmw_mksstat_remove_ioctl(struct drm_device *dev, void *data, return -EAGAIN; } + +/** + * vmw_disable_backdoor: Disables all backdoor communication + * with the hypervisor. + */ +void vmw_disable_backdoor(void) +{ + vmw_msg_enabled = 0; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index 8d171d71cb8a..7e112319a23c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -53,12 +53,6 @@ struct vmw_overlay { struct vmw_stream stream[VMW_MAX_NUM_STREAMS]; }; -static inline struct vmw_overlay *vmw_overlay(struct drm_device *dev) -{ - struct vmw_private *dev_priv = vmw_priv(dev); - return dev_priv ? dev_priv->overlay_priv : NULL; -} - struct vmw_escape_header { uint32_t cmd; SVGAFifoCmdEscape body; |