diff options
Diffstat (limited to 'drivers/gpu/drm')
570 files changed, 12277 insertions, 4660 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 610e159d362a..1160a439e92a 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -185,7 +185,7 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS bool "Enable refcount backtrace history in the DP MST helpers" depends on STACKTRACE_SUPPORT select STACKDEPOT - depends on DRM_KMS_HELPER + select DRM_KMS_HELPER depends on DEBUG_KERNEL depends on EXPERT help @@ -211,6 +211,18 @@ config DRM_DEBUG_MODESET_LOCK If in doubt, say "N". +config DRM_CLIENT_SELECTION + bool + depends on DRM + select DRM_CLIENT_SETUP if DRM_FBDEV_EMULATION + help + Drivers that support in-kernel DRM clients have to select this + option. + +config DRM_CLIENT_SETUP + bool + depends on DRM_CLIENT_SELECTION + config DRM_FBDEV_EMULATION bool "Enable legacy fbdev support for your modesetting driver" depends on DRM @@ -486,6 +498,10 @@ config DRM_HYPERV config DRM_EXPORT_FOR_TESTS bool +# Separate option as not all DRM drivers use it +config DRM_PANEL_BACKLIGHT_QUIRKS + tristate + config DRM_LIB_RANDOM bool default n diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 784229d4504d..1ec44529447a 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -93,6 +93,7 @@ drm-$(CONFIG_DRM_PANIC_SCREEN_QR_CODE) += drm_panic_qr.o obj-$(CONFIG_DRM) += drm.o obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o +obj-$(CONFIG_DRM_PANEL_BACKLIGHT_QUIRKS) += drm_panel_backlight_quirks.o # # Memory-management helpers @@ -143,8 +144,12 @@ drm_kms_helper-y := \ drm_rect.o \ drm_self_refresh_helper.o \ drm_simple_kms_helper.o +drm_kms_helper-$(CONFIG_DRM_CLIENT_SETUP) += \ + drm_client_setup.o drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o -drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o +drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += \ + drm_fbdev_client.o \ + drm_fb_helper.o obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o # diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9b1e0ede05a4..7edf8d67a0fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -350,7 +350,6 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; -#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 @@ -854,6 +853,7 @@ struct amdgpu_device { bool need_swiotlb; bool accel_working; struct notifier_block acpi_nb; + struct notifier_block pm_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct debugfs_blob_wrapper debugfs_vbios_blob; struct debugfs_blob_wrapper debugfs_discovery_blob; @@ -1571,11 +1571,9 @@ static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_cap #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); -void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } -static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif void amdgpu_register_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b8d4e07d2043..bebfbc1497d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1533,22 +1533,4 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } -/** - * amdgpu_choose_low_power_state - * - * @adev: amdgpu_device_pointer - * - * Choose the target low power state for the GPU - */ -void amdgpu_choose_low_power_state(struct amdgpu_device *adev) -{ - if (adev->in_runpm) - return; - - if (amdgpu_acpi_is_s0ix_active(adev)) - adev->in_s0ix = true; - else if (amdgpu_acpi_is_s3_active(adev)) - adev->in_s3 = true; -} - #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index e41318bfbf45..84e5364d1f67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -715,8 +715,9 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; - if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && - ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { + if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && + ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) || + (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) { pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f9d119448442..581fe1a48f37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -192,7 +192,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); +void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo); int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, unsigned long cur_seq, struct kgd_mem *mem); int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, @@ -212,9 +212,8 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) } static inline -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) +void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo) { - return 0; } static inline diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 3bc0cbf45bc5..a46d6dd6de32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1133,8 +1133,7 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, uint32_t low, high; uint64_t queue_addr = 0; - if (!adev->debug_exp_resets && - !adev->gfx.num_gfx_rings) + if (!amdgpu_gpu_recovery) return 0; kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); @@ -1185,6 +1184,9 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, uint32_t low, high, pipe_reset_data = 0; uint64_t queue_addr = 0; + if (!amdgpu_gpu_recovery) + return 0; + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); amdgpu_gfx_rlc_enter_safe_mode(adev, inst); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index fa572ba7f9fc..1465b3adacb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -370,40 +370,32 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, return 0; } -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) +/** + * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences + * @bo: the BO where to remove the evictions fences from. + * + * This functions should only be used on release when all references to the BO + * are already dropped. We remove the eviction fence from the private copy of + * the dma_resv object here since that is what is used during release to + * determine of the BO is idle or not. + */ +void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo) { - struct amdgpu_bo *root = bo; - struct amdgpu_vm_bo_base *vm_bo; - struct amdgpu_vm *vm; - struct amdkfd_process_info *info; - struct amdgpu_amdkfd_fence *ef; - int ret; - - /* we can always get vm_bo from root PD bo.*/ - while (root->parent) - root = root->parent; + struct dma_resv *resv = &bo->tbo.base._resv; + struct dma_fence *fence, *stub; + struct dma_resv_iter cursor; - vm_bo = root->vm_bo; - if (!vm_bo) - return 0; + dma_resv_assert_held(resv); - vm = vm_bo->vm; - if (!vm) - return 0; - - info = vm->process_info; - if (!info || !info->eviction_fence) - return 0; - - ef = container_of(dma_fence_get(&info->eviction_fence->base), - struct amdgpu_amdkfd_fence, base); - - BUG_ON(!dma_resv_trylock(bo->tbo.base.resv)); - ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef); - dma_resv_unlock(bo->tbo.base.resv); + stub = dma_fence_get_stub(); + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) { + if (!to_amdgpu_amdkfd_fence(fence)) + continue; - dma_fence_put(&ef->base); - return ret; + dma_resv_replace_fences(resv, fence->context, stub, + DMA_RESV_USAGE_BOOKKEEP); + } + dma_fence_put(stub); } static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 45affc02548c..a3a7d20ab4fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -437,6 +437,13 @@ success: return true; } +static bool amdgpu_prefer_rom_resource(struct amdgpu_device *adev) +{ + struct resource *res = &adev->pdev->resource[PCI_ROM_RESOURCE]; + + return (res->flags & IORESOURCE_ROM_SHADOW); +} + static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) { if (amdgpu_atrm_get_bios(adev)) { @@ -455,14 +462,27 @@ static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) goto success; } - if (amdgpu_read_platform_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from platform\n"); - goto success; - } + if (amdgpu_prefer_rom_resource(adev)) { + if (amdgpu_read_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); + goto success; + } - if (amdgpu_read_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); - goto success; + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + + } else { + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + + if (amdgpu_read_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); + goto success; + } } if (amdgpu_read_bios_from_rom(adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index cfdf558b48b6..dfb6cfd83760 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -88,8 +88,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE); if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); @@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 9da4414de617..81f16e4447f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1902,7 +1902,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && (&job->hw_fence) == fence) + if (preempted && (&job->hw_fence.base) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 45e28726e148..8cf224fd4ff2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -145,6 +145,8 @@ const char *amdgpu_asic_name[] = { }; static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data); /** * DOC: pcie_replay_count @@ -168,6 +170,24 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); +static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) +{ + int ret = 0; + + if (!amdgpu_sriov_vf(adev)) + ret = sysfs_create_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); + + return ret; +} + +static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev)) + sysfs_remove_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); +} + static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) @@ -1542,6 +1562,13 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ + if ((amdgpu_runtime_pm != 0) && + adev->pdev->vendor == PCI_VENDOR_ID_ATI && + adev->pdev->device == 0x731f && + adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) + return 0; + /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) DRM_WARN("System can't access extended configuration space, please check!!\n"); @@ -3315,6 +3342,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_device_mem_scratch_fini(adev); amdgpu_ib_pool_fini(adev); amdgpu_seq64_fini(adev); + amdgpu_doorbell_fini(adev); } r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); @@ -4020,11 +4048,6 @@ static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) } #endif -static const struct attribute *amdgpu_dev_attributes[] = { - &dev_attr_pcie_replay_count.attr, - NULL -}; - static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) { if (amdgpu_mcbp == 1) @@ -4121,7 +4144,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); - mutex_init(&adev->virt.rlcg_reg_lock); hash_init(adev->mn_hash); mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); @@ -4147,6 +4169,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->se_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->mm_stats.lock); + spin_lock_init(&adev->virt.rlcg_reg_lock); spin_lock_init(&adev->wb.lock); INIT_LIST_HEAD(&adev->reset_list); @@ -4467,7 +4490,7 @@ fence_driver_init: } else adev->ucode_sysfs_en = true; - r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); + r = amdgpu_device_attr_sysfs_init(adev); if (r) dev_err(adev->dev, "Could not create amdgpu device attr\n"); @@ -4511,6 +4534,11 @@ fence_driver_init: amdgpu_device_check_iommu_direct_map(adev); + adev->pm_nb.notifier_call = amdgpu_device_pm_notifier; + r = register_pm_notifier(&adev->pm_nb); + if (r) + goto failed; + return 0; release_ras_con: @@ -4575,6 +4603,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) drain_workqueue(adev->mman.bdev.wq); adev->shutdown = true; + unregister_pm_notifier(&adev->pm_nb); + /* make sure IB test finished before entering exclusive mode * to avoid preemption on IB test */ @@ -4597,7 +4627,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_pm_sysfs_fini(adev); if (adev->ucode_sysfs_en) amdgpu_ucode_sysfs_fini(adev); - sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); + amdgpu_device_attr_sysfs_fini(adev); amdgpu_fru_sysfs_fini(adev); amdgpu_reg_state_sysfs_fini(adev); @@ -4647,6 +4677,9 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) kfree(adev->fru_info); adev->fru_info = NULL; + kfree(adev->xcp_mgr); + adev->xcp_mgr = NULL; + px = amdgpu_device_supports_px(adev_to_drm(adev)); if (px || (!dev_is_removable(&adev->pdev->dev) && @@ -4663,7 +4696,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - amdgpu_doorbell_fini(adev); drm_dev_exit(idx); } @@ -4706,6 +4738,33 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * Suspend & resume. */ /** + * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events + * @nb: notifier block + * @mode: suspend mode + * @data: data + * + * This function is called when the system is about to suspend or hibernate. + * It is used to set the appropriate flags so that eviction can be optimized + * in the pm prepare callback. + */ +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data) +{ + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); + + switch (mode) { + case PM_HIBERNATION_PREPARE: + adev->in_s4 = true; + break; + case PM_POST_HIBERNATION: + adev->in_s4 = false; + break; + } + + return NOTIFY_DONE; +} + +/** * amdgpu_device_prepare - prepare for device suspend * * @dev: drm dev pointer @@ -4719,15 +4778,13 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; - amdgpu_choose_low_power_state(adev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - goto unprepare; + return r; flush_delayed_work(&adev->gfx.gfx_off_delay_work); @@ -4738,15 +4795,10 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); if (r) - goto unprepare; + return r; } return 0; - -unprepare: - adev->in_s0ix = adev->in_s3 = false; - - return r; } /** @@ -4902,6 +4954,8 @@ exit: dev->dev->power.disable_depth--; #endif } + + amdgpu_vram_mgr_clear_reset_blocks(adev); adev->in_suspend = false; if (adev->enable_mes) @@ -5809,7 +5863,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence.base)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -6568,18 +6622,26 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, { struct dma_fence *old = NULL; + dma_fence_get(gang); do { dma_fence_put(old); old = amdgpu_device_get_gang(adev); if (old == gang) break; - if (!dma_fence_is_signaled(old)) + if (!dma_fence_is_signaled(old)) { + dma_fence_put(gang); return old; + } } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, old, gang) != old); + /* + * Drop it once for the exchanged reference in adev and once for the + * thread local reference acquired in amdgpu_device_get_gang(). + */ + dma_fence_put(old); dma_fence_put(old); return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ca8091fd3a24..eee434743deb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -111,8 +111,15 @@ #include "amdgpu_isp.h" #endif -#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" -MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); +MODULE_FIRMWARE("amdgpu/ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin"); #define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 @@ -295,25 +302,19 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, return ret; } -static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) +static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, + uint8_t *binary, + const char *fw_name) { const struct firmware *fw; - const char *fw_name; int r; - switch (amdgpu_discovery) { - case 2: - fw_name = FIRMWARE_IP_DISCOVERY; - break; - default: - dev_warn(adev->dev, "amdgpu_discovery is not set properly\n"); - return -EINVAL; - } - - r = request_firmware(&fw, fw_name, adev->dev); + r = firmware_request_nowarn(&fw, fw_name, adev->dev); if (r) { - dev_err(adev->dev, "can't load firmware \"%s\"\n", - fw_name); + if (amdgpu_discovery == 2) + dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name); + else + drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name); return r; } @@ -402,10 +403,39 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, return 0; } +static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev) +{ + if (amdgpu_discovery == 2) + return "amdgpu/ip_discovery.bin"; + + switch (adev->asic_type) { + case CHIP_VEGA10: + return "amdgpu/vega10_ip_discovery.bin"; + case CHIP_VEGA12: + return "amdgpu/vega12_ip_discovery.bin"; + case CHIP_RAVEN: + if (adev->apu_flags & AMD_APU_IS_RAVEN2) + return "amdgpu/raven2_ip_discovery.bin"; + else if (adev->apu_flags & AMD_APU_IS_PICASSO) + return "amdgpu/picasso_ip_discovery.bin"; + else + return "amdgpu/raven_ip_discovery.bin"; + case CHIP_VEGA20: + return "amdgpu/vega20_ip_discovery.bin"; + case CHIP_ARCTURUS: + return "amdgpu/arcturus_ip_discovery.bin"; + case CHIP_ALDEBARAN: + return "amdgpu/aldebaran_ip_discovery.bin"; + default: + return NULL; + } +} + static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; struct binary_header *bhdr; + const char *fw_name; uint16_t offset; uint16_t size; uint16_t checksum; @@ -417,17 +447,14 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) return -ENOMEM; /* Read from file if it is the preferred option */ - if (amdgpu_discovery == 2) { - dev_info(adev->dev, "use ip discovery information from file"); - r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); - - if (r) { - dev_err(adev->dev, "failed to read ip discovery binary from file\n"); - r = -EINVAL; + fw_name = amdgpu_discovery_get_fw_name(adev); + if (fw_name != NULL) { + drm_dbg(&adev->ddev, "use ip discovery information from file"); + r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); + if (r) goto out; - } - } else { + drm_dbg(&adev->ddev, "use ip discovery information from memory"); r = amdgpu_discovery_read_binary_from_mem( adev, adev->mman.discovery_bin); if (r) @@ -1285,10 +1312,8 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) int r; r = amdgpu_discovery_init(adev); - if (r) { - DRM_ERROR("amdgpu_discovery_init failed\n"); + if (r) return r; - } adev->gfx.xcc_mask = 0; adev->sdma.sdma_mask = 0; @@ -2430,6 +2455,40 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_RAVEN: + case CHIP_VEGA20: + case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: + /* this is not fatal. We have a fallback below + * if the new firmwares are not present. some of + * this will be overridden below to keep things + * consistent with the current behavior. + */ + r = amdgpu_discovery_reg_base_init(adev); + if (!r) { + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + } + break; + default: + r = amdgpu_discovery_reg_base_init(adev); + if (r) { + drm_err(&adev->ddev, "discovery failed: %d\n", r); + return r; + } + + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + break; + } + + switch (adev->asic_type) { + case CHIP_VEGA10: vega10_reg_base_init(adev); adev->sdma.num_instances = 2; adev->gmc.num_umc = 4; @@ -2590,14 +2649,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0); break; default: - r = amdgpu_discovery_reg_base_init(adev); - if (r) - return -EINVAL; - - amdgpu_discovery_harvest_ip(adev); - amdgpu_discovery_get_gfx_info(adev); - amdgpu_discovery_get_mall_info(adev); - amdgpu_discovery_get_vcn_info(adev); break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 8e81a83d37d8..e63a32c21447 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -42,6 +42,29 @@ #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> +static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops; + +/** + * dma_buf_attach_adev - Helper to get adev of an attachment + * + * @attach: attachment + * + * Returns: + * A struct amdgpu_device * if the attaching device is an amdgpu device or + * partition, NULL otherwise. + */ +static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach) +{ + if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) { + struct drm_gem_object *obj = attach->importer_priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdgpu_ttm_adev(bo->tbo.bdev); + } + + return NULL; +} + /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -53,11 +76,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { + struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && + pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; return 0; @@ -181,7 +206,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { - if (sgt->sgl->page_link) { + if (sg_page(sgt->sgl)) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); @@ -456,6 +481,9 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct drm_gem_object *obj = &bo->tbo.base; struct drm_gem_object *gobj; + if (!adev) + return false; + if (obj->import_attach) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 81d9877c8735..93c3de2d27d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -118,9 +118,11 @@ * - 3.57.0 - Compute tunneling on GFX10+ * - 3.58.0 - Add GFX12 DCC support * - 3.59.0 - Cleared VRAM + * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement) + * - 3.61.0 - Contains fix for RV/PCO compute queues */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 59 +#define KMS_DRIVER_MINOR 61 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -170,6 +172,7 @@ uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; bool enforce_isolation; +int amdgpu_modeset = -1; /* Specifies the default granularity for SVM, used in buffer * migration and restoration of backing memory when handling @@ -1036,6 +1039,13 @@ module_param(enforce_isolation, bool, 0444); MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); /** + * DOC: modeset (int) + * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto). + */ +MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)"); +module_param_named(modeset, amdgpu_modeset, int, 0444); + +/** * DOC: seamless (int) * Seamless boot will keep the image on the screen during the boot process. */ @@ -1793,7 +1803,6 @@ static const u16 amdgpu_unsupported_pciidlist[] = { }; static const struct pci_device_id pciidlist[] = { -#ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -1866,8 +1875,6 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, {0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, {0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK /* Kaveri */ {0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU}, @@ -1950,7 +1957,6 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, -#endif /* topaz */ {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, @@ -2250,6 +2256,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, int ret, retry = 0, i; bool supports_atomic = false; + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { + if (drm_firmware_drivers_only() && amdgpu_modeset == -1) + return -EINVAL; + } + /* skip devices which are owned by radeon */ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { if (amdgpu_unsupported_pciidlist[i] == pdev->device) @@ -2282,14 +2294,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENOTSUPP; } + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: #ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: + if (!amdgpu_si_support) { dev_info(&pdev->dev, "SI support provided by radeon.\n"); dev_info(&pdev->dev, @@ -2297,16 +2309,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ); return -ENODEV; } - } + break; +#else + dev_info(&pdev->dev, "amdgpu is built without SI support.\n"); + return -ENODEV; #endif + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: #ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: + if (!amdgpu_cik_support) { dev_info(&pdev->dev, "CIK support provided by radeon.\n"); dev_info(&pdev->dev, @@ -2314,8 +2328,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ); return -ENODEV; } - } + break; +#else + dev_info(&pdev->dev, "amdgpu is built without CIK support.\n"); + return -ENODEV; #endif + default: + break; + } adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) @@ -2629,9 +2649,7 @@ static int amdgpu_pmops_freeze(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int r; - adev->in_s4 = true; r = amdgpu_device_suspend(drm_dev, true); - adev->in_s4 = false; if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 2f24a6aa13bf..569e0e537392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -41,22 +41,6 @@ #include "amdgpu_trace.h" #include "amdgpu_reset.h" -/* - * Fences mark an event in the GPUs pipeline and are used - * for GPU/CPU synchronization. When the fence is written, - * it is expected that all buffers associated with that fence - * are no longer in use by the associated ring on the GPU and - * that the relevant GPU caches have been flushed. - */ - -struct amdgpu_fence { - struct dma_fence base; - - /* RB, DMA, etc. */ - struct amdgpu_ring *ring; - ktime_t start_timestamp; -}; - static struct kmem_cache *amdgpu_fence_slab; int amdgpu_fence_slab_init(void) @@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); if (am_fence == NULL) return -ENOMEM; - fence = &am_fence->base; - am_fence->ring = ring; } else { /* take use of job-embedded fence */ - fence = &job->hw_fence; + am_fence = &job->hw_fence; } + fence = &am_fence->base; + am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; if (job && job->job_run_counter) { @@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) * it right here or we won't be able to track them in fence_drv * and they will remain unsignaled during sa_bo free. */ - job = container_of(old, struct amdgpu_job, hw_fence); + job = container_of(old, struct amdgpu_job, hw_fence.base); if (!job->base.s_fence && !dma_fence_is_signaled(old)) dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); @@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); return (const char *)to_amdgpu_ring(job->base.sched)->name; } @@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) */ static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); @@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence)); + kfree(container_of(f, struct amdgpu_job, hw_fence.base)); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c index 2d4b67175b55..328a1b963548 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c @@ -122,6 +122,10 @@ static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return 0; + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3)) + return 0; + if (adev->asic_type >= CHIP_SIENNA_CICHLID) return 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 156abd2ba5a6..3c2ac5f4e814 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1426,9 +1426,11 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; struct drm_gpu_scheduler *sched = &ring->sched; struct drm_sched_entity entity; + static atomic_t counter; struct dma_fence *f; struct amdgpu_job *job; struct amdgpu_ib *ib; + void *owner; int i, r; /* Initialize the scheduler entity */ @@ -1439,9 +1441,15 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) goto err; } - r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, - 64, 0, - &job); + /* + * Use some unique dummy value as the owner to make sure we execute + * the cleaner shader on each submission. The value just need to change + * for each submission and is otherwise meaningless. + */ + owner = (void *)(unsigned long)atomic_inc_return(&counter); + + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, + 64, 0, &job); if (r) goto err; @@ -1837,6 +1845,7 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 idx; + bool sched_work = false; if (!adev->gfx.enable_cleaner_shader) return; @@ -1852,15 +1861,19 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) mutex_lock(&adev->enforce_isolation_mutex); if (adev->enforce_isolation[idx]) { if (adev->kfd.init_complete) - amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); + sched_work = true; } mutex_unlock(&adev->enforce_isolation_mutex); + + if (sched_work) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); } void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 idx; + bool sched_work = false; if (!adev->gfx.enable_cleaner_shader) return; @@ -1876,7 +1889,10 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) mutex_lock(&adev->enforce_isolation_mutex); if (adev->enforce_isolation[idx]) { if (adev->kfd.init_complete) - amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); + sched_work = true; } mutex_unlock(&adev->enforce_isolation_mutex); + + if (sched_work) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 17a19d49d30a..9d130d3af0b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -678,12 +678,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, bool all_hub, uint32_t inst) { - u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : - adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; unsigned int ndw; - int r; + int r, cnt = 0; uint32_t seq; /* @@ -740,10 +738,21 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq[inst].ring_lock); - if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) { + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) { dev_err(adev->dev, "timeout waiting for kiq fence\n"); r = -ETIME; - } + } else + r = 0; } error_unlock_reset: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 8b512dc28df8..071f187f5e28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -193,8 +193,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || - (amdgpu_sriov_vf(adev) && need_ctx_switch) || - amdgpu_vm_need_pipeline_sync(ring, job))) { + need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { + need_pipe_sync = true; if (tmp) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 508f02eb0cf8..7de10208e8dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -78,6 +78,9 @@ struct amdgpu_ih_ring { #define amdgpu_ih_ts_after(t1, t2) \ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL) +#define amdgpu_ih_ts_after_or_equal(t1, t2) \ + (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) >= 0LL) + /* provided by the ih block */ struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 1ce20a19be8b..7e6057a6e7f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -259,8 +259,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) /* Check if any fences where initialized */ if (job->base.s_fence && job->base.s_fence->finished.ops) f = &job->base.s_fence->finished; - else if (job->hw_fence.ops) - f = &job->hw_fence; + else if (job->hw_fence.base.ops) + f = &job->hw_fence.base; else f = NULL; @@ -277,10 +277,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); /* only put the hw fence if has embedded fence */ - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -309,10 +309,10 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index ce6b9ba967ff..4fe033d8f356 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -48,7 +48,7 @@ struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; struct amdgpu_sync explicit_sync; - struct dma_fence hw_fence; + struct amdgpu_fence hw_fence; struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 7d4b540340e0..41b88e0ea98b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -860,7 +860,9 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); queue_input.wptr_addr = ring->wptr_gpu_addr; + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to map legacy queue\n"); @@ -883,7 +885,9 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, queue_input.trail_fence_addr = gpu_addr; queue_input.trail_fence_data = seq; + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to unmap legacy queue\n"); @@ -910,7 +914,9 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, queue_input.vmid = vmid; queue_input.use_mmio = use_mmio; + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to reset legacy queue\n"); @@ -931,7 +937,9 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to read reg (0x%x)\n", reg); else @@ -957,7 +965,9 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to write reg (0x%x)\n", reg); @@ -984,7 +994,9 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to reg_write_reg_wait\n"); @@ -1009,7 +1021,9 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to reg_write_reg_wait\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 971419e3a9bb..fc588ef598c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -161,8 +161,8 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) * When GTT is just an alternative to VRAM make sure that we * only use it as fallback and still try to fill up VRAM first. */ - if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && - !(adev->flags & AMD_IS_APU)) + if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) && + domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) places[c].flags |= TTM_PL_FLAG_FALLBACK; c++; } @@ -1246,28 +1246,36 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (abo->kfd_bo) amdgpu_amdkfd_release_notify(abo); - /* We only remove the fence if the resv has individualized. */ - WARN_ON_ONCE(bo->type == ttm_bo_type_kernel - && bo->base.resv != &bo->base._resv); - if (bo->base.resv == &bo->base._resv) - amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); + /* + * We lock the private dma_resv object here and since the BO is about to + * be released nobody else should have a pointer to it. + * So when this locking here fails something is wrong with the reference + * counting. + */ + if (WARN_ON_ONCE(!dma_resv_trylock(&bo->base._resv))) + return; + + amdgpu_amdkfd_remove_all_eviction_fences(abo); if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM || !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) || adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev))) - return; + goto out; - if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) - return; + r = dma_resv_reserve_fences(&bo->base._resv, 1); + if (r) + goto out; - r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true); - if (!WARN_ON(r)) { - amdgpu_vram_mgr_set_cleared(bo->resource); - amdgpu_bo_fence(abo, fence, false); - dma_fence_put(fence); - } + r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true); + if (WARN_ON(r)) + goto out; + + amdgpu_vram_mgr_set_cleared(bo->resource); + dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); - dma_resv_unlock(bo->base.resv); +out: + dma_resv_unlock(&bo->base._resv); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 0b28b2cf1517..3d42f6c3308e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -44,7 +44,7 @@ #include "amdgpu_securedisplay.h" #include "amdgpu_atomfirmware.h" -#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*3) +#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*16) static int psp_load_smu_fw(struct psp_context *psp); static int psp_rap_terminate(struct psp_context *psp); @@ -531,7 +531,6 @@ static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - struct psp_gfx_cmd_resp *cmd = psp->cmd; psp_memory_training_fini(psp); @@ -541,8 +540,8 @@ static int psp_sw_fini(void *handle) amdgpu_ucode_release(&psp->cap_fw); amdgpu_ucode_release(&psp->toc_fw); - kfree(cmd); - cmd = NULL; + kfree(psp->cmd); + psp->cmd = NULL; psp_free_shared_bufs(psp); @@ -3431,7 +3430,10 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3673,7 +3675,10 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) struct amdgpu_device *adev = psp->adev; int err; - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name); if (err) return err; @@ -3713,9 +3718,10 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name) if (err == -ENODEV) { dev_warn(adev->dev, "cap microcode does not exist, skip\n"); err = 0; - goto out; + } else { + dev_err(adev->dev, "fail to initialize cap microcode\n"); } - dev_err(adev->dev, "fail to initialize cap microcode\n"); + goto out; } info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 690976665cf6..10da6e550d76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -439,6 +439,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { unsigned long flags; ktime_t deadline; + bool ret; if (unlikely(ring->adev->debug_disable_soft_recovery)) return false; @@ -453,12 +454,16 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, dma_fence_set_error(fence, -ENODATA); spin_unlock_irqrestore(fence->lock, flags); - atomic_inc(&ring->adev->gpu_reset_counter); while (!dma_fence_is_signaled(fence) && ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) ring->funcs->soft_recovery(ring, vmid); - return dma_fence_is_signaled(fence); + ret = dma_fence_is_signaled(fence); + /* increment the counter only if soft reset worked */ + if (ret) + atomic_inc(&ring->adev->gpu_reset_counter); + + return ret; } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index f93f51002201..9af2cda676ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -126,6 +126,22 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +/* + * Fences mark an event in the GPUs pipeline and are used + * for GPU/CPU synchronization. When the fence is written, + * it is expected that all buffers associated with that fence + * are no longer in use by the associated ring on the GPU and + * that the relevant GPU caches have been flushed. + */ + +struct amdgpu_fence { + struct dma_fence base; + + /* RB, DMA, etc. */ + struct amdgpu_ring *ring; + ktime_t start_timestamp; +}; + extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index e22cb2b5cd92..dba8051b8c14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -133,7 +133,7 @@ void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv) vm = &fpriv->vm; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 9f922ec50ea2..1c8ac4cf08c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -309,7 +309,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); while (src_mm.remaining) { uint64_t from, to, cur_size, tiling_flags; - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_compress_disable; struct dma_fence *next; /* Never copy more than 256MiB at once to avoid a timeout */ @@ -340,9 +340,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE); data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT); + write_compress_disable = + AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE); copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) | AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) | - AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format)); + AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) | + AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE, + write_compress_disable)); } r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, @@ -2065,6 +2069,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL); ttm_device_fini(&adev->mman.bdev); adev->mman.initialized = false; DRM_INFO("amdgpu: ttm finalized\n"); @@ -2275,7 +2280,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor cursor; u64 addr; - int r; + int r = 0; if (!adev->mman.buffer_funcs_enabled) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 138d80017f35..3c883f1cf068 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -118,6 +118,8 @@ struct amdgpu_copy_mem { #define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1 #define AMDGPU_COPY_FLAGS_SET(field, value) \ (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT) @@ -151,6 +153,7 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, uint64_t start, uint64_t size); int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); +void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev); bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, struct ttm_resource *res); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 4c7b53648a50..eb83d7c1f784 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -30,6 +30,10 @@ #define AMDGPU_UCODE_NAME_MAX (128) +static const struct kicker_device kicker_device_list[] = { + {0x744B, 0x00}, +}; + static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) { DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes)); @@ -1383,6 +1387,19 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return NULL; } +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) { + if (adev->pdev->device == kicker_device_list[i].device && + adev->pdev->revision == kicker_device_list[i].revision) + return true; + } + + return false; +} + void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len) { int maj, min, rev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4e23419b92d4..fd08b015b2a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -595,6 +595,11 @@ struct amdgpu_firmware { uint64_t fw_buf_mc; }; +struct kicker_device{ + unsigned short device; + u8 revision; +}; + void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr); @@ -622,5 +627,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len); +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index bb7b9b2eaac1..8da0bddab3d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -383,6 +383,45 @@ int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, return 0; } +static int amdgpu_umc_loop_all_aid(struct amdgpu_device *adev, umc_func func, + void *data) +{ + uint32_t umc_node_inst; + uint32_t node_inst; + uint32_t umc_inst; + uint32_t ch_inst; + int ret; + + /* + * This loop is done based on the following - + * umc.active mask = mask of active umc instances across all nodes + * umc.umc_inst_num = maximum number of umc instancess per node + * umc.node_inst_num = maximum number of node instances + * Channel instances are not assumed to be harvested. + */ + dev_dbg(adev->dev, "active umcs :%lx umc_inst per node: %d", + adev->umc.active_mask, adev->umc.umc_inst_num); + for_each_set_bit(umc_node_inst, &(adev->umc.active_mask), + adev->umc.node_inst_num * adev->umc.umc_inst_num) { + node_inst = umc_node_inst / adev->umc.umc_inst_num; + umc_inst = umc_node_inst % adev->umc.umc_inst_num; + LOOP_UMC_CH_INST(ch_inst) { + dev_dbg(adev->dev, + "node_inst :%d umc_inst: %d ch_inst: %d", + node_inst, umc_inst, ch_inst); + ret = func(adev, node_inst, umc_inst, ch_inst, data); + if (ret) { + dev_err(adev->dev, + "Node %d umc %d ch %d func returns %d\n", + node_inst, umc_inst, ch_inst, ret); + return ret; + } + } + } + + return 0; +} + int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data) { @@ -391,6 +430,9 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev, uint32_t ch_inst = 0; int ret = 0; + if (adev->aid_mask) + return amdgpu_umc_loop_all_aid(adev, func, data); + if (adev->umc.node_inst_num) { LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { ret = func(adev, node_inst, umc_inst, ch_inst, data); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 6162582d0aa2..d5125523bfa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -584,7 +584,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: - break; + return -EINVAL; } r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 2a1f3dbb14d3..3b6254de2c0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -66,7 +66,6 @@ #define VCN_ENC_CMD_REG_WAIT 0x0000000c #define VCN_AON_SOC_ADDRESS_2_0 0x1f800 -#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index b6397d3229e1..01dccd489a80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1010,6 +1010,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f void *scratch_reg2; void *scratch_reg3; void *spare_int; + unsigned long flags; if (!adev->gfx.rlc.rlcg_reg_access_supported) { dev_err(adev->dev, @@ -1031,7 +1032,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2; scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3; - mutex_lock(&adev->virt.rlcg_reg_lock); + spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags); if (reg_access_ctrl->spare_int) spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int; @@ -1090,7 +1091,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f ret = readl(scratch_reg0); - mutex_unlock(&adev->virt.rlcg_reg_lock); + spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b650a2032c42..6a2087abfb7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -275,7 +275,8 @@ struct amdgpu_virt { /* the ucode id to signal the autoload */ uint32_t autoload_ucode_id; - struct mutex rlcg_reg_lock; + /* Spinlock to protect access to the RLCG register interface */ + spinlock_t rlcg_reg_lock; }; struct amdgpu_video_codec_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 73e02141a6e2..37d53578825b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2434,8 +2434,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->done); - INIT_LIST_HEAD(&vm->pt_freed); - INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); INIT_KFIFO(vm->faults); r = amdgpu_vm_init_entities(adev, vm); @@ -2607,8 +2605,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); - flush_work(&vm->pt_free_work); - root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); amdgpu_vm_put_task_info(vm->task_info); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 52dd7cdfdc81..ee893527a4f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -360,10 +360,6 @@ struct amdgpu_vm { /* BOs which are invalidated, has been updated in the PTs */ struct list_head done; - /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */ - struct list_head pt_freed; - struct work_struct pt_free_work; - /* contains the page directory */ struct amdgpu_vm_bo_base root; struct dma_fence *last_update; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index f78a0434a48f..54ae0e9bc6d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -546,27 +546,6 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) amdgpu_bo_unref(&entry->bo); } -void amdgpu_vm_pt_free_work(struct work_struct *work) -{ - struct amdgpu_vm_bo_base *entry, *next; - struct amdgpu_vm *vm; - LIST_HEAD(pt_freed); - - vm = container_of(work, struct amdgpu_vm, pt_free_work); - - spin_lock(&vm->status_lock); - list_splice_init(&vm->pt_freed, &pt_freed); - spin_unlock(&vm->status_lock); - - /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */ - amdgpu_bo_reserve(vm->root.bo, true); - - list_for_each_entry_safe(entry, next, &pt_freed, vm_status) - amdgpu_vm_pt_free(entry); - - amdgpu_bo_unreserve(vm->root.bo); -} - /** * amdgpu_vm_pt_free_list - free PD/PT levels * @@ -579,19 +558,15 @@ void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, struct amdgpu_vm_update_params *params) { struct amdgpu_vm_bo_base *entry, *next; - struct amdgpu_vm *vm = params->vm; bool unlocked = params->unlocked; if (list_empty(¶ms->tlb_flush_waitlist)) return; - if (unlocked) { - spin_lock(&vm->status_lock); - list_splice_init(¶ms->tlb_flush_waitlist, &vm->pt_freed); - spin_unlock(&vm->status_lock); - schedule_work(&vm->pt_free_work); - return; - } + /* + * unlocked unmap clear page table leaves, warning to free the page entry. + */ + WARN_ON(unlocked); list_for_each_entry_safe(entry, next, ¶ms->tlb_flush_waitlist, vm_status) amdgpu_vm_pt_free(entry); @@ -899,7 +874,15 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); pe_start = ((cursor.pfn >> shift) & mask) * 8; - entry_end = ((uint64_t)mask + 1) << shift; + + if (cursor.level < AMDGPU_VM_PTB && params->unlocked) + /* + * MMU notifier callback unlocked unmap huge page, leave is PDE entry, + * only clear one entry. Next entry search again for PDE or PTE leave. + */ + entry_end = 1ULL << shift; + else + entry_end = ((uint64_t)mask + 1) << shift; entry_end += cursor.pfn & ~(entry_end - 1); entry_end = min(entry_end, end); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index ff5e52025266..ea4df412decf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -463,7 +463,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, int r; lpfn = (u64)place->lpfn << PAGE_SHIFT; - if (!lpfn) + if (!lpfn || lpfn > man->size) lpfn = man->size; fpfn = (u64)place->fpfn << PAGE_SHIFT; @@ -648,9 +648,8 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, list_for_each_entry(block, &vres->blocks, link) vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks, vres->flags); + amdgpu_vram_mgr_do_reserve(man); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); @@ -783,6 +782,23 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr) } /** + * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks + * + * @adev: amdgpu device pointer + * + * Reset the cleared drm buddy blocks. + */ +void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_buddy_reset_clear(mm, false); + mutex_unlock(&mgr->lock); +} + +/** * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection * * @man: TTM memory type manager diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 45ed97038df0..9a1c9dbad126 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5998,7 +5998,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -6076,7 +6076,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -6153,7 +6153,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -6528,7 +6528,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -9510,9 +9510,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; @@ -9559,9 +9558,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d3e8be82a172..96e5c520af31 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -63,10 +63,28 @@ #define regPC_CONFIG_CNTL_1 0x194d #define regPC_CONFIG_CNTL_1_BASE_IDX 1 +#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 +#define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 +#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 + +#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 +#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 +#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 + MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); @@ -717,6 +735,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) adev->pdev->revision == 0xCE) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, "amdgpu/gc_11_0_0_rlc_1.bin"); + else if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, "amdgpu/%s_rlc.bin", ucode_prefix); @@ -1549,7 +1570,7 @@ static int gfx_v11_0_sw_init(void *handle) adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; break; @@ -2327,7 +2348,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2371,7 +2392,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2416,7 +2437,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -3051,7 +3072,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -3269,7 +3290,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -3896,7 +3917,7 @@ static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) priority = 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); + tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); mqd->cp_gfx_hqd_queue_priority = tmp; } @@ -3918,14 +3939,14 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set up mqd control */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); + tmp = regCP_GFX_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); mqd->cp_gfx_mqd_control = tmp; /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); + tmp = regCP_GFX_HQD_VMID_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; @@ -3933,7 +3954,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); /* set up time quantum */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); + tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); mqd->cp_gfx_hqd_quantum = tmp; @@ -3955,7 +3976,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ rb_bufsz = order_base_2(prop->queue_size / 4) - 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); + tmp = regCP_GFX_HQD_CNTL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); #ifdef __BIG_ENDIAN @@ -3964,7 +3985,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ - tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); + tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -3976,7 +3997,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_rb_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); + mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; /* active the queue */ mqd->cp_gfx_hqd_active = 1; @@ -4062,14 +4083,14 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -4098,7 +4119,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = regCP_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -4108,7 +4129,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -4134,7 +4155,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = 0; /* enable the doorbell if requested */ if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -4149,17 +4170,17 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); mqd->cp_hqd_persistent_state = tmp; /* set MIN_IB_AVAIL_SIZE */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = regCP_HQD_IB_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; @@ -4487,7 +4508,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 6c19626ec59e..adcfcf594286 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -52,6 +52,24 @@ #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 +#define regCP_GFX_HQD_CNTL_DEFAULT 0x00f00000 +#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 + +#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 +#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 +#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 + + MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin"); @@ -1332,7 +1350,7 @@ static int gfx_v12_0_sw_init(void *handle) adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; break; @@ -2264,7 +2282,7 @@ static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -2395,7 +2413,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) (void **)&adev->gfx.me.me_fw_data_ptr); if (r) { dev_err(adev->dev, "(%d) failed to create me data bo\n", r); - gfx_v12_0_pfp_fini(adev); + gfx_v12_0_me_fini(adev); return r; } @@ -2408,7 +2426,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -2851,25 +2869,25 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set up mqd control */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); + tmp = regCP_GFX_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); mqd->cp_gfx_mqd_control = tmp; /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); + tmp = regCP_GFX_HQD_VMID_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; /* set up default queue priority level * 0x0 = low priority, 0x1 = high priority */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); + tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); mqd->cp_gfx_hqd_queue_priority = tmp; /* set up time quantum */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); + tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); mqd->cp_gfx_hqd_quantum = tmp; @@ -2891,7 +2909,7 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ rb_bufsz = order_base_2(prop->queue_size / 4) - 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); + tmp = regCP_GFX_HQD_CNTL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); #ifdef __BIG_ENDIAN @@ -2900,7 +2918,7 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ - tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); + tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -2912,7 +2930,7 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_rb_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); + mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; /* active the queue */ mqd->cp_gfx_hqd_active = 1; @@ -3007,14 +3025,14 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -3043,7 +3061,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = regCP_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -3053,7 +3071,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -3078,7 +3096,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = 0; /* enable the doorbell if requested */ if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -3093,17 +3111,17 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); mqd->cp_hqd_persistent_state = tmp; /* set MIN_IB_AVAIL_SIZE */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = regCP_HQD_IB_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; @@ -3429,7 +3447,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; @@ -3981,17 +3999,6 @@ static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade if (def != data) WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); - - data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); - data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - - /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ - if (adev->sdma.num_instances > 1) { - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); - } } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 9d741695ca07..1f675d67a1a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4652,6 +4652,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e7cd51c95141..91af1adbf5e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1269,6 +1269,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.mec_fw_write_wait = false; if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) && ((adev->gfx.mec_fw_version < 0x000001a5) || (adev->gfx.mec_feature_version < 46) || (adev->gfx.pfp_fw_version < 0x000000b7) || @@ -7251,10 +7252,6 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (!adev->debug_exp_resets && - !adev->gfx.num_gfx_rings) - return -EINVAL; - if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -7321,8 +7318,8 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { DRM_ERROR("fail to remap queue\n"); return r; @@ -7419,6 +7416,34 @@ static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } +static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_gfx_enforce_isolation_ring_begin_use(ring); + + /* Raven and PCO APUs seem to have stability issues + * with compute and gfxoff and gfx pg. Disable gfx pg during + * submission and allow again afterwards. + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) + gfx_v9_0_set_powergating_state(adev, AMD_PG_STATE_UNGATE); +} + +static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* Raven and PCO APUs seem to have stability issues + * with compute and gfxoff and gfx pg. Disable gfx pg during + * submission and allow again afterwards. + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) + gfx_v9_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + + amdgpu_gfx_enforce_isolation_ring_end_use(ring); +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -7595,8 +7620,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wave_limit = gfx_v9_0_emit_wave_limit, .reset = gfx_v9_0_reset_kcq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v9_0_ring_begin_use_compute, + .end_use = gfx_v9_0_ring_end_use_compute, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index ffdb966c4127..f27ccb8f3c8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3062,9 +3062,6 @@ static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; uint32_t value = 0; - if (!adev->debug_exp_resets) - return; - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); @@ -3580,9 +3577,6 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int r; - if (!adev->debug_exp_resets) - return -EINVAL; - if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -3646,9 +3640,8 @@ pipe_reset: } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { dev_err(adev->dev, "fail to remap queue\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 0e3ddea7b8e0..a7bfc9f41d0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -92,12 +92,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; - /* Program the AGP BAR */ - WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0); - WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) { + /* Program the AGP BAR */ + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + /* Program the system aperture low logical page number. */ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 9784a2892185..c6e742921282 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -265,7 +265,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -966,7 +966,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) adev->hdp.funcs->init_registers(adev); /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 2797fd84432b..87aaf5f1224f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -226,7 +226,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -747,6 +747,18 @@ static int gmc_v11_0_sw_init(void *handle) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; + /* The mall_size is already calculated as mall_size_per_umc * num_umc. + * However, for gfx1151, which features a 2-to-1 UMC mapping, + * the result must be multiplied by 2 to determine the actual mall size. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 5, 1): + adev->gmc.mall_size *= 2; + break; + default: + break; + } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): @@ -893,7 +905,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index edcb5351f8cc..525e435ee22d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -294,7 +294,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -498,9 +498,6 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, uint64_t *flags) { struct amdgpu_bo *bo = mapping->bo_va->base.bo; - struct amdgpu_device *bo_adev; - bool coherent, is_system; - *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -516,25 +513,11 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_VALID; } - if (!bo) - return; - - if (bo->flags & (AMDGPU_GEM_CREATE_COHERENT | - AMDGPU_GEM_CREATE_UNCACHED)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); - - bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); - coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; - is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) || - (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); - if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) *flags |= AMDGPU_PTE_DCC; - /* WA for HW bug */ - if (is_system || ((bo_adev != adev) && coherent)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); - + if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); } static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) @@ -879,7 +862,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 7a45f3fdc734..78c527b56f7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1461,7 +1461,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; - adev->umc.active_mask = adev->aid_mask; adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) adev->umc.ras = &umc_v12_0_ras; @@ -2351,7 +2350,7 @@ static int gmc_v9_0_hw_init(void *handle) adev->hdp.funcs->init_registers(adev); /* After HDP is initialized, flush HDP.*/ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 194026e9be33..1ca1bbe7784e 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -42,7 +42,12 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index d3962d469088..40705e13ca56 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -33,7 +33,12 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index f52552c5fa27..6b9f2e1d9d69 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -34,7 +34,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, if (!ring || !ring->funcs->emit_wreg) { WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + if (amdgpu_sriov_vf(adev)) { + /* this is fine because SR_IOV doesn't remap the register */ + RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index 6948fe9956ce..20da813299f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -36,7 +36,12 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 63820329f67e..f7ecdd15d528 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -33,7 +33,12 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index d4f72e47ae9e..c4f5cbf1ecd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -32,6 +32,7 @@ #include "gc/gc_11_0_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); @@ -50,7 +51,10 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 231a3d490ea8..49113df8baef 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -690,7 +690,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) { - int size = 128 * PAGE_SIZE; + int size = 128 * AMDGPU_GPU_PAGE_SIZE; int ret = 0; struct amdgpu_device *adev = mes->adev; union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt; @@ -859,6 +859,10 @@ static void mes_v11_0_get_fw_version(struct amdgpu_device *adev) { int pipe; + /* return early if we have already fetched these */ + if (adev->mes.sched_version && adev->mes.kiq_version) + return; + /* get MES scheduler/KIQ versions */ mutex_lock(&adev->srbm_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index b3175ff676f3..459f7b8d72b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1225,17 +1225,20 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, mes_v12_0_queue_init_register(ring); } - /* get MES scheduler/KIQ versions */ - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, pipe, 0, 0); + if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) || + ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) { + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, pipe, 0, 0); - if (pipe == AMDGPU_MES_SCHED_PIPE) - adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) - adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 9689e2b5d4e5..2adee2b94c37 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -172,6 +172,30 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp); } +/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */ +static void mmhub_v1_7_init_snoop_override_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i; + uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE - + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE; + + for (i = 0; i < 5; i++) { /* DAGB instances */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance); + tmp |= (1 << 15); /* SDMA client is BIT15 */ + WREG32_SOC15_OFFSET(MMHUB, 0, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance); + tmp |= (1 << 15); + WREG32_SOC15_OFFSET(MMHUB, 0, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance, tmp); + } + +} + static void mmhub_v1_7_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; @@ -337,6 +361,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev) mmhub_v1_7_init_system_aperture_regs(adev); mmhub_v1_7_init_tlb_regs(adev); mmhub_v1_7_init_cache_regs(adev); + mmhub_v1_7_init_snoop_override_regs(adev); mmhub_v1_7_enable_system_domain(adev); mmhub_v1_7_disable_identity_aperture(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index b01bb759d0f4..2276c644a697 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -214,6 +214,32 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) } } +/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */ +static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev) +{ + uint32_t tmp, inst_mask; + int i, j; + uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE - + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE; + + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + for (j = 0; j < 5; j++) { /* DAGB instances */ + tmp = RREG32_SOC15_OFFSET(MMHUB, i, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance); + tmp |= (1 << 15); /* SDMA client is BIT15 */ + WREG32_SOC15_OFFSET(MMHUB, i, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, i, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance); + tmp |= (1 << 15); + WREG32_SOC15_OFFSET(MMHUB, i, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance, tmp); + } + } +} + static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp, inst_mask; @@ -419,6 +445,7 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev) mmhub_v1_8_init_system_aperture_regs(adev); mmhub_v1_8_init_tlb_regs(adev); mmhub_v1_8_init_cache_regs(adev); + mmhub_v1_8_init_snoop_override_regs(adev); mmhub_v1_8_enable_system_domain(adev); mmhub_v1_8_disable_identity_aperture(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index ff1b58e44689..fe0710b55c3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -198,6 +198,36 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); } +/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */ +static void mmhub_v9_4_init_snoop_override_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + int i; + uint32_t distance = mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE - + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE; + uint32_t huboffset = hubid * MMHUB_INSTANCE_REGISTER_OFFSET; + + for (i = 0; i < 5 - (2 * hubid); i++) { + /* DAGB instances 0 to 4 are in hub0 and 5 to 7 are in hub1 */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, + huboffset + i * distance); + tmp |= (1 << 15); /* SDMA client is BIT15 */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, + huboffset + i * distance, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, + huboffset + i * distance); + tmp |= (1 << 15); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, + huboffset + i * distance, tmp); + } + +} + static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) { uint32_t tmp; @@ -392,6 +422,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) mmhub_v9_4_init_cache_regs(adev, i); + mmhub_v9_4_init_snoop_override_regs(adev, i); mmhub_v9_4_enable_system_domain(adev, i); if (!amdgpu_sriov_vf(adev)) mmhub_v9_4_disable_identity_aperture(adev, i); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 41421da63a08..a11f556b3ff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -361,7 +361,7 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) +#define MMIO_REG_HOLE_OFFSET 0x44000 static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index d1bd79bbae53..8e401f8b2a05 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -32,9 +32,6 @@ #define NPS_MODE_MASK 0x000000FFL -/* Core 0 Port 0 counter */ -#define smnPCIEP_NAK_COUNTER 0x1A340218 - static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -453,22 +450,6 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev) } } -static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev) -{ - u32 val, nak_r, nak_g; - - if (adev->flags & AMD_IS_APU) - return 0; - - /* Get the number of NAKs received and generated */ - val = RREG32_PCIE(smnPCIEP_NAK_COUNTER); - nak_r = val & 0xFFFF; - nak_g = val >> 16; - - /* Add the total number of NAKs, i.e the number of replays */ - return (nak_r + nak_g); -} - #define MMIO_REG_HOLE_OFFSET 0x1A000 static void nbio_v7_9_set_reg_remap(struct amdgpu_device *adev) @@ -509,7 +490,6 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode, .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, .init_registers = nbio_v7_9_init_registers, - .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count, .set_reg_remap = nbio_v7_9_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 73065a85e0d2..ab0eecbab412 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -78,12 +78,12 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = { /* Navi1x */ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -104,10 +104,10 @@ static const struct amdgpu_video_codecs sc_video_codecs_encode = { }; static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -115,10 +115,10 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] }; static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -141,23 +141,23 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = { }; static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 2395f1856962..e77a467af7ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -532,7 +532,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 51e470e8d67d..124f74e862d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -42,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); @@ -600,7 +602,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 4d33c95a5116..89f6c06946c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -488,7 +488,7 @@ static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index c77889040760..1e4ce06f5f2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -485,7 +485,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 doorbell_offset, doorbell; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, sdma_cntl; int i; for_each_inst(i, inst_mask) { @@ -497,6 +497,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0); + WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl); if (sdma[i]->use_doorbell) { doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); @@ -953,10 +956,13 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); - /* enable context empty interrupt during initialization */ - temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); WREG32_SDMA(i, regSDMA_CNTL, temp); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { + /* enable context empty interrupt during initialization */ + temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); + } if (!amdgpu_sriov_vf(adev)) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* unhalt engine */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 9288f37a3cc5..843e6b46deee 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1688,11 +1688,12 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, uint32_t byte_count, uint32_t copy_flags) { - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_cm; max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED); data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT); num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE); + write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1; ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | @@ -1709,7 +1710,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED))) ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) | ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | - ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | + ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); else ib->ptr[ib->length_dw++] = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 307185c0e1b8..4cbe0da100d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -103,12 +103,11 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode = /* Vega */ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_decode = @@ -120,12 +119,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode = /* Raven */ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, }; @@ -138,10 +137,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode = /* Renoir, Arcturus */ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index bba35880badb..04a1b2a46368 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -117,23 +117,17 @@ static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = { }; static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index bfd067e2d2f1..f0edaabdcde5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 04e9e806e318..e4d0c0310e76 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 65dd68b32280..be86f86b49e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -40,6 +40,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 26c6f10a8c8f..f391f0c54043 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_HARVEST_MMSCH 0 @@ -575,7 +576,8 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 6fca2915ea8f..77542dabec59 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -44,6 +44,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), @@ -943,6 +944,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { vcn_inst = GET_INST(VCN, i); + vcn_v4_0_3_fw_shared_init(adev, vcn_inst); + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); header.version = MMSCH_VERSION; header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 9d4f5352a62c..3d114ea7049f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) +#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000) #define VCN_HARVEST_MMSCH 0 @@ -984,6 +985,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL); + return 0; } @@ -1166,6 +1171,10 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev) tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index c305386358b4..d19eec4d4790 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -488,7 +488,8 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i /* VCN global tiling registers */ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c new file mode 100644 index 000000000000..cdefd7fcb0da --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -0,0 +1,1624 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" +#include "vcn_v4_0_3.h" +#include "mmsch_v5_0.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" +#include "vcn_v5_0_0.h" +#include "vcn_v5_0_1.h" + +#include <drm/drm_drv.h> + +static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev); +static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring); +static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev); +/** + * vcn_v5_0_1_early_init - set function pointers and load microcode + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + * Load microcode from filesystem + */ +static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + /* re-use enc ring as unified ring */ + adev->vcn.inst[i].num_enc_rings = 1; + + vcn_v5_0_1_set_unified_ring_funcs(adev); + vcn_v5_0_1_set_irq_funcs(adev); + vcn_v5_0_1_set_ras_funcs(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].set_pg_state = vcn_v5_0_1_set_pg_state; + + r = amdgpu_vcn_early_init(adev, i); + if (r) + return r; + } + + return 0; +} + +static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx) +{ + struct amdgpu_vcn5_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + + if (fw_shared->sq.is_enabled) + return; + fw_shared->present_flag_0 = + cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); +} + +/** + * vcn_v5_0_1_sw_init - sw init for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + /* VCN UNIFIED TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); + if (r) + return r; + + /* VCN POISON TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq); + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + vcn_inst = GET_INST(VCN, i); + + r = amdgpu_vcn_sw_init(adev, i); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev, i); + + r = amdgpu_vcn_resume(adev, i); + if (r) + return r; + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + if (!amdgpu_sriov_vf(adev)) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 32 * vcn_inst; + + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); + sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); + + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); + if (r) + return r; + + vcn_v5_0_1_fw_shared_init(adev, i); + } + + /* TODO: Add queue reset mask when FW fully supports it */ + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + + vcn_v5_0_0_alloc_ip_dump(adev); + + return amdgpu_vcn_sysfs_reset_mask_init(adev); +} + +/** + * vcn_v5_0_1_sw_fini - sw fini for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * VCN suspend and free up sw allocation + */ +static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, r, idx; + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } + + drm_dev_exit(idx); + } + + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_sw_fini(adev, i); + if (r) + return r; + } + + amdgpu_vcn_sysfs_reset_mask_fini(adev); + + kfree(adev->vcn.ip_dump); + + return 0; +} + +/** + * vcn_v5_0_1_hw_init - start and test VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + if (amdgpu_sriov_vf(adev)) { + r = vcn_v5_0_1_start_sriov(adev); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v5_0_1_unified_ring_set_wptr(ring); + ring->sched.ready = true; + } + } else { + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst), + adev->vcn.inst[i].aid_id); + + /* Re-init fw_shared, if required */ + vcn_v5_0_1_fw_shared_init(adev, i); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + } + + return 0; +} + +/** + * vcn_v5_0_1_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); + if (vinst->cur_state != AMD_PG_STATE_GATE) + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); + } + + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0); + + return 0; +} + +/** + * vcn_v5_0_1_suspend - suspend VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend VCN block + */ +static int vcn_v5_0_1_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r, i; + + r = vcn_v5_0_1_hw_fini(ip_block); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(ip_block->adev, i); + if (r) + return r; + } + + return r; +} + +/** + * vcn_v5_0_1_resume - resume VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init VCN block + */ +static int vcn_v5_0_1_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r, i; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + if (amdgpu_in_reset(adev)) + vinst->cur_state = AMD_PG_STATE_GATE; + + r = amdgpu_vcn_resume(ip_block->adev, i); + if (r) + return r; + } + + r = vcn_v5_0_1_hw_init(ip_block); + + return r; +} + +/** + * vcn_v5_0_1_mc_resume - memory controller programming + * + * @vinst: VCN instance + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v5_0_1_mc_resume(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; + uint32_t offset, size, vcn_inst; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + vcn_inst = GET_INST(VCN, inst); + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared))); +} + +/** + * vcn_v5_0_1_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @vinst: VCN instance + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v5_0_1_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) +{ + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v5_0_1_disable_clock_gating - disable VCN clock gating + * + * @vinst: VCN instance + * + * Disable clock gating for VCN block + */ +static void vcn_v5_0_1_disable_clock_gating(struct amdgpu_vcn_inst *vinst) +{ +} + +/** + * vcn_v5_0_1_enable_clock_gating - enable VCN clock gating + * + * @vinst: VCN instance + * + * Enable clock gating for VCN block + */ +static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst) +{ +} + +/** + * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode + * + * @vinst: VCN instance + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) +{ + struct amdgpu_device *adev = vinst->adev; + uint32_t reg_data = 0; + int vcn_inst; + + vcn_inst = GET_INST(VCN, vinst->inst); + + /* pause/unpause if state is changed */ + if (vinst->pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", + vinst->pause_state.fw_based, new_state->fw_based, + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } else { + /* unpause DPG, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + } + vinst->pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + + +/** + * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode + * + * @vinst: VCN instance + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) +{ + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + volatile struct amdgpu_vcn5_fw_shared *fw_shared = + adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; + int vcn_inst; + uint32_t tmp; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); + + if (indirect) { + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */ + WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF, + adev->vcn.inst[inst_idx].aid_id, 0, true); + } + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect); + + vcn_v5_0_1_mc_resume_dpg_mode(vinst, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + + /* resetting ring, fw should not check RB ring */ + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + + /* Pause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t)); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + /* resetting done, fw can check RB ring */ + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + return 0; +} + +static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev) +{ + int i, vcn_inst; + struct amdgpu_ring *ring_enc; + uint64_t cache_addr; + uint64_t rb_enc_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + uint32_t init_status; + uint32_t enabled_vcn; + + struct mmsch_v5_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v5_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v5_0_cmd_end end = { {0} }; + struct mmsch_v5_0_init_header header; + + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + end.cmd_header.command_type = MMSCH_COMMAND__END; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + vcn_inst = GET_INST(VCN, i); + + vcn_v5_0_1_fw_shared_init(adev, vcn_inst); + + memset(&header, 0, sizeof(struct mmsch_v5_0_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + table_size = 0; + + MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); + + offset = 0; + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET0), 0); + } else { + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset; + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET1), 0); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr)); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET2), 0); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); + + fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr; + rb_setup = &fw_shared->rb_setup; + + ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0]; + ring_enc->wptr = 0; + rb_enc_addr = ring_enc->gpu_addr; + + rb_setup->is_rb_enabled_flags |= RB_ENABLED; + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); + MMSCH_V5_0_INSERT_END(); + + header.vcn0.init_status = 0; + header.vcn0.table_offset = header.total_size; + header.vcn0.table_size = table_size; + header.total_size += table_size; + + /* Send init table to mmsch */ + size = sizeof(struct mmsch_v5_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x00000001; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + while (resp != expected) { + resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP); + if (resp != 0) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0; + init_status = ((struct mmsch_v5_0_init_header *)(table_loc))->vcn0.init_status; + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) { + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\ + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status); + } + } + + return 0; +} + +/** + * vcn_v5_0_1_start - VCN start + * + * @vinst: VCN instance + * + * Start VCN block + */ +static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_ring *ring; + uint32_t tmp; + int j, k, r, vcn_inst; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v5_0_1_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram); + + vcn_inst = GET_INST(VCN, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + vcn_v5_0_1_mc_resume(vinst); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + if (status & 2) + break; + mdelay(100); + if (amdgpu_emu_mode == 1) + msleep(20); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { + r = 0; + break; + } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + } + + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + + return 0; +} + +/** + * vcn_v5_0_1_stop_dpg_mode - VCN stop with dpg mode + * + * @vinst: VCN instance + * + * Stop VCN block with dpg mode + */ +static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + uint32_t tmp; + int vcn_inst; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* Unpause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); +} + +/** + * vcn_v5_0_1_stop - VCN stop + * + * @vinst: VCN instance + * + * Stop VCN block + */ +static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + uint32_t tmp; + int r = 0, vcn_inst; + + vcn_inst = GET_INST(VCN, i); + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v5_0_1_stop_dpg_mode(vinst); + return 0; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); + + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + + return 0; +} + +/** + * vcn_v5_0_1_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v5_0_1_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR); +} + +/** + * vcn_v5_0_1_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v5_0_1_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR); +} + +/** + * vcn_v5_0_1_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR, + lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v5_0_1_unified_ring_get_rptr, + .get_wptr = vcn_v5_0_1_unified_ring_get_wptr, + .set_wptr = vcn_v5_0_1_unified_ring_set_wptr, + .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v5_0_1_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_1_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + vcn_inst = GET_INST(VCN, i); + adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid; + } +} + +/** + * vcn_v5_0_1_is_idle - check VCN block is idle + * + * @ip_block: Pointer to the amdgpu_ip_block structure + * + * Check whether VCN block is idle + */ +static bool vcn_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE); + + return ret; +} + +/** + * vcn_v5_0_1_wait_for_idle - wait for VCN block idle + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Wait for VCN block idle + */ +static int vcn_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/** + * vcn_v5_0_1_set_clockgating_state - set VCN block clockgating state + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = state == AMD_CG_STATE_GATE; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + if (enable) { + if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v5_0_1_enable_clock_gating(vinst); + } else { + vcn_v5_0_1_disable_clock_gating(vinst); + } + } + + return 0; +} + +static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = vinst->adev; + int ret = 0; + + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + vinst->cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + + if (state == vinst->cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v5_0_1_stop(vinst); + else + ret = vcn_v5_0_1_start(vinst); + + if (!ret) + vinst->cur_state = state; + + return ret; +} + +/** + * vcn_v5_0_1_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t i, inst; + + i = node_id_to_phys_map[entry->node_id]; + + DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n"); + + for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst) + if (adev->vcn.inst[inst].aid_id == i) + break; + if (inst >= adev->vcn.num_vcn_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown VCN instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static int vcn_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = { + .process = vcn_v5_0_1_process_interrupt, +}; + +static const struct amdgpu_irq_src_funcs vcn_v5_0_1_ras_irq_funcs = { + .set = vcn_v5_0_1_set_ras_interrupt_state, + .process = amdgpu_vcn_process_poison_irq, +}; + + +/** + * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + adev->vcn.inst->irq.num_types++; + + adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs; + + adev->vcn.inst->ras_poison_irq.num_types = 1; + adev->vcn.inst->ras_poison_irq.funcs = &vcn_v5_0_1_ras_irq_funcs; + +} + +static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { + .name = "vcn_v5_0_1", + .early_init = vcn_v5_0_1_early_init, + .late_init = NULL, + .sw_init = vcn_v5_0_1_sw_init, + .sw_fini = vcn_v5_0_1_sw_fini, + .hw_init = vcn_v5_0_1_hw_init, + .hw_fini = vcn_v5_0_1_hw_fini, + .suspend = vcn_v5_0_1_suspend, + .resume = vcn_v5_0_1_resume, + .is_idle = vcn_v5_0_1_is_idle, + .wait_for_idle = vcn_v5_0_1_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v5_0_1_set_clockgating_state, + .set_powergating_state = vcn_set_powergating_state, + .dump_ip_state = vcn_v5_0_0_dump_ip_state, + .print_ip_state = vcn_v5_0_0_print_ip_state, +}; + +const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 5, + .minor = 0, + .rev = 1, + .funcs = &vcn_v5_0_1_ip_funcs, +}; + +static uint32_t vcn_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_VCN_V5_0_1_VCPU_VCODEC: + reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool vcn_v5_0_1_query_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst, sub; + uint32_t poison_stat = 0; + + for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) + for (sub = 0; sub < AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK; sub++) + poison_stat += + vcn_v5_0_1_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +static const struct amdgpu_ras_block_hw_ops vcn_v5_0_1_ras_hw_ops = { + .query_poison_status = vcn_v5_0_1_query_poison_status, +}; + +static int vcn_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int vcn_v5_0_1_err_codes[] = { + 14, 15, /* VCN */ +}; + +static bool vcn_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + vcn_v5_0_1_err_codes, + ARRAY_SIZE(vcn_v5_0_1_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops vcn_v5_0_1_aca_bank_ops = { + .aca_bank_parser = vcn_v5_0_1_aca_bank_parser, + .aca_bank_is_valid = vcn_v5_0_1_aca_bank_is_valid, +}; + +static const struct aca_info vcn_v5_0_1_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &vcn_v5_0_1_aca_bank_ops, +}; + +static int vcn_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN, + &vcn_v5_0_1_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + +static struct amdgpu_vcn_ras vcn_v5_0_1_ras = { + .ras_block = { + .hw_ops = &vcn_v5_0_1_ras_hw_ops, + .ras_late_init = vcn_v5_0_1_ras_late_init, + }, +}; + +static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev) +{ + adev->vcn.ras = &vcn_v5_0_1_ras; +} diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 792b2eb6bbac..48ab93e715c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -167,16 +167,16 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] = { { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 3, }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 5, }, { @@ -188,9 +188,9 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] = }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 4, }, }; @@ -206,16 +206,16 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = { { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 3, }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 5, }, { @@ -227,9 +227,9 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 4, }, { @@ -239,13 +239,6 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = .max_pixels_per_frame = 4096 * 4096, .max_level = 186, }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, }; static const struct amdgpu_video_codecs cz_video_codecs_decode = diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 02f7ba8c93cd..6c8c9935a0f2 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -3640,7 +3640,7 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { }; static const uint32_t cwsr_trap_gfx12_hex[] = { - 0xbfa00001, 0xbfa0024b, + 0xbfa00001, 0xbfa002a2, 0xb0804009, 0xb8f8f804, 0x9178ff78, 0x00008c00, 0xb8fbf811, 0x8b6eff78, @@ -3714,7 +3714,15 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0x00011677, 0xd7610000, 0x00011a79, 0xd7610000, 0x00011c7e, 0xd7610000, - 0x00011e7f, 0xbefe00ff, + 0x00011e7f, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xbefe00ff, 0x00003fff, 0xbeff0080, 0xee0a407a, 0x000c0000, 0x00004000, 0xd760007a, @@ -3751,38 +3759,46 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0x00000200, 0xbef600ff, 0x01000000, 0x7e000280, 0x7e020280, 0x7e040280, - 0xbefd0080, 0xbe804ec2, - 0xbf94fffe, 0xb8faf804, - 0x8b7a847a, 0x91788478, - 0x8c787a78, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xd7610002, 0x0000fa6c, - 0x807d817d, 0x917aff6d, - 0x80000000, 0xd7610002, - 0x0000fa7a, 0x807d817d, - 0xd7610002, 0x0000fa6e, - 0x807d817d, 0xd7610002, - 0x0000fa6f, 0x807d817d, - 0xd7610002, 0x0000fa78, - 0x807d817d, 0xb8faf811, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xd7610002, - 0x0000fa7b, 0x807d817d, - 0xb8f1f801, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f814, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f815, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f812, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f813, 0xd7610002, - 0x0000fa71, 0x807d817d, + 0xbe804ec2, 0xbf94fffe, + 0xb8faf804, 0x8b7a847a, + 0x91788478, 0x8c787a78, + 0x917aff6d, 0x80000000, + 0xd7610002, 0x00010071, + 0xd7610002, 0x0001026c, + 0xd7610002, 0x0001047a, + 0xd7610002, 0x0001066e, + 0xd7610002, 0x0001086f, + 0xd7610002, 0x00010a78, + 0xd7610002, 0x00010e7b, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xb8faf811, 0xd7610002, + 0x00010c7a, 0xb8faf801, + 0xd7610002, 0x0001107a, + 0xb8faf814, 0xd7610002, + 0x0001127a, 0xb8faf815, + 0xd7610002, 0x0001147a, + 0xb8faf812, 0xd7610002, + 0x0001167a, 0xb8faf813, + 0xd7610002, 0x0001187a, 0xb8faf802, 0xd7610002, - 0x0000fa7a, 0x807d817d, - 0xbefa50c1, 0xbfc70000, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xbefe00ff, + 0x00011a7a, 0xbefa50c1, + 0xbfc70000, 0xd7610002, + 0x00011c7a, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xbefe00ff, 0x0000ffff, 0xbeff0080, 0xc4068070, 0x008ce802, 0x00000000, 0xbefe00c1, @@ -3797,328 +3813,356 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0xbe824102, 0xbe844104, 0xbe864106, 0xbe884108, 0xbe8a410a, 0xbe8c410c, - 0xbe8e410e, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, - 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, - 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, - 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, - 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, - 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, - 0x80798179, 0xd7610002, - 0x0000f20c, 0x80798179, - 0xd7610002, 0x0000f20d, - 0x80798179, 0xd7610002, - 0x0000f20e, 0x80798179, - 0xd7610002, 0x0000f20f, - 0x80798179, 0xbf06a079, - 0xbfa10007, 0xc4068070, + 0xbe8e410e, 0xbf068079, + 0xbfa10032, 0xd7610002, + 0x00010000, 0xd7610002, + 0x00010201, 0xd7610002, + 0x00010402, 0xd7610002, + 0x00010603, 0xd7610002, + 0x00010804, 0xd7610002, + 0x00010a05, 0xd7610002, + 0x00010c06, 0xd7610002, + 0x00010e07, 0xd7610002, + 0x00011008, 0xd7610002, + 0x00011209, 0xd7610002, + 0x0001140a, 0xd7610002, + 0x0001160b, 0xd7610002, + 0x0001180c, 0xd7610002, + 0x00011a0d, 0xd7610002, + 0x00011c0e, 0xd7610002, + 0x00011e0f, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0x80799079, + 0xbfa00038, 0xd7610002, + 0x00012000, 0xd7610002, + 0x00012201, 0xd7610002, + 0x00012402, 0xd7610002, + 0x00012603, 0xd7610002, + 0x00012804, 0xd7610002, + 0x00012a05, 0xd7610002, + 0x00012c06, 0xd7610002, + 0x00012e07, 0xd7610002, + 0x00013008, 0xd7610002, + 0x00013209, 0xd7610002, + 0x0001340a, 0xd7610002, + 0x0001360b, 0xd7610002, + 0x0001380c, 0xd7610002, + 0x00013a0d, 0xd7610002, + 0x00013c0e, 0xd7610002, + 0x00013e0f, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0x80799079, + 0xc4068070, 0x008ce802, + 0x00000000, 0x8070ff70, + 0x00000080, 0xbef90080, + 0x7e040280, 0x807d907d, + 0xbf0aff7d, 0x00000060, + 0xbfa2ff88, 0xbe804100, + 0xbe824102, 0xbe844104, + 0xbe864106, 0xbe884108, + 0xbe8a410a, 0xd7610002, + 0x00010000, 0xd7610002, + 0x00010201, 0xd7610002, + 0x00010402, 0xd7610002, + 0x00010603, 0xd7610002, + 0x00010804, 0xd7610002, + 0x00010a05, 0xd7610002, + 0x00010c06, 0xd7610002, + 0x00010e07, 0xd7610002, + 0x00011008, 0xd7610002, + 0x00011209, 0xd7610002, + 0x0001140a, 0xd7610002, + 0x0001160b, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xc4068070, 0x008ce802, 0x00000000, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8fb4306, 0x8b7bc17b, + 0xbfa10044, 0x8b7aff6d, + 0x80000000, 0xbfa10041, + 0x847b897b, 0xbef6007b, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, 0x8070ff70, 0x00000080, - 0xbef90080, 0x7e040280, - 0x807d907d, 0xbf0aff7d, - 0x00000060, 0xbfa2ffbb, - 0xbe804100, 0xbe824102, - 0xbe844104, 0xbe864106, - 0xbe884108, 0xbe8a410a, - 0xd7610002, 0x0000f200, - 0x80798179, 0xd7610002, - 0x0000f201, 0x80798179, - 0xd7610002, 0x0000f202, - 0x80798179, 0xd7610002, - 0x0000f203, 0x80798179, - 0xd7610002, 0x0000f204, - 0x80798179, 0xd7610002, - 0x0000f205, 0x80798179, - 0xd7610002, 0x0000f206, - 0x80798179, 0xd7610002, - 0x0000f207, 0x80798179, - 0xd7610002, 0x0000f208, - 0x80798179, 0xd7610002, - 0x0000f209, 0x80798179, - 0xd7610002, 0x0000f20a, - 0x80798179, 0xd7610002, - 0x0000f20b, 0x80798179, - 0xc4068070, 0x008ce802, - 0x00000000, 0xbefe00c1, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8fb4306, - 0x8b7bc17b, 0xbfa10044, - 0x8b7aff6d, 0x80000000, - 0xbfa10041, 0x847b897b, - 0xbef6007b, 0xb8f03b05, - 0x80708170, 0xbf0d9973, - 0xbfa20002, 0x84708970, - 0xbfa00001, 0x84708a70, - 0xb8fa1e06, 0x847a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xd71f0000, - 0x000100c1, 0xd7200000, - 0x000200c1, 0x16000084, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbefd0080, - 0xbfa20013, 0xbe8300ff, - 0x00000080, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf8a0000, 0xc4068070, - 0x008ce801, 0x00000000, - 0x807d037d, 0x80700370, - 0xd5250000, 0x0001ff00, - 0x00000080, 0xbf0a7b7d, - 0xbfa2fff3, 0xbfa00012, - 0xbe8300ff, 0x00000100, + 0xbef600ff, 0x01000000, + 0xd71f0000, 0x000100c1, + 0xd7200000, 0x000200c1, + 0x16000084, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbefd0080, 0xbfa20013, + 0xbe8300ff, 0x00000080, 0xbf800000, 0xbf800000, 0xbf800000, 0xd8d80000, 0x01000000, 0xbf8a0000, 0xc4068070, 0x008ce801, 0x00000000, 0x807d037d, 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000100, + 0x0001ff00, 0x00000080, 0xbf0a7b7d, 0xbfa2fff3, - 0xbefe00c1, 0x857d9973, - 0x8b7d817d, 0xbf06817d, - 0xbfa20004, 0xbef000ff, - 0x00000200, 0xbeff0080, - 0xbfa00003, 0xbef000ff, - 0x00000400, 0xbeff00c1, - 0xb8fb3b05, 0x807b817b, - 0x847b827b, 0x857d9973, - 0x8b7d817d, 0xbf06817d, - 0xbfa2001b, 0xbef600ff, - 0x01000000, 0xbefd0084, - 0xbf0a7b7d, 0xbfa10040, - 0x7e008700, 0x7e028701, - 0x7e048702, 0x7e068703, - 0xc4068070, 0x008ce800, - 0x00000000, 0xc4068070, - 0x008ce801, 0x00008000, - 0xc4068070, 0x008ce802, - 0x00010000, 0xc4068070, - 0x008ce803, 0x00018000, - 0x807d847d, 0x8070ff70, - 0x00000200, 0xbf0a7b7d, - 0xbfa2ffeb, 0xbfa0002a, + 0xbfa00012, 0xbe8300ff, + 0x00000100, 0xbf800000, + 0xbf800000, 0xbf800000, + 0xd8d80000, 0x01000000, + 0xbf8a0000, 0xc4068070, + 0x008ce801, 0x00000000, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7d, + 0xbfa2fff3, 0xbefe00c1, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20004, + 0xbef000ff, 0x00000200, + 0xbeff0080, 0xbfa00003, + 0xbef000ff, 0x00000400, + 0xbeff00c1, 0xb8fb3b05, + 0x807b817b, 0x847b827b, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa2001b, 0xbef600ff, 0x01000000, 0xbefd0084, 0xbf0a7b7d, - 0xbfa10015, 0x7e008700, + 0xbfa10040, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xc4068070, 0x008ce800, 0x00000000, 0xc4068070, 0x008ce801, - 0x00010000, 0xc4068070, - 0x008ce802, 0x00020000, + 0x00008000, 0xc4068070, + 0x008ce802, 0x00010000, 0xc4068070, 0x008ce803, - 0x00030000, 0x807d847d, - 0x8070ff70, 0x00000400, + 0x00018000, 0x807d847d, + 0x8070ff70, 0x00000200, 0xbf0a7b7d, 0xbfa2ffeb, - 0xb8fb1e06, 0x8b7bc17b, - 0xbfa1000d, 0x847b837b, - 0x807b7d7b, 0xbefe00c1, - 0xbeff0080, 0x7e008700, + 0xbfa0002a, 0xbef600ff, + 0x01000000, 0xbefd0084, + 0xbf0a7b7d, 0xbfa10015, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, 0xc4068070, 0x008ce800, - 0x00000000, 0x807d817d, - 0x8070ff70, 0x00000080, - 0xbf0a7b7d, 0xbfa2fff7, - 0xbfa0016e, 0xbef4007e, - 0x8b75ff7f, 0x0000ffff, - 0x8c75ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x10807fac, 0xbef1007f, - 0xb8f20742, 0x84729972, - 0x8b6eff7f, 0x04000000, - 0xbfa1003b, 0xbefe00c1, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8ef4306, - 0x8b6fc16f, 0xbfa10030, - 0x846f896f, 0xbef6006f, + 0x00000000, 0xc4068070, + 0x008ce801, 0x00010000, + 0xc4068070, 0x008ce802, + 0x00020000, 0xc4068070, + 0x008ce803, 0x00030000, + 0x807d847d, 0x8070ff70, + 0x00000400, 0xbf0a7b7d, + 0xbfa2ffeb, 0xb8fb1e06, + 0x8b7bc17b, 0xbfa1000d, + 0x847b837b, 0x807b7d7b, + 0xbefe00c1, 0xbeff0080, + 0x7e008700, 0xc4068070, + 0x008ce800, 0x00000000, + 0x807d817d, 0x8070ff70, + 0x00000080, 0xbf0a7b7d, + 0xbfa2fff7, 0xbfa0016e, + 0xbef4007e, 0x8b75ff7f, + 0x0000ffff, 0x8c75ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x10807fac, + 0xbef1007f, 0xb8f20742, + 0x84729972, 0x8b6eff7f, + 0x04000000, 0xbfa1003b, + 0xbefe00c1, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8ef4306, 0x8b6fc16f, + 0xbfa10030, 0x846f896f, + 0xbef6006f, 0xb8f83b05, + 0x80788178, 0xbf0d9972, + 0xbfa20002, 0x84788978, + 0xbfa00001, 0x84788a78, + 0xb8ee1e06, 0x846e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbefd0080, 0xbfa2000d, + 0xc4050078, 0x0080e800, + 0x00000000, 0xbf8a0000, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff4, + 0xbfa0000c, 0xc4050078, + 0x0080e800, 0x00000000, + 0xbf8a0000, 0xdac00000, + 0x00000000, 0x807dff7d, + 0x00000100, 0x8078ff78, + 0x00000100, 0xbf0a6f7d, + 0xbfa2fff4, 0xbef80080, + 0xbefe00c1, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8ef3b05, 0x806f816f, + 0x846f826f, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa2002c, 0xbef600ff, + 0x01000000, 0xbeee0078, + 0x8078ff78, 0x00000200, + 0xbefd0084, 0xbf0a6f7d, + 0xbfa10061, 0xc4050078, + 0x008ce800, 0x00000000, + 0xc4050078, 0x008ce801, + 0x00008000, 0xc4050078, + 0x008ce802, 0x00010000, + 0xc4050078, 0x008ce803, + 0x00018000, 0xbf8a0000, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807d847d, 0x8078ff78, + 0x00000200, 0xbf0a6f7d, + 0xbfa2ffea, 0xc405006e, + 0x008ce800, 0x00000000, + 0xc405006e, 0x008ce801, + 0x00008000, 0xc405006e, + 0x008ce802, 0x00010000, + 0xc405006e, 0x008ce803, + 0x00018000, 0xbf8a0000, + 0xbfa0003d, 0xbef600ff, + 0x01000000, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefd0084, 0xbf0a6f7d, + 0xbfa10016, 0xc4050078, + 0x008ce800, 0x00000000, + 0xc4050078, 0x008ce801, + 0x00010000, 0xc4050078, + 0x008ce802, 0x00020000, + 0xc4050078, 0x008ce803, + 0x00030000, 0xbf8a0000, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807d847d, 0x8078ff78, + 0x00000400, 0xbf0a6f7d, + 0xbfa2ffea, 0xb8ef1e06, + 0x8b6fc16f, 0xbfa1000f, + 0x846f836f, 0x806f7d6f, + 0xbefe00c1, 0xbeff0080, + 0xc4050078, 0x008ce800, + 0x00000000, 0xbf8a0000, + 0x7e008500, 0x807d817d, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff6, + 0xbeff00c1, 0xc405006e, + 0x008ce800, 0x00000000, + 0xc405006e, 0x008ce801, + 0x00010000, 0xc405006e, + 0x008ce802, 0x00020000, + 0xc405006e, 0x008ce803, + 0x00030000, 0xbf8a0000, 0xb8f83b05, 0x80788178, 0xbf0d9972, 0xbfa20002, 0x84788978, 0xbfa00001, 0x84788a78, 0xb8ee1e06, 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0x8078ff78, 0x00000080, - 0xbef600ff, 0x01000000, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbefd0080, - 0xbfa2000d, 0xc4050078, - 0x0080e800, 0x00000000, - 0xbf8a0000, 0xdac00000, - 0x00000000, 0x807dff7d, - 0x00000080, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff4, 0xbfa0000c, - 0xc4050078, 0x0080e800, - 0x00000000, 0xbf8a0000, - 0xdac00000, 0x00000000, - 0x807dff7d, 0x00000100, - 0x8078ff78, 0x00000100, - 0xbf0a6f7d, 0xbfa2fff4, - 0xbef80080, 0xbefe00c1, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8ef3b05, - 0x806f816f, 0x846f826f, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa2002c, + 0x80f8ff78, 0x00000050, 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000200, 0xbefd0084, - 0xbf0a6f7d, 0xbfa10061, - 0xc4050078, 0x008ce800, - 0x00000000, 0xc4050078, - 0x008ce801, 0x00008000, - 0xc4050078, 0x008ce802, - 0x00010000, 0xc4050078, - 0x008ce803, 0x00018000, - 0xbf8a0000, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807d847d, + 0xbefd00ff, 0x0000006c, + 0x80f89078, 0xf462403a, + 0xf0000000, 0xbf8a0000, + 0x80fd847d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0x80f8a078, 0xf462603a, + 0xf0000000, 0xbf8a0000, + 0x80fd887d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0x80f8c078, 0xf462803a, + 0xf0000000, 0xbf8a0000, + 0x80fd907d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0xbe884308, 0xbe8a430a, + 0xbe8c430c, 0xbe8e430e, + 0xbf06807d, 0xbfa1fff0, + 0xb980f801, 0x00000000, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0xbf0a6f7d, 0xbfa2ffea, - 0xc405006e, 0x008ce800, - 0x00000000, 0xc405006e, - 0x008ce801, 0x00008000, - 0xc405006e, 0x008ce802, - 0x00010000, 0xc405006e, - 0x008ce803, 0x00018000, - 0xbf8a0000, 0xbfa0003d, 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefd0084, - 0xbf0a6f7d, 0xbfa10016, - 0xc4050078, 0x008ce800, - 0x00000000, 0xc4050078, - 0x008ce801, 0x00010000, - 0xc4050078, 0x008ce802, - 0x00020000, 0xc4050078, - 0x008ce803, 0x00030000, - 0xbf8a0000, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807d847d, - 0x8078ff78, 0x00000400, - 0xbf0a6f7d, 0xbfa2ffea, - 0xb8ef1e06, 0x8b6fc16f, - 0xbfa1000f, 0x846f836f, - 0x806f7d6f, 0xbefe00c1, - 0xbeff0080, 0xc4050078, - 0x008ce800, 0x00000000, - 0xbf8a0000, 0x7e008500, - 0x807d817d, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff6, 0xbeff00c1, - 0xc405006e, 0x008ce800, - 0x00000000, 0xc405006e, - 0x008ce801, 0x00010000, - 0xc405006e, 0x008ce802, - 0x00020000, 0xc405006e, - 0x008ce803, 0x00030000, - 0xbf8a0000, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x80f8ff78, - 0x00000050, 0xbef600ff, - 0x01000000, 0xbefd00ff, - 0x0000006c, 0x80f89078, - 0xf462403a, 0xf0000000, - 0xbf8a0000, 0x80fd847d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0x80f8a078, - 0xf462603a, 0xf0000000, - 0xbf8a0000, 0x80fd887d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0x80f8c078, - 0xf462803a, 0xf0000000, - 0xbf8a0000, 0x80fd907d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0xbe884308, - 0xbe8a430a, 0xbe8c430c, - 0xbe8e430e, 0xbf06807d, - 0xbfa1fff0, 0xb980f801, - 0x00000000, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0xbef600ff, - 0x01000000, 0xbeff0071, - 0xf4621bfa, 0xf0000000, - 0x80788478, 0xf4621b3a, + 0xbeff0071, 0xf4621bfa, 0xf0000000, 0x80788478, - 0xf4621b7a, 0xf0000000, - 0x80788478, 0xf4621c3a, + 0xf4621b3a, 0xf0000000, + 0x80788478, 0xf4621b7a, 0xf0000000, 0x80788478, - 0xf4621c7a, 0xf0000000, - 0x80788478, 0xf4621eba, + 0xf4621c3a, 0xf0000000, + 0x80788478, 0xf4621c7a, 0xf0000000, 0x80788478, - 0xf4621efa, 0xf0000000, - 0x80788478, 0xf4621e7a, + 0xf4621eba, 0xf0000000, + 0x80788478, 0xf4621efa, 0xf0000000, 0x80788478, - 0xf4621cfa, 0xf0000000, - 0x80788478, 0xf4621bba, + 0xf4621e7a, 0xf0000000, + 0x80788478, 0xf4621cfa, 0xf0000000, 0x80788478, - 0xbf8a0000, 0xb96ef814, 0xf4621bba, 0xf0000000, 0x80788478, 0xbf8a0000, - 0xb96ef815, 0xf4621bba, + 0xb96ef814, 0xf4621bba, 0xf0000000, 0x80788478, - 0xbf8a0000, 0xb96ef812, + 0xbf8a0000, 0xb96ef815, 0xf4621bba, 0xf0000000, 0x80788478, 0xbf8a0000, - 0xb96ef813, 0x8b6eff7f, - 0x04000000, 0xbfa1000d, - 0x80788478, 0xf4621bba, + 0xb96ef812, 0xf4621bba, 0xf0000000, 0x80788478, - 0xbf8a0000, 0xbf0d806e, - 0xbfa10006, 0x856e906e, - 0x8b6e6e6e, 0xbfa10003, - 0xbe804ec1, 0x816ec16e, - 0xbfa0fffb, 0xbefd006f, - 0xbefe0070, 0xbeff0071, - 0xb97b2011, 0x857b867b, - 0xb97b0191, 0x857b827b, - 0xb97bba11, 0xb973f801, - 0xb8ee3b05, 0x806e816e, - 0xbf0d9972, 0xbfa20002, - 0x846e896e, 0xbfa00001, - 0x846e8a6e, 0xb8ef1e06, - 0x846f8a6f, 0x806e6f6e, - 0x806eff6e, 0x00000200, - 0x806e746e, 0x826f8075, - 0x8b6fff6f, 0x0000ffff, - 0xf4605c37, 0xf8000050, - 0xf4605d37, 0xf8000060, - 0xf4601e77, 0xf8000074, - 0xbf8a0000, 0x8b6dff6d, - 0x0000ffff, 0x8bfe7e7e, - 0x8bea6a6a, 0xb97af804, + 0xbf8a0000, 0xb96ef813, + 0x8b6eff7f, 0x04000000, + 0xbfa1000d, 0x80788478, + 0xf4621bba, 0xf0000000, + 0x80788478, 0xbf8a0000, + 0xbf0d806e, 0xbfa10006, + 0x856e906e, 0x8b6e6e6e, + 0xbfa10003, 0xbe804ec1, + 0x816ec16e, 0xbfa0fffb, + 0xbefd006f, 0xbefe0070, + 0xbeff0071, 0xb97b2011, + 0x857b867b, 0xb97b0191, + 0x857b827b, 0xb97bba11, + 0xb973f801, 0xb8ee3b05, + 0x806e816e, 0xbf0d9972, + 0xbfa20002, 0x846e896e, + 0xbfa00001, 0x846e8a6e, + 0xb8ef1e06, 0x846f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x8b6fff6f, + 0x0000ffff, 0xf4605c37, + 0xf8000050, 0xf4605d37, + 0xf8000060, 0xf4601e77, + 0xf8000074, 0xbf8a0000, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb97af804, 0xbe804ec2, + 0xbf94fffe, 0xbe804a6c, 0xbe804ec2, 0xbf94fffe, - 0xbe804a6c, 0xbfb10000, + 0xbfb10000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, - 0xbf9f0000, 0x00000000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 44772eec9ef4..96fbb16ceb21 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -34,41 +34,24 @@ * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3 * sp3 gfx11.sp3 -hex gfx11.hex * - * gfx12: - * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o gfx12.sp3 - * sp3 gfx12.sp3 -hex gfx12.hex */ #define CHIP_NAVI10 26 #define CHIP_SIENNA_CICHLID 30 #define CHIP_PLUM_BONITO 36 -#define CHIP_GFX12 37 #define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID) #define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID) #define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO) #define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO) -#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12) +#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO) #define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised -#if ASIC_FAMILY < CHIP_GFX12 #define S_COHERENCE glc:1 #define V_COHERENCE slc:1 glc:1 #define S_WAITCNT_0 s_waitcnt 0 -#else -#define S_COHERENCE scope:SCOPE_SYS -#define V_COHERENCE scope:SCOPE_SYS -#define S_WAITCNT_0 s_wait_idle - -#define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO -#define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI -#define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC -#define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC -#define HW_REG_MODE HW_REG_WAVE_MODE -#endif -#if ASIC_FAMILY < CHIP_GFX12 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 @@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 var S_SAVE_PC_HI_HT_MASK = 0x01000000 -#else -var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 -var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 -var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 -var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 -var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 -var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 -var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 -var SQ_WAVE_STATUS_WAVE64_SIZE = 1 -var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 -var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV -var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK -var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK -var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 -#endif var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 @@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 #endif -#if ASIC_FAMILY < CHIP_GFX12 var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 @@ -161,39 +128,6 @@ var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT var S_TRAPSTS_HWREG = HW_REG_TRAPSTS var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT -#else -var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF -var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 -var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 -var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 -var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 -var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 -var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 -var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 -var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 -var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 -var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 -var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 -var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 -var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 - -var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV -var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK -var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT -var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK -var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT -var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT -var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT -var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT -var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT -var BARRIER_STATE_SIGNAL_OFFSET = 16 -var BARRIER_STATE_VALID_OFFSET = 0 -#endif // bits [31:24] unused by SPI debug data var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 @@ -305,11 +239,7 @@ L_TRAP_NO_BARRIER: L_HALTED: // Host trap may occur while wave is halted. -#if ASIC_FAMILY < CHIP_GFX12 s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK -#else - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK -#endif s_cbranch_scc1 L_FETCH_2ND_TRAP L_CHECK_SAVE: @@ -336,7 +266,6 @@ L_NOT_HALTED: // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. // Maskable exceptions only cause the wave to enter the trap handler if // their respective bit in mode.excp_en is set. -#if ASIC_FAMILY < CHIP_GFX12 s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK s_cbranch_scc0 L_CHECK_TRAP_ID @@ -349,17 +278,6 @@ L_NOT_ADDR_WATCH: s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT s_and_b32 ttmp2, ttmp2, ttmp3 s_cbranch_scc1 L_FETCH_2ND_TRAP -#else - s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) - s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK - s_cbranch_scc0 L_NOT_ADDR_WATCH - s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK - -L_NOT_ADDR_WATCH: - s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) - s_and_b32 ttmp2, ttmp3, ttmp2 - s_cbranch_scc1 L_FETCH_2ND_TRAP -#endif L_CHECK_TRAP_ID: // Check trap_id != 0 @@ -369,13 +287,8 @@ L_CHECK_TRAP_ID: #if SINGLE_STEP_MISSED_WORKAROUND // Prioritize single step exception over context save. // Second-level trap will halt wave and RFE, re-entering for SAVECTX. -#if ASIC_FAMILY < CHIP_GFX12 s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK -#else - // WAVE_TRAP_CTRL is already in ttmp3. - s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK -#endif s_cbranch_scc1 L_FETCH_2ND_TRAP #endif @@ -425,12 +338,7 @@ L_NO_NEXT_TRAP: s_cbranch_scc1 L_TRAP_CASE // Host trap will not cause trap re-entry. -#if ASIC_FAMILY < CHIP_GFX12 s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK -#else - s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK -#endif s_cbranch_scc1 L_EXIT_TRAP s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK @@ -457,16 +365,7 @@ L_EXIT_TRAP: s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 -#if ASIC_FAMILY < CHIP_GFX12 s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status -#else - // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. - // Only restore fields which the trap handler changes. - s_lshr_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_SCC_SHIFT - s_setreg_b32 hwreg(S_STATUS_HWREG, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ - SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status -#endif - s_rfe_b64 [ttmp0, ttmp1] L_SAVE: @@ -478,14 +377,6 @@ L_SAVE: s_endpgm L_HAVE_VGPRS: #endif -#if ASIC_FAMILY >= CHIP_GFX12 - s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) - s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT - s_cbranch_scc0 L_HAVE_VGPRS - s_endpgm -L_HAVE_VGPRS: -#endif - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] s_mov_b32 s_save_tmp, 0 s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit @@ -671,19 +562,6 @@ L_SAVE_HWREG: s_mov_b32 m0, 0x0 //Next lane of v2 to write to #endif -#if ASIC_FAMILY >= CHIP_GFX12 - // Ensure no further changes to barrier or LDS state. - // STATE_PRIV.BARRIER_COMPLETE may change up to this point. - s_barrier_signal -2 - s_barrier_wait -2 - - // Re-read final state of BARRIER_COMPLETE field for save. - s_getreg_b32 s_save_tmp, hwreg(S_STATUS_HWREG) - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK - s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK - s_or_b32 s_save_status, s_save_status, s_save_tmp -#endif - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK @@ -707,21 +585,6 @@ L_SAVE_HWREG: s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI) write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) -#if ASIC_FAMILY >= CHIP_GFX12 - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) - - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) - - s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) - write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) - - s_get_barrier_state s_save_tmp, -1 - s_wait_kmcnt (0) - write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) -#endif - #if NO_SQC_STORE // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. s_mov_b32 exec_lo, 0xFFFF @@ -814,9 +677,7 @@ L_SAVE_LDS_NORMAL: s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE -#if ASIC_FAMILY < CHIP_GFX12 s_barrier //LDS is used? wait for other waves in the same TG -#endif s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK s_cbranch_scc0 L_SAVE_LDS_DONE @@ -1081,11 +942,6 @@ L_RESTORE: s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC -#if ASIC_FAMILY >= CHIP_GFX12 - // Save s_restore_spi_init_hi for later use. - s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi -#endif - //determine it is wave32 or wave64 get_wave_size2(s_restore_size) @@ -1320,9 +1176,7 @@ L_RESTORE_SGPR: // s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception. // Clear DEBUG_EN before and restore MODE after the barrier. s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0 -#if ASIC_FAMILY < CHIP_GFX12 s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG -#endif /* restore HW registers */ L_RESTORE_HWREG: @@ -1334,11 +1188,6 @@ L_RESTORE_HWREG: s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes -#if ASIC_FAMILY >= CHIP_GFX12 - // Restore s_restore_spi_init_hi before the saved value gets clobbered. - s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save -#endif - read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) @@ -1358,44 +1207,6 @@ L_RESTORE_HWREG: s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch -#if ASIC_FAMILY >= CHIP_GFX12 - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) - S_WAITCNT_0 - s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp - - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) - S_WAITCNT_0 - s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp - - // Only the first wave needs to restore the workgroup barrier. - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE - - // Skip over WAVE_STATUS, since there is no state to restore from it - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 - - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) - S_WAITCNT_0 - - s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE - - // extract the saved signal count from s_restore_tmp - s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET - - // We need to call s_barrier_signal repeatedly to restore the signal - // count of the work group barrier. The member count is already - // initialized with the number of waves in the work group. -L_BARRIER_RESTORE_LOOP: - s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE - s_barrier_signal -1 - s_add_i32 s_restore_tmp, s_restore_tmp, -1 - s_branch L_BARRIER_RESTORE_LOOP - -L_SKIP_BARRIER_RESTORE: -#endif - s_mov_b32 m0, s_restore_m0 s_mov_b32 exec_lo, s_restore_exec_lo s_mov_b32 exec_hi, s_restore_exec_hi @@ -1453,13 +1264,6 @@ L_RETURN_WITHOUT_PRIV: s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu -#if ASIC_FAMILY >= CHIP_GFX12 - // Make barrier and LDS state visible to all waves in the group. - // STATE_PRIV.BARRIER_COMPLETE may change after this point. - s_barrier_signal -2 - s_barrier_wait -2 -#endif - s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution L_END_PGM: @@ -1598,11 +1402,7 @@ function get_hwreg_size_bytes end function get_wave_size2(s_reg) -#if ASIC_FAMILY < CHIP_GFX12 s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) -#else - s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) -#endif s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE end diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm new file mode 100644 index 000000000000..5a1a1b1f897f --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm @@ -0,0 +1,1136 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* To compile this assembly code: + * + * gfx12: + * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3 + * sp3 gfx12.sp3 -hex gfx12.hex + */ + +#define CHIP_GFX12 37 + +#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised +#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12) + +var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 +var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 +var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 +var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 +var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 +var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 +var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 +var SQ_WAVE_STATUS_WAVE64_SIZE = 1 +var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 +var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK +var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 + +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 +var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 +var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 + +var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF +var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 +var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 +var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 +var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 +var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 +var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 +var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 +var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 +var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 +var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 +var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 +var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 +var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 + +var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT +var BARRIER_STATE_SIGNAL_OFFSET = 16 +var BARRIER_STATE_VALID_OFFSET = 0 + +var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 +var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 + +// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] +// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000 +var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31 + +var s_sgpr_save_num = 108 + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi +var s_save_pc_lo = ttmp0 +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_state_priv = ttmp12 +var s_save_excp_flag_priv = ttmp15 +var s_save_xnack_mask = s_save_excp_flag_priv +var s_wave_size = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 +var s_save_mem_offset = ttmp4 +var s_save_alloc_size = s_save_excp_flag_priv +var s_save_tmp = ttmp14 +var s_save_m0 = ttmp5 +var s_save_ttmps_lo = s_save_tmp +var s_save_ttmps_hi = s_save_excp_flag_priv + +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 +var S_WAVE_SIZE = 25 + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi +var s_restore_mem_offset = ttmp12 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp2 +var s_restore_mem_offset_save = s_restore_tmp +var s_restore_m0 = s_restore_alloc_size +var s_restore_mode = ttmp7 +var s_restore_flat_scratch = s_restore_tmp +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = ttmp4 +var s_restore_exec_hi = ttmp5 +var s_restore_state_priv = ttmp14 +var s_restore_excp_flag_priv = ttmp15 +var s_restore_xnack_mask = ttmp13 +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 +var s_restore_size = ttmp6 +var s_restore_ttmps_lo = s_restore_tmp +var s_restore_ttmps_hi = s_restore_alloc_size +var s_restore_spi_init_hi_save = s_restore_exec_hi + +shader main + asic(DEFAULT) + type(CS) + wave_size(32) + + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save + +L_JUMP_TO_RESTORE: + s_branch L_RESTORE + +L_SKIP_RESTORE: + s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC + + // Clear SPI_PRIO: do not save with elevated priority. + // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. + s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK + + s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + + s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK + s_cbranch_scc0 L_NOT_HALTED + +L_HALTED: + // Host trap may occur while wave is halted. + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_SAVE: + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK + s_cbranch_scc1 L_SAVE + + // Wave is halted but neither host trap nor SAVECTX is raised. + // Caused by instruction fetch memory violation. + // Spin wait until context saved to prevent interrupt storm. + s_sleep 0x10 + s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + s_branch L_CHECK_SAVE + +L_NOT_HALTED: + // Let second-level handle non-SAVECTX exception or trap. + // Any concurrent SAVECTX will be handled upon re-entry once halted. + + // Check non-maskable exceptions. memory_violation, illegal_instruction + // and xnack_error exceptions always cause the wave to enter the trap + // handler. + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + + // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. + // Maskable exceptions only cause the wave to enter the trap handler if + // their respective bit in mode.excp_en is set. + s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) + s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK + s_cbranch_scc0 L_NOT_ADDR_WATCH + s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK + +L_NOT_ADDR_WATCH: + s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) + s_and_b32 ttmp2, ttmp3, ttmp2 + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_TRAP_ID: + // Check trap_id != 0 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +#if SINGLE_STEP_MISSED_WORKAROUND + // Prioritize single step exception over context save. + // Second-level trap will halt wave and RFE, re-entering for SAVECTX. + // WAVE_TRAP_CTRL is already in ttmp3. + s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP +#endif + + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK + s_cbranch_scc1 L_SAVE + +L_FETCH_2ND_TRAP: + // Read second-level TBA/TMA from first-level TMA and jump if available. + // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) + // ttmp12 holds SQ_WAVE_STATUS + s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA) + s_wait_idle + s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + + s_bitcmp1_b32 ttmp15, 0xF + s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA + s_or_b32 ttmp15, ttmp15, 0xFFFF0000 +L_NO_SIGN_EXTEND_TMA: + + s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag + s_wait_idle + s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT + s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK + s_or_b32 ttmp11, ttmp11, ttmp2 + + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA + s_wait_idle + s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA + s_wait_idle + + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] + s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set + s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler + +L_NO_NEXT_TRAP: + // If not caused by trap then halt wave to prevent re-entry. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_TRAP_CASE + + // Host trap will not cause trap re-entry. + s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK + s_cbranch_scc1 L_EXIT_TRAP + s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set. + // Rewind the PC to prevent this from occurring. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + + s_branch L_EXIT_TRAP + +L_TRAP_CASE: + // Advance past trap instruction to prevent re-entry. + s_add_u32 ttmp0, ttmp0, 0x4 + s_addc_u32 ttmp1, ttmp1, 0x0 + +L_EXIT_TRAP: + s_and_b32 ttmp1, ttmp1, 0xFFFF + + // Restore SQ_WAVE_STATUS. + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + + // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. + // Only restore fields which the trap handler changes. + s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT + s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ + SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv + + s_rfe_b64 [ttmp0, ttmp1] + +L_SAVE: + // If VGPRs have been deallocated then terminate the wavefront. + // It has no remaining program to run and cannot save without VGPRs. + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) + s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT + s_cbranch_scc0 L_HAVE_VGPRS + s_endpgm +L_HAVE_VGPRS: + + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_mov_b32 s_save_tmp, 0 + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + + s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE) + s_wait_idle + + // Save first_wave flag so we can clear high bits of save address. + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK + s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT) + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + + // Trap temporaries must be saved via VGPR but all VGPRs are in use. + // There is no ttmp space to hold the resource constant for VGPR save. + // Save v0 by itself since it requires only two SGPRs. + s_mov_b32 s_save_ttmps_lo, exec_lo + s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0xFFFFFFFF + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS + v_mov_b32 v0, 0x0 + s_mov_b32 exec_lo, s_save_ttmps_lo + s_mov_b32 exec_hi, s_save_ttmps_hi + + // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 + get_wave_size2(s_save_ttmps_hi) + get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi) + get_svgpr_size_bytes(s_save_ttmps_hi) + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi + s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes() + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo + s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0 + + v_writelane_b32 v0, ttmp4, 0x4 + v_writelane_b32 v0, ttmp5, 0x5 + v_writelane_b32 v0, ttmp6, 0x6 + v_writelane_b32 v0, ttmp7, 0x7 + v_writelane_b32 v0, ttmp8, 0x8 + v_writelane_b32 v0, ttmp9, 0x9 + v_writelane_b32 v0, ttmp10, 0xA + v_writelane_b32 v0, ttmp11, 0xB + v_writelane_b32 v0, ttmp13, 0xD + v_writelane_b32 v0, exec_lo, 0xE + v_writelane_b32 v0, exec_hi, 0xF + valu_sgpr_hazard() + + s_mov_b32 exec_lo, 0x3FFF + s_mov_b32 exec_hi, 0x0 + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS + v_readlane_b32 ttmp14, v0, 0xE + v_readlane_b32 ttmp15, v0, 0xF + s_mov_b32 exec_lo, ttmp14 + s_mov_b32 exec_hi, ttmp15 + + /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + + s_mov_b32 s_save_m0, m0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 + get_wave_size2(s_wave_size) + + /* save first 4 VGPRs, needed for SGPR save */ + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_4VGPR_WAVE32 +L_ENABLE_SAVE_4VGPR_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF + s_branch L_SAVE_4VGPR_WAVE64 +L_SAVE_4VGPR_WAVE32: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 + s_branch L_SAVE_HWREG + +L_SAVE_4VGPR_WAVE64: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 + + /* save HW registers */ + +L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource + v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource + v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store + + // Ensure no further changes to barrier or LDS state. + // STATE_PRIV.BARRIER_COMPLETE may change up to this point. + s_barrier_signal -2 + s_barrier_wait -2 + + // Re-read final state of BARRIER_COMPLETE field for save. + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV) + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK + s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK + s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp + + s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK + v_writelane_b32 v2, s_save_m0, 0x0 + v_writelane_b32 v2, s_save_pc_lo, 0x1 + v_writelane_b32 v2, s_save_tmp, 0x2 + v_writelane_b32 v2, s_save_exec_lo, 0x3 + v_writelane_b32 v2, s_save_exec_hi, 0x4 + v_writelane_b32 v2, s_save_state_priv, 0x5 + v_writelane_b32 v2, s_save_xnack_mask, 0x7 + valu_sgpr_hazard() + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + v_writelane_b32 v2, s_save_tmp, 0x6 + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_MODE) + v_writelane_b32 v2, s_save_tmp, 0x8 + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) + v_writelane_b32 v2, s_save_tmp, 0x9 + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) + v_writelane_b32 v2, s_save_tmp, 0xA + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) + v_writelane_b32 v2, s_save_tmp, 0xB + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_TRAP_CTRL) + v_writelane_b32 v2, s_save_tmp, 0xC + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) + v_writelane_b32 v2, s_save_tmp, 0xD + + s_get_barrier_state s_save_tmp, -1 + s_wait_kmcnt (0) + v_writelane_b32 v2, s_save_tmp, 0xE + valu_sgpr_hazard() + + // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. + s_mov_b32 exec_lo, 0xFFFF + s_mov_b32 exec_hi, 0x0 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode. + s_mov_b32 exec_lo, 0xFFFFFFFF + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + + // SGPR SR memory offset : size(VGPR)+size(SVGPR) + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + s_nop 0x0 //Manually inserted wait states +L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + s_cmp_eq_u32 ttmp13, 0x0 + s_cbranch_scc0 L_WRITE_V2_SECOND_HALF + write_16sgpr_to_v2(s0, 0x0) + s_branch L_SAVE_SGPR_SKIP_TCP_STORE +L_WRITE_V2_SECOND_HALF: + write_16sgpr_to_v2(s0, 0x10) + + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80 + s_mov_b32 ttmp13, 0x0 + v_mov_b32 v2, 0x0 +L_SAVE_SGPR_SKIP_TCP_STORE: + + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete? + + //save the rest 12 SGPR + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + write_12sgpr_to_v2(s0) + + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + /* save LDS */ + +L_SAVE_LDS: + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_LDS_NORMAL +L_ENABLE_SAVE_LDS_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_SAVE_LDS_NORMAL: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + //load 0~63*4(byte address) to vgpr v0 + v_mbcnt_lo_u32_b32 v0, -1, 0 + v_mbcnt_hi_u32_b32 v0, -1, v0 + v_mul_u32_u24 v0, 4, v0 + + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_mov_b32 m0, 0x0 + s_cbranch_scc1 L_SAVE_LDS_W64 + +L_SAVE_LDS_W32: + s_mov_b32 s3, 128 + s_nop 0 + s_nop 0 + s_nop 0 +L_SAVE_LDS_LOOP_W32: + ds_read_b32 v1, v0 + s_wait_idle + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 + v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete? + + s_branch L_SAVE_LDS_DONE + +L_SAVE_LDS_W64: + s_mov_b32 s3, 256 + s_nop 0 + s_nop 0 + s_nop 0 +L_SAVE_LDS_LOOP_W64: + ds_read_b32 v1, v0 + s_wait_idle + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 + v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete? + +L_SAVE_LDS_DONE: + /* save VGPRs - set the Rest VGPRs */ +L_SAVE_VGPR: + // VGPR SR memory offset: 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI + s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_VGPR_NORMAL +L_ENABLE_SAVE_VGPR_EXEC_HI: + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_hi, 0xFFFFFFFF +L_SAVE_VGPR_NORMAL: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + //determine it is wave32 or wave64 + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_SAVE_VGPR_WAVE64 + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + +L_SAVE_VGPR_W32_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete? + + s_branch L_SAVE_VGPR_END + +L_SAVE_VGPR_WAVE64: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_SHARED_VGPR + +L_SAVE_VGPR_W64_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete? + +L_SAVE_SHARED_VGPR: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? + s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. + //save shared_vgpr will start from the index of m0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0x00000000 + +L_SAVE_SHARED_VGPR_WAVE64_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + s_add_u32 m0, m0, 1 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete? + +L_SAVE_VGPR_END: + s_branch L_END_PGM + +L_RESTORE: + /* Setup Resource Contants */ + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + + // Save s_restore_spi_init_hi for later use. + s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi + + //determine it is wave32 or wave64 + get_wave_size2(s_restore_size) + + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ +L_RESTORE_LDS: + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_RESTORE_LDS_NORMAL +L_ENABLE_RESTORE_LDS_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_RESTORE_LDS_NORMAL: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_mov_b32 m0, 0x0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 + +L_RESTORE_LDS_LOOP_W32: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_wait_idle + ds_store_addtid_b32 v0 + s_add_u32 m0, m0, 128 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete? + s_branch L_RESTORE_VGPR + +L_RESTORE_LDS_LOOP_W64: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_wait_idle + ds_store_addtid_b32 v0 + s_add_u32 m0, m0, 256 // 256 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete? + + /* restore VGPRs */ +L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_RESTORE_VGPR_NORMAL +L_ENABLE_RESTORE_VGPR_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_RESTORE_VGPR_NORMAL: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + //determine it is wave32 or wave64 + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64 + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 + s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SGPR + +L_RESTORE_VGPR_WAVE32_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3 + s_wait_idle + v_movreld_b32 v0, v0 //v[0+m0] = v0 + v_movreld_b32 v1, v1 + v_movreld_b32 v2, v2 + v_movreld_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete? + + /* VGPR restore on v0 */ + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3 + s_wait_idle + + s_branch L_RESTORE_SGPR + +L_RESTORE_VGPR_WAVE64: + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SHARED_VGPR + +L_RESTORE_VGPR_WAVE64_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3 + s_wait_idle + v_movreld_b32 v0, v0 //v[0+m0] = v0 + v_movreld_b32 v1, v1 + v_movreld_b32 v2, v2 + v_movreld_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? + +L_RESTORE_SHARED_VGPR: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? + s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used? + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. + //restore shared_vgpr will start from the index of m0 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0 + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0x00000000 +L_RESTORE_SHARED_VGPR_WAVE64_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS + s_wait_idle + v_movreld_b32 v0, v0 //v[0+m0] = v0 + s_add_u32 m0, m0, 1 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? + + s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!! + + /* VGPR restore on v0 */ +L_RESTORE_V0: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3 + s_wait_idle + + /* restore SGPRs */ + //will be 2+8+16*6 + // SGPR SR memory offset : size(VGPR)+size(SVGPR) +L_RESTORE_SGPR: + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_mov_b32 m0, s_sgpr_save_num + + read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + + read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP + + // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception. + // Clear DEBUG_EN before and restore MODE after the barrier. + s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0 + + /* restore HW registers */ +L_RESTORE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // Restore s_restore_spi_init_hi before the saved value gets clobbered. + s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch + + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch + + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp + + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp + + // Only the first wave needs to restore the workgroup barrier. + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE + + // Skip over WAVE_STATUS, since there is no state to restore from it + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 + + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE + + // extract the saved signal count from s_restore_tmp + s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET + + // We need to call s_barrier_signal repeatedly to restore the signal + // count of the work group barrier. The member count is already + // initialized with the number of waves in the work group. +L_BARRIER_RESTORE_LOOP: + s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE + s_barrier_signal -1 + s_add_i32 s_restore_tmp, s_restore_tmp, -1 + s_branch L_BARRIER_RESTORE_LOOP + +L_SKIP_BARRIER_RESTORE: + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed. + // Only restore the other fields to avoid clobbering them. + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv + s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv + s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv + + s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode + + // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size) + get_svgpr_size_bytes(s_restore_ttmps_hi) + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes() + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 + s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 + s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF + s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS + s_wait_idle + + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + + s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu + + // Make barrier and LDS state visible to all waves in the group. + // STATE_PRIV.BARRIER_COMPLETE may change after this point. + s_barrier_signal -2 + s_barrier_wait -2 + + s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + +L_END_PGM: + // Make sure that no wave of the workgroup can exit the trap handler + // before the workgroup barrier state is saved. + s_barrier_signal -2 + s_barrier_wait -2 + s_endpgm_saved +end + +function write_16sgpr_to_v2(s, lane_offset) + // Copy into VGPR for later TCP store. + for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++ + v_writelane_b32 v2, s[sgpr_idx], sgpr_idx + lane_offset + end + valu_sgpr_hazard() + s_add_u32 ttmp13, ttmp13, 0x10 +end + +function write_12sgpr_to_v2(s) + // Copy into VGPR for later TCP store. + for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++ + v_writelane_b32 v2, s[sgpr_idx], sgpr_idx + end + valu_sgpr_hazard() +end + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS +end + +function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*8 + s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS +end + +function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*4 + s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS +end + +function get_vgpr_size_bytes(s_vgpr_size_byte, s_size) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_bitcmp1_b32 s_size, S_WAVE_SIZE + s_cbranch_scc1 L_ENABLE_SHIFT_W64 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value) + s_branch L_SHIFT_DONE +L_ENABLE_SHIFT_W64: + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) +L_SHIFT_DONE: +end + +function get_svgpr_size_bytes(s_svgpr_size_byte) + s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) + s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7) +end + +function get_sgpr_size_bytes + return 512 +end + +function get_hwreg_size_bytes + return 128 +end + +function get_wave_size2(s_reg) + s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) + s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE +end + +function valu_sgpr_hazard +#if HAVE_VALU_SGPR_HAZARD + for var rep = 0; rep < 8; rep ++ + ds_nop + end +#endif +end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 3e6b4736a7fe..67b5f3d7ff8e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -212,6 +212,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return -EINVAL; } + if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { + args->ring_size = KFD_MIN_QUEUE_RING_SIZE; + pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); + } + if (!access_ok((const void __user *) args->read_pointer_address, sizeof(uint32_t))) { pr_err("Can't access read pointer\n"); @@ -461,6 +466,11 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, return -EINVAL; } + if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { + args->ring_size = KFD_MIN_QUEUE_RING_SIZE; + pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); + } + properties.queue_address = args->ring_base_address; properties.queue_size = args->ring_size; properties.queue_percent = args->queue_percentage & 0xFF; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index b05be24531e1..07eadab4c1c4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -537,7 +537,8 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) kfd->cwsr_isa = cwsr_trap_gfx11_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex); } else { - BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) + > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_gfx12_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx12_hex); } @@ -637,6 +638,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes) struct kfd_node *knode; unsigned int i; + /* + * flush_work ensures that there are no outstanding + * work-queue items that will access interrupt_ring. New work items + * can't be created because we stopped interrupt handling above. + */ + flush_workqueue(kfd->ih_wq); + destroy_workqueue(kfd->ih_wq); + for (i = 0; i < num_nodes; i++) { knode = kfd->nodes[i]; device_queue_manager_uninit(knode->dqm); @@ -1058,21 +1067,6 @@ static int kfd_resume(struct kfd_node *node) return err; } -static inline void kfd_queue_work(struct workqueue_struct *wq, - struct work_struct *work) -{ - int cpu, new_cpu; - - cpu = new_cpu = smp_processor_id(); - do { - new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; - if (cpu_to_node(new_cpu) == numa_node_id()) - break; - } while (cpu != new_cpu); - - queue_work_on(new_cpu, wq, work); -} - /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { @@ -1098,7 +1092,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) patched_ihre, &is_patched) && enqueue_ih_ring_entry(node, is_patched ? patched_ihre : ih_ring_entry)) { - kfd_queue_work(node->ih_wq, &node->interrupt_work); + queue_work(node->kfd->ih_wq, &node->interrupt_work); spin_unlock_irqrestore(&node->interrupt_lock, flags); return; } @@ -1500,6 +1494,11 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev) return -EINVAL; } + if (dev->kfd->shared_resources.enable_mes) { + dev_err(dev->adev->dev, "Inducing MES hang is not supported\n"); + return -EINVAL; + } + return dqm_debugfs_hang_hws(dev->dqm); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f5b3ed20e891..6a58dd8d2130 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -205,21 +205,6 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, if (!down_read_trylock(&adev->reset_domain->sem)) return -EIO; - if (!pdd->proc_ctx_cpu_ptr) { - r = amdgpu_amdkfd_alloc_gtt_mem(adev, - AMDGPU_MES_PROC_CTX_SIZE, - &pdd->proc_ctx_bo, - &pdd->proc_ctx_gpu_addr, - &pdd->proc_ctx_cpu_ptr, - false); - if (r) { - dev_err(adev->dev, - "failed to allocate process context bo\n"); - return r; - } - memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); - } - memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); queue_input.process_id = qpd->pqm->process->pasid; queue_input.page_table_base_addr = qpd->page_table_base; @@ -1199,11 +1184,13 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, decrement_queue_count(dqm, qpd, q); if (dqm->dev->kfd->shared_resources.enable_mes) { - retval = remove_queue_mes(dqm, q, qpd); - if (retval) { + int err; + + err = remove_queue_mes(dqm, q, qpd); + if (err) { dev_err(dev, "Failed to evict queue %d\n", q->properties.queue_id); - goto out; + retval = err; } } } @@ -2290,9 +2277,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { + while (halt_if_hws_hang) + schedule(); if (reset_queues_on_hws_hang(dqm)) { - while (halt_if_hws_hang) - schedule(); dqm->is_hws_hang = true; kfd_hws_hang(dqm); retval = -ETIME; @@ -2466,14 +2453,6 @@ failed_try_destroy_debugged_queue: return retval; } -/* - * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to - * stay in user mode. - */ -#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL -/* APE1 limit is inclusive and 64K aligned. */ -#define APE1_LIMIT_ALIGNMENT 0xFFFF - static bool set_cache_memory_policy(struct device_queue_manager *dqm, struct qcm_process_device *qpd, enum cache_policy default_policy, @@ -2488,34 +2467,6 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, dqm_lock(dqm); - if (alternate_aperture_size == 0) { - /* base > limit disables APE1 */ - qpd->sh_mem_ape1_base = 1; - qpd->sh_mem_ape1_limit = 0; - } else { - /* - * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, - * SH_MEM_APE1_BASE[31:0], 0x0000 } - * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, - * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } - * Verify that the base and size parameters can be - * represented in this format and convert them. - * Additionally restrict APE1 to user-mode addresses. - */ - - uint64_t base = (uintptr_t)alternate_aperture_base; - uint64_t limit = base + alternate_aperture_size - 1; - - if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || - (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { - retval = false; - goto out; - } - - qpd->sh_mem_ape1_base = base >> 16; - qpd->sh_mem_ape1_limit = limit >> 16; - } - retval = dqm->asic_ops.set_cache_memory_policy( dqm, qpd, @@ -2524,6 +2475,9 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, alternate_aperture_base, alternate_aperture_size); + if (retval) + goto out; + if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index d4d95c7f2e5d..32bedef912b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -27,6 +27,14 @@ #include "oss/oss_2_4_sh_mask.h" #include "gca/gfx_7_2_sh_mask.h" +/* + * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to + * stay in user mode. + */ +#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL +/* APE1 limit is inclusive and 64K aligned. */ +#define APE1_LIMIT_ALIGNMENT 0xFFFF + static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd, enum cache_policy default_policy, @@ -84,6 +92,36 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, { uint32_t default_mtype; uint32_t ape1_mtype; + unsigned int temp; + bool retval = true; + + if (alternate_aperture_size == 0) { + /* base > limit disables APE1 */ + qpd->sh_mem_ape1_base = 1; + qpd->sh_mem_ape1_limit = 0; + } else { + /* + * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, + * SH_MEM_APE1_BASE[31:0], 0x0000 } + * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, + * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } + * Verify that the base and size parameters can be + * represented in this format and convert them. + * Additionally restrict APE1 to user-mode addresses. + */ + + uint64_t base = (uintptr_t)alternate_aperture_base; + uint64_t limit = base + alternate_aperture_size - 1; + + if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || + (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { + retval = false; + goto out; + } + + qpd->sh_mem_ape1_base = base >> 16; + qpd->sh_mem_ape1_limit = limit >> 16; + } default_mtype = (default_policy == cache_policy_coherent) ? MTYPE_NONCACHED : @@ -97,37 +135,22 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | DEFAULT_MTYPE(default_mtype) | APE1_MTYPE(ape1_mtype); - - return true; -} - -static int update_qpd_cik(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct kfd_process_device *pdd; - unsigned int temp; - - pdd = qpd_to_pdd(qpd); - - /* check if sh_mem_config register already configured */ - if (qpd->sh_mem_config == 0) { - qpd->sh_mem_config = - ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | - DEFAULT_MTYPE(MTYPE_NONCACHED) | - APE1_MTYPE(MTYPE_NONCACHED); - qpd->sh_mem_ape1_limit = 0; - qpd->sh_mem_ape1_base = 0; - } - /* On dGPU we're always in GPUVM64 addressing mode with 64-bit * aperture addresses. */ - temp = get_sh_mem_bases_nybble_64(pdd); + temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd)); qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); +out: + return retval; +} + +static int update_qpd_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c index 245a90dfc2f6..b5f5f141353b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c @@ -31,10 +31,17 @@ static int update_qpd_v10(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); void device_queue_manager_init_v10( struct device_queue_manager_asic_ops *asic_ops) { + asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10; asic_ops->update_qpd = update_qpd_v10; asic_ops->init_sdma_vm = init_sdma_vm_v10; asic_ops->mqd_manager_init = mqd_manager_init_v10; @@ -49,27 +56,27 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) private_base; } -static int update_qpd_v10(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) +static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) { - struct kfd_process_device *pdd; - - pdd = qpd_to_pdd(qpd); - - /* check if sh_mem_config register already configured */ - if (qpd->sh_mem_config == 0) { - qpd->sh_mem_config = - (SH_MEM_ALIGNMENT_MODE_UNALIGNED << - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | - (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT); - qpd->sh_mem_ape1_limit = 0; - qpd->sh_mem_ape1_base = 0; - } - - qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd); + qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd)); pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases); + return true; +} +static int update_qpd_v10(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c index 2e129da7acb4..f436878d0d62 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c @@ -30,10 +30,17 @@ static int update_qpd_v11(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); void device_queue_manager_init_v11( struct device_queue_manager_asic_ops *asic_ops) { + asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11; asic_ops->update_qpd = update_qpd_v11; asic_ops->init_sdma_vm = init_sdma_vm_v11; asic_ops->mqd_manager_init = mqd_manager_init_v11; @@ -48,28 +55,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) private_base; } -static int update_qpd_v11(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) +static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) { - struct kfd_process_device *pdd; - - pdd = qpd_to_pdd(qpd); - - /* check if sh_mem_config register already configured */ - if (qpd->sh_mem_config == 0) { - qpd->sh_mem_config = - (SH_MEM_ALIGNMENT_MODE_UNALIGNED << - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | - (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT); - - qpd->sh_mem_ape1_limit = 0; - qpd->sh_mem_ape1_base = 0; - } + qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT); - qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd)); pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases); + return true; +} +static int update_qpd_v11(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c index 4f3295b29dfb..62ca1c8fcbaf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c @@ -30,10 +30,17 @@ static int update_qpd_v12(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); void device_queue_manager_init_v12( struct device_queue_manager_asic_ops *asic_ops) { + asic_ops->set_cache_memory_policy = set_cache_memory_policy_v12; asic_ops->update_qpd = update_qpd_v12; asic_ops->init_sdma_vm = init_sdma_vm_v12; asic_ops->mqd_manager_init = mqd_manager_init_v12; @@ -48,28 +55,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) private_base; } -static int update_qpd_v12(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) +static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) { - struct kfd_process_device *pdd; - - pdd = qpd_to_pdd(qpd); - - /* check if sh_mem_config register already configured */ - if (qpd->sh_mem_config == 0) { - qpd->sh_mem_config = - (SH_MEM_ALIGNMENT_MODE_UNALIGNED << - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | - (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT); - - qpd->sh_mem_ape1_limit = 0; - qpd->sh_mem_ape1_base = 0; - } + qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT); - qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd)); pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases); + return true; +} +static int update_qpd_v12(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 210bcc048f4c..d85eadaa1e11 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -30,10 +30,17 @@ static int update_qpd_v9(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); void device_queue_manager_init_v9( struct device_queue_manager_asic_ops *asic_ops) { + asic_ops->set_cache_memory_policy = set_cache_memory_policy_v9; asic_ops->update_qpd = update_qpd_v9; asic_ops->init_sdma_vm = init_sdma_vm_v9; asic_ops->mqd_manager_init = mqd_manager_init_v9; @@ -48,10 +55,36 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) private_base; } +static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + + if (dqm->dev->kfd->noretry) + qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + + if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) || + KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4)) + qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT); + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd)); + + pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases, + qpd->sh_mem_config); + return true; +} + static int update_qpd_v9(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { - struct kfd_process_device *pdd; + struct kfd_process_device *pdd = qpd_to_pdd(qpd); pdd = qpd_to_pdd(qpd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index b291ee0fab94..320518f41890 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -27,6 +27,14 @@ #include "gca/gfx_8_0_sh_mask.h" #include "oss/oss_3_0_sh_mask.h" +/* + * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to + * stay in user mode. + */ +#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL +/* APE1 limit is inclusive and 64K aligned. */ +#define APE1_LIMIT_ALIGNMENT 0xFFFF + static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd, enum cache_policy default_policy, @@ -85,6 +93,36 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, { uint32_t default_mtype; uint32_t ape1_mtype; + unsigned int temp; + bool retval = true; + + if (alternate_aperture_size == 0) { + /* base > limit disables APE1 */ + qpd->sh_mem_ape1_base = 1; + qpd->sh_mem_ape1_limit = 0; + } else { + /* + * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, + * SH_MEM_APE1_BASE[31:0], 0x0000 } + * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, + * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } + * Verify that the base and size parameters can be + * represented in this format and convert them. + * Additionally restrict APE1 to user-mode addresses. + */ + + uint64_t base = (uintptr_t)alternate_aperture_base; + uint64_t limit = base + alternate_aperture_size - 1; + + if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || + (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { + retval = false; + goto out; + } + + qpd->sh_mem_ape1_base = base >> 16; + qpd->sh_mem_ape1_limit = limit >> 16; + } default_mtype = (default_policy == cache_policy_coherent) ? MTYPE_UC : @@ -100,40 +138,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT; - return true; -} - -static int update_qpd_vi(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct kfd_process_device *pdd; - unsigned int temp; - - pdd = qpd_to_pdd(qpd); - - /* check if sh_mem_config register already configured */ - if (qpd->sh_mem_config == 0) { - qpd->sh_mem_config = - SH_MEM_ALIGNMENT_MODE_UNALIGNED << - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | - MTYPE_UC << - SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | - MTYPE_UC << - SH_MEM_CONFIG__APE1_MTYPE__SHIFT; - - qpd->sh_mem_ape1_limit = 0; - qpd->sh_mem_ape1_base = 0; - } - /* On dGPU we're always in GPUVM64 addressing mode with 64-bit * aperture addresses. */ - temp = get_sh_mem_bases_nybble_64(pdd); + temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd)); qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n", temp, qpd->sh_mem_bases); +out: + return retval; +} +static int update_qpd_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index ea3792249209..6798510c4a70 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -1315,6 +1315,7 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid) user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); if (unlikely(user_gpu_id == -EINVAL)) { WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id); + kfd_unref_process(p); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 9b6b6e882593..15b4b70cf199 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -62,11 +62,14 @@ int kfd_interrupt_init(struct kfd_node *node) return r; } - node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); - if (unlikely(!node->ih_wq)) { - kfifo_free(&node->ih_fifo); - dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n"); - return -ENOMEM; + if (!node->kfd->ih_wq) { + node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND, + node->kfd->num_nodes); + if (unlikely(!node->kfd->ih_wq)) { + kfifo_free(&node->ih_fifo); + dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n"); + return -ENOMEM; + } } spin_lock_init(&node->interrupt_lock); @@ -96,16 +99,6 @@ void kfd_interrupt_exit(struct kfd_node *node) spin_lock_irqsave(&node->interrupt_lock, flags); node->interrupts_active = false; spin_unlock_irqrestore(&node->interrupt_lock, flags); - - /* - * flush_work ensures that there are no outstanding - * work-queue items that will access interrupt_ring. New work items - * can't be created because we stopped interrupt handling above. - */ - flush_workqueue(node->ih_wq); - - destroy_workqueue(node->ih_wq); - kfifo_free(&node->ih_fifo); } @@ -162,7 +155,7 @@ static void interrupt_wq(struct work_struct *work) /* If we spent more than a second processing signals, * reschedule the worker to avoid soft-lockup warnings */ - queue_work(dev->ih_wq, &dev->interrupt_work); + queue_work(dev->kfd->ih_wq, &dev->interrupt_work); break; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 2eff37aaf827..1695dd78ede8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -107,6 +107,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -167,10 +169,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 68dbc0399c87..3c0ae28c5923 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -154,6 +154,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -221,10 +223,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index 2b72d5b4949b..565858b9044d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -121,6 +121,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -184,10 +186,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 84e8ea3a8a0c..217af36dc097 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -182,6 +182,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -244,7 +247,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 1f9f5bfeaf86..d87b895660c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -237,7 +237,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->bitfields2.engine_sel = engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; + packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0; packet->bitfields2.extended_engine_sel = extended_engine_sel__mes_map_queues__legacy_engine_sel; packet->bitfields2.queue_type = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 26e48fdc8728..75523f30cd38 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -273,7 +273,6 @@ struct kfd_node { /* Interrupts */ struct kfifo ih_fifo; - struct workqueue_struct *ih_wq; struct work_struct interrupt_work; spinlock_t interrupt_lock; @@ -366,6 +365,8 @@ struct kfd_dev { struct kfd_node *nodes[MAX_KFD_NODES]; unsigned int num_nodes; + struct workqueue_struct *ih_wq; + /* Kernel doorbells for KFD device */ struct amdgpu_bo *doorbells; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 264bd764f6f2..45923da7709f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include "amdgpu_amdkfd.h" #include "amdgpu.h" +#include "amdgpu_reset.h" struct mm_struct; @@ -841,6 +842,14 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) return ERR_PTR(-EINVAL); } + /* If the process just called exec(3), it is possible that the + * cleanup of the kfd_process (following the release of the mm + * of the old process image) is still in the cleanup work queue. + * Make sure to drain any job before trying to recreate any + * resource for this process. + */ + flush_workqueue(kfd_process_wq); + /* * take kfd processes mutex before starting of process creation * so there won't be a case where two threads of the same process @@ -859,14 +868,6 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) if (process) { pr_debug("Process already found\n"); } else { - /* If the process just called exec(3), it is possible that the - * cleanup of the kfd_process (following the release of the mm - * of the old process image) is still in the cleanup work queue. - * Make sure to drain any job before trying to recreate any - * resource for this process. - */ - flush_workqueue(kfd_process_wq); - process = create_process(thread); if (IS_ERR(process)) goto out; @@ -1140,6 +1141,17 @@ static void kfd_process_remove_sysfs(struct kfd_process *p) p->kobj = NULL; } +/* + * If any GPU is ongoing reset, wait for reset complete. + */ +static void kfd_process_wait_gpu_reset_complete(struct kfd_process *p) +{ + int i; + + for (i = 0; i < p->n_pdds; i++) + flush_workqueue(p->pdds[i]->dev->adev->reset_domain->wq); +} + /* No process locking is needed in this function, because the process * is not findable any more. We must assume that no other thread is * using it any more, otherwise we couldn't safely free the process @@ -1154,6 +1166,11 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); + /* + * If GPU in reset, user queues may still running, wait for reset complete. + */ + kfd_process_wait_gpu_reset_complete(p); + /* Signal the eviction fence after user mode queues are * destroyed. This allows any BOs to be freed without * triggering pointless evictions or waiting for fences. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index ead4317a2168..4078a8176187 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -86,9 +86,12 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) if (pdd->already_dequeued) return; - + /* The MES context flush needs to filter out the case which the + * KFD process is created without setting up the MES context and + * queue for creating a compute queue. + */ dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); - if (dev->kfd->shared_resources.enable_mes && + if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr && down_read_trylock(&dev->adev->reset_domain->sem)) { amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); @@ -295,7 +298,7 @@ static int init_user_queue(struct process_queue_manager *pqm, return 0; free_gang_ctx_bo: - amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo); cleanup: uninit_queue(*q); *q = NULL; @@ -358,10 +361,26 @@ int pqm_create_queue(struct process_queue_manager *pqm, if (retval != 0) return retval; + /* Register process if this is the first queue */ if (list_empty(&pdd->qpd.queues_list) && list_empty(&pdd->qpd.priv_queue_list)) dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); + /* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */ + if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) { + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); + if (retval) { + dev_err(dev->adev->dev, "failed to allocate process context bo\n"); + return retval; + } + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); + } + pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); if (!pqn) { retval = -ENOMEM; @@ -527,7 +546,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n", pqm->process->pasid, pqn->q->properties.queue_id, retval); - if (retval != -ETIME) + if (retval != -ETIME && retval != -EIO) goto err_destroy_queue; } kfd_procfs_del_queue(pqn->q); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index ad29634f8b44..29d7cb4cfe69 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -233,6 +233,7 @@ void kfd_queue_buffer_put(struct amdgpu_bo **bo) int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { struct kfd_topology_device *topo_dev; + u64 expected_queue_size; struct amdgpu_vm *vm; u32 total_cwsr_size; int err; @@ -241,6 +242,15 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope if (!topo_dev) return -EINVAL; + /* AQL queues on GFX7 and GFX8 appear twice their actual size */ + if (properties->type == KFD_QUEUE_TYPE_COMPUTE && + properties->format == KFD_QUEUE_FORMAT_AQL && + topo_dev->node_props.gfx_target_version >= 70000 && + topo_dev->node_props.gfx_target_version < 90000) + expected_queue_size = properties->queue_size / 2; + else + expected_queue_size = properties->queue_size; + vm = drm_priv_to_vm(pdd->drm_priv); err = amdgpu_bo_reserve(vm->root.bo, false); if (err) @@ -255,7 +265,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope goto out_err_unreserve; err = kfd_queue_buffer_get(vm, (void *)properties->queue_address, - &properties->ring_bo, properties->queue_size); + &properties->ring_bo, expected_queue_size); if (err) goto out_err_unreserve; @@ -266,8 +276,8 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope /* EOP buffer is not required for all ASICs */ if (properties->eop_ring_buffer_address) { if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { - pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", - properties->eop_buf_bo->tbo.base.size, + pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n", + properties->eop_ring_buffer_size, topo_dev->node_props.eop_buffer_size); err = -EINVAL; goto out_err_unreserve; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 1893c27746a5..3e9e0f36cd3f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1170,13 +1170,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start, } static void -svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, - struct svm_range *pchild, enum svm_work_list_ops op) +svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op) { pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", pchild, pchild->start, pchild->last, prange, op); - pchild->work_item.mm = mm; + pchild->work_item.mm = NULL; pchild->work_item.op = op; list_add_tail(&pchild->child_list, &prange->child_list); } @@ -1276,13 +1275,7 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): - if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_node != node) - mapping_flags |= AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } + mapping_flags |= AMDGPU_VM_MTYPE_NC; break; default: mapping_flags |= coherent ? @@ -2390,15 +2383,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, prange->work_item.op != SVM_OP_UNMAP_RANGE) prange->work_item.op = op; } else { - prange->work_item.op = op; - - /* Pairs with mmput in deferred_list_work */ - mmget(mm); - prange->work_item.mm = mm; - list_add_tail(&prange->deferred_list, - &prange->svms->deferred_range_list); - pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", - prange, prange->start, prange->last, op); + /* Pairs with mmput in deferred_list_work. + * If process is exiting and mm is gone, don't update mmu notifier. + */ + if (mmget_not_zero(mm)) { + prange->work_item.mm = mm; + prange->work_item.op = op; + list_add_tail(&prange->deferred_list, + &prange->svms->deferred_range_list); + pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", + prange, prange->start, prange->last, op); + } } spin_unlock(&svms->deferred_list_lock); } @@ -2412,8 +2407,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms) } static void -svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, - struct svm_range *prange, unsigned long start, +svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start, unsigned long last) { struct svm_range *head; @@ -2434,12 +2428,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, svm_range_split(tail, last + 1, tail->last, &head); if (head != prange && tail != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); - svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE); } else if (tail != prange) { - svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE); } else if (head != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); } else if (parent != prange) { prange->work_item.op = SVM_OP_UNMAP_RANGE; } @@ -2516,14 +2510,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, l = min(last, pchild->last); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); - svm_range_unmap_split(mm, prange, pchild, start, last); + svm_range_unmap_split(prange, pchild, start, last); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger); - svm_range_unmap_split(mm, prange, prange, start, last); + svm_range_unmap_split(prange, prange, start, last); if (unmap_parent) svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); @@ -2566,8 +2560,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, if (range->event == MMU_NOTIFY_RELEASE) return true; - if (!mmget_not_zero(mni->mm)) - return true; start = mni->interval_tree.start; last = mni->interval_tree.last; @@ -2594,7 +2586,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, } svm_range_unlock(prange); - mmput(mni->mm); return true; } @@ -2998,19 +2989,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - /* check if this page fault time stamp is before svms->checkpoint_ts */ - if (svms->checkpoint_ts[gpuidx] != 0) { - if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); - r = 0; - goto out; - } else - /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts - * to zero to avoid following ts wrap around give wrong comparing - */ - svms->checkpoint_ts[gpuidx] = 0; - } - if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -3030,6 +3008,21 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = -EAGAIN; + goto out_unlock_svms; + } else { + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + } + prange = svm_range_from_addr(svms, addr, NULL); if (!prange) { pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", @@ -3154,7 +3147,8 @@ out_unlock_svms: mutex_unlock(&svms->lock); mmap_read_unlock(mm); - svm_range_count_fault(node, p, gpuidx); + if (r != -EAGAIN) + svm_range_count_fault(node, p, gpuidx); mmput(mm); out: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3871591c9aec..82da568604b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1683,17 +1683,32 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, int cache_type, unsigned int cu_processor_id, struct kfd_node *knode) { - unsigned int cu_sibling_map_mask; + unsigned int cu_sibling_map_mask = 0; int first_active_cu; int i, j, k, xcc, start, end; int num_xcc = NUM_XCC(knode->xcc_mask); struct kfd_cache_properties *pcache = NULL; enum amdgpu_memory_partition mode; struct amdgpu_device *adev = knode->adev; + bool found = false; start = ffs(knode->xcc_mask) - 1; end = start + num_xcc; - cu_sibling_map_mask = cu_info->bitmap[start][0][0]; + + /* To find the bitmap in the first active cu in the first + * xcc, it is based on the assumption that evrey xcc must + * have at least one active cu. + */ + for (i = 0; i < gfx_info->max_shader_engines && !found; i++) { + for (j = 0; j < gfx_info->max_sh_per_se && !found; j++) { + if (cu_info->bitmap[start][i % 4][j % 4]) { + cu_sibling_map_mask = + cu_info->bitmap[start][i % 4][j % 4]; + found = true; + } + } + } + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); first_active_cu = ffs(cu_sibling_map_mask); @@ -2002,10 +2017,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; - if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) - dev->node_props.capability |= - HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; - if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ea403fece839..33a3e5e28fbc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -243,6 +243,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); static void handle_hpd_rx_irq(void *param); +static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, + int bl_idx, + u32 user_brightness); + static bool is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state, struct drm_crtc_state *new_crtc_state); @@ -361,6 +365,8 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { + if (new_state->stream->adjust.timing_adjust_pending) + return true; if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) return true; else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state)) @@ -1036,8 +1042,10 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port, continue; *enabled = true; + mutex_lock(&connector->eld_mutex); ret = drm_eld_size(connector->eld); memcpy(buf, connector->eld, min(max_bytes, ret)); + mutex_unlock(&connector->eld_mutex); break; } @@ -1591,75 +1599,151 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } -static const struct dmi_system_id hpd_disconnect_quirk_table[] = { +struct amdgpu_dm_quirks { + bool aux_hpd_discon; + bool support_edp0_on_dp1; +}; + +static struct amdgpu_dm_quirks quirk_entries = { + .aux_hpd_discon = false, + .support_edp0_on_dp1 = false +}; + +static int edp0_on_dp1_callback(const struct dmi_system_id *id) +{ + quirk_entries.support_edp0_on_dp1 = true; + return 0; +} + +static int aux_hpd_discon_callback(const struct dmi_system_id *id) +{ + quirk_entries.aux_hpd_discon = true; + return 0; +} + +static const struct dmi_system_id dmi_quirk_table[] = { { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), }, }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 645 14 inch G11 Notebook PC"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 445 14 inch G11 Notebook PC"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 465 16 inch G11 Notebook PC"), + }, + }, {} /* TODO: refactor this from a fixed table to a dynamic option */ }; -static void retrieve_dmi_info(struct amdgpu_display_manager *dm) +static void retrieve_dmi_info(struct amdgpu_display_manager *dm, struct dc_init_data *init_data) { - const struct dmi_system_id *dmi_id; + int dmi_id; + struct drm_device *dev = dm->ddev; dm->aux_hpd_discon_quirk = false; + init_data->flags.support_edp0_on_dp1 = false; + + dmi_id = dmi_check_system(dmi_quirk_table); - dmi_id = dmi_first_match(hpd_disconnect_quirk_table); - if (dmi_id) { + if (!dmi_id) + return; + + if (quirk_entries.aux_hpd_discon) { dm->aux_hpd_discon_quirk = true; - DRM_INFO("aux_hpd_discon_quirk attached\n"); + drm_info(dev, "aux_hpd_discon_quirk attached\n"); + } + if (quirk_entries.support_edp0_on_dp1) { + init_data->flags.support_edp0_on_dp1 = true; + drm_info(dev, "support_edp0_on_dp1 attached\n"); } } @@ -1805,26 +1889,6 @@ static enum dmub_ips_disable_type dm_get_default_ips_mode( switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(3, 5, 0): - /* - * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to - * cause a hard hang. A fix exists for newer PMFW. - * - * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest - * IPS state in all cases, except for s0ix and all displays off (DPMS), - * where IPS2 is allowed. - * - * When checking pmfw version, use the major and minor only. - */ - if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) - /* - * Other ASICs with DCN35 that have residency issues with - * IPS2 in idle. - * We want them to use IPS2 only in display off cases. - */ - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - break; case IP_VERSION(3, 5, 1): ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; break; @@ -1967,7 +2031,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; - retrieve_dmi_info(&adev->dm); + retrieve_dmi_info(&adev->dm, &init_data); if (adev->dm.bb_from_dmub) init_data.bb_from_dmub = adev->dm.bb_from_dmub; @@ -3134,16 +3198,16 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, for (k = 0; k < dc_state->stream_count; k++) { bundle->stream_update.stream = dc_state->streams[k]; - for (m = 0; m < dc_state->stream_status->plane_count; m++) { + for (m = 0; m < dc_state->stream_status[k].plane_count; m++) { bundle->surface_updates[m].surface = - dc_state->stream_status->plane_states[m]; + dc_state->stream_status[k].plane_states[m]; bundle->surface_updates[m].surface->force_full_update = true; } update_planes_and_stream_adapter(dm->dc, UPDATE_TYPE_FULL, - dc_state->stream_status->plane_count, + dc_state->stream_status[k].plane_count, dc_state->streams[k], &bundle->stream_update, bundle->surface_updates); @@ -3170,8 +3234,7 @@ static int dm_resume(void *handle) struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state); enum dc_connection_type new_connection_type = dc_connection_none; struct dc_state *dc_state; - int i, r, j, ret; - bool need_hotplug = false; + int i, r, j; struct dc_commit_streams_params commit_params = {}; if (dm->dc->caps.ips_support) { @@ -3238,6 +3301,12 @@ static int dm_resume(void *handle) mutex_unlock(&dm->dc_lock); + /* set the backlight after a reset */ + for (i = 0; i < dm->num_of_edps; i++) { + if (dm->backlight_dev[i]) + amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); + } + return 0; } /* Recreate dc_state - DC invalidates it when setting power state to S3. */ @@ -3360,23 +3429,16 @@ static int dm_resume(void *handle) aconnector->mst_root) continue; - ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true); - - if (ret < 0) { - dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, - aconnector->dc_link); - need_hotplug = true; - } + drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr); } drm_connector_list_iter_end(&iter); - if (need_hotplug) - drm_kms_helper_hotplug_event(ddev); - amdgpu_dm_irq_resume_late(adev); amdgpu_dm_smu_write_watermarks_table(adev); + drm_kms_helper_hotplug_event(ddev); + return 0; } @@ -4765,6 +4827,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) dm->backlight_dev[aconnector->bl_idx] = backlight_device_register(bl_name, aconnector->base.kdev, dm, &amdgpu_dm_backlight_ops, &props); + dm->brightness[aconnector->bl_idx] = props.brightness; if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) { DRM_ERROR("DM: Backlight registration failed!\n"); @@ -4832,7 +4895,6 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm, aconnector->bl_idx = bl_idx; amdgpu_dm_update_backlight_caps(dm, bl_idx); - dm->brightness[bl_idx] = AMDGPU_MAX_BL_LEVEL; dm->backlight_link[bl_idx] = link; dm->num_of_edps++; @@ -5161,7 +5223,8 @@ fail: static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm) { - drm_atomic_private_obj_fini(&dm->atomic_obj); + if (dm->atomic_obj.state) + drm_atomic_private_obj_fini(&dm->atomic_obj); } /****************************************************************************** @@ -5479,9 +5542,9 @@ fill_plane_color_attributes(const struct drm_plane_state *plane_state, case DRM_COLOR_YCBCR_BT2020: if (full_range) - *color_space = COLOR_SPACE_2020_YCBCR; + *color_space = COLOR_SPACE_2020_YCBCR_FULL; else - return -EINVAL; + *color_space = COLOR_SPACE_2020_YCBCR_LIMITED; break; default: @@ -5497,8 +5560,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, const u64 tiling_flags, struct dc_plane_info *plane_info, struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) + bool tmz_surface) { const struct drm_framebuffer *fb = plane_state->fb; const struct amdgpu_framebuffer *afb = @@ -5597,7 +5659,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, &plane_info->tiling_info, &plane_info->plane_size, &plane_info->dcc, address, - tmz_surface, force_disable_dcc); + tmz_surface); if (ret) return ret; @@ -5618,7 +5680,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, struct dc_scaling_info scaling_info; struct dc_plane_info plane_info; int ret; - bool force_disable_dcc = false; ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, plane_state, &scaling_info); if (ret) @@ -5629,13 +5690,11 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, dc_plane_state->clip_rect = scaling_info.clip_rect; dc_plane_state->scaling_quality = scaling_info.scaling_quality; - force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend; ret = fill_dc_plane_info_and_addr(adev, plane_state, afb->tiling_flags, &plane_info, &dc_plane_state->address, - afb->tmz_surface, - force_disable_dcc); + afb->tmz_surface); if (ret) return ret; @@ -5981,7 +6040,7 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing, if (dc_crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB) color_space = COLOR_SPACE_2020_RGB_FULLRANGE; else - color_space = COLOR_SPACE_2020_YCBCR; + color_space = COLOR_SPACE_2020_YCBCR_LIMITED; break; case DRM_MODE_COLORIMETRY_DEFAULT: // ITU601 default: @@ -7330,12 +7389,12 @@ cleanup: } struct dc_stream_state * -create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, +create_validate_stream_for_sink(struct drm_connector *connector, const struct drm_display_mode *drm_mode, const struct dm_connector_state *dm_state, const struct dc_stream_state *old_stream) { - struct drm_connector *connector = &aconnector->base; + struct amdgpu_dm_connector *aconnector = NULL; struct amdgpu_device *adev = drm_to_adev(connector->dev); struct dc_stream_state *stream; const struct drm_connector_state *drm_state = dm_state ? &dm_state->base : NULL; @@ -7346,8 +7405,12 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (!dm_state) return NULL; - if (aconnector->dc_link->connector_signal == SIGNAL_TYPE_HDMI_TYPE_A || - aconnector->dc_link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) + if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) + aconnector = to_amdgpu_dm_connector(connector); + + if (aconnector && + (aconnector->dc_link->connector_signal == SIGNAL_TYPE_HDMI_TYPE_A || + aconnector->dc_link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER)) bpc_limit = 8; do { @@ -7359,10 +7422,11 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, break; } - if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + dc_result = dc_validate_stream(adev->dm.dc, stream); + + if (!aconnector) /* writeback connector */ return stream; - dc_result = dc_validate_stream(adev->dm.dc, stream); if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream); @@ -7392,7 +7456,7 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, __func__, __LINE__); aconnector->force_yuv420_output = true; - stream = create_validate_stream_for_sink(aconnector, drm_mode, + stream = create_validate_stream_for_sink(connector, drm_mode, dm_state, old_stream); aconnector->force_yuv420_output = false; } @@ -7407,6 +7471,9 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec struct dc_sink *dc_sink; /* TODO: Unhardcode stream count */ struct dc_stream_state *stream; + /* we always have an amdgpu_dm_connector here since we got + * here via the amdgpu_dm_connector_helper_funcs + */ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); if ((mode->flags & DRM_MODE_FLAG_INTERLACE) || @@ -7431,7 +7498,7 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec drm_mode_set_crtcinfo(mode, 0); - stream = create_validate_stream_for_sink(aconnector, mode, + stream = create_validate_stream_for_sink(connector, mode, to_dm_connector_state(connector->state), NULL); if (stream) { @@ -8211,7 +8278,7 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, int i; int result = -EIO; - if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported) + if (!ddc_service->ddc_pin) return result; cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL); @@ -8398,14 +8465,39 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, int offdelay; if (acrtc_state) { - if (amdgpu_ip_version(adev, DCE_HWIP, 0) < - IP_VERSION(3, 5, 0) || - acrtc_state->stream->link->psr_settings.psr_version < - DC_PSR_VERSION_UNSUPPORTED || - !(adev->flags & AMD_IS_APU)) { - timing = &acrtc_state->stream->timing; - - /* at least 2 frames */ + timing = &acrtc_state->stream->timing; + + /* + * Depending on when the HW latching event of double-buffered + * registers happen relative to the PSR SDP deadline, and how + * bad the Panel clock has drifted since the last ALPM off + * event, there can be up to 3 frames of delay between sending + * the PSR exit cmd to DMUB fw, and when the panel starts + * displaying live frames. + * + * We can set: + * + * 20/100 * offdelay_ms = 3_frames_ms + * => offdelay_ms = 5 * 3_frames_ms + * + * This ensures that `3_frames_ms` will only be experienced as a + * 20% delay on top how long the display has been static, and + * thus make the delay less perceivable. + */ + if (acrtc_state->stream->link->psr_settings.psr_version < + DC_PSR_VERSION_UNSUPPORTED) { + offdelay = DIV64_U64_ROUND_UP((u64)5 * 3 * 10 * + timing->v_total * + timing->h_total, + timing->pix_clk_100hz); + config.offdelay_ms = offdelay ?: 30; + } else if (amdgpu_ip_version(adev, DCE_HWIP, 0) < + IP_VERSION(3, 5, 0) || + !(adev->flags & AMD_IS_APU)) { + /* + * Older HW and DGPU have issues with instant off; + * use a 2 frame offdelay. + */ offdelay = DIV64_U64_ROUND_UP((u64)20 * timing->v_total * timing->h_total, @@ -8413,6 +8505,8 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, config.offdelay_ms = offdelay ?: 30; } else { + /* offdelay_ms = 0 will never disable vblank */ + config.offdelay_ms = 1; config.disable_immediate = true; } @@ -8889,6 +8983,7 @@ static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach, struct replay_settings *pr = &acrtc_state->stream->link->replay_settings; struct amdgpu_dm_connector *aconn = (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context; + bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state); if (acrtc_state->update_type > UPDATE_TYPE_FAST) { if (pr->config.replay_supported && !pr->replay_feature_enabled) @@ -8915,14 +9010,15 @@ static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach, * adequate number of fast atomic commits to notify KMD * of update events. See `vblank_control_worker()`. */ - if (acrtc_attach->dm_irq_params.allow_sr_entry && + if (!vrr_active && + acrtc_attach->dm_irq_params.allow_sr_entry && #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) && #endif (current_ts - psr->psr_dirty_rects_change_timestamp_ns) > 500000000) { if (pr->replay_feature_enabled && !pr->replay_allow_active) amdgpu_dm_replay_enable(acrtc_state->stream, true); - if (psr->psr_version >= DC_PSR_VERSION_SU_1 && + if (psr->psr_version == DC_PSR_VERSION_SU_1 && !psr->psr_allow_active && !aconn->disallow_edp_enter_psr) amdgpu_dm_psr_enable(acrtc_state->stream); } @@ -9059,7 +9155,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, afb->tiling_flags, &bundle->plane_infos[planes_count], &bundle->flip_addrs[planes_count].address, - afb->tmz_surface, false); + afb->tmz_surface); drm_dbg_state(state->dev, "plane: id=%d dcc_en=%d\n", new_plane_state->plane->index, @@ -9093,7 +9189,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, acrtc_state->stream->link->psr_settings.psr_dirty_rects_change_timestamp_ns = timestamp_ns; if (acrtc_state->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(acrtc_state->stream); + amdgpu_dm_psr_disable(acrtc_state->stream, true); mutex_unlock(&dm->dc_lock); } } @@ -9259,11 +9355,11 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->stream_update.abm_level = &acrtc_state->abm_level; mutex_lock(&dm->dc_lock); - if (acrtc_state->update_type > UPDATE_TYPE_FAST) { + if ((acrtc_state->update_type > UPDATE_TYPE_FAST) || vrr_active) { if (acrtc_state->stream->link->replay_settings.replay_allow_active) amdgpu_dm_replay_disable(acrtc_state->stream); if (acrtc_state->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(acrtc_state->stream); + amdgpu_dm_psr_disable(acrtc_state->stream, true); } mutex_unlock(&dm->dc_lock); @@ -10422,7 +10518,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) goto skip_modeset; - new_stream = create_validate_stream_for_sink(aconnector, + new_stream = create_validate_stream_for_sink(connector, &new_crtc_state->mode, dm_new_conn_state, dm_old_crtc_state->stream); @@ -10665,6 +10761,9 @@ static bool should_reset_plane(struct drm_atomic_state *state, state->allow_modeset) return true; + if (amdgpu_in_reset(adev) && state->allow_modeset) + return true; + /* Exit early if we know that we're adding or removing the plane. */ if (old_plane_state->crtc != new_plane_state->crtc) return true; @@ -11370,6 +11469,25 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev, return 0; } +static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, + struct drm_atomic_state *state, + struct drm_crtc_state *crtc_state) +{ + struct drm_plane *plane; + struct drm_plane_state *new_plane_state, *old_plane_state; + + drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) { + new_plane_state = drm_atomic_get_plane_state(state, plane); + old_plane_state = drm_atomic_get_plane_state(state, plane); + + if (old_plane_state->fb && new_plane_state->fb && + get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb)) + return true; + } + + return false; +} + /** * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM. * @@ -11567,10 +11685,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, /* Remove exiting planes if they are modified */ for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) { - if (old_plane_state->fb && new_plane_state->fb && - get_mem_type(old_plane_state->fb) != - get_mem_type(new_plane_state->fb)) - lock_and_validation_needed = true; ret = dm_update_plane_state(dc, state, plane, old_plane_state, @@ -11865,9 +11979,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, /* * Only allow async flips for fast updates that don't change - * the FB pitch, the DCC state, rotation, etc. + * the FB pitch, the DCC state, rotation, mem_type, etc. */ - if (new_crtc_state->async_flip && lock_and_validation_needed) { + if (new_crtc_state->async_flip && + (lock_and_validation_needed || + amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] async flips are only supported for fast updates\n", crtc->base.id, crtc->name); @@ -12416,7 +12532,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( * Transient states before tunneling is enabled could * lead to this error. We can ignore this for now. */ - if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { + if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) { DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", payload->address, payload->length, p_notify->result); @@ -12425,22 +12541,15 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( goto out; } + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF; + if (adev->dm.dmub_notify->aux_reply.command & 0xF0) + /* The reply is stored in the top nibble of the command. */ + payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; - if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { - - if (payload->length != p_notify->aux_reply.length) { - DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", - p_notify->aux_reply.length, - payload->address, payload->length); - *operation_result = AUX_RET_ERROR_INVALID_REPLY; - goto out; - } - + /*write req may receive a byte indicating partially written number as well*/ + if (p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); - } /* success */ ret = p_notify->aux_reply.length; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index a0bc2c0ac04d..9603352ee094 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -697,6 +697,8 @@ struct amdgpu_dm_connector { struct drm_dp_mst_port *mst_output_port; struct amdgpu_dm_connector *mst_root; struct drm_dp_aux *dsc_aux; + uint32_t mst_local_bw; + uint16_t vc_full_pbn; struct mutex handle_mst_msg_ready; /* TODO see if we can merge with ddc_bus or make a dm_connector */ @@ -985,7 +987,7 @@ int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int struct set_config_cmd_payload *payload, enum set_config_status *operation_result); struct dc_stream_state * - create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, + create_validate_stream_for_sink(struct drm_connector *connector, const struct drm_display_mode *drm_mode, const struct dm_connector_state *dm_state, const struct dc_stream_state *old_stream); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index f936a35fa9eb..0f6ba7b1575d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -30,6 +30,7 @@ #include "amdgpu_dm.h" #include "dc.h" #include "amdgpu_securedisplay.h" +#include "amdgpu_dm_psr.h" static const char *const pipe_crc_sources[] = { "none", @@ -224,6 +225,10 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); + /* For PSR1, check that the panel has exited PSR */ + if (stream_state->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1) + amdgpu_dm_psr_wait_disable(stream_state); + /* Enable or disable CRTC CRC generation */ if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { if (!dc_stream_configure_crc(stream_state->ctx->dc, @@ -357,6 +362,17 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) } + /* + * Reading the CRC requires the vblank interrupt handler to be + * enabled. Keep a reference until CRC capture stops. + */ + enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); + if (!enabled && enable) { + ret = drm_crtc_vblank_get(crtc); + if (ret) + goto cleanup; + } + #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) /* Reset secure_display when we change crc source from debugfs */ amdgpu_dm_set_crc_window_default(crtc, crtc_state->stream); @@ -367,16 +383,7 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) goto cleanup; } - /* - * Reading the CRC requires the vblank interrupt handler to be - * enabled. Keep a reference until CRC capture stops. - */ - enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); if (!enabled && enable) { - ret = drm_crtc_vblank_get(crtc); - if (ret) - goto cleanup; - if (dm_is_crc_source_dprx(source)) { if (drm_dp_start_crc(aux, crtc)) { DRM_DEBUG_DRIVER("dp start crc failed\n"); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 9be87b532517..9a31e5da3687 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -93,7 +93,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable) return rc; } -bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state) +bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state) { return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; @@ -113,6 +113,7 @@ bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state) * * Panel Replay and PSR SU * - Enable when: + * - VRR is disabled * - vblank counter is disabled * - entry is allowed: usermode demonstrates an adequate number of fast * commits) @@ -131,19 +132,20 @@ static void amdgpu_dm_crtc_set_panel_sr_feature( bool is_sr_active = (link->replay_settings.replay_allow_active || link->psr_settings.psr_allow_active); bool is_crc_window_active = false; + bool vrr_active = amdgpu_dm_crtc_vrr_active_irq(vblank_work->acrtc); #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY is_crc_window_active = amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base); #endif - if (link->replay_settings.replay_feature_enabled && + if (link->replay_settings.replay_feature_enabled && !vrr_active && allow_sr_entry && !is_sr_active && !is_crc_window_active) { amdgpu_dm_replay_enable(vblank_work->stream, true); } else if (vblank_enabled) { if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active) - amdgpu_dm_psr_disable(vblank_work->stream); - } else if (link->psr_settings.psr_feature_enabled && + amdgpu_dm_psr_disable(vblank_work->stream, false); + } else if (link->psr_settings.psr_feature_enabled && !vrr_active && allow_sr_entry && !is_sr_active && !is_crc_window_active) { struct amdgpu_dm_connector *aconn = @@ -729,7 +731,16 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, * support programmable degamma anywhere. */ is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch; - drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0, + /* Dont't enable DRM CRTC degamma property for DCN401 since the + * pre-blending degamma LUT doesn't apply to cursor, and therefore + * can't work similar to a post-blending degamma LUT as in other hw + * versions. + * TODO: revisit it once KMS plane color API is merged. + */ + drm_crtc_enable_color_mgmt(&acrtc->base, + (is_dcn && + dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01) ? + MAX_COLOR_LUT_ENTRIES : 0, true, MAX_COLOR_LUT_ENTRIES); drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h index 17e948753f59..c1212947a77b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h @@ -37,7 +37,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable); bool amdgpu_dm_crtc_vrr_active_irq(struct amdgpu_crtc *acrtc); -bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state); +bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state); int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index db56b0aa5454..15d94d2a0e2f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1145,7 +1145,7 @@ static int amdgpu_current_colorspace_show(struct seq_file *m, void *data) case COLOR_SPACE_2020_RGB_FULLRANGE: seq_puts(m, "BT2020_RGB"); break; - case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_2020_YCBCR_LIMITED: seq_puts(m, "BT2020_YCC"); break; default: @@ -3638,7 +3638,7 @@ static int crc_win_update_set(void *data, u64 val) /* PSR may write to OTG CRC window control register, * so close it before starting secure_display. */ - amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream); + amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream, true); spin_lock_irq(&adev_to_drm(adev)->event_lock); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index e339c7a8d541..10ba4d7bf632 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -172,7 +172,10 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, struct mod_hdcp_display_adjustment display_adjust; unsigned int conn_index = aconnector->base.index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -209,7 +212,6 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, mod_hdcp_update_display(&hdcp_w->hdcp, conn_index, &link_adjust, &display_adjust, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); } static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, @@ -220,8 +222,7 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, struct drm_connector_state *conn_state = aconnector->base.state; unsigned int conn_index = aconnector->base.index; - mutex_lock(&hdcp_w->mutex); - hdcp_w->aconnector[conn_index] = aconnector; + guard(mutex)(&hdcp_w->mutex); /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -237,9 +238,11 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); - + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); } void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index) @@ -247,7 +250,7 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; unsigned int conn_index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); mod_hdcp_reset_connection(&hdcp_w->hdcp, &hdcp_w->output); @@ -256,11 +259,13 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } } process_output(hdcp_w); - - mutex_unlock(&hdcp_w->mutex); } void hdcp_handle_cpirq(struct hdcp_workqueue *hdcp_work, unsigned int link_index) @@ -277,7 +282,7 @@ static void event_callback(struct work_struct *work) hdcp_work = container_of(to_delayed_work(work), struct hdcp_workqueue, callback_dwork); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); cancel_delayed_work(&hdcp_work->callback_dwork); @@ -285,8 +290,6 @@ static void event_callback(struct work_struct *work) &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } static void event_property_update(struct work_struct *work) @@ -323,7 +326,7 @@ static void event_property_update(struct work_struct *work) continue; drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); if (conn_state->commit) { ret = wait_for_completion_interruptible_timeout(&conn_state->commit->hw_done, @@ -355,7 +358,6 @@ static void event_property_update(struct work_struct *work) drm_hdcp_update_content_protection(connector, DRM_MODE_CONTENT_PROTECTION_DESIRED); } - mutex_unlock(&hdcp_work->mutex); drm_modeset_unlock(&dev->mode_config.connection_mutex); } } @@ -368,7 +370,7 @@ static void event_property_validate(struct work_struct *work) struct amdgpu_dm_connector *aconnector; unsigned int conn_index; - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { @@ -408,8 +410,6 @@ static void event_property_validate(struct work_struct *work) schedule_work(&hdcp_work->property_update_work); } } - - mutex_unlock(&hdcp_work->mutex); } static void event_watchdog_timer(struct work_struct *work) @@ -420,7 +420,7 @@ static void event_watchdog_timer(struct work_struct *work) struct hdcp_workqueue, watchdog_timer_dwork); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); cancel_delayed_work(&hdcp_work->watchdog_timer_dwork); @@ -429,8 +429,6 @@ static void event_watchdog_timer(struct work_struct *work) &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } static void event_cpirq(struct work_struct *work) @@ -439,13 +437,11 @@ static void event_cpirq(struct work_struct *work) hdcp_work = container_of(work, struct hdcp_workqueue, cpirq_work); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); mod_hdcp_process_event(&hdcp_work->hdcp, MOD_HDCP_EVENT_CPIRQ, &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) @@ -455,6 +451,7 @@ void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) for (i = 0; i < hdcp_work->max_link; i++) { cancel_delayed_work_sync(&hdcp_work[i].callback_dwork); cancel_delayed_work_sync(&hdcp_work[i].watchdog_timer_dwork); + cancel_delayed_work_sync(&hdcp_work[i].property_validate_dwork); } sysfs_remove_bin_file(kobj, &hdcp_work[0].attr); @@ -478,7 +475,7 @@ static bool enable_assr(void *handle, struct dc_link *link) dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf; - mutex_lock(&psp->dtm_context.mutex); + guard(mutex)(&psp->dtm_context.mutex); memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory)); dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_ASSR_ENABLE; @@ -493,8 +490,6 @@ static bool enable_assr(void *handle, struct dc_link *link) res = false; } - mutex_unlock(&psp->dtm_context.mutex); - return res; } @@ -503,6 +498,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct hdcp_workqueue *hdcp_work = handle; struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx; int link_index = aconnector->dc_link->link_index; + unsigned int conn_index = aconnector->base.index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -556,13 +552,14 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); - + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = aconnector; process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); - } /** diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 3390f0d8420a..a215234151ac 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -894,6 +894,15 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int irq_type; + int i; + + /* First, clear all hpd and hpdrx interrupts */ + for (i = DC_IRQ_SOURCE_HPD1; i <= DC_IRQ_SOURCE_HPD6RX; i++) { + if (!dc_interrupt_set(adev->dm.dc, i, false)) + drm_err(dev, "Failed to clear hpd(rx) source=%d on init\n", + i); + } drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -907,10 +916,31 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) dc_link = amdgpu_dm_connector->dc_link; + /* + * Get a base driver irq reference for hpd ints for the lifetime + * of dm. Note that only hpd interrupt types are registered with + * base driver; hpd_rx types aren't. IOW, amdgpu_irq_get/put on + * hpd_rx isn't available. DM currently controls hpd_rx + * explicitly with dc_interrupt_set() + */ if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { - dc_interrupt_set(adev->dm.dc, - dc_link->irq_source_hpd, - true); + irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1; + /* + * TODO: There's a mismatch between mode_info.num_hpd + * and what bios reports as the # of connectors with hpd + * sources. Since the # of hpd source types registered + * with base driver == mode_info.num_hpd, we have to + * fallback to dc_interrupt_set for the remaining types. + */ + if (irq_type < adev->mode_info.num_hpd) { + if (amdgpu_irq_get(adev, &adev->hpd_irq, irq_type)) + drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", + dc_link->irq_source_hpd); + } else { + dc_interrupt_set(adev->dm.dc, + dc_link->irq_source_hpd, + true); + } } if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) { @@ -935,6 +965,7 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int irq_type; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -948,9 +979,18 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) dc_link = amdgpu_dm_connector->dc_link; if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { - dc_interrupt_set(adev->dm.dc, - dc_link->irq_source_hpd, - false); + irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1; + + /* TODO: See same TODO in amdgpu_dm_hpd_init() */ + if (irq_type < adev->mode_info.num_hpd) { + if (amdgpu_irq_put(adev, &adev->hpd_irq, irq_type)) + drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", + dc_link->irq_source_hpd); + } else { + dc_interrupt_set(adev->dm.dc, + dc_link->irq_source_hpd, + false); + } } if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 32b025c92c63..92158009cfa7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -51,6 +51,9 @@ #define PEAK_FACTOR_X1000 1006 +/* + * This function handles both native AUX and I2C-Over-AUX transactions. + */ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -59,6 +62,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, enum aux_return_code_type operation_result; struct amdgpu_device *adev; struct ddc_service *ddc; + uint8_t copy[16]; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -74,6 +78,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, (msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0; payload.defer_delay = 0; + if (payload.write) { + memcpy(copy, msg->buffer, msg->size); + payload.data = copy; + } + result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); @@ -87,15 +96,25 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, if (adev->dm.aux_hpd_discon_quirk) { if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && operation_result == AUX_RET_ERROR_HPD_DISCON) { - result = 0; + result = msg->size; operation_result = AUX_RET_SUCCESS; } } - if (payload.write && result >= 0) - result = msg->size; + /* + * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER + */ + if (payload.write && result >= 0) { + if (result) { + /*one byte indicating partially written bytes*/ + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + result = payload.data[0]; + } else if (!payload.reply[0]) + /*I2C_ACK|AUX_ACK*/ + result = msg->size; + } - if (result < 0) + if (result < 0) { switch (operation_result) { case AUX_RET_SUCCESS: break; @@ -114,6 +133,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } + drm_dbg_dp(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + } + + if (payload.reply[0]) + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + payload.reply[0]); + return result; } @@ -155,6 +181,17 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) return 0; } + +static inline void +amdgpu_dm_mst_reset_mst_connector_setting(struct amdgpu_dm_connector *aconnector) +{ + aconnector->edid = NULL; + aconnector->dsc_aux = NULL; + aconnector->mst_output_port->passthrough_aux = NULL; + aconnector->mst_local_bw = 0; + aconnector->vc_full_pbn = 0; +} + static void amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) { @@ -182,9 +219,7 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) dc_sink_release(dc_sink); aconnector->dc_sink = NULL; - aconnector->edid = NULL; - aconnector->dsc_aux = NULL; - port->passthrough_aux = NULL; + amdgpu_dm_mst_reset_mst_connector_setting(aconnector); } aconnector->mst_status = MST_STATUS_DEFAULT; @@ -500,9 +535,7 @@ dm_dp_mst_detect(struct drm_connector *connector, dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; - aconnector->edid = NULL; - aconnector->dsc_aux = NULL; - port->passthrough_aux = NULL; + amdgpu_dm_mst_reset_mst_connector_setting(aconnector); amdgpu_dm_set_mst_status(&aconnector->mst_status, MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD, @@ -1613,7 +1646,6 @@ int pre_validate_dsc(struct drm_atomic_state *state, if (ind >= 0) { struct drm_connector *connector; - struct amdgpu_dm_connector *aconnector; struct drm_connector_state *drm_new_conn_state; struct dm_connector_state *dm_new_conn_state; struct dm_crtc_state *dm_old_crtc_state; @@ -1621,15 +1653,14 @@ int pre_validate_dsc(struct drm_atomic_state *state, connector = amdgpu_dm_find_first_crtc_matching_connector(state, state->crtcs[ind].ptr); - aconnector = to_amdgpu_dm_connector(connector); drm_new_conn_state = drm_atomic_get_new_connector_state(state, - &aconnector->base); + connector); dm_new_conn_state = to_dm_connector_state(drm_new_conn_state); dm_old_crtc_state = to_dm_crtc_state(state->crtcs[ind].old_state); local_dc_state->streams[i] = - create_validate_stream_for_sink(aconnector, + create_validate_stream_for_sink(connector, &state->crtcs[ind].new_state->mode, dm_new_conn_state, dm_old_crtc_state->stream); @@ -1684,16 +1715,16 @@ clean_exit: return ret; } -static unsigned int kbps_from_pbn(unsigned int pbn) +static uint32_t kbps_from_pbn(unsigned int pbn) { - unsigned int kbps = pbn; + uint64_t kbps = (uint64_t)pbn; kbps *= (1000000 / PEAK_FACTOR_X1000); kbps *= 8; kbps *= 54; kbps /= 64; - return kbps; + return (uint32_t)kbps; } static bool is_dsc_common_config_possible(struct dc_stream_state *stream, @@ -1815,9 +1846,18 @@ enum dc_status dm_dp_mst_is_port_support_mode( struct drm_dp_mst_port *immediate_upstream_port = NULL; uint32_t end_link_bw = 0; - /*Get last DP link BW capability*/ - if (dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw)) { - if (stream_kbps > end_link_bw) { + /*Get last DP link BW capability. Mode shall be supported by Legacy peer*/ + if (aconnector->mst_output_port->pdt != DP_PEER_DEVICE_DP_LEGACY_CONV && + aconnector->mst_output_port->pdt != DP_PEER_DEVICE_NONE) { + if (aconnector->vc_full_pbn != aconnector->mst_output_port->full_pbn) { + dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw); + aconnector->vc_full_pbn = aconnector->mst_output_port->full_pbn; + aconnector->mst_local_bw = end_link_bw; + } else { + end_link_bw = aconnector->mst_local_bw; + } + + if (end_link_bw > 0 && stream_kbps > end_link_bw) { DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." "Mode required bw can't fit into last link\n"); return DC_FAIL_BANDWIDTH_VALIDATE; @@ -1831,11 +1871,15 @@ enum dc_status dm_dp_mst_is_port_support_mode( if (immediate_upstream_port) { virtual_channel_bw_in_kbps = kbps_from_pbn(immediate_upstream_port->full_pbn); virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps); - if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { - DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." - "Max dsc compression can't fit into MST available bw\n"); - return DC_FAIL_BANDWIDTH_VALIDATE; - } + } else { + /* For topology LCT 1 case - only one mstb*/ + virtual_channel_bw_in_kbps = root_link_bw_in_kbps; + } + + if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." + "Max dsc compression can't fit into MST available bw\n"); + return DC_FAIL_BANDWIDTH_VALIDATE; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 495e3cd70426..62e30942f735 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -275,8 +275,11 @@ static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev, if (!dcc->enable) return 0; - if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || - !dc->cap_funcs.get_dcc_compression_cap) + if (adev->family < AMDGPU_FAMILY_GC_12_0_0 && + format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return -EINVAL; + + if (!dc->cap_funcs.get_dcc_compression_cap) return -EINVAL; input.format = format; @@ -309,8 +312,7 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg const struct plane_size *plane_size, union dc_tiling_info *tiling_info, struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - const bool force_disable_dcc) + struct dc_plane_address *address) { const uint64_t modifier = afb->base.modifier; int ret = 0; @@ -318,7 +320,7 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); - if (amdgpu_dm_plane_modifier_has_dcc(modifier) && !force_disable_dcc) { + if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { uint64_t dcc_address = afb->address + afb->base.offsets[1]; bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); @@ -360,8 +362,7 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd const struct plane_size *plane_size, union dc_tiling_info *tiling_info, struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - const bool force_disable_dcc) + struct dc_plane_address *address) { const uint64_t modifier = afb->base.modifier; int ret = 0; @@ -371,7 +372,7 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); - if (amdgpu_dm_plane_modifier_has_dcc(modifier) && !force_disable_dcc) { + if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { int max_compressed_block = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier); dcc->enable = 1; @@ -839,8 +840,7 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) + bool tmz_surface) { const struct drm_framebuffer *fb = &afb->base; int ret; @@ -900,16 +900,14 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, ret = amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(adev, afb, format, rotation, plane_size, tiling_info, dcc, - address, - force_disable_dcc); + address); if (ret) return ret; } else if (adev->family >= AMDGPU_FAMILY_AI) { ret = amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, rotation, plane_size, tiling_info, dcc, - address, - force_disable_dcc); + address); if (ret) return ret; } else { @@ -1000,14 +998,13 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { struct dc_plane_state *plane_state = dm_plane_state_new->dc_state; - bool force_disable_dcc = !plane_state->dcc.enable; amdgpu_dm_plane_fill_plane_buffer_attributes( adev, afb, plane_state->format, plane_state->rotation, afb->tiling_flags, &plane_state->tiling_info, &plane_state->plane_size, &plane_state->dcc, &plane_state->address, - afb->tmz_surface, force_disable_dcc); + afb->tmz_surface); } return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h index 6498359bff6f..2eef13b1c05a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -51,8 +51,7 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc); + bool tmz_surface); int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index f40240aafe98..d63038ec4ec7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -54,7 +54,8 @@ static bool link_supports_psrsu(struct dc_link *link) if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU) return false; - return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub); + /* Temporarily disable PSR-SU to avoid glitches */ + return false; } /* @@ -126,8 +127,10 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) psr_config.allow_multi_disp_optimizations = (amdgpu_dc_feature_mask & DC_PSR_ALLOW_MULTI_DISP_OPT); - if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config)) - return false; + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) { + if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config)) + return false; + } ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context); @@ -201,14 +204,13 @@ void amdgpu_dm_psr_enable(struct dc_stream_state *stream) * * Return: true if success */ -bool amdgpu_dm_psr_disable(struct dc_stream_state *stream) +bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait) { - unsigned int power_opt = 0; bool psr_enable = false; DRM_DEBUG_DRIVER("Disabling psr...\n"); - return dc_link_set_psr_allow_active(stream->link, &psr_enable, true, false, &power_opt); + return dc_link_set_psr_allow_active(stream->link, &psr_enable, wait, false, NULL); } /* @@ -251,3 +253,33 @@ bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm) return allow_active; } + +/** + * amdgpu_dm_psr_wait_disable() - Wait for eDP panel to exit PSR + * @stream: stream state attached to the eDP link + * + * Waits for a max of 500ms for the eDP panel to exit PSR. + * + * Return: true if panel exited PSR, false otherwise. + */ +bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream) +{ + enum dc_psr_state psr_state = PSR_STATE0; + struct dc_link *link = stream->link; + int retry_count; + + if (link == NULL) + return false; + + for (retry_count = 0; retry_count <= 1000; retry_count++) { + dc_link_get_psr_state(link, &psr_state); + if (psr_state == PSR_STATE0) + break; + udelay(500); + } + + if (retry_count == 1000) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h index cd2d45c2b5ef..e2366321a3c1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h @@ -34,8 +34,9 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link); void amdgpu_dm_psr_enable(struct dc_stream_state *stream); bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream); -bool amdgpu_dm_psr_disable(struct dc_stream_state *stream); +bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait); bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm); bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm); +bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream); #endif /* AMDGPU_DM_AMDGPU_DM_PSR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/basics/dc_common.c b/drivers/gpu/drm/amd/display/dc/basics/dc_common.c index b2fc4f8e6482..a51c2701da24 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/dc_common.c +++ b/drivers/gpu/drm/amd/display/dc/basics/dc_common.c @@ -40,7 +40,8 @@ bool is_rgb_cspace(enum dc_color_space output_color_space) case COLOR_SPACE_YCBCR709: case COLOR_SPACE_YCBCR601_LIMITED: case COLOR_SPACE_YCBCR709_LIMITED: - case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_2020_YCBCR_LIMITED: + case COLOR_SPACE_2020_YCBCR_FULL: return false; default: /* Add a case to switch */ diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 7d18f372ce7a..6bc59b7ef007 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -101,7 +101,6 @@ static void init_dig_encoder_control(struct bios_parser *bp) bp->cmd_tbl.dig_encoder_control = encoder_control_digx_v1_5; break; default: - dm_output_to_console("Don't have dig_encoder_control for v%d\n", version); bp->cmd_tbl.dig_encoder_control = encoder_control_fallback; break; } @@ -238,7 +237,6 @@ static void init_transmitter_control(struct bios_parser *bp) bp->cmd_tbl.transmitter_control = transmitter_control_v1_7; break; default: - dm_output_to_console("Don't have transmitter_control for v%d\n", crev); bp->cmd_tbl.transmitter_control = transmitter_control_fallback; break; } @@ -408,8 +406,6 @@ static void init_set_pixel_clock(struct bios_parser *bp) bp->cmd_tbl.set_pixel_clock = set_pixel_clock_v7; break; default: - dm_output_to_console("Don't have set_pixel_clock for v%d\n", - BIOS_CMD_TABLE_PARA_REVISION(setpixelclock)); bp->cmd_tbl.set_pixel_clock = set_pixel_clock_fallback; break; } @@ -554,7 +550,6 @@ static void init_set_crtc_timing(struct bios_parser *bp) set_crtc_using_dtd_timing_v3; break; default: - dm_output_to_console("Don't have set_crtc_timing for v%d\n", dtd_version); bp->cmd_tbl.set_crtc_timing = NULL; break; } @@ -671,8 +666,6 @@ static void init_enable_crtc(struct bios_parser *bp) bp->cmd_tbl.enable_crtc = enable_crtc_v1; break; default: - dm_output_to_console("Don't have enable_crtc for v%d\n", - BIOS_CMD_TABLE_PARA_REVISION(enablecrtc)); bp->cmd_tbl.enable_crtc = NULL; break; } @@ -864,8 +857,6 @@ static void init_set_dce_clock(struct bios_parser *bp) bp->cmd_tbl.set_dce_clock = set_dce_clock_v2_1; break; default: - dm_output_to_console("Don't have set_dce_clock for v%d\n", - BIOS_CMD_TABLE_PARA_REVISION(setdceclock)); bp->cmd_tbl.set_dce_clock = NULL; break; } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c index 73458e295103..df8139bda142 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c @@ -87,8 +87,7 @@ bool dal_bios_parser_init_cmd_tbl_helper2( return true; default: - /* Unsupported DCE */ - BREAK_TO_DEBUGGER(); + *h = dal_cmd_tbl_helper_dce112_get_table2(); return false; } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index ab1132bc896a..d9955c5d2e5e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -174,7 +174,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32) ############################################################################### # DCN35 ############################################################################### -CLK_MGR_DCN35 = dcn35_smu.o dcn35_clk_mgr.o +CLK_MGR_DCN35 = dcn35_smu.o dcn351_clk_mgr.o dcn35_clk_mgr.o AMD_DAL_CLK_MGR_DCN35 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn35/,$(CLK_MGR_DCN35)) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 0e243f4344d0..4c3e58c730b1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -355,8 +355,11 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p BREAK_TO_DEBUGGER(); return NULL; } + if (ctx->dce_version == DCN_VERSION_3_51) + dcn351_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + else + dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); - dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base.base; } break; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index e93df3d6222e..bc123f1884da 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -50,12 +50,13 @@ #include "link.h" #include "logger_types.h" + + +#include "yellow_carp_offset.h" #undef DC_LOGGER #define DC_LOGGER \ clk_mgr->base.base.ctx->logger -#include "yellow_carp_offset.h" - #define regCLK1_CLK_PLL_REQ 0x0237 #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 29eff386505a..91d872d6d392 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -53,9 +53,6 @@ #include "logger_types.h" -#undef DC_LOGGER -#define DC_LOGGER \ - clk_mgr->base.base.ctx->logger #define MAX_INSTANCE 7 @@ -77,6 +74,9 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, { { 0x0001B200, 0x0242DC00, 0, 0, 0, 0, 0, 0 } }, { { 0x0001B400, 0x0242E000, 0, 0, 0, 0, 0, 0 } } } }; +#undef DC_LOGGER +#define DC_LOGGER \ + clk_mgr->base.base.ctx->logger #define regCLK1_CLK_PLL_REQ 0x0237 #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index a0fb4481d2f1..e4d22f74f986 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -130,7 +130,7 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; + int display_count = 0; bool update_dppclk = false; bool update_dispclk = false; bool dpp_clock_lowered = false; @@ -194,8 +194,6 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow. if (new_clocks->dppclk_khz < MIN_DPP_DISP_CLK) new_clocks->dppclk_khz = MIN_DPP_DISP_CLK; - if (new_clocks->dispclk_khz < MIN_DPP_DISP_CLK) - new_clocks->dispclk_khz = MIN_DPP_DISP_CLK; if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) @@ -204,15 +202,19 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, update_dppclk = true; } - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - /* No need to apply the w/a if we haven't taken over from bios yet */ - if (clk_mgr_base->clks.dispclk_khz) - dcn315_disable_otg_wa(clk_mgr_base, context, true); + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { + int requested_dispclk_khz = new_clocks->dispclk_khz; + dcn315_disable_otg_wa(clk_mgr_base, context, true); + + /* Clamp the requested clock to PMFW based on their limit. */ + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) + requested_dispclk_khz = dc->debug.min_disp_clk_khz; + + dcn315_smu_set_dispclk(clk_mgr, requested_dispclk_khz); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn315_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - if (clk_mgr_base->clks.dispclk_khz) - dcn315_disable_otg_wa(clk_mgr_base, context, false); + dcn315_disable_otg_wa(clk_mgr_base, context, false); update_dispclk = true; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index c3e50c3aaa60..49efea0c8fcf 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -140,7 +140,7 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; + int display_count = 0; bool update_dppclk = false; bool update_dispclk = false; bool dpp_clock_lowered = false; @@ -201,8 +201,6 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow. if (new_clocks->dppclk_khz < 100000) new_clocks->dppclk_khz = 100000; - if (new_clocks->dispclk_khz < 100000) - new_clocks->dispclk_khz = 100000; if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) @@ -211,11 +209,18 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, update_dppclk = true; } - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { + int requested_dispclk_khz = new_clocks->dispclk_khz; + dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); + /* Clamp the requested clock to PMFW based on their limit. */ + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) + requested_dispclk_khz = dc->debug.min_disp_clk_khz; + + dcn316_smu_set_dispclk(clk_mgr, requested_dispclk_khz); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c new file mode 100644 index 000000000000..6a6ae618650b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c @@ -0,0 +1,140 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "core_types.h" +#include "dcn35_clk_mgr.h" + +#define DCN_BASE__INST0_SEG1 0x000000C0 +#define mmCLK1_CLK_PLL_REQ 0x16E37 + +#define mmCLK1_CLK0_DFS_CNTL 0x16E69 +#define mmCLK1_CLK1_DFS_CNTL 0x16E6C +#define mmCLK1_CLK2_DFS_CNTL 0x16E6F +#define mmCLK1_CLK3_DFS_CNTL 0x16E72 +#define mmCLK1_CLK4_DFS_CNTL 0x16E75 +#define mmCLK1_CLK5_DFS_CNTL 0x16E78 + +#define mmCLK1_CLK0_CURRENT_CNT 0x16EFC +#define mmCLK1_CLK1_CURRENT_CNT 0x16EFD +#define mmCLK1_CLK2_CURRENT_CNT 0x16EFE +#define mmCLK1_CLK3_CURRENT_CNT 0x16EFF +#define mmCLK1_CLK4_CURRENT_CNT 0x16F00 +#define mmCLK1_CLK5_CURRENT_CNT 0x16F01 + +#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A +#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93 +#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C +#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5 +#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE +#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7 + +#define mmCLK1_CLK0_DS_CNTL 0x16E83 +#define mmCLK1_CLK1_DS_CNTL 0x16E8C +#define mmCLK1_CLK2_DS_CNTL 0x16E95 +#define mmCLK1_CLK3_DS_CNTL 0x16E9E +#define mmCLK1_CLK4_DS_CNTL 0x16EA7 +#define mmCLK1_CLK5_DS_CNTL 0x16EB0 + +#define mmCLK1_CLK0_ALLOW_DS 0x16E84 +#define mmCLK1_CLK1_ALLOW_DS 0x16E8D +#define mmCLK1_CLK2_ALLOW_DS 0x16E96 +#define mmCLK1_CLK3_ALLOW_DS 0x16E9F +#define mmCLK1_CLK4_ALLOW_DS 0x16EA8 +#define mmCLK1_CLK5_ALLOW_DS 0x16EB1 + +#define mmCLK5_spll_field_8 0x1B04B +#define mmDENTIST_DISPCLK_CNTL 0x0124 +#define regDENTIST_DISPCLK_CNTL 0x0064 +#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L + +// DENTIST_DISPCLK_CNTL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L + +#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L + +#define REG(reg) \ + (clk_mgr->regs->reg) + +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define CLK_SR_DCN35(reg_name)\ + .reg_name = mm ## reg_name + +static const struct clk_mgr_registers clk_mgr_regs_dcn351 = { + CLK_REG_LIST_DCN35() +}; + +static const struct clk_mgr_shift clk_mgr_shift_dcn351 = { + CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct clk_mgr_mask clk_mgr_mask_dcn351 = { + CLK_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + +#define TO_CLK_MGR_DCN35(clk_mgr)\ + container_of(clk_mgr, struct clk_mgr_dcn35, base) + + +void dcn351_clk_mgr_construct( + struct dc_context *ctx, + struct clk_mgr_dcn35 *clk_mgr, + struct pp_smu_funcs *pp_smu, + struct dccg *dccg) +{ + /*register offset changed*/ + clk_mgr->base.regs = &clk_mgr_regs_dcn351; + clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn351; + clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn351; + + dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + +} + + diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 3bd0d46c1701..a4ac601a30c3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -36,15 +36,11 @@ #include "dcn20/dcn20_clk_mgr.h" - - #include "reg_helper.h" #include "core_types.h" #include "dcn35_smu.h" #include "dm_helpers.h" -/* TODO: remove this include once we ported over remaining clk mgr functions*/ -#include "dcn30/dcn30_clk_mgr.h" #include "dcn31/dcn31_clk_mgr.h" #include "dc_dmub_srv.h" @@ -55,34 +51,102 @@ #define DC_LOGGER \ clk_mgr->base.base.ctx->logger -#define regCLK1_CLK_PLL_REQ 0x0237 -#define regCLK1_CLK_PLL_REQ_BASE_IDX 0 +#define DCN_BASE__INST0_SEG1 0x000000C0 +#define mmCLK1_CLK_PLL_REQ 0x16E37 + +#define mmCLK1_CLK0_DFS_CNTL 0x16E69 +#define mmCLK1_CLK1_DFS_CNTL 0x16E6C +#define mmCLK1_CLK2_DFS_CNTL 0x16E6F +#define mmCLK1_CLK3_DFS_CNTL 0x16E72 +#define mmCLK1_CLK4_DFS_CNTL 0x16E75 +#define mmCLK1_CLK5_DFS_CNTL 0x16E78 + +#define mmCLK1_CLK0_CURRENT_CNT 0x16EFB +#define mmCLK1_CLK1_CURRENT_CNT 0x16EFC +#define mmCLK1_CLK2_CURRENT_CNT 0x16EFD +#define mmCLK1_CLK3_CURRENT_CNT 0x16EFE +#define mmCLK1_CLK4_CURRENT_CNT 0x16EFF +#define mmCLK1_CLK5_CURRENT_CNT 0x16F00 + +#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A +#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93 +#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C +#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5 +#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE +#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7 + +#define mmCLK1_CLK0_DS_CNTL 0x16E83 +#define mmCLK1_CLK1_DS_CNTL 0x16E8C +#define mmCLK1_CLK2_DS_CNTL 0x16E95 +#define mmCLK1_CLK3_DS_CNTL 0x16E9E +#define mmCLK1_CLK4_DS_CNTL 0x16EA7 +#define mmCLK1_CLK5_DS_CNTL 0x16EB0 + +#define mmCLK1_CLK0_ALLOW_DS 0x16E84 +#define mmCLK1_CLK1_ALLOW_DS 0x16E8D +#define mmCLK1_CLK2_ALLOW_DS 0x16E96 +#define mmCLK1_CLK3_ALLOW_DS 0x16E9F +#define mmCLK1_CLK4_ALLOW_DS 0x16EA8 +#define mmCLK1_CLK5_ALLOW_DS 0x16EB1 + +#define mmCLK5_spll_field_8 0x1B24B +#define mmDENTIST_DISPCLK_CNTL 0x0124 +#define regDENTIST_DISPCLK_CNTL 0x0064 +#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L +// DENTIST_DISPCLK_CNTL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L + +#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L -#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L +#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 +#undef FN +#define FN(reg_name, field_name) \ + clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name -#define regCLK1_CLK2_BYPASS_CNTL 0x029c -#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 +#define REG(reg) \ + (clk_mgr->regs->reg) -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg -#define regCLK5_0_CLK5_spll_field_8 0x464b -#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0 +#define BASE(seg) BASE_INNER(seg) -#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd -#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name -#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 +#define CLK_SR_DCN35(reg_name)\ + .reg_name = mm ## reg_name -#define REG(reg_name) \ - (ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) +static const struct clk_mgr_registers clk_mgr_regs_dcn35 = { + CLK_REG_LIST_DCN35() +}; + +static const struct clk_mgr_shift clk_mgr_shift_dcn35 = { + CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct clk_mgr_mask clk_mgr_mask_dcn35 = { + CLK_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; #define TO_CLK_MGR_DCN35(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn35, base) @@ -388,14 +452,19 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, update_dppclk = true; } - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { + int requested_dispclk_khz = new_clocks->dispclk_khz; + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); - if (dc->debug.min_disp_clk_khz > 0 && new_clocks->dispclk_khz < dc->debug.min_disp_clk_khz) - new_clocks->dispclk_khz = dc->debug.min_disp_clk_khz; + /* Clamp the requested clock to PMFW based on their limit. */ + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) + requested_dispclk_khz = dc->debug.min_disp_clk_khz; + dcn35_smu_set_dispclk(clk_mgr, requested_dispclk_khz); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; @@ -443,7 +512,6 @@ static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) struct fixed31_32 pll_req; unsigned int fbmult_frac_val = 0; unsigned int fbmult_int_val = 0; - struct dc_context *ctx = clk_mgr->base.ctx; /* * Register value of fbmult is in 8.16 format, we are converting to 314.32 @@ -503,12 +571,12 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - struct dc_context *ctx = clk_mgr->base.ctx; + uint32_t ssc_enable; - REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable); + ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK; - return ssc_enable == 1; + return ssc_enable != 0; } static void init_clk_states(struct clk_mgr *clk_mgr) @@ -633,10 +701,10 @@ static struct dcn35_ss_info_table ss_info_table = { static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) { - struct dc_context *ctx = clk_mgr->base.ctx; - uint32_t clock_source; + uint32_t clock_source = 0; + + clock_source = REG_READ(CLK1_CLK2_BYPASS_CNTL) & CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK; - REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); // If it's DFS mode, clock_source is 0. if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) { clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; @@ -1106,6 +1174,12 @@ void dcn35_clk_mgr_construct( clk_mgr->base.dprefclk_ss_divider = 1000; clk_mgr->base.ss_on_dprefclk = false; clk_mgr->base.dfs_ref_freq_khz = 48000; + if (ctx->dce_version == DCN_VERSION_3_5) { + clk_mgr->base.regs = &clk_mgr_regs_dcn35; + clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn35; + clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn35; + } + clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h index 1203dc605b12..a12a9bf90806 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h @@ -60,4 +60,8 @@ void dcn35_clk_mgr_construct(struct dc_context *ctx, void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int); +void dcn351_clk_mgr_construct(struct dc_context *ctx, + struct clk_mgr_dcn35 *clk_mgr, + struct pp_smu_funcs *pp_smu, + struct dccg *dccg); #endif //__DCN35_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 8cfc5f435937..2ee034879f9f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -24,6 +24,8 @@ #include "dml/dcn401/dcn401_fpu.h" +#define DCN_BASE__INST0_SEG1 0x000000C0 + #define mmCLK01_CLK0_CLK_PLL_REQ 0x16E37 #define mmCLK01_CLK0_CLK0_DFS_CNTL 0x16E69 #define mmCLK01_CLK0_CLK1_DFS_CNTL 0x16E6C @@ -1698,7 +1700,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct( clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL); if (!clk_mgr->base.bw_params) { BREAK_TO_DEBUGGER(); - kfree(clk_mgr); + kfree(clk_mgr401); return NULL; } @@ -1709,6 +1711,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct( if (!clk_mgr->wm_range_table) { BREAK_TO_DEBUGGER(); kfree(clk_mgr->base.bw_params); + kfree(clk_mgr401); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 6d4ee8fe615c..f5d938b9504c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -276,6 +276,7 @@ static bool create_links( link->link_id.type = OBJECT_TYPE_CONNECTOR; link->link_id.id = CONNECTOR_ID_VIRTUAL; link->link_id.enum_id = ENUM_ID_1; + link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL); if (!link->link_enc) { @@ -438,9 +439,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * Don't adjust DRR while there's bandwidth optimizations pending to * avoid conflicting with firmware updates. */ - if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required || dc->wm_optimized_required) + if (dc->ctx->dce_version > DCE_VERSION_MAX) { + if (dc->optimized_required || dc->wm_optimized_required) { + stream->adjust.timing_adjust_pending = true; return false; + } + } dc_exit_ips_for_hw_access(dc); @@ -452,6 +456,7 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, if (dc->caps.max_v_total != 0 && (adjust->v_total_max > dc->caps.max_v_total || adjust->v_total_min > dc->caps.max_v_total)) { + stream->adjust.timing_adjust_pending = false; if (adjust->allow_otg_v_count_halt) return set_long_vtotal(dc, stream, adjust); else @@ -465,7 +470,7 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, dc->hwss.set_drr(&pipe, 1, *adjust); - + stream->adjust.timing_adjust_pending = false; return true; } } @@ -2032,7 +2037,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_enable_stereo(dc, context, dc_streams, context->stream_count); - if (context->stream_count > get_seamless_boot_stream_count(context) || + if (get_seamless_boot_stream_count(context) == 0 || context->stream_count == 0) { /* Must wait for no flips to be pending before doing optimize bw */ hwss_wait_for_no_pipes_pending(dc, context); @@ -2975,8 +2980,14 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_active_fixed) stream->vrr_active_fixed = *update->vrr_active_fixed; - if (update->crtc_timing_adjust) + if (update->crtc_timing_adjust) { + if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min || + stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max || + stream->adjust.timing_adjust_pending) + update->crtc_timing_adjust->timing_adjust_pending = true; stream->adjust = *update->crtc_timing_adjust; + update->crtc_timing_adjust->timing_adjust_pending = false; + } if (update->dpms_off) stream->dpms_off = *update->dpms_off; @@ -4734,7 +4745,8 @@ static bool full_update_required(struct dc *dc, stream_update->lut3d_func || stream_update->pending_test_pattern || stream_update->crtc_timing_adjust || - stream_update->scaler_sharpener_update)) + stream_update->scaler_sharpener_update || + stream_update->hw_cursor_req)) return true; if (stream) { @@ -5109,8 +5121,8 @@ bool dc_update_planes_and_stream(struct dc *dc, else ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); - - if (ret) + if (ret && (dc->ctx->dce_version >= DCN_VERSION_3_2 || + dc->ctx->dce_version == DCN_VERSION_3_01)) clear_update_flags(srf_updates, surface_count, stream); return ret; @@ -5141,7 +5153,7 @@ void dc_commit_updates_for_stream(struct dc *dc, ret = update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, stream_update, state); - if (ret) + if (ret && dc->ctx->dce_version >= DCN_VERSION_3_2) clear_update_flags(srf_updates, surface_count, stream); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index bb766c2a7417..d62b00314682 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -176,7 +176,7 @@ static bool is_ycbcr2020_type( { bool ret = false; - if (color_space == COLOR_SPACE_2020_YCBCR) + if (color_space == COLOR_SPACE_2020_YCBCR_LIMITED || color_space == COLOR_SPACE_2020_YCBCR_FULL) ret = true; return ret; } @@ -247,7 +247,8 @@ void color_space_to_black_color( case COLOR_SPACE_YCBCR709_BLACK: case COLOR_SPACE_YCBCR601_LIMITED: case COLOR_SPACE_YCBCR709_LIMITED: - case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_2020_YCBCR_LIMITED: + case COLOR_SPACE_2020_YCBCR_FULL: *black_color = black_color_format[BLACK_COLOR_FORMAT_YUV_CV]; break; @@ -508,6 +509,7 @@ void set_p_state_switch_method( if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba) return; + pipe_ctx->p_state_type = P_STATE_UNKNOWN; if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) { /* MCLK switching is supported */ @@ -554,6 +556,21 @@ void set_p_state_switch_method( } } +void set_drr_and_clear_adjust_pending( + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream, + struct drr_params *params) +{ + /* params can be null.*/ + if (pipe_ctx && pipe_ctx->stream_res.tg && + pipe_ctx->stream_res.tg->funcs->set_drr) + pipe_ctx->stream_res.tg->funcs->set_drr( + pipe_ctx->stream_res.tg, params); + + if (stream) + stream->adjust.timing_adjust_pending = false; +} + void get_fams2_visual_confirm_color( struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d915020a4295..6dbf139c51f7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1455,7 +1455,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* Invalid input */ - if (!plane_state->dst_rect.width || + if (!plane_state || + !plane_state->dst_rect.width || !plane_state->dst_rect.height || !plane_state->src_rect.width || !plane_state->src_rect.height) { @@ -3387,10 +3388,13 @@ static int get_norm_pix_clk(const struct dc_crtc_timing *timing) break; case COLOR_DEPTH_121212: normalized_pix_clk = (pix_clk * 36) / 24; - break; + break; + case COLOR_DEPTH_141414: + normalized_pix_clk = (pix_clk * 42) / 24; + break; case COLOR_DEPTH_161616: normalized_pix_clk = (pix_clk * 48) / 24; - break; + break; default: ASSERT(0); break; @@ -4211,7 +4215,7 @@ static void set_avi_info_frame( break; case COLOR_SPACE_2020_RGB_FULLRANGE: case COLOR_SPACE_2020_RGB_LIMITEDRANGE: - case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_2020_YCBCR_LIMITED: hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR; hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; break; @@ -4225,7 +4229,7 @@ static void set_avi_info_frame( break; } - if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR && + if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR_LIMITED && stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) { hdmi_info.bits.EC0_EC2 = 0; hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 41bd95e9177a..223c3d55544b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -959,6 +959,14 @@ union dp_128b_132b_supported_lttpr_link_rates { uint8_t raw; }; +union dp_alpm_lttpr_cap { + struct { + uint8_t AUX_LESS_ALPM_SUPPORTED :1; + uint8_t RESERVED :7; + } bits; + uint8_t raw; +}; + union dp_sink_video_fallback_formats { struct { uint8_t dp_1024x768_60Hz_24bpp_support :1; @@ -1103,6 +1111,7 @@ struct dc_lttpr_caps { uint8_t max_ext_timeout; union dp_main_link_channel_coding_lttpr_cap main_link_channel_coding; union dp_128b_132b_supported_lttpr_link_rates supported_128b_132b_rates; + union dp_alpm_lttpr_cap alpm; uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1]; }; @@ -1352,6 +1361,9 @@ struct dp_trace { #ifndef DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP #define DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 0x221c #endif +#ifndef DP_LTTPR_ALPM_CAPABILITIES +#define DP_LTTPR_ALPM_CAPABILITIES 0xF0009 +#endif #ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50 #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index c10567ec1c81..6fd94c5f6da5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -641,7 +641,8 @@ enum dc_color_space { COLOR_SPACE_YCBCR709_LIMITED, COLOR_SPACE_2020_RGB_FULLRANGE, COLOR_SPACE_2020_RGB_LIMITEDRANGE, - COLOR_SPACE_2020_YCBCR, + COLOR_SPACE_2020_YCBCR_LIMITED, + COLOR_SPACE_2020_YCBCR_FULL, COLOR_SPACE_ADOBERGB, COLOR_SPACE_DCIP3, COLOR_SPACE_DISPLAYNATIVE, @@ -649,6 +650,7 @@ enum dc_color_space { COLOR_SPACE_APPCTRL, COLOR_SPACE_CUSTOMPOINTS, COLOR_SPACE_YCBCR709_BLACK, + COLOR_SPACE_2020_YCBCR = COLOR_SPACE_2020_YCBCR_LIMITED, }; enum dc_dither_option { @@ -1000,6 +1002,7 @@ struct dc_crtc_timing_adjust { uint32_t v_total_mid; uint32_t v_total_mid_frame_num; uint32_t allow_otg_v_count_halt; + uint8_t timing_adjust_pending; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index c8bdbbba44ef..1aca9e96c474 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1009,6 +1009,13 @@ struct psr_settings { unsigned int psr_sdp_transmit_line_num_deadline; uint8_t force_ffu_mode; unsigned int psr_power_opt; + + /** + * Some panels cannot handle idle pattern during PSR entry. + * To power down phy before disable stream to avoid sending + * idle pattern. + */ + uint8_t power_down_phy_before_disable_stream; }; enum replay_coasting_vtotal_type { diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c index 0b889004509a..62402c7be0a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c @@ -580,9 +580,6 @@ static void dccg401_set_dpstreamclk( int otg_inst, int dp_hpo_inst) { - /* set the dtbclk_p source */ - dccg401_set_dtbclk_p_src(dccg, src, otg_inst); - /* enabled to select one of the DTBCLKs for pipe */ if (src == REFCLK) dccg401_disable_dpstreamclk(dccg, dp_hpo_inst); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 5c2825bc9a87..654b919465f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -420,7 +420,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( dynamic_range_rgb = 1; /*limited range*/ break; case COLOR_SPACE_2020_RGB_FULLRANGE: - case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_2020_YCBCR_LIMITED: case COLOR_SPACE_XR_RGB: case COLOR_SPACE_MSREF_SCRGB: case COLOR_SPACE_ADOBERGB: @@ -432,6 +432,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( case COLOR_SPACE_APPCTRL: case COLOR_SPACE_CUSTOMPOINTS: case COLOR_SPACE_UNKNOWN: + default: /* do nothing */ break; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index bf636b28e3e1..d37ecfdde4f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -63,11 +63,26 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, bool should_use_dmub_lock(struct dc_link *link) { + /* ASIC doesn't support DMUB */ + if (!link->ctx->dmub_srv) + return false; + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) return true; if (link->replay_settings.replay_feature_enabled) return true; + /* only use HW lock for PSR1 on single eDP */ + if (link->psr_settings.psr_version == DC_PSR_VERSION_1) { + struct dc_link *edp_links[MAX_NUM_EDP]; + int edp_num; + + dc_get_edp_links(link->dc, edp_links, &edp_num); + + if (edp_num == 1) + return true; + } + return false; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index cae18f8c1c9a..8821153d0ac3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -419,6 +419,10 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->relock_delay_frame_cnt = 0; if (link->dpcd_caps.sink_dev_id == DP_BRANCH_DEVICE_ID_001CF8) copy_settings_data->relock_delay_frame_cnt = 2; + + copy_settings_data->power_down_phy_before_disable_stream = + link->psr_settings.power_down_phy_before_disable_stream; + copy_settings_data->dsc_slice_height = psr_context->dsc_slice_height; dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c index e5fb0e8333e4..e691a1cf3356 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c @@ -239,6 +239,7 @@ static const struct timing_generator_funcs dce60_tg_funcs = { dce60_timing_generator_enable_advanced_request, .configure_crc = dce60_configure_crc, .get_crc = dce110_get_crc, + .is_two_pixels_per_container = dce110_is_two_pixels_per_container, }; void dce60_timing_generator_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c index f496e952ceec..f8f1e98f646e 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c @@ -393,7 +393,7 @@ void enc1_stream_encoder_dp_set_stream_attribute( break; case COLOR_SPACE_2020_RGB_LIMITEDRANGE: case COLOR_SPACE_2020_RGB_FULLRANGE: - case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_2020_YCBCR_LIMITED: case COLOR_SPACE_XR_RGB: case COLOR_SPACE_MSREF_SCRGB: case COLOR_SPACE_ADOBERGB: @@ -406,6 +406,7 @@ void enc1_stream_encoder_dp_set_stream_attribute( case COLOR_SPACE_CUSTOMPOINTS: case COLOR_SPACE_UNKNOWN: case COLOR_SPACE_YCBCR709_BLACK: + default: /* do nothing */ break; } diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c index 0a27e0942a12..0008816cf155 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c @@ -634,7 +634,7 @@ void enc401_stream_encoder_dp_set_stream_attribute( break; case COLOR_SPACE_2020_RGB_LIMITEDRANGE: case COLOR_SPACE_2020_RGB_FULLRANGE: - case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_2020_YCBCR_LIMITED: case COLOR_SPACE_XR_RGB: case COLOR_SPACE_MSREF_SCRGB: case COLOR_SPACE_ADOBERGB: @@ -647,6 +647,7 @@ void enc401_stream_encoder_dp_set_stream_attribute( case COLOR_SPACE_CUSTOMPOINTS: case COLOR_SPACE_UNKNOWN: case COLOR_SPACE_YCBCR709_BLACK: + default: /* do nothing */ break; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 1c10ba4dcdde..abe51cf3aab2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -281,10 +281,10 @@ static void CalculateDynamicMetadataParameters( double DISPCLK, double DCFClkDeepSleep, double PixelClock, - long HTotal, - long VBlank, - long DynamicMetadataTransmittedBytes, - long DynamicMetadataLinesBeforeActiveRequired, + unsigned int HTotal, + unsigned int VBlank, + unsigned int DynamicMetadataTransmittedBytes, + int DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP, double *Tsetup, @@ -3277,8 +3277,8 @@ static double CalculateWriteBackDelay( static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK, - double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes, - long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP, + double DCFClkDeepSleep, double PixelClock, unsigned int HTotal, unsigned int VBlank, unsigned int DynamicMetadataTransmittedBytes, + int DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP, double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks) { double TotalRepeaterDelayTime = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index beed7adbbd43..c90dee4e9116 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -367,6 +367,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, clock_limits[i].socclk_mhz; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dram_speed_mts = clock_limits[i].dram_speed_mts; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = clock_limits[i].dtbclk_mhz; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index a201dbb743d7..79d921adc215 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -401,6 +401,7 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc, clock_limits[i].socclk_mhz; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dram_speed_mts = clock_limits[i].dram_speed_mts; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = clock_limits[i].dtbclk_mhz; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index c4378e620cbf..986a69c5bd4b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -29,7 +29,11 @@ dml2_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) +frame_warn_flag := -Wframe-larger-than=4096 +else frame_warn_flag := -Wframe-larger-than=3072 +endif else frame_warn_flag := -Wframe-larger-than=2048 endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 8dabb1ac0b68..6822b0795120 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6301,9 +6301,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.meta_row_bandwidth_this_state, mode_lib->ms.dpte_row_bandwidth_this_state, mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor); + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j]); s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only( &mode_lib->ms.soc, @@ -6434,7 +6434,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) /* Output */ &mode_lib->ms.UrgentBurstFactorCursorPre[k], &mode_lib->ms.UrgentBurstFactorLumaPre[k], - &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], &mode_lib->ms.NotUrgentLatencyHidingPre[k]); mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * @@ -6458,9 +6458,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cursor_bw_pre, mode_lib->ms.prefetch_vmrow_bw, mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], mode_lib->ms.UrgentBurstFactorLumaPre, mode_lib->ms.UrgentBurstFactorChromaPre, mode_lib->ms.UrgentBurstFactorCursorPre, @@ -6517,9 +6517,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cursor_bw, mode_lib->ms.cursor_bw_pre, mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], mode_lib->ms.UrgentBurstFactorLumaPre, mode_lib->ms.UrgentBurstFactorChromaPre, mode_lib->ms.UrgentBurstFactorCursorPre); @@ -6586,9 +6586,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cursor_bw_pre, mode_lib->ms.prefetch_vmrow_bw, mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], mode_lib->ms.UrgentBurstFactorLumaPre, mode_lib->ms.UrgentBurstFactorChromaPre, mode_lib->ms.UrgentBurstFactorCursorPre, @@ -7809,9 +7809,9 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) mode_lib->ms.DETBufferSizeYThisState[k], mode_lib->ms.DETBufferSizeCThisState[k], /* Output */ - &mode_lib->ms.UrgentBurstFactorCursor[k], - &mode_lib->ms.UrgentBurstFactorLuma[k], - &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.UrgentBurstFactorCursor[j][k], + &mode_lib->ms.UrgentBurstFactorLuma[j][k], + &mode_lib->ms.UrgentBurstFactorChroma[j][k], &mode_lib->ms.NotUrgentLatencyHiding[k]); } @@ -9190,6 +9190,8 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc &locals->FractionOfUrgentBandwidth, &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport + + if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) { dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); @@ -9204,6 +9206,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc } } + if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) { locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( mode_lib->ms.num_active_planes, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h index f951936bb579..504c427b3b31 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h @@ -884,11 +884,11 @@ struct mode_support_st { dml_uint_t meta_row_height[__DML_NUM_PLANES__]; dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__]; dml_float_t UrgLatency; - dml_float_t UrgentBurstFactorCursor[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorCursor[2][__DML_NUM_PLANES__]; dml_float_t UrgentBurstFactorCursorPre[__DML_NUM_PLANES__]; - dml_float_t UrgentBurstFactorLuma[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorLuma[2][__DML_NUM_PLANES__]; dml_float_t UrgentBurstFactorLumaPre[__DML_NUM_PLANES__]; - dml_float_t UrgentBurstFactorChroma[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorChroma[2][__DML_NUM_PLANES__]; dml_float_t UrgentBurstFactorChromaPre[__DML_NUM_PLANES__]; dml_float_t MaximumSwathWidthInLineBufferLuma; dml_float_t MaximumSwathWidthInLineBufferChroma; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 8dee0d397e03..f3aa93ddbf9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -762,6 +762,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->pixel_format = dml2_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: plane->pixel_format = dml2_444_64; @@ -887,7 +888,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm } //TODO : Could be possibly moved to a common helper layer. -static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, unsigned int *plane_id) +static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id) { int i, j; @@ -895,10 +896,12 @@ static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const str return false; for (i = 0; i < context->stream_count; i++) { - for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane) { - *plane_id = (i << 16) | j; - return true; + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } } } } @@ -921,14 +924,14 @@ static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *d return location; } -static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, +static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context) { unsigned int plane_id; int i = 0; int location = -1; - if (!dml21_wrapper_get_plane_id(context, plane, &plane_id)) { + if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) { ASSERT(false); return -1; } @@ -994,7 +997,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_streams++; - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx); populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index]); @@ -1013,18 +1016,18 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; } else { for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { - disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->stream_status[stream_index].plane_states[plane_index], context); + disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context); if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_planes++; - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]); populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; - if (dml21_wrapper_get_plane_id(context, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; /* apply forced pstate policy */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index d35dd507cb9f..dcbe327209d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -2,6 +2,7 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include <linux/vmalloc.h> #include "dml2_internal_types.h" #include "dml_top.h" @@ -13,11 +14,11 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) { - *dml_ctx = (struct dml2_context *)kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + *dml_ctx = vzalloc(sizeof(struct dml2_context)); if (!(*dml_ctx)) return false; - (*dml_ctx)->v21.dml_init.dml2_instance = (struct dml2_instance *)kzalloc(sizeof(struct dml2_instance), GFP_KERNEL); + (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance)); if (!((*dml_ctx)->v21.dml_init.dml2_instance)) return false; @@ -27,7 +28,7 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; - (*dml_ctx)->v21.mode_programming.programming = (struct dml2_display_cfg_programming *)kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL); + (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming)); if (!((*dml_ctx)->v21.mode_programming.programming)) return false; @@ -87,6 +88,8 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co /* Store configuration options */ (*dml_ctx)->config = *config; + DC_FP_START(); + /*Initialize SOCBB and DCNIP params */ dml21_initialize_soc_bb_params(&(*dml_ctx)->v21.dml_init, config, in_dc); dml21_initialize_ip_params(&(*dml_ctx)->v21.dml_init, config, in_dc); @@ -97,6 +100,8 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co /*Initialize DML21 instance */ dml2_initialize_instance(&(*dml_ctx)->v21.dml_init); + + DC_FP_END(); } bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) @@ -112,8 +117,8 @@ bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const s void dml21_destroy(struct dml2_context *dml2) { - kfree(dml2->v21.dml_init.dml2_instance); - kfree(dml2->v21.mode_programming.programming); + vfree(dml2->v21.dml_init.dml2_instance); + vfree(dml2->v21.mode_programming.programming); } static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, @@ -216,7 +221,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s if (!result) return false; + DC_FP_START(); result = dml2_build_mode_programming(mode_programming); + DC_FP_END(); if (!result) return false; @@ -266,7 +273,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co mode_support->dml2_instance = dml_init->dml2_instance; dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; + DC_FP_START(); is_supported = dml2_check_mode_supported(mode_support); + DC_FP_END(); if (!is_supported) return false; @@ -278,10 +287,11 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml bool out = false; /* Use dml_validate_only for fast_validate path */ - if (fast_validate) { + if (fast_validate) out = dml21_check_mode_support(in_dc, context, dml_ctx); - } else + else out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); + return out; } @@ -420,8 +430,12 @@ void dml21_copy(struct dml2_context *dst_dml_ctx, dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming; + DC_FP_START(); + /* need to initialize copied instance for internal references to be correct */ dml2_initialize_instance(&dst_dml_ctx->v21.dml_init); + + DC_FP_END(); } bool dml21_create_copy(struct dml2_context **dst_dml_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 0aa4e4d343b0..2c1316d1b6eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -139,9 +139,8 @@ bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out) core->clean_me_up.mode_lib.ip.subvp_fw_processing_delay_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us; core->clean_me_up.mode_lib.ip.subvp_swath_height_margin_lines = core_dcn4_ip_caps_base.subvp_swath_height_margin_lines; } else { - memcpy(&core->clean_me_up.mode_lib.ip, &core_dcn4_ip_caps_base, sizeof(struct dml2_core_ip_params)); + memcpy(&core->clean_me_up.mode_lib.ip, &core_dcn4_ip_caps_base, sizeof(struct dml2_core_ip_params)); patch_ip_params_with_ip_caps(&core->clean_me_up.mode_lib.ip, in_out->ip_caps); - core->clean_me_up.mode_lib.ip.imall_supported = false; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 3ea54fd52e46..157903115f3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -578,8 +578,8 @@ static void CalculateBytePerPixelAndBlockSizes( { *BytePerPixelDETY = 0; *BytePerPixelDETC = 0; - *BytePerPixelY = 0; - *BytePerPixelC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 1; if (SourcePixelFormat == dml2_444_64) { *BytePerPixelDETY = 8; @@ -3630,13 +3630,12 @@ static unsigned int CalculateMaxVStartup( double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000); unsigned int vblank_actual = timing->v_total - timing->v_active; unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0); - unsigned int vblank_nom_input = (unsigned int)math_min2(timing->vblank_nom, vblank_nom_default_in_line); - unsigned int vblank_avail = (vblank_nom_input == 0) ? vblank_nom_default_in_line : vblank_nom_input; + unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom; vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail); if (timing->interlaced && !ptoi_supported) - max_vstartup_lines = (unsigned int)(math_floor2(vblank_size / 2.0, 1.0)); + max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0)); else max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0)); #ifdef __DML_VBA_DEBUG__ @@ -4652,7 +4651,10 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); - *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) + *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + else + *p->tdlut_opt_time = 0; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 1ed21c1b86a5..a966abd40788 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -532,26 +532,6 @@ static void calculate_odm_slices(const struct dc_stream_state *stream, unsigned odm_slice_end_x[odm_factor - 1] = stream->src.width - 1; } -static bool is_plane_in_odm_slice(const struct dc_plane_state *plane, unsigned int slice_index, unsigned int *odm_slice_end_x, unsigned int num_slices) -{ - unsigned int slice_start_x, slice_end_x; - - if (slice_index == 0) - slice_start_x = 0; - else - slice_start_x = odm_slice_end_x[slice_index - 1] + 1; - - slice_end_x = odm_slice_end_x[slice_index]; - - if (plane->clip_rect.x + plane->clip_rect.width < slice_start_x) - return false; - - if (plane->clip_rect.x > slice_end_x) - return false; - - return true; -} - static void add_odm_slice_to_odm_tree(struct dml2_context *ctx, struct dc_state *state, struct dc_pipe_mapping_scratch *scratch, @@ -791,12 +771,6 @@ static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state sort_pipes_for_splitting(&scratch->pipe_pool); for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) { - // We build the tree for one ODM slice at a time. - // Each ODM slice shares a common OPP - if (!is_plane_in_odm_slice(plane, odm_slice_index, scratch->odm_info.odm_slice_end_x, scratch->odm_info.odm_factor)) { - continue; - } - // Now we have a list of all pipes to be used for this plane/stream, now setup the tree. scratch->odm_info.next_higher_pipe_for_odm_slice[odm_slice_index] = add_plane_to_blend_tree(ctx, state, plane, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index bde4250853b1..405aefd14d9b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -746,7 +746,7 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) + if (location < MAX_HPO_DP2_ENCODERS && dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: @@ -909,6 +909,7 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p out->SourcePixelFormat[location] = dml_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: out->SourcePixelFormat[location] = dml_444_64; @@ -929,7 +930,9 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p } } -static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context, struct scaler_data *out) +static struct scaler_data *get_scaler_data_for_plane( + const struct dc_plane_state *in, + struct dc_state *context) { int i; struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; @@ -950,7 +953,7 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc } ASSERT(i < MAX_PIPES); - memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); + return &temp_pipe->plane_res.scl_data; } static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, @@ -1013,11 +1016,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out const struct dc_plane_state *in, struct dc_state *context, const struct soc_bounding_box_st *soc) { - struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); - if (!scaler_data) - return; - - get_scaler_data_for_plane(in, context, scaler_data); + struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context); out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; @@ -1082,8 +1081,6 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->DynamicMetadataTransmittedBytes[location] = 0; out->NumberOfCursors[location] = 1; - - kfree(scaler_data); } static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, @@ -1303,7 +1300,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_timings++; - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2); @@ -1343,7 +1340,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_surfaces++; - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); populate_dml_plane_cfg_from_plane_state( diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 866b0abcff1b..03812f862b3d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -24,6 +24,8 @@ * */ +#include <linux/vmalloc.h> + #include "display_mode_core.h" #include "dml2_internal_types.h" #include "dml2_utils.h" @@ -533,14 +535,21 @@ static bool optimize_pstate_with_svp_and_drr(struct dml2_context *dml2, struct d static bool call_dml_mode_support_and_programming(struct dc_state *context) { unsigned int result = 0; - unsigned int min_state; + unsigned int min_state = 0; int min_state_for_g6_temp_read = 0; + + + if (!context) + return false; + struct dml2_context *dml2 = context->bw_ctx.dml2; struct dml2_wrapper_scratch *s = &dml2->v20.scratch; - min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context); + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context); - ASSERT(min_state_for_g6_temp_read >= 0); + ASSERT(min_state_for_g6_temp_read >= 0); + } if (!dml2->config.use_native_pstate_optimization) { result = optimize_pstate_with_svp_and_drr(dml2, context); @@ -551,14 +560,20 @@ static bool call_dml_mode_support_and_programming(struct dc_state *context) /* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on. * Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly. */ - if (min_state_for_g6_temp_read >= 0) - min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; - else - min_state = s->mode_support_params.out_lowest_state_idx; - - if (result) - result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + if (min_state_for_g6_temp_read >= 0) + min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; + else + min_state = s->mode_support_params.out_lowest_state_idx; + } + if (result) { + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + } else { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + } + } return result; } @@ -687,6 +702,8 @@ static bool dml2_validate_only(struct dc_state *context) build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); map_dc_state_into_dml_display_cfg(dml2, context, &dml2->v20.scratch.cur_display_config); + if (!dml2->config.skip_hw_state_mapping) + dml2_apply_det_buffer_allocation_policy(dml2, &dml2->v20.scratch.cur_display_config); result = pack_and_call_dml_mode_support_ex(dml2, &dml2->v20.scratch.cur_display_config, @@ -719,17 +736,22 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2 return out; } + DC_FP_START(); + /* Use dml_validate_only for fast_validate path */ if (fast_validate) out = dml2_validate_only(context); else out = dml2_validate_and_build_resource(in_dc, context); + + DC_FP_END(); + return out; } static inline struct dml2_context *dml2_allocate_memory(void) { - return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + return (struct dml2_context *) vzalloc(sizeof(struct dml2_context)); } static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) @@ -764,11 +786,15 @@ static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_op break; } + DC_FP_START(); + initialize_dml2_ip_params(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.ip); initialize_dml2_soc_bbox(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc); initialize_dml2_soc_states(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc, &(*dml2)->v20.dml_core_ctx.states); + + DC_FP_END(); } bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) @@ -796,7 +822,7 @@ void dml2_destroy(struct dml2_context *dml2) if (dml2->architecture == dml2_architecture_21) dml21_destroy(dml2); - kfree(dml2); + vfree(dml2); } void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 0f944fcfd5a5..785226945699 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -159,6 +159,7 @@ struct dml2_clks_table_entry { unsigned int dtbclk_mhz; unsigned int dispclk_mhz; unsigned int dppclk_mhz; + unsigned int dram_speed_mts; /*which is based on wck_ratio*/ }; struct dml2_clks_num_entries { diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c index e1da48b05d00..75fb77bca83b 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c @@ -194,6 +194,9 @@ void dpp_reset(struct dpp *dpp_base) dpp->filter_h = NULL; dpp->filter_v = NULL; + memset(&dpp_base->pos, 0, sizeof(dpp_base->pos)); + memset(&dpp_base->att, 0, sizeof(dpp_base->att)); + memset(&dpp->scl_data, 0, sizeof(dpp->scl_data)); memset(&dpp->pwl_data, 0, sizeof(dpp->pwl_data)); } @@ -480,10 +483,11 @@ void dpp1_set_cursor_position( if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ - REG_UPDATE(CURSOR0_CONTROL, - CUR0_ENABLE, cur_en); + if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); - dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + } } void dpp1_cnv_set_optional_cursor_attributes( diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index 40acebd13e46..abf439e743f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -425,11 +425,6 @@ bool dpp3_get_optimal_number_of_taps( int min_taps_y, min_taps_c; enum lb_memory_config lb_config; - if (scl_data->viewport.width > scl_data->h_active && - dpp->ctx->dc->debug.max_downscale_src_width != 0 && - scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width) - return false; - /* * Set default taps if none are provided * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling @@ -467,6 +462,12 @@ bool dpp3_get_optimal_number_of_taps( else scl_data->taps.h_taps_c = in_taps->h_taps_c; + // Avoid null data in the scl data with this early return, proceed non-adaptive calcualtion first + if (scl_data->viewport.width > scl_data->h_active && + dpp->ctx->dc->debug.max_downscale_src_width != 0 && + scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width) + return false; + /*Ensure we can support the requested number of vtaps*/ min_taps_y = dc_fixpt_ceil(scl_data->ratios.vert); min_taps_c = dc_fixpt_ceil(scl_data->ratios.vert_c); diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 3b6ca7974e18..712aff7e17f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,10 +120,11 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; - // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - cur_rom_en = 1; + if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { + cur_rom_en = 1; + } } REG_UPDATE_3(CURSOR0_CONTROL, @@ -154,9 +155,11 @@ void dpp401_set_cursor_position( struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); uint32_t cur_en = pos->enable ? 1 : 0; - REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); + if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); - dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + } } void dpp401_set_optional_cursor_attributes( diff --git a/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c index 678db949cfe3..759b453385c4 100644 --- a/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c @@ -323,7 +323,7 @@ static void dcn31_hpo_dp_stream_enc_set_stream_attribute( break; case COLOR_SPACE_2020_RGB_LIMITEDRANGE: case COLOR_SPACE_2020_RGB_FULLRANGE: - case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_2020_YCBCR_LIMITED: case COLOR_SPACE_XR_RGB: case COLOR_SPACE_MSREF_SCRGB: case COLOR_SPACE_ADOBERGB: @@ -336,6 +336,7 @@ static void dcn31_hpo_dp_stream_enc_set_stream_attribute( case COLOR_SPACE_CUSTOMPOINTS: case COLOR_SPACE_UNKNOWN: case COLOR_SPACE_YCBCR709_BLACK: + default: /* do nothing */ break; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c index fe741100c0f8..d347bb06577a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c @@ -129,7 +129,8 @@ bool hubbub3_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF); - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); return wm_pending; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c index 7fb5523f9722..b98505b240a7 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c @@ -750,7 +750,8 @@ static bool hubbub31_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); return wm_pending; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 5264dc26cce1..32a6be543105 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -786,7 +786,8 @@ static bool hubbub32_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 5eb3da8d5206..dce7269959ce 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -326,7 +326,8 @@ static bool hubbub35_program_watermarks( DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD, 0xA);/*hw delta*/ REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DCHUBBUB_ARB_MAX_QOS_COMMIT_THRESHOLD, 0xF); - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c index 22ac2b7e49ae..da963f73829f 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c @@ -532,6 +532,12 @@ void hubp1_dcc_control(struct hubp *hubp, bool enable, SECONDARY_SURFACE_DCC_IND_64B_BLK, dcc_ind_64b_blk); } +void hubp_reset(struct hubp *hubp) +{ + memset(&hubp->pos, 0, sizeof(hubp->pos)); + memset(&hubp->att, 0, sizeof(hubp->att)); +} + void hubp1_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, @@ -1337,8 +1343,9 @@ static void hubp1_wait_pipe_read_start(struct hubp *hubp) void hubp1_init(struct hubp *hubp) { - //do nothing + hubp_reset(hubp); } + static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_program_surface_flip_and_addr = hubp1_program_surface_flip_and_addr, @@ -1351,6 +1358,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_set_vm_context0_settings = hubp1_set_vm_context0_settings, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_hubp_blank_en = hubp1_set_hubp_blank_en, .set_cursor_attributes = hubp1_cursor_set_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h index 69119b2fdce2..193e48b440ef 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h @@ -746,6 +746,8 @@ void hubp1_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size independent_64b_blks); +void hubp_reset(struct hubp *hubp); + bool hubp1_program_surface_flip_and_addr( struct hubp *hubp, const struct dc_plane_address *address, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c index 0637e4c552d8..c74ee2d50a69 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c @@ -1044,11 +1044,13 @@ void hubp2_cursor_set_position( if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ - if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) - hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) { + if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) + hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); - REG_UPDATE(CURSOR_CONTROL, + REG_UPDATE(CURSOR_CONTROL, CURSOR_ENABLE, cur_en); + } REG_SET_2(CURSOR_POSITION, 0, CURSOR_X_POSITION, pos->x, @@ -1660,6 +1662,7 @@ static struct hubp_funcs dcn20_hubp_funcs = { .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp2_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c index cd2bfcc51276..6efcb10abf3d 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c @@ -121,6 +121,7 @@ static struct hubp_funcs dcn201_hubp_funcs = { .set_cursor_position = hubp1_cursor_set_position, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .hubp_clk_cntl = hubp1_clk_cntl, .hubp_vtg_sel = hubp1_vtg_sel, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c index e13d69a22c1c..4e2d9d381db3 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c @@ -811,6 +811,8 @@ static void hubp21_init(struct hubp *hubp) struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1; REG_WRITE(HUBPREQ_DEBUG, 1 << 26); + + hubp_reset(hubp); } static struct hubp_funcs dcn21_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, @@ -823,6 +825,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp21_set_vm_system_aperture_settings, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = hubp21_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp1_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index 60a64d290352..5cf7e6771cb4 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -483,6 +483,10 @@ void hubp3_init(struct hubp *hubp) struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1; REG_WRITE(HUBPREQ_DEBUG, 1 << 26); + + REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0); + + hubp_reset(hubp); } static struct hubp_funcs dcn30_hubp_funcs = { @@ -497,6 +501,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index 8394e8c06919..c671908ba7d0 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -44,7 +44,7 @@ void hubp31_set_unbounded_requesting(struct hubp *hubp, bool enable) struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(DCHUBP_CNTL, HUBP_UNBOUNDED_REQ_MODE, enable); - REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, enable); + REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, 1); } void hubp31_soft_reset(struct hubp *hubp, bool reset) @@ -79,6 +79,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index ca5b4b28a664..c4f41350d1b3 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -168,6 +168,8 @@ void hubp32_init(struct hubp *hubp) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_WRITE(HUBPREQ_DEBUG_DB, 1 << 8); + + REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0); } static struct hubp_funcs dcn32_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, @@ -181,6 +183,7 @@ static struct hubp_funcs dcn32_hubp_funcs = { .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index d1f05b82b3dd..e7625290c0e4 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -199,6 +199,7 @@ static struct hubp_funcs dcn35_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index b1ebf5053b4f..7013c124efcf 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -141,7 +141,7 @@ void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor void hubp401_init(struct hubp *hubp) { - //For now nothing to do, HUBPREQ_DEBUG_DB register is removed on DCN4x. + hubp_reset(hubp); } void hubp401_vready_at_or_After_vsync(struct hubp *hubp, @@ -718,11 +718,13 @@ void hubp401_cursor_set_position( dc_fixpt_from_int(dst_x_offset), param->h_scale_ratio)); - if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) - hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) { + if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) + hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); - REG_UPDATE(CURSOR_CONTROL, - CURSOR_ENABLE, cur_en); + REG_UPDATE(CURSOR_CONTROL, + CURSOR_ENABLE, cur_en); + } REG_SET_2(CURSOR_POSITION, 0, CURSOR_X_POSITION, x_pos, @@ -974,6 +976,7 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, + .hubp_reset = hubp_reset, .mem_program_viewport = hubp401_set_viewport, .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position = hubp401_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 4fbed0298adf..03b22e9115ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -951,8 +951,8 @@ void dce110_edp_backlight_control( struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; uint8_t pwrseq_instance = 0; - unsigned int pre_T11_delay = OLED_PRE_T11_DELAY; - unsigned int post_T7_delay = OLED_POST_T7_DELAY; + unsigned int pre_T11_delay = (link->dpcd_sink_ext_caps.bits.oled ? OLED_PRE_T11_DELAY : 0); + unsigned int post_T7_delay = (link->dpcd_sink_ext_caps.bits.oled ? OLED_POST_T7_DELAY : 0); if (dal_graphics_object_id_get_connector_id(link->link_enc->connector) != CONNECTOR_ID_EDP) { @@ -1064,9 +1064,11 @@ void dce110_edp_backlight_control( DC_LOG_DC("edp_receiver_ready_T9 skipped\n"); } - if (!enable && link->dpcd_sink_ext_caps.bits.oled) { + if (!enable) { + /*follow oem panel config's requirement*/ pre_T11_delay += link->panel_config.pps.extra_pre_t11_ms; - msleep(pre_T11_delay); + if (pre_T11_delay) + msleep(pre_T11_delay); } } @@ -1215,7 +1217,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) struct dce_hwseq *hws = link->dc->hwseq; if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { - if (!link->skip_implict_edp_power_control) + if (!link->skip_implict_edp_power_control && hws) hws->funcs.edp_backlight_control(link, false); link->dc->hwss.set_abm_immediate_disable(pipe_ctx); } @@ -1653,9 +1655,7 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( params.vertical_total_min = stream->adjust.v_total_min; params.vertical_total_max = stream->adjust.v_total_max; - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx, stream, ¶ms); // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) @@ -2103,8 +2103,7 @@ static void set_drr(struct pipe_ctx **pipe_ctx, struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; if ((tg != NULL) && tg->funcs) { - if (tg->funcs->set_drr) - tg->funcs->set_drr(tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx[i], pipe_ctx[i]->stream, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) if (tg->funcs->set_static_screen_control) tg->funcs->set_static_screen_control( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index a6a1db5ba8ba..00be0b26689d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1112,9 +1112,7 @@ static void dcn10_reset_back_end_for_pipe( pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg); pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false); - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, NULL); + set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL); if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; } @@ -1286,6 +1284,7 @@ void dcn10_plane_atomic_power_down(struct dc *dc, if (hws->funcs.hubp_pg_control) hws->funcs.hubp_pg_control(hws, hubp->inst, false); + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); REG_SET(DC_IP_REQUEST_CNTL, 0, @@ -1447,6 +1446,7 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) /* Disable on the current state so the new one isn't cleared. */ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; @@ -1990,20 +1990,11 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx) dc->hwss.get_position(&pipe_ctx, 1, &position); vpos = position.vertical_count; - /* Avoid wraparound calculation issues */ - vupdate_start += stream->timing.v_total; - vupdate_end += stream->timing.v_total; - vpos += stream->timing.v_total; - if (vpos <= vupdate_start) { /* VPOS is in VACTIVE or back porch. */ lines_to_vupdate = vupdate_start - vpos; - } else if (vpos > vupdate_end) { - /* VPOS is in the front porch. */ - return; } else { - /* VPOS is in VUPDATE. */ - lines_to_vupdate = 0; + lines_to_vupdate = stream->timing.v_total - vpos + vupdate_start; } /* Calculate time until VUPDATE in microseconds. */ @@ -2011,13 +2002,18 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx) stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz; us_to_vupdate = lines_to_vupdate * us_per_line; + /* Stall out until the cursor update completes. */ + if (vupdate_end < vupdate_start) + vupdate_end += stream->timing.v_total; + + /* Position is in the range of vupdate start and end*/ + if (lines_to_vupdate > stream->timing.v_total - vupdate_end + vupdate_start) + us_to_vupdate = 0; + /* 70 us is a conservative estimate of cursor update time*/ if (us_to_vupdate > 70) return; - /* Stall out until the cursor update completes. */ - if (vupdate_end < vupdate_start) - vupdate_end += stream->timing.v_total; us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line; udelay(us_to_vupdate + us_vupdate); } @@ -3219,8 +3215,7 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; if ((tg != NULL) && tg->funcs) { - if (tg->funcs->set_drr) - tg->funcs->set_drr(tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx[i], pipe_ctx[i]->stream, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) if (tg->funcs->set_static_screen_control) tg->funcs->set_static_screen_control( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 36d12db8d022..08fc2a2c399f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -273,14 +273,13 @@ void dcn20_setup_gsl_group_as_lock( } /* at this point we want to program whether it's to enable or disable */ - if (pipe_ctx->stream_res.tg->funcs->set_gsl != NULL && - pipe_ctx->stream_res.tg->funcs->set_gsl_source_select != NULL) { + if (pipe_ctx->stream_res.tg->funcs->set_gsl != NULL) { pipe_ctx->stream_res.tg->funcs->set_gsl( pipe_ctx->stream_res.tg, &gsl); - - pipe_ctx->stream_res.tg->funcs->set_gsl_source_select( - pipe_ctx->stream_res.tg, group_idx, enable ? 4 : 0); + if (pipe_ctx->stream_res.tg->funcs->set_gsl_source_select != NULL) + pipe_ctx->stream_res.tg->funcs->set_gsl_source_select( + pipe_ctx->stream_res.tg, group_idx, enable ? 4 : 0); } else BREAK_TO_DEBUGGER(); } @@ -946,15 +945,13 @@ enum dc_status dcn20_enable_stream_timing( return DC_ERROR_UNEXPECTED; } - hws->funcs.wait_for_blank_complete(pipe_ctx->stream_res.opp); + fsleep(stream->timing.v_total * (stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz)); params.vertical_total_min = stream->adjust.v_total_min; params.vertical_total_max = stream->adjust.v_total_max; params.vertical_total_mid = stream->adjust.v_total_mid; params.vertical_total_mid_frame_num = stream->adjust.v_total_mid_frame_num; - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx, stream, ¶ms); // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) @@ -2822,9 +2819,7 @@ void dcn20_reset_back_end_for_pipe( pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, NULL); + set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL); /* TODO - convert symclk_ref_cnts for otg to a bit map to solve * the case where the same symclk is shared across multiple otg * instances @@ -3003,7 +2998,11 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); phyd32clk = get_phyd32clk_src(link); - dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + if (link->cur_link_settings.link_rate == LINK_RATE_UNKNOWN) { + dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); + } else { + dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + } } else { if (dccg->funcs->enable_symclk_se) dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 3d4b31bd9946..9aa925a0b3b4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -528,9 +528,7 @@ static void dcn31_reset_back_end_for_pipe( if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, NULL); + set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL); link = pipe_ctx->stream->link; /* DPMS may already disable or */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index bd309dbdf7b2..21aff7fa6375 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -236,7 +236,8 @@ void dcn35_init_hw(struct dc *dc) } hws->funcs.init_pipes(dc, dc->current_state); - if (dc->res_pool->hubbub->funcs->allow_self_refresh_control) + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control && + !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); } @@ -787,6 +788,7 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) /* Disable on the current state so the new one isn't cleared. */ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; @@ -940,6 +942,7 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) /*to do, need to support both case*/ hubp->power_gated = true; + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); pipe_ctx->stream = NULL; @@ -1016,8 +1019,22 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp) update_state->pg_pipe_res_update[PG_MPCC][pipe_ctx->plane_res.mpcc_inst] = false; - if (pipe_ctx->stream_res.dsc) + if (pipe_ctx->stream_res.dsc) { update_state->pg_pipe_res_update[PG_DSC][pipe_ctx->stream_res.dsc->inst] = false; + if (dc->caps.sequential_ono) { + update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false; + update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (!pipe_ctx->top_pipe && pipe_ctx->plane_res.hubp && + pipe_ctx->plane_res.hubp->inst != pipe_ctx->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = false; + update_state->pg_pipe_res_update[PG_DPP][j] = false; + } + } + } + } if (pipe_ctx->stream_res.opp) update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false; @@ -1162,6 +1179,25 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; if (dc->caps.sequential_ono) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + + if (new_pipe->stream_res.dsc && !new_pipe->top_pipe && + update_state->pg_pipe_res_update[PG_DSC][new_pipe->stream_res.dsc->inst]) { + update_state->pg_pipe_res_update[PG_HUBP][new_pipe->stream_res.dsc->inst] = true; + update_state->pg_pipe_res_update[PG_DPP][new_pipe->stream_res.dsc->inst] = true; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (new_pipe->plane_res.hubp && + new_pipe->plane_res.hubp->inst != new_pipe->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = true; + update_state->pg_pipe_res_update[PG_DPP][j] = true; + } + } + } + } + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { if (update_state->pg_pipe_res_update[PG_HUBP][i] && update_state->pg_pipe_res_update[PG_DPP][i]) { @@ -1449,8 +1485,7 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, num_frames = 2 * (frame_rate % 60); } } - if (tg->funcs->set_drr) - tg->funcs->set_drr(tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx[i], pipe_ctx[i]->stream, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) if (tg->funcs->set_static_screen_control) tg->funcs->set_static_screen_control( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 0b743669f23b..3279f347660c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -844,6 +844,13 @@ enum dc_status dcn401_enable_stream_timing( odm_slice_width, last_odm_slice_width); } + /* set DTBCLK_P */ + if (dc->res_pool->dccg->funcs->set_dtbclk_p_src) { + if (dc_is_dp_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) { + dc->res_pool->dccg->funcs->set_dtbclk_p_src(dc->res_pool->dccg, DPREFCLK, pipe_ctx->stream_res.tg->inst); + } + } + /* HW program guide assume display already disable * by unplug sequence. OTG assume stop. */ @@ -895,10 +902,7 @@ enum dc_status dcn401_enable_stream_timing( } hws->funcs.wait_for_blank_complete(pipe_ctx->stream_res.opp); - - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx, stream, ¶ms); /* Event triggers and num frames initialized for DRR, but can be * later updated for PSR use. Note DRR trigger events are generated @@ -1001,11 +1005,12 @@ void dcn401_enable_stream(struct pipe_ctx *pipe_ctx) if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) { if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { dccg->funcs->set_dpstreamclk(dccg, DPREFCLK, tg->inst, dp_hpo_inst); - - dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + if (link->cur_link_settings.link_rate == LINK_RATE_UNKNOWN) { + dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); + } else { + dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + } } else { - /* need to set DTBCLK_P source to DPREFCLK for DP8B10B */ - dccg->funcs->set_dtbclk_p_src(dccg, DPREFCLK, tg->inst); dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A); } @@ -1770,3 +1775,128 @@ void dcn401_program_outstanding_updates(struct dc *dc, if (hubbub->funcs->program_compbuf_segments) hubbub->funcs->program_compbuf_segments(hubbub, context->bw_ctx.bw.dcn.arb_regs.compbuf_size, true); } + +void dcn401_reset_back_end_for_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context) +{ + int i; + struct dc_link *link = pipe_ctx->stream->link; + const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); + + DC_LOGGER_INIT(dc->ctx->logger); + if (pipe_ctx->stream_res.stream_enc == NULL) { + pipe_ctx->stream = NULL; + return; + } + + /* DPMS may already disable or */ + /* dpms_off status is incorrect due to fastboot + * feature. When system resume from S4 with second + * screen only, the dpms_off would be true but + * VBIOS lit up eDP, so check link status too. + */ + if (!pipe_ctx->stream->dpms_off || link->link_status.link_active) + dc->link_srv->set_dpms_off(pipe_ctx); + else if (pipe_ctx->stream_res.audio) + dc->hwss.disable_audio_stream(pipe_ctx); + + /* free acquired resources */ + if (pipe_ctx->stream_res.audio) { + /*disable az_endpoint*/ + pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio); + + /*free audio*/ + if (dc->caps.dynamic_audio == true) { + /*we have to dynamic arbitrate the audio endpoints*/ + /*we free the resource, need reset is_audio_acquired*/ + update_audio_usage(&dc->current_state->res_ctx, dc->res_pool, + pipe_ctx->stream_res.audio, false); + pipe_ctx->stream_res.audio = NULL; + } + } + + /* by upper caller loop, parent pipe: pipe0, will be reset last. + * back end share by all pipes and will be disable only when disable + * parent pipe. + */ + if (pipe_ctx->top_pipe == NULL) { + + dc->hwss.set_abm_immediate_disable(pipe_ctx); + + pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg); + + pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false); + if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass) + pipe_ctx->stream_res.tg->funcs->set_odm_bypass( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); + + set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL); + + /* TODO - convert symclk_ref_cnts for otg to a bit map to solve + * the case where the same symclk is shared across multiple otg + * instances + */ + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) + link->phy_state.symclk_ref_cnts.otg = 0; + if (link->phy_state.symclk_state == SYMCLK_ON_TX_OFF) { + link_hwss->disable_link_output(link, + &pipe_ctx->link_res, pipe_ctx->stream->signal); + link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; + } + + /* reset DTBCLK_P */ + if (dc->res_pool->dccg->funcs->set_dtbclk_p_src) + dc->res_pool->dccg->funcs->set_dtbclk_p_src(dc->res_pool->dccg, REFCLK, pipe_ctx->stream_res.tg->inst); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (&dc->current_state->res_ctx.pipe_ctx[i] == pipe_ctx) + break; + + if (i == dc->res_pool->pipe_count) + return; + +/* + * In case of a dangling plane, setting this to NULL unconditionally + * causes failures during reset hw ctx where, if stream is NULL, + * it is expected that the pipe_ctx pointers to pipes and plane are NULL. + */ + pipe_ctx->stream = NULL; + DC_LOG_DEBUG("Reset back end for pipe %d, tg:%d\n", + pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst); +} + +void dcn401_reset_hw_ctx_wrap( + struct dc *dc, + struct dc_state *context) +{ + int i; + struct dce_hwseq *hws = dc->hwseq; + + /* Reset Back End*/ + for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) { + struct pipe_ctx *pipe_ctx_old = + &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx_old->stream) + continue; + + if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe) + continue; + + if (!pipe_ctx->stream || + pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { + struct clock_source *old_clk = pipe_ctx_old->clock_source; + + if (hws->funcs.reset_back_end_for_pipe) + hws->funcs.reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); + if (hws->funcs.enable_stream_gating) + hws->funcs.enable_stream_gating(dc, pipe_ctx_old); + if (old_clk) + old_clk->funcs->cs_power_down(old_clk); + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index a27e62081685..6256429c8a4f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -84,4 +84,11 @@ void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); void dcn401_program_outstanding_updates(struct dc *dc, struct dc_state *context); +void dcn401_reset_back_end_for_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); +void dcn401_reset_hw_ctx_wrap( + struct dc *dc, + struct dc_state *context); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index a2ca07235c83..d6f36b8e1a26 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -111,7 +111,7 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .power_down = dce110_power_down, .enable_display_power_gating = dcn10_dummy_display_power_gating, .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, + .reset_hw_ctx_wrap = dcn401_reset_hw_ctx_wrap, .enable_stream_timing = dcn401_enable_stream_timing, .edp_backlight_control = dce110_edp_backlight_control, .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, @@ -136,7 +136,7 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .update_mall_sel = dcn32_update_mall_sel, .calculate_dccg_k1_k2_values = NULL, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, - .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, + .reset_back_end_for_pipe = dcn401_reset_back_end_for_pipe, .populate_mcm_luts = NULL, }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index ac9205625623..9ae6259f2db1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -46,6 +46,7 @@ struct dce_hwseq; struct link_resource; struct dc_dmub_cmd; struct pg_block_update; +struct drr_params; struct subvp_pipe_control_lock_fast_params { struct dc *dc; @@ -509,6 +510,11 @@ void set_p_state_switch_method( struct dc_state *context, struct pipe_ctx *pipe_ctx); +void set_drr_and_clear_adjust_pending( + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream, + struct drr_params *params); + void hwss_execute_sequence(struct dc *dc, struct block_sequence block_sequence[], int num_steps); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index e1e3142cdc00..62fb2009b302 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -621,7 +621,7 @@ struct dc_state { */ struct bw_context bw_ctx; - struct block_sequence block_sequence[50]; + struct block_sequence block_sequence[100]; unsigned int block_sequence_steps; struct dc_dmub_cmd dc_dmub_cmd[10]; unsigned int dmub_cmd_count; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index c2dd061892f4..221645c023b5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -166,6 +166,41 @@ enum dentist_divider_range { CLK_SR_DCN32(CLK1_CLK4_CURRENT_CNT), \ CLK_SR_DCN32(CLK4_CLK0_CURRENT_CNT) +#define CLK_REG_LIST_DCN35() \ + CLK_SR_DCN35(CLK1_CLK_PLL_REQ), \ + CLK_SR_DCN35(CLK1_CLK0_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK1_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK2_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK3_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK4_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK5_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK0_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK1_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK2_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK3_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK4_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK5_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK0_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK1_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK2_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK3_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK4_BYPASS_CNTL),\ + CLK_SR_DCN35(CLK1_CLK5_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK0_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK1_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK2_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK3_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK4_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK5_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK0_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK1_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK2_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK3_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \ + CLK_SR_DCN35(CLK5_spll_field_8), \ + SR(DENTIST_DISPCLK_CNTL), \ + #define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \ CLK_COMMON_MASK_SH_LIST_DCN20_BASE(mask_sh),\ CLK_SF(CLK1_CLK_PLL_REQ, FbMult_int, mask_sh),\ @@ -186,6 +221,7 @@ enum dentist_divider_range { CLK_SF(CLK0_CLK_PLL_REQ, FbMult_frac, mask_sh) #define CLK_REG_LIST_DCN401() \ + SR(DENTIST_DISPCLK_CNTL), \ CLK_SR_DCN401(CLK0_CLK_PLL_REQ, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK0_DFS_CNTL, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK1_DFS_CNTL, CLK01, 0), \ @@ -236,6 +272,7 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_DFS_CNTL; uint32_t CLK1_CLK3_DFS_CNTL; uint32_t CLK1_CLK4_DFS_CNTL; + uint32_t CLK1_CLK5_DFS_CNTL; uint32_t CLK2_CLK2_DFS_CNTL; uint32_t CLK1_CLK0_CURRENT_CNT; @@ -243,11 +280,34 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_CURRENT_CNT; uint32_t CLK1_CLK3_CURRENT_CNT; uint32_t CLK1_CLK4_CURRENT_CNT; + uint32_t CLK1_CLK5_CURRENT_CNT; uint32_t CLK0_CLK0_DFS_CNTL; uint32_t CLK0_CLK1_DFS_CNTL; uint32_t CLK0_CLK3_DFS_CNTL; uint32_t CLK0_CLK4_DFS_CNTL; + uint32_t CLK1_CLK0_BYPASS_CNTL; + uint32_t CLK1_CLK1_BYPASS_CNTL; + uint32_t CLK1_CLK2_BYPASS_CNTL; + uint32_t CLK1_CLK3_BYPASS_CNTL; + uint32_t CLK1_CLK4_BYPASS_CNTL; + uint32_t CLK1_CLK5_BYPASS_CNTL; + + uint32_t CLK1_CLK0_DS_CNTL; + uint32_t CLK1_CLK1_DS_CNTL; + uint32_t CLK1_CLK2_DS_CNTL; + uint32_t CLK1_CLK3_DS_CNTL; + uint32_t CLK1_CLK4_DS_CNTL; + uint32_t CLK1_CLK5_DS_CNTL; + + uint32_t CLK1_CLK0_ALLOW_DS; + uint32_t CLK1_CLK1_ALLOW_DS; + uint32_t CLK1_CLK2_ALLOW_DS; + uint32_t CLK1_CLK3_ALLOW_DS; + uint32_t CLK1_CLK4_ALLOW_DS; + uint32_t CLK1_CLK5_ALLOW_DS; + uint32_t CLK5_spll_field_8; + }; struct clk_mgr_shift { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 0150f2581ee4..0c5675d1c593 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -119,10 +119,14 @@ static const struct dpp_input_csc_matrix __maybe_unused dpp_input_csc_matrix[] = { 0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0, 0x2568, 0x43ee, 0xdbb2 } }, - { COLOR_SPACE_2020_YCBCR, + { COLOR_SPACE_2020_YCBCR_FULL, { 0x2F30, 0x2000, 0, 0xE869, 0xEDB7, 0x2000, 0xFABC, 0xBC6, 0, 0x2000, 0x3C34, 0xE1E6 } }, + { COLOR_SPACE_2020_YCBCR_LIMITED, + { 0x35B9, 0x2543, 0, 0xE2B2, + 0xEB2F, 0x2543, 0xFA01, 0x0B1F, + 0, 0x2543, 0x4489, 0xDB42 } }, { COLOR_SPACE_2020_RGB_LIMITEDRANGE, { 0x35E0, 0x255F, 0, 0xE2B3, 0xEB20, 0x255F, 0xF9FD, 0xB1E, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 16580d624278..eec16b0a199d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -152,6 +152,8 @@ struct hubp_funcs { void (*dcc_control)(struct hubp *hubp, bool enable, enum hubp_ind_block_size blk_size); + void (*hubp_reset)(struct hubp *hubp); + void (*mem_program_viewport)( struct hubp *hubp, const struct rect *viewport, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index c4e03482ba9a..9d740659521a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -140,7 +140,8 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init) } } - if ((!link->wa_flags.dp_keep_receiver_powered) || hw_init) + if (((!link->wa_flags.dp_keep_receiver_powered) || hw_init) && + (link->type != dc_connection_none)) dpcd_write_rx_power_ctrl(link, false); } } @@ -148,6 +149,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init) void link_set_all_streams_dpms_off_for_link(struct dc_link *link) { struct pipe_ctx *pipes[MAX_PIPES]; + struct dc_stream_state *streams[MAX_PIPES]; struct dc_state *state = link->dc->current_state; uint8_t count; int i; @@ -160,10 +162,18 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link) link_get_master_pipes_with_dpms_on(link, state, &count, pipes); + /* The subsequent call to dc_commit_updates_for_stream for a full update + * will release the current state and swap to a new state. Releasing the + * current state results in the stream pointers in the pipe_ctx structs + * to be zero'd. Hence, cache all streams prior to dc_commit_updates_for_stream. + */ + for (i = 0; i < count; i++) + streams[i] = pipes[i]->stream; + for (i = 0; i < count; i++) { - stream_update.stream = pipes[i]->stream; + stream_update.stream = streams[i]; dc_commit_updates_for_stream(link->ctx->dc, NULL, 0, - pipes[i]->stream, &stream_update, + streams[i], &stream_update, state); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index d78c8ec4de79..842636c7922b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -51,9 +51,10 @@ #include "dc_dmub_srv.h" #include "gpio_service_interface.h" +#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ + #define DC_LOGGER \ link->ctx->logger -#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ #ifndef MAX #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) @@ -249,21 +250,21 @@ static uint32_t intersect_frl_link_bw_support( { uint32_t supported_bw_in_kbps = max_supported_frl_bw_in_kbps; - // HDMI_ENCODED_LINK_BW bits are only valid if HDMI Link Configuration bit is 1 (FRL mode) - if (hdmi_encoded_link_bw.bits.FRL_MODE) { - if (hdmi_encoded_link_bw.bits.BW_48Gbps) - supported_bw_in_kbps = 48000000; - else if (hdmi_encoded_link_bw.bits.BW_40Gbps) - supported_bw_in_kbps = 40000000; - else if (hdmi_encoded_link_bw.bits.BW_32Gbps) - supported_bw_in_kbps = 32000000; - else if (hdmi_encoded_link_bw.bits.BW_24Gbps) - supported_bw_in_kbps = 24000000; - else if (hdmi_encoded_link_bw.bits.BW_18Gbps) - supported_bw_in_kbps = 18000000; - else if (hdmi_encoded_link_bw.bits.BW_9Gbps) - supported_bw_in_kbps = 9000000; - } + /* Skip checking FRL_MODE bit, as certain PCON will clear + * it despite supporting the link BW indicated in the other bits. + */ + if (hdmi_encoded_link_bw.bits.BW_48Gbps) + supported_bw_in_kbps = 48000000; + else if (hdmi_encoded_link_bw.bits.BW_40Gbps) + supported_bw_in_kbps = 40000000; + else if (hdmi_encoded_link_bw.bits.BW_32Gbps) + supported_bw_in_kbps = 32000000; + else if (hdmi_encoded_link_bw.bits.BW_24Gbps) + supported_bw_in_kbps = 24000000; + else if (hdmi_encoded_link_bw.bits.BW_18Gbps) + supported_bw_in_kbps = 18000000; + else if (hdmi_encoded_link_bw.bits.BW_9Gbps) + supported_bw_in_kbps = 9000000; return supported_bw_in_kbps; } @@ -944,6 +945,9 @@ bool link_decide_link_settings(struct dc_stream_state *stream, * TODO: add MST specific link training routine */ decide_mst_link_settings(link, link_setting); + } else if (stream->signal == SIGNAL_TYPE_VIRTUAL) { + link_setting->lane_count = LANE_COUNT_FOUR; + link_setting->link_rate = LINK_RATE_HIGH3; } else if (link->connector_signal == SIGNAL_TYPE_EDP) { /* enable edp link optimization for DSC eDP case */ if (stream->timing.flags.DSC) { @@ -966,9 +970,6 @@ bool link_decide_link_settings(struct dc_stream_state *stream, } else { edp_decide_link_settings(link, link_setting, req_bw); } - } else if (stream->signal == SIGNAL_TYPE_VIRTUAL) { - link_setting->lane_count = LANE_COUNT_FOUR; - link_setting->link_rate = LINK_RATE_HIGH3; } else { decide_dp_link_settings(link, link_setting, req_bw); } @@ -1494,7 +1495,7 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link) enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) { - uint8_t lttpr_dpcd_data[8] = {0}; + uint8_t lttpr_dpcd_data[10] = {0}; enum dc_status status; bool is_lttpr_present; @@ -1544,6 +1545,10 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + link->dpcd_caps.lttpr_caps.alpm.raw = + lttpr_dpcd_data[DP_LTTPR_ALPM_CAPABILITIES - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + /* If this chip cap is set, at least one retimer must exist in the chain * Override count to 1 if we receive a known bad count (0 or an invalid value) */ if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c index bafa52a0165a..17c57cf98ec5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c @@ -75,7 +75,8 @@ void dp_disable_link_phy(struct dc_link *link, struct dc *dc = link->ctx->dc; if (!link->wa_flags.dp_keep_receiver_powered && - !link->skip_implict_edp_power_control) + !link->skip_implict_edp_power_control && + link->type != dc_connection_none) dpcd_write_rx_power_ctrl(link, false); dc->hwss.disable_link_output(link, link_res, signal); @@ -163,8 +164,9 @@ enum dc_status dp_set_fec_ready(struct dc_link *link, const struct link_resource } else { if (link->fec_state == dc_link_fec_ready) { fec_config = 0; - core_link_write_dpcd(link, DP_FEC_CONFIGURATION, - &fec_config, sizeof(fec_config)); + if (link->type != dc_connection_none) + core_link_write_dpcd(link, DP_FEC_CONFIGURATION, + &fec_config, sizeof(fec_config)); link_enc->funcs->fec_set_ready(link_enc, false); link->fec_state = dc_link_fec_not_ready; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 27b881f947e8..9385a32a471b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -1769,13 +1769,10 @@ bool perform_link_training_with_retries( is_link_bw_min = ((cur_link_settings.link_rate <= LINK_RATE_LOW) && (cur_link_settings.lane_count <= LANE_COUNT_ONE)); - if (is_link_bw_low) { + if (is_link_bw_low) DC_LOG_WARNING( "%s: Link(%d) bandwidth too low after fallback req_bw(%d) > link_bw(%d)\n", __func__, link->link_index, req_bw, link_bw); - - return false; - } } msleep(delay_between_attempts); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 3bdce32a85e3..ae95ec48e572 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -36,7 +36,8 @@ link->ctx->logger static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, - const struct dc_link_settings *link_settings) + const struct dc_link_settings *link_settings, + enum lttpr_mode lttpr_mode) { union training_aux_rd_interval training_rd_interval; uint32_t wait_in_micro_secs = 100; @@ -49,6 +50,8 @@ static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, DP_TRAINING_AUX_RD_INTERVAL, (uint8_t *)&training_rd_interval, sizeof(training_rd_interval)); + if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) + wait_in_micro_secs = 400; if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; } @@ -110,7 +113,6 @@ void decide_8b_10b_training_settings( */ lt_settings->link_settings.link_spread = link->dp_ss_off ? LINK_SPREAD_DISABLED : LINK_SPREAD_05_DOWNSPREAD_30KHZ; - lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting); lt_settings->eq_pattern_time = get_eq_training_aux_rd_interval(link, link_setting); lt_settings->pattern_for_cr = decide_cr_training_pattern(link_setting); lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_setting); @@ -119,6 +121,7 @@ void decide_8b_10b_training_settings( lt_settings->disallow_per_lane_settings = true; lt_settings->always_match_dpcd_with_hw_lane_settings = true; lt_settings->lttpr_mode = dp_decide_8b_10b_lttpr_mode(link); + lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting, lt_settings->lttpr_mode); dp_hw_to_dpcd_lane_settings(lt_settings, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 3aa05a2be6c0..fa642f4b88c2 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -674,6 +674,18 @@ bool edp_setup_psr(struct dc_link *link, if (!link) return false; + //Clear PSR cfg + memset(&psr_configuration, 0, sizeof(psr_configuration)); + dm_helpers_dp_write_dpcd( + link->ctx, + link, + DP_PSR_EN_CFG, + &psr_configuration.raw, + sizeof(psr_configuration.raw)); + + if (link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) + return false; + dc = link->ctx->dc; dmcu = dc->res_pool->dmcu; psr = dc->res_pool->psr; @@ -684,9 +696,6 @@ bool edp_setup_psr(struct dc_link *link, if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) return false; - - memset(&psr_configuration, 0, sizeof(psr_configuration)); - psr_configuration.bits.ENABLE = 1; psr_configuration.bits.CRC_VERIFICATION = 1; psr_configuration.bits.FRAME_CAPTURE_INDICATION = @@ -950,6 +959,16 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream if (!link) return false; + //Clear Replay config + dm_helpers_dp_write_dpcd(link->ctx, link, + DP_SINK_PR_ENABLE_AND_CONFIGURATION, + (uint8_t *)&(replay_config.raw), sizeof(uint8_t)); + + if (!(link->replay_settings.config.replay_supported)) + return false; + + link->replay_settings.config.replay_error_status.raw = 0; + dc = link->ctx->dc; replay = dc->res_pool->replay; diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c index 37ab5a4eefc7..0f531cfd3c49 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c @@ -571,7 +571,7 @@ void mpc401_get_gamut_remap(struct mpc *mpc, struct mpc_grph_gamut_adjustment *adjust) { uint16_t arr_reg_val[12] = {0}; - uint32_t mode_select; + uint32_t mode_select = MPCC_GAMUT_REMAP_MODE_SELECT_0; read_gamut_remap(mpc, mpcc_id, arr_reg_val, adjust->mpcc_gamut_remap_block_id, &mode_select); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index 7d04739c3ba1..4bbbe07ecde7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -671,9 +671,9 @@ static const struct dc_plane_cap plane_cap = { /* 6:1 downscaling ratio: 1000/6 = 166.666 */ .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 + .argb8888 = 358, + .nv12 = 358, + .fp16 = 358 }, 64, 64 @@ -694,7 +694,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ + .max_downscale_src_width = 4096,/*upto true 4k*/ .scl_reset_length10 = true, .sanity_checks = false, .underflow_assert_delay_us = 0xFFFFFFFF, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 80386f698ae4..0ca6358a9782 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -891,7 +891,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = true, .enable_legacy_fast_update = true, .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/ - .dml_hostvm_override = DML_HOSTVM_NO_OVERRIDE, + .dml_hostvm_override = DML_HOSTVM_OVERRIDE_FALSE, .using_dml2 = false, }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 01d95108ce66..585c3e8a2194 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -927,6 +927,7 @@ static const struct dc_debug_options debug_defaults_drv = { .seamless_boot_odm_combine = true, .enable_legacy_fast_update = true, .using_dml2 = false, + .disable_dsc_power_gate = true, }; static const struct dc_panel_config panel_config_defaults = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index f2ce687c0e03..9cb72805b8d1 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1699,7 +1699,7 @@ static int dcn315_populate_dml_pipes_from_context( pipes[pipe_cnt].dout.dsc_input_bpc = 0; DC_FP_START(); dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); - if (pixel_rate_crb && !pipe->top_pipe && !pipe->prev_odm_pipe) { + if (pixel_rate_crb) { int bpp = source_format_to_bpp(pipes[pipe_cnt].pipe.src.source_format); /* Ceil to crb segment size */ int approx_det_segs_required_for_pstate = dcn_get_approx_det_segs_required_for_pstate( @@ -1756,28 +1756,26 @@ static int dcn315_populate_dml_pipes_from_context( continue; } - if (!pipe->top_pipe && !pipe->prev_odm_pipe) { - bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc) - || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120); - - if (remaining_det_segs > MIN_RESERVED_DET_SEGS && crb_pipes != 0) - pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes + - (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0); - if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) { - /* Clamp to 2 pipe split max det segments */ - remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS); - pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS; - } - if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) { - /* If we are splitting we must have an even number of segments */ - remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2; - pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2; - } - /* Convert segments into size for DML use */ - pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB; - - crb_idx++; + bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc) + || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120); + + if (remaining_det_segs > MIN_RESERVED_DET_SEGS && crb_pipes != 0) + pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes + + (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0); + if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) { + /* Clamp to 2 pipe split max det segments */ + remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS); + pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS; + } + if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) { + /* If we are splitting we must have an even number of segments */ + remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2; + pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2; } + /* Convert segments into size for DML use */ + pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB; + + crb_idx++; pipe_cnt++; } } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 014e8a296f0c..54c7d6aecf51 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -875,8 +875,8 @@ static bool spl_get_optimal_number_of_taps( bool *enable_isharp) { int num_part_y, num_part_c; - int max_taps_y, max_taps_c; - int min_taps_y, min_taps_c; + unsigned int max_taps_y, max_taps_c; + unsigned int min_taps_y, min_taps_c; enum lb_memory_config lb_config; bool skip_easf = false; diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 2a74ff5fdfdb..a2c28949ec47 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -479,7 +479,7 @@ struct spl_sharpness_range { }; struct adaptive_sharpness { bool enable; - int sharpness_level; + unsigned int sharpness_level; struct spl_sharpness_range sharpness_range; }; enum linear_light_scaling { // convert it in translation logic diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 7835100b37c4..d74336830368 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2869,6 +2869,12 @@ struct dmub_cmd_psr_copy_settings_data { * Some panels request main link off before xth vertical line */ uint16_t poweroff_before_vertical_line; + /** + * Some panels cannot handle idle pattern during PSR entry. + * To power down phy before disable stream to avoid sending + * idle pattern. + */ + uint8_t power_down_phy_before_disable_stream; }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index d9f31b191c69..1a68b5782cac 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -83,8 +83,8 @@ static inline void dmub_dcn31_translate_addr(const union dmub_addr *addr_in, void dmub_dcn31_reset(struct dmub_srv *dmub) { union dmub_gpint_data_register cmd; - const uint32_t timeout = 100; - uint32_t in_reset, scratch, i, pwait_mode; + const uint32_t timeout = 100000; + uint32_t in_reset, is_enabled, scratch, i, pwait_mode; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); @@ -108,7 +108,7 @@ void dmub_dcn31_reset(struct dmub_srv *dmub) } for (i = 0; i < timeout; ++i) { - scratch = dmub->hw_funcs.get_gpint_response(dmub); + scratch = REG_READ(DMCUB_SCRATCH7); if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) break; @@ -125,9 +125,14 @@ void dmub_dcn31_reset(struct dmub_srv *dmub) /* Force reset in case we timed out, DMCUB is likely hung. */ } - REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); - REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled); + + if (is_enabled) { + REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); + } + REG_WRITE(DMCUB_INBOX1_RPTR, 0); REG_WRITE(DMCUB_INBOX1_WPTR, 0); REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 2ccad79053c5..01e83c6ce701 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -88,23 +88,19 @@ static inline void dmub_dcn35_translate_addr(const union dmub_addr *addr_in, void dmub_dcn35_reset(struct dmub_srv *dmub) { union dmub_gpint_data_register cmd; - const uint32_t timeout = 100; + const uint32_t timeout = 100000; uint32_t in_reset, is_enabled, scratch, i, pwait_mode; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled); - if (in_reset == 0) { + if (in_reset == 0 && is_enabled != 0) { cmd.bits.status = 1; cmd.bits.command_code = DMUB_GPINT__STOP_FW; cmd.bits.param = 0; dmub->hw_funcs.set_gpint(dmub, cmd); - /** - * Timeout covers both the ACK and the wait - * for remaining work to finish. - */ - for (i = 0; i < timeout; ++i) { if (dmub->hw_funcs.is_gpint_acked(dmub, cmd)) break; @@ -113,7 +109,7 @@ void dmub_dcn35_reset(struct dmub_srv *dmub) } for (i = 0; i < timeout; ++i) { - scratch = dmub->hw_funcs.get_gpint_response(dmub); + scratch = REG_READ(DMCUB_SCRATCH7); if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) break; @@ -130,11 +126,9 @@ void dmub_dcn35_reset(struct dmub_srv *dmub) /* Force reset in case we timed out, DMCUB is likely hung. */ } - REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled); - if (is_enabled) { REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + udelay(1); REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); } @@ -160,11 +154,7 @@ void dmub_dcn35_reset_release(struct dmub_srv *dmub) LONO_SOCCLK_GATE_DISABLE, 1, LONO_DMCUBCLK_GATE_DISABLE, 1); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); - udelay(1); REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1); - REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); - udelay(1); REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0); REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 0); } diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index 39a8cb6d7523..e1c4fe1c6e3e 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -63,8 +63,10 @@ static inline void dmub_dcn401_translate_addr(const union dmub_addr *addr_in, void dmub_dcn401_reset(struct dmub_srv *dmub) { union dmub_gpint_data_register cmd; - const uint32_t timeout = 30; - uint32_t in_reset, scratch, i; + const uint32_t timeout_us = 1 * 1000 * 1000; //1s + const uint32_t poll_delay_us = 1; //1us + uint32_t i = 0; + uint32_t in_reset, scratch, pwait_mode; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); @@ -75,32 +77,35 @@ void dmub_dcn401_reset(struct dmub_srv *dmub) dmub->hw_funcs.set_gpint(dmub, cmd); - /** - * Timeout covers both the ACK and the wait - * for remaining work to finish. - * - * This is mostly bound by the PHY disable sequence. - * Each register check will be greater than 1us, so - * don't bother using udelay. - */ - - for (i = 0; i < timeout; ++i) { + for (i = 0; i < timeout_us; i++) { if (dmub->hw_funcs.is_gpint_acked(dmub, cmd)) break; + + udelay(poll_delay_us); } - for (i = 0; i < timeout; ++i) { + for (; i < timeout_us; i++) { scratch = dmub->hw_funcs.get_gpint_response(dmub); if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) break; + + udelay(poll_delay_us); } - /* Force reset in case we timed out, DMCUB is likely hung. */ + for (; i < timeout_us; i++) { + REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode); + if (pwait_mode & (1 << 0)) + break; + + udelay(poll_delay_us); + } + } + + if (i >= timeout_us) { + /* timeout should never occur */ + BREAK_TO_DEBUGGER(); } - REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); - REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); REG_WRITE(DMCUB_INBOX1_RPTR, 0); REG_WRITE(DMCUB_INBOX1_WPTR, 0); REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); @@ -131,7 +136,10 @@ void dmub_dcn401_backdoor_load(struct dmub_srv *dmub, dmub_dcn401_get_fb_base_offset(dmub, &fb_base, &fb_offset); + /* reset and disable DMCUB and MMHUBBUB DMUIF */ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1); + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); dmub_dcn401_translate_addr(&cw0->offset, fb_base, fb_offset, &offset); @@ -151,6 +159,7 @@ void dmub_dcn401_backdoor_load(struct dmub_srv *dmub, DMCUB_REGION3_CW1_TOP_ADDRESS, cw1->region.top, DMCUB_REGION3_CW1_ENABLE, 1); + /* release DMCUB reset only to prevent premature execution */ REG_UPDATE_2(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 0, DMCUB_MEM_UNIT_ID, 0x20); } @@ -161,7 +170,10 @@ void dmub_dcn401_backdoor_load_zfb_mode(struct dmub_srv *dmub, { union dmub_addr offset; + /* reset and disable DMCUB and MMHUBBUB DMUIF */ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1); + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); offset = cw0->offset; @@ -181,6 +193,7 @@ void dmub_dcn401_backdoor_load_zfb_mode(struct dmub_srv *dmub, DMCUB_REGION3_CW1_TOP_ADDRESS, cw1->region.top, DMCUB_REGION3_CW1_ENABLE, 1); + /* release DMCUB reset only to prevent premature execution */ REG_UPDATE_2(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 0, DMCUB_MEM_UNIT_ID, 0x20); } diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h index 4c8843b79695..31f95b27e227 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h @@ -169,7 +169,8 @@ struct dmub_srv; DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN) \ DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK) \ DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_STAT) \ - DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN) + DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN) \ + DMUB_SF(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS) struct dmub_srv_dcn401_reg_offset { #define DMUB_SR(reg) uint32_t reg; diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index 8c137d7c032e..e58e7b93810b 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -368,6 +368,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_encryption(struct mod_hdcp *hdcp) struct mod_hdcp_display *display = get_first_active_display(hdcp); enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; + if (!display) + return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + mutex_lock(&psp->hdcp_context.mutex); hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index a344e2e49b0e..b3d55cac3569 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -383,10 +383,10 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, colorimetryFormat = ColorimetryYCC_DP_ITU709; else if (cs == COLOR_SPACE_ADOBERGB) colorimetryFormat = ColorimetryYCC_DP_AdobeYCC; - else if (cs == COLOR_SPACE_2020_YCBCR) + else if (cs == COLOR_SPACE_2020_YCBCR_LIMITED) colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr; - if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22) + if (cs == COLOR_SPACE_2020_YCBCR_LIMITED && tf == TRANSFER_FUNC_GAMMA_22) colorimetryFormat = ColorimetryYCC_DP_ITU709; break; diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_offset.h index c488d4a50cf4..b2252deabc17 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_offset.h @@ -203,6 +203,10 @@ #define mmDAGB0_WR_DATA_CREDIT_BASE_IDX 1 #define mmDAGB0_WR_MISC_CREDIT 0x0058 #define mmDAGB0_WR_MISC_CREDIT_BASE_IDX 1 +#define mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE 0x005b +#define mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_BASE_IDX 1 +#define mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE 0x005c +#define mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE_BASE_IDX 1 #define mmDAGB0_WRCLI_ASK_PENDING 0x005d #define mmDAGB0_WRCLI_ASK_PENDING_BASE_IDX 1 #define mmDAGB0_WRCLI_GO_PENDING 0x005e @@ -455,6 +459,10 @@ #define mmDAGB1_WR_DATA_CREDIT_BASE_IDX 1 #define mmDAGB1_WR_MISC_CREDIT 0x00d8 #define mmDAGB1_WR_MISC_CREDIT_BASE_IDX 1 +#define mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE 0x00db +#define mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE_BASE_IDX 1 +#define mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE_VALUE 0x00dc +#define mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE_VALUE_BASE_IDX 1 #define mmDAGB1_WRCLI_ASK_PENDING 0x00dd #define mmDAGB1_WRCLI_ASK_PENDING_BASE_IDX 1 #define mmDAGB1_WRCLI_GO_PENDING 0x00de @@ -707,6 +715,10 @@ #define mmDAGB2_WR_DATA_CREDIT_BASE_IDX 1 #define mmDAGB2_WR_MISC_CREDIT 0x0158 #define mmDAGB2_WR_MISC_CREDIT_BASE_IDX 1 +#define mmDAGB2_WRCLI_GPU_SNOOP_OVERRIDE 0x015b +#define mmDAGB2_WRCLI_GPU_SNOOP_OVERRIDE_BASE_IDX 1 +#define mmDAGB2_WRCLI_GPU_SNOOP_OVERRIDE_VALUE 0x015c +#define mmDAGB2_WRCLI_GPU_SNOOP_OVERRIDE_VALUE_BASE_IDX 1 #define mmDAGB2_WRCLI_ASK_PENDING 0x015d #define mmDAGB2_WRCLI_ASK_PENDING_BASE_IDX 1 #define mmDAGB2_WRCLI_GO_PENDING 0x015e @@ -959,6 +971,10 @@ #define mmDAGB3_WR_DATA_CREDIT_BASE_IDX 1 #define mmDAGB3_WR_MISC_CREDIT 0x01d8 #define mmDAGB3_WR_MISC_CREDIT_BASE_IDX 1 +#define mmDAGB3_WRCLI_GPU_SNOOP_OVERRIDE 0x01db +#define mmDAGB3_WRCLI_GPU_SNOOP_OVERRIDE_BASE_IDX 1 +#define mmDAGB3_WRCLI_GPU_SNOOP_OVERRIDE_VALUE 0x01dc +#define mmDAGB3_WRCLI_GPU_SNOOP_OVERRIDE_VALUE_BASE_IDX 1 #define mmDAGB3_WRCLI_ASK_PENDING 0x01dd #define mmDAGB3_WRCLI_ASK_PENDING_BASE_IDX 1 #define mmDAGB3_WRCLI_GO_PENDING 0x01de @@ -1211,6 +1227,10 @@ #define mmDAGB4_WR_DATA_CREDIT_BASE_IDX 1 #define mmDAGB4_WR_MISC_CREDIT 0x0258 #define mmDAGB4_WR_MISC_CREDIT_BASE_IDX 1 +#define mmDAGB4_WRCLI_GPU_SNOOP_OVERRIDE 0x025b +#define mmDAGB4_WRCLI_GPU_SNOOP_OVERRIDE_BASE_IDX 1 +#define mmDAGB4_WRCLI_GPU_SNOOP_OVERRIDE_VALUE 0x025c +#define mmDAGB4_WRCLI_GPU_SNOOP_OVERRIDE_VALUE_BASE_IDX 1 #define mmDAGB4_WRCLI_ASK_PENDING 0x025d #define mmDAGB4_WRCLI_ASK_PENDING_BASE_IDX 1 #define mmDAGB4_WRCLI_GO_PENDING 0x025e @@ -4793,6 +4813,10 @@ #define mmDAGB5_WR_DATA_CREDIT_BASE_IDX 1 #define mmDAGB5_WR_MISC_CREDIT 0x3058 #define mmDAGB5_WR_MISC_CREDIT_BASE_IDX 1 +#define mmDAGB5_WRCLI_GPU_SNOOP_OVERRIDE 0x305b +#define mmDAGB5_WRCLI_GPU_SNOOP_OVERRIDE_BASE_IDX 1 +#define mmDAGB5_WRCLI_GPU_SNOOP_OVERRIDE_VALUE 0x305c +#define mmDAGB5_WRCLI_GPU_SNOOP_OVERRIDE_VALUE_BASE_IDX 1 #define mmDAGB5_WRCLI_ASK_PENDING 0x305d #define mmDAGB5_WRCLI_ASK_PENDING_BASE_IDX 1 #define mmDAGB5_WRCLI_GO_PENDING 0x305e @@ -5045,6 +5069,10 @@ #define mmDAGB6_WR_DATA_CREDIT_BASE_IDX 1 #define mmDAGB6_WR_MISC_CREDIT 0x30d8 #define mmDAGB6_WR_MISC_CREDIT_BASE_IDX 1 +#define mmDAGB6_WRCLI_GPU_SNOOP_OVERRIDE 0x30db +#define mmDAGB6_WRCLI_GPU_SNOOP_OVERRIDE_BASE_IDX 1 +#define mmDAGB6_WRCLI_GPU_SNOOP_OVERRIDE_VALUE 0x30dc +#define mmDAGB6_WRCLI_GPU_SNOOP_OVERRIDE_VALUE_BASE_IDX 1 #define mmDAGB6_WRCLI_ASK_PENDING 0x30dd #define mmDAGB6_WRCLI_ASK_PENDING_BASE_IDX 1 #define mmDAGB6_WRCLI_GO_PENDING 0x30de @@ -5297,6 +5325,10 @@ #define mmDAGB7_WR_DATA_CREDIT_BASE_IDX 1 #define mmDAGB7_WR_MISC_CREDIT 0x3158 #define mmDAGB7_WR_MISC_CREDIT_BASE_IDX 1 +#define mmDAGB7_WRCLI_GPU_SNOOP_OVERRIDE 0x315b +#define mmDAGB7_WRCLI_GPU_SNOOP_OVERRIDE_BASE_IDX 1 +#define mmDAGB7_WRCLI_GPU_SNOOP_OVERRIDE_VALUE 0x315c +#define mmDAGB7_WRCLI_GPU_SNOOP_OVERRIDE_VALUE_BASE_IDX 1 #define mmDAGB7_WRCLI_ASK_PENDING 0x315d #define mmDAGB7_WRCLI_ASK_PENDING_BASE_IDX 1 #define mmDAGB7_WRCLI_GO_PENDING 0x315e diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h index 2969fbf282b7..5069d2fd467f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h @@ -1532,6 +1532,12 @@ //DAGB0_WRCLI_DBUS_GO_PENDING #define DAGB0_WRCLI_DBUS_GO_PENDING__BUSY__SHIFT 0x0 #define DAGB0_WRCLI_DBUS_GO_PENDING__BUSY_MASK 0xFFFFFFFFL +//DAGB0_WRCLI_GPU_SNOOP_OVERRIDE +#define DAGB0_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE__SHIFT 0x0 +#define DAGB0_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE_MASK 0xFFFFFFFFL +//DAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE +#define DAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE__SHIFT 0x0 +#define DAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE_MASK 0xFFFFFFFFL //DAGB0_DAGB_DLY #define DAGB0_DAGB_DLY__DLY__SHIFT 0x0 #define DAGB0_DAGB_DLY__CLI__SHIFT 0x8 @@ -3207,6 +3213,12 @@ //DAGB1_WRCLI_DBUS_GO_PENDING #define DAGB1_WRCLI_DBUS_GO_PENDING__BUSY__SHIFT 0x0 #define DAGB1_WRCLI_DBUS_GO_PENDING__BUSY_MASK 0xFFFFFFFFL +//DAGB1_WRCLI_GPU_SNOOP_OVERRIDE +#define DAGB1_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE__SHIFT 0x0 +#define DAGB1_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE_MASK 0xFFFFFFFFL +//DAGB1_WRCLI_GPU_SNOOP_OVERRIDE_VALUE +#define DAGB1_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE__SHIFT 0x0 +#define DAGB1_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE_MASK 0xFFFFFFFFL //DAGB1_DAGB_DLY #define DAGB1_DAGB_DLY__DLY__SHIFT 0x0 #define DAGB1_DAGB_DLY__CLI__SHIFT 0x8 @@ -4882,6 +4894,12 @@ //DAGB2_WRCLI_DBUS_GO_PENDING #define DAGB2_WRCLI_DBUS_GO_PENDING__BUSY__SHIFT 0x0 #define DAGB2_WRCLI_DBUS_GO_PENDING__BUSY_MASK 0xFFFFFFFFL +//DAGB2_WRCLI_GPU_SNOOP_OVERRIDE +#define DAGB2_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE__SHIFT 0x0 +#define DAGB2_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE_MASK 0xFFFFFFFFL +//DAGB2_WRCLI_GPU_SNOOP_OVERRIDE_VALUE +#define DAGB2_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE__SHIFT 0x0 +#define DAGB2_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE_MASK 0xFFFFFFFFL //DAGB2_DAGB_DLY #define DAGB2_DAGB_DLY__DLY__SHIFT 0x0 #define DAGB2_DAGB_DLY__CLI__SHIFT 0x8 @@ -6557,6 +6575,12 @@ //DAGB3_WRCLI_DBUS_GO_PENDING #define DAGB3_WRCLI_DBUS_GO_PENDING__BUSY__SHIFT 0x0 #define DAGB3_WRCLI_DBUS_GO_PENDING__BUSY_MASK 0xFFFFFFFFL +//DAGB3_WRCLI_GPU_SNOOP_OVERRIDE +#define DAGB3_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE__SHIFT 0x0 +#define DAGB3_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE_MASK 0xFFFFFFFFL +//DAGB3_WRCLI_GPU_SNOOP_OVERRIDE_VALUE +#define DAGB3_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE__SHIFT 0x0 +#define DAGB3_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE_MASK 0xFFFFFFFFL //DAGB3_DAGB_DLY #define DAGB3_DAGB_DLY__DLY__SHIFT 0x0 #define DAGB3_DAGB_DLY__CLI__SHIFT 0x8 @@ -8232,6 +8256,12 @@ //DAGB4_WRCLI_DBUS_GO_PENDING #define DAGB4_WRCLI_DBUS_GO_PENDING__BUSY__SHIFT 0x0 #define DAGB4_WRCLI_DBUS_GO_PENDING__BUSY_MASK 0xFFFFFFFFL +//DAGB4_WRCLI_GPU_SNOOP_OVERRIDE +#define DAGB4_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE__SHIFT 0x0 +#define DAGB4_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE_MASK 0xFFFFFFFFL +//DAGB4_WRCLI_GPU_SNOOP_OVERRIDE_VALUE +#define DAGB4_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE__SHIFT 0x0 +#define DAGB4_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE_MASK 0xFFFFFFFFL //DAGB4_DAGB_DLY #define DAGB4_DAGB_DLY__DLY__SHIFT 0x0 #define DAGB4_DAGB_DLY__CLI__SHIFT 0x8 @@ -28737,6 +28767,12 @@ //DAGB5_WRCLI_DBUS_GO_PENDING #define DAGB5_WRCLI_DBUS_GO_PENDING__BUSY__SHIFT 0x0 #define DAGB5_WRCLI_DBUS_GO_PENDING__BUSY_MASK 0xFFFFFFFFL +//DAGB5_WRCLI_GPU_SNOOP_OVERRIDE +#define DAGB5_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE__SHIFT 0x0 +#define DAGB5_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE_MASK 0xFFFFFFFFL +//DAGB5_WRCLI_GPU_SNOOP_OVERRIDE_VALUE +#define DAGB5_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE__SHIFT 0x0 +#define DAGB5_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE_MASK 0xFFFFFFFFL //DAGB5_DAGB_DLY #define DAGB5_DAGB_DLY__DLY__SHIFT 0x0 #define DAGB5_DAGB_DLY__CLI__SHIFT 0x8 @@ -30412,6 +30448,12 @@ //DAGB6_WRCLI_DBUS_GO_PENDING #define DAGB6_WRCLI_DBUS_GO_PENDING__BUSY__SHIFT 0x0 #define DAGB6_WRCLI_DBUS_GO_PENDING__BUSY_MASK 0xFFFFFFFFL +//DAGB6_WRCLI_GPU_SNOOP_OVERRIDE +#define DAGB6_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE__SHIFT 0x0 +#define DAGB6_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE_MASK 0xFFFFFFFFL +//DAGB6_WRCLI_GPU_SNOOP_OVERRIDE_VALUE +#define DAGB6_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE__SHIFT 0x0 +#define DAGB6_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE_MASK 0xFFFFFFFFL //DAGB6_DAGB_DLY #define DAGB6_DAGB_DLY__DLY__SHIFT 0x0 #define DAGB6_DAGB_DLY__CLI__SHIFT 0x8 @@ -32087,6 +32129,12 @@ //DAGB7_WRCLI_DBUS_GO_PENDING #define DAGB7_WRCLI_DBUS_GO_PENDING__BUSY__SHIFT 0x0 #define DAGB7_WRCLI_DBUS_GO_PENDING__BUSY_MASK 0xFFFFFFFFL +//DAGB7_WRCLI_GPU_SNOOP_OVERRIDE +#define DAGB7_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE__SHIFT 0x0 +#define DAGB7_WRCLI_GPU_SNOOP_OVERRIDE__ENABLE_MASK 0xFFFFFFFFL +//DAGB7_WRCLI_GPU_SNOOP_OVERRIDE_VALUE +#define DAGB7_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE__SHIFT 0x0 +#define DAGB7_WRCLI_GPU_SNOOP_OVERRIDE_VALUE__ENABLE_MASK 0xFFFFFFFFL //DAGB7_DAGB_DLY #define DAGB7_DAGB_DLY__DLY__SHIFT 0x0 #define DAGB7_DAGB_DLY__CLI__SHIFT 0x8 diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 0fa6fbee1978..c4fdd82a0042 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1490,6 +1490,8 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, if (ret) return -EINVAL; parameter_size++; + if (!tmp_str) + break; while (isspace(*tmp_str)) tmp_str++; } @@ -2493,6 +2495,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): *states = ATTR_STATE_SUPPORTED; break; default: @@ -3851,6 +3855,9 @@ static int parse_input_od_command_lines(const char *buf, return -EINVAL; parameter_size++; + if (!tmp_str) + break; + while (isspace(*tmp_str)) tmp_str++; } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index e8b6989a40f3..6b34a33d788f 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -3043,6 +3043,7 @@ static int kv_dpm_hw_init(void *handle) if (!amdgpu_dpm) return 0; + mutex_lock(&adev->pm.mutex); kv_dpm_setup_asic(adev); ret = kv_dpm_enable(adev); if (ret) @@ -3050,6 +3051,8 @@ static int kv_dpm_hw_init(void *handle) else adev->pm.dpm_enabled = true; amdgpu_legacy_dpm_compute_clocks(adev); + mutex_unlock(&adev->pm.mutex); + return ret; } @@ -3067,32 +3070,42 @@ static int kv_dpm_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cancel_work_sync(&adev->pm.dpm.thermal.work); + if (adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm_enabled = false; /* disable dpm */ kv_dpm_disable(adev); /* reset the power state */ adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; + mutex_unlock(&adev->pm.mutex); } return 0; } static int kv_dpm_resume(void *handle) { - int ret; + int ret = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->pm.dpm_enabled) { + if (!amdgpu_dpm) + return 0; + + if (!adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); /* asic init will reset to the boot state */ kv_dpm_setup_asic(adev); ret = kv_dpm_enable(adev); - if (ret) + if (ret) { adev->pm.dpm_enabled = false; - else + } else { adev->pm.dpm_enabled = true; - if (adev->pm.dpm_enabled) amdgpu_legacy_dpm_compute_clocks(adev); + } + mutex_unlock(&adev->pm.mutex); } - return 0; + return ret; } static bool kv_dpm_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index e861355ebd75..c7518b13e787 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -1009,9 +1009,12 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL; int temp, size = sizeof(temp); - if (!adev->pm.dpm_enabled) - return; + mutex_lock(&adev->pm.mutex); + if (!adev->pm.dpm_enabled) { + mutex_unlock(&adev->pm.mutex); + return; + } if (!pp_funcs->read_sensor(adev->powerplay.pp_handle, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&temp, @@ -1033,4 +1036,5 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) adev->pm.dpm.state = dpm_state; amdgpu_legacy_dpm_compute_clocks(adev->powerplay.pp_handle); + mutex_unlock(&adev->pm.mutex); } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index a1baa13ab2c2..a5ad1b60597e 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7783,6 +7783,7 @@ static int si_dpm_hw_init(void *handle) if (!amdgpu_dpm) return 0; + mutex_lock(&adev->pm.mutex); si_dpm_setup_asic(adev); ret = si_dpm_enable(adev); if (ret) @@ -7790,6 +7791,7 @@ static int si_dpm_hw_init(void *handle) else adev->pm.dpm_enabled = true; amdgpu_legacy_dpm_compute_clocks(adev); + mutex_unlock(&adev->pm.mutex); return ret; } @@ -7807,32 +7809,44 @@ static int si_dpm_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cancel_work_sync(&adev->pm.dpm.thermal.work); + if (adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm_enabled = false; /* disable dpm */ si_dpm_disable(adev); /* reset the power state */ adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; + mutex_unlock(&adev->pm.mutex); } + return 0; } static int si_dpm_resume(void *handle) { - int ret; + int ret = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->pm.dpm_enabled) { + if (!amdgpu_dpm) + return 0; + + if (!adev->pm.dpm_enabled) { /* asic init will reset to the boot state */ + mutex_lock(&adev->pm.mutex); si_dpm_setup_asic(adev); ret = si_dpm_enable(adev); - if (ret) + if (ret) { adev->pm.dpm_enabled = false; - else + } else { adev->pm.dpm_enabled = true; - if (adev->pm.dpm_enabled) amdgpu_legacy_dpm_compute_clocks(adev); + } + mutex_unlock(&adev->pm.mutex); } - return 0; + + return ret; } static bool si_dpm_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index a71c6117d7e5..0115d26b5af9 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -51,6 +51,11 @@ static int amd_powerplay_create(struct amdgpu_device *adev) hwmgr->adev = adev; hwmgr->not_vf = !amdgpu_sriov_vf(adev); hwmgr->device = amdgpu_cgs_create_device(adev); + if (!hwmgr->device) { + kfree(hwmgr); + return -ENOMEM; + } + mutex_init(&hwmgr->msg_lock); hwmgr->chip_family = adev->family; hwmgr->chip_id = adev->asic_type; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index b56298d9da98..a76fc15a55f5 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -144,6 +144,10 @@ int atomctrl_initialize_mc_reg_table( vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); @@ -181,6 +185,10 @@ int atomctrl_initialize_mc_reg_table_v2_2( vram_info = (ATOM_VRAM_INFO_HEADER_V2_2 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); @@ -1420,6 +1428,8 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr GetIndexIntoMasterTable(DATA, SMU_Info), &size, &frev, &crev); + if (!psmu_info) + return -EINVAL; for (i = 0; i < psmu_info->ucSclkEntryNum; i++) { table->entry[i].ucVco_setting = psmu_info->asSclkFcwRangeEntry[i].ucVco_setting; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c index a8fc0fa44db6..ba5c1237fcfe 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c @@ -267,10 +267,10 @@ int smu7_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) if (hwmgr->thermal_controller.fanInfo.bNoFan || (hwmgr->thermal_controller.fanInfo. ucTachometerPulsesPerRevolution == 0) || - speed == 0 || + (!speed || speed > UINT_MAX/8) || (speed < hwmgr->thermal_controller.fanInfo.ulMinRPM) || (speed > hwmgr->thermal_controller.fanInfo.ulMaxRPM)) - return 0; + return -EINVAL; if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) smu7_fan_ctrl_stop_smc_fan_control(hwmgr); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c index 79a566f3564a..c305ea4ec17d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c @@ -149,7 +149,7 @@ int phm_wait_on_indirect_register(struct pp_hwmgr *hwmgr, } cgs_write_register(hwmgr->device, indirect_port, index); - return phm_wait_on_register(hwmgr, indirect_port + 1, mask, value); + return phm_wait_on_register(hwmgr, indirect_port + 1, value, mask); } int phm_wait_for_register_unequal(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c index 3007b054c873..776d58ea63ae 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c @@ -1120,13 +1120,14 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) result = vega10_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega10, VEGA10_CONFIGREG_DIDT); if (0 != result) - return result; + goto exit_safe_mode; vega10_didt_set_mask(hwmgr, false); +exit_safe_mode: amdgpu_gfx_rlc_exit_safe_mode(adev, 0); - return 0; + return result; } static int vega10_disable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c index 379012494da5..56423aedf3fa 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c @@ -307,10 +307,10 @@ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) int result = 0; if (hwmgr->thermal_controller.fanInfo.bNoFan || - speed == 0 || + (!speed || speed > UINT_MAX/8) || (speed < hwmgr->thermal_controller.fanInfo.ulMinRPM) || (speed > hwmgr->thermal_controller.fanInfo.ulMaxRPM)) - return -1; + return -EINVAL; if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) result = vega10_fan_ctrl_stop_smc_fan_control(hwmgr); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c index a3331ffb2daf..1b1c88590156 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c @@ -191,7 +191,7 @@ int vega20_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) uint32_t tach_period, crystal_clock_freq; int result = 0; - if (!speed) + if (!speed || speed > UINT_MAX/8) return -EINVAL; if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 0c0b9aa44dfa..3fd8da5dc761 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -607,7 +607,8 @@ static int smu_sys_set_pp_table(void *handle, return -EIO; } - if (!smu_table->hardcode_pptable) { + if (!smu_table->hardcode_pptable || smu_table->power_play_table_size < size) { + kfree(smu_table->hardcode_pptable); smu_table->hardcode_pptable = kzalloc(size, GFP_KERNEL); if (!smu_table->hardcode_pptable) return -ENOMEM; @@ -2771,6 +2772,7 @@ int smu_get_power_limit(void *handle, switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 11): diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index fc1297fecc62..d4b954b22441 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1267,6 +1267,9 @@ static int arcturus_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; + if (!speed || speed > UINT_MAX/8) + return -EINVAL; + tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); WREG32_SOC15(THM, 0, mmCG_TACH_CTRL_ARCT, REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL_ARCT), diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 16fcd9dcd202..6c61e87359dd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1199,7 +1199,7 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; - if (speed == 0) + if (!speed || speed > UINT_MAX/8) return -EINVAL; /* * To prevent from possible overheat, some ASICs may have requirement diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 9bca748ac2e9..3d3765815e24 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -664,7 +664,6 @@ static int vangogh_print_clk_levels(struct smu_context *smu, { DpmClocks_t *clk_table = smu->smu_table.clocks_table; SmuMetrics_t metrics; - struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; bool cur_value_match_level = false; @@ -680,31 +679,25 @@ static int vangogh_print_clk_levels(struct smu_context *smu, switch (clk_type) { case SMU_OD_SCLK: - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); - size += sysfs_emit_at(buf, size, "0: %10uMhz\n", - (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); - size += sysfs_emit_at(buf, size, "1: %10uMhz\n", - (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); - } + size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", + (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", + (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); break; case SMU_OD_CCLK: - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size += sysfs_emit_at(buf, size, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); - size += sysfs_emit_at(buf, size, "0: %10uMhz\n", - (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); - size += sysfs_emit_at(buf, size, "1: %10uMhz\n", - (smu->cpu_actual_soft_max_freq > 0) ? smu->cpu_actual_soft_max_freq : smu->cpu_default_soft_max_freq); - } + size += sysfs_emit_at(buf, size, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", + (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", + (smu->cpu_actual_soft_max_freq > 0) ? smu->cpu_actual_soft_max_freq : smu->cpu_default_soft_max_freq); break; case SMU_OD_RANGE: - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); - size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", - smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); - size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", - smu->cpu_default_soft_min_freq, smu->cpu_default_soft_max_freq); - } + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); + size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", + smu->cpu_default_soft_min_freq, smu->cpu_default_soft_max_freq); break; case SMU_SOCCLK: /* the level 3 ~ 6 of socclk use the same frequency for vangogh */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 2c35eb31475a..5a1f24438e47 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1731,7 +1731,6 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity; gpu_metrics->average_umc_activity = metrics.AverageUclkActivity; - gpu_metrics->average_mm_activity = 0; /* Valid power data is available only from primary die */ if (aldebaran_is_primary(smu)) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 2024a85fa11b..c5bca3019de0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -58,6 +58,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_0.bin"); +MODULE_FIRMWARE("amdgpu/smu_13_0_0_kicker.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_7.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_10.bin"); @@ -92,7 +93,7 @@ const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; int smu_v13_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - char ucode_prefix[15]; + char ucode_prefix[30]; int err = 0; const struct smc_firmware_header_v1_0 *hdr; const struct common_firmware_header *header; @@ -103,7 +104,10 @@ int smu_v13_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); if (err) goto out; @@ -1228,7 +1232,7 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t tach_period; int ret; - if (!speed) + if (!speed || speed > UINT_MAX/8) return -EINVAL; ret = smu_v13_0_auto_fan_control(smu, 0); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index cd2cf0ffc0f5..5a0a10144a73 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2549,11 +2549,12 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, &backend_workload_mask); /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ - if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && - ((smu->adev->pm.fw_version == 0x004e6601) || - (smu->adev->pm.fw_version >= 0x004e7300))) || - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && - smu->adev->pm.fw_version >= 0x00504500)) { + if ((workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE)) && + ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && + ((smu->adev->pm.fw_version == 0x004e6601) || + (smu->adev->pm.fw_version >= 0x004e7300))) || + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + smu->adev->pm.fw_version >= 0x00504500))) { workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, PP_SMC_POWER_PROFILE_POWERSAVING); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 55ed6247eb61..9ac694c4f1f7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -275,8 +275,9 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) int var = (adev->pdev->device & 0xF); char ucode_prefix[15]; - /* No need to load P2S tables in IOV mode */ - if (amdgpu_sriov_vf(adev)) + /* No need to load P2S tables in IOV mode or for smu v13.0.12 */ + if (amdgpu_sriov_vf(adev) || + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12))) return 0; if (!(adev->flags & AMD_IS_APU)) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 452589adaf04..e5f619c979d8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -1883,16 +1883,6 @@ static int smu_v14_0_allow_ih_interrupt(struct smu_context *smu) NULL); } -static int smu_v14_0_process_pending_interrupt(struct smu_context *smu) -{ - int ret = 0; - - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT)) - ret = smu_v14_0_allow_ih_interrupt(smu); - - return ret; -} - int smu_v14_0_enable_thermal_alert(struct smu_context *smu) { int ret = 0; @@ -1904,7 +1894,7 @@ int smu_v14_0_enable_thermal_alert(struct smu_context *smu) if (ret) return ret; - return smu_v14_0_process_pending_interrupt(smu); + return smu_v14_0_allow_ih_interrupt(smu); } int smu_v14_0_disable_thermal_alert(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 9ec53431f2c3..e98a6a2f3e6a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1206,16 +1206,9 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, PP_OD_FEATURE_GFXCLK_BIT)) break; - PPTable_t *pptable = smu->smu_table.driver_pptable; - const OverDriveLimits_t * const overdrive_upperlimits = - &pptable->SkuTable.OverDriveLimitsBasicMax; - const OverDriveLimits_t * const overdrive_lowerlimits = - &pptable->SkuTable.OverDriveLimitsBasicMin; - size += sysfs_emit_at(buf, size, "OD_SCLK_OFFSET:\n"); - size += sysfs_emit_at(buf, size, "0: %dMhz\n1: %uMhz\n", - overdrive_lowerlimits->GfxclkFoffset, - overdrive_upperlimits->GfxclkFoffset); + size += sysfs_emit_at(buf, size, "%dMhz\n", + od_table->OverDriveTable.GfxclkFoffset); break; case SMU_OD_MCLK: @@ -1350,12 +1343,8 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { smu_v14_0_2_get_od_setting_limits(smu, - PP_OD_FEATURE_GFXCLK_FMIN, - &min_value, - NULL); - smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_GFXCLK_FMAX, - NULL, + &min_value, &max_value); size += sysfs_emit_at(buf, size, "SCLK_OFFSET: %7dMhz %10uMhz\n", min_value, max_value); @@ -1639,6 +1628,39 @@ out: adev->unique_id = ((uint64_t)upper32 << 32) | lower32; } +static int smu_v14_0_2_get_fan_speed_pwm(struct smu_context *smu, + uint32_t *speed) +{ + int ret; + + if (!speed) + return -EINVAL; + + ret = smu_v14_0_2_get_smu_metrics_data(smu, + METRICS_CURR_FANPWM, + speed); + if (ret) { + dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!"); + return ret; + } + + /* Convert the PMFW output which is in percent to pwm(255) based */ + *speed = min(*speed * 255 / 100, (uint32_t)255); + + return 0; +} + +static int smu_v14_0_2_get_fan_speed_rpm(struct smu_context *smu, + uint32_t *speed) +{ + if (!speed) + return -EINVAL; + + return smu_v14_0_2_get_smu_metrics_data(smu, + METRICS_CURR_FANSPEED, + speed); +} + static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *current_power_limit, uint32_t *default_power_limit, @@ -2429,36 +2451,24 @@ static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, return -ENOTSUPP; } - for (i = 0; i < size; i += 2) { - if (i + 2 > size) { - dev_info(adev->dev, "invalid number of input parameters %d\n", size); - return -EINVAL; - } - - switch (input[i]) { - case 1: - smu_v14_0_2_get_od_setting_limits(smu, - PP_OD_FEATURE_GFXCLK_FMAX, - &minimum, - &maximum); - if (input[i + 1] < minimum || - input[i + 1] > maximum) { - dev_info(adev->dev, "GfxclkFmax (%ld) must be within [%u, %u]!\n", - input[i + 1], minimum, maximum); - return -EINVAL; - } - - od_table->OverDriveTable.GfxclkFoffset = input[i + 1]; - od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; - break; + if (size != 1) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } - default: - dev_info(adev->dev, "Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); - dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); - return -EINVAL; - } + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "GfxclkFoffset must be within [%d, %u]!\n", + minimum, maximum); + return -EINVAL; } + od_table->OverDriveTable.GfxclkFoffset = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; break; case PP_OD_EDIT_MCLK_VDDC_TABLE: @@ -2817,6 +2827,8 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .set_performance_level = smu_v14_0_set_performance_level, .gfx_off_control = smu_v14_0_gfx_off_control, .get_unique_id = smu_v14_0_2_get_unique_id, + .get_fan_speed_pwm = smu_v14_0_2_get_fan_speed_pwm, + .get_fan_speed_rpm = smu_v14_0_2_get_fan_speed_rpm, .get_power_limit = smu_v14_0_2_get_power_limit, .set_power_limit = smu_v14_0_2_set_power_limit, .get_power_profile_mode = smu_v14_0_2_get_power_profile_mode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 0d71db7be325..0ce1766c859f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -459,8 +459,7 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, } if (read_arg) { smu_cmn_read_arg(smu, read_arg); - dev_dbg(adev->dev, "smu send message: %s(%d) param: 0x%08x, resp: 0x%08x,\ - readval: 0x%08x\n", + dev_dbg(adev->dev, "smu send message: %s(%d) param: 0x%08x, resp: 0x%08x, readval: 0x%08x\n", smu_get_message_name(smu, msg), index, param, reg, *read_arg); } else { dev_dbg(adev->dev, "smu send message: %s(%d) param: 0x%08x, resp: 0x%08x\n", diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index ebccb74306a7..f30b3d5eeca5 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -160,6 +160,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms, formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl, kwb_conn->wb_layer->layer_type, &n_formats); + if (!formats) { + kfree(kwb_conn); + return -ENOMEM; + } err = drm_writeback_connector_init(&kms->base, wb_conn, &komeda_wb_connector_funcs, diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 00b364f9a71e..5dadc895e7f2 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -17,6 +17,12 @@ static bool ast_astdp_is_connected(struct ast_device *ast) { if (!ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xDF, AST_IO_VGACRDF_HPD)) return false; + /* + * HPD might be set even if no monitor is connected, so also check that + * the link training was successful. + */ + if (!ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xDC, AST_IO_VGACRDC_LINK_SUCCESS)) + return false; return true; } diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index ed496fb32bf3..162dc0698f4a 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -131,7 +131,7 @@ static bool ast_get_vbios_mode_info(const struct drm_format_info *format, return false; } - switch (mode->crtc_hdisplay) { + switch (mode->hdisplay) { case 640: vbios_mode->enh_table = &res_640x480[refresh_rate_index]; break; @@ -145,7 +145,7 @@ static bool ast_get_vbios_mode_info(const struct drm_format_info *format, vbios_mode->enh_table = &res_1152x864[refresh_rate_index]; break; case 1280: - if (mode->crtc_vdisplay == 800) + if (mode->vdisplay == 800) vbios_mode->enh_table = &res_1280x800[refresh_rate_index]; else vbios_mode->enh_table = &res_1280x1024[refresh_rate_index]; @@ -157,7 +157,7 @@ static bool ast_get_vbios_mode_info(const struct drm_format_info *format, vbios_mode->enh_table = &res_1440x900[refresh_rate_index]; break; case 1600: - if (mode->crtc_vdisplay == 900) + if (mode->vdisplay == 900) vbios_mode->enh_table = &res_1600x900[refresh_rate_index]; else vbios_mode->enh_table = &res_1600x1200[refresh_rate_index]; @@ -166,7 +166,7 @@ static bool ast_get_vbios_mode_info(const struct drm_format_info *format, vbios_mode->enh_table = &res_1680x1050[refresh_rate_index]; break; case 1920: - if (mode->crtc_vdisplay == 1080) + if (mode->vdisplay == 1080) vbios_mode->enh_table = &res_1920x1080[refresh_rate_index]; else vbios_mode->enh_table = &res_1920x1200[refresh_rate_index]; @@ -210,6 +210,7 @@ static bool ast_get_vbios_mode_info(const struct drm_format_info *format, hborder = (vbios_mode->enh_table->flags & HBorder) ? 8 : 0; vborder = (vbios_mode->enh_table->flags & VBorder) ? 8 : 0; + adjusted_mode->crtc_hdisplay = vbios_mode->enh_table->hde; adjusted_mode->crtc_htotal = vbios_mode->enh_table->ht; adjusted_mode->crtc_hblank_start = vbios_mode->enh_table->hde + hborder; adjusted_mode->crtc_hblank_end = vbios_mode->enh_table->ht - hborder; @@ -219,6 +220,7 @@ static bool ast_get_vbios_mode_info(const struct drm_format_info *format, vbios_mode->enh_table->hfp + vbios_mode->enh_table->hsync); + adjusted_mode->crtc_vdisplay = vbios_mode->enh_table->vde; adjusted_mode->crtc_vtotal = vbios_mode->enh_table->vt; adjusted_mode->crtc_vblank_start = vbios_mode->enh_table->vde + vborder; adjusted_mode->crtc_vblank_end = vbios_mode->enh_table->vt - vborder; @@ -1317,9 +1319,9 @@ static void ast_mode_config_helper_atomic_commit_tail(struct drm_atomic_state *s /* * Concurrent operations could possibly trigger a call to - * drm_connector_helper_funcs.get_modes by trying to read the - * display modes. Protect access to I/O registers by acquiring - * the I/O-register lock. Released in atomic_flush(). + * drm_connector_helper_funcs.get_modes by reading the display + * modes. Protect access to registers by acquiring the modeset + * lock. */ mutex_lock(&ast->modeset_lock); drm_atomic_helper_commit_tail(state); diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c index 8f786592143b..24e1e11acf69 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c @@ -244,7 +244,9 @@ static const struct hdmi_codec_pdata codec_data = { .ops = &adv7511_codec_ops, .max_i2s_channels = 2, .i2s = 1, + .no_i2s_capture = 1, .spdif = 1, + .no_spdif_capture = 1, }; int adv7511_audio_init(struct device *dev, struct adv7511 *adv7511) diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index a2675b121fe4..c036bbc92ba9 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -2002,8 +2002,10 @@ static int anx7625_audio_get_eld(struct device *dev, void *data, memset(buf, 0, len); } else { dev_dbg(dev, "audio copy eld\n"); + mutex_lock(&ctx->connector->eld_mutex); memcpy(buf, ctx->connector->eld, min(sizeof(ctx->connector->eld), len)); + mutex_unlock(&ctx->connector->eld_mutex); } return 0; diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index 6886db2d9e00..8e889a38fad0 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -64,10 +64,11 @@ struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, str adev->id = ret; adev->name = "dp_hpd_bridge"; adev->dev.parent = parent; - adev->dev.of_node = of_node_get(parent->of_node); adev->dev.release = drm_aux_hpd_bridge_release; adev->dev.platform_data = of_node_get(np); + device_set_of_node_from_dev(&adev->dev, parent); + ret = auxiliary_device_init(adev); if (ret) { of_node_put(adev->dev.platform_data); diff --git a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c index 7457d38622b0..89eed0668bfb 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c @@ -568,15 +568,18 @@ static int cdns_dsi_check_conf(struct cdns_dsi *dsi, struct phy_configure_opts_mipi_dphy *phy_cfg = &output->phy_opts.mipi_dphy; unsigned long dsi_hss_hsa_hse_hbp; unsigned int nlanes = output->dev->lanes; + int mode_clock = (mode_valid_check ? mode->clock : mode->crtc_clock); int ret; ret = cdns_dsi_mode2cfg(dsi, mode, dsi_cfg, mode_valid_check); if (ret) return ret; - phy_mipi_dphy_get_default_config(mode->crtc_clock * 1000, - mipi_dsi_pixel_format_to_bpp(output->dev->format), - nlanes, phy_cfg); + ret = phy_mipi_dphy_get_default_config(mode_clock * 1000, + mipi_dsi_pixel_format_to_bpp(output->dev->format), + nlanes, phy_cfg); + if (ret) + return ret; ret = cdns_dsi_adjust_phy_config(dsi, dsi_cfg, phy_cfg, mode, mode_valid_check); if (ret) @@ -680,6 +683,11 @@ static void cdns_dsi_bridge_post_disable(struct drm_bridge *bridge) struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); struct cdns_dsi *dsi = input_to_dsi(input); + dsi->phy_initialized = false; + dsi->link_initialized = false; + phy_power_off(dsi->dphy); + phy_exit(dsi->dphy); + pm_runtime_put(dsi->base.dev); } @@ -761,7 +769,7 @@ static void cdns_dsi_bridge_enable(struct drm_bridge *bridge) struct phy_configure_opts_mipi_dphy *phy_cfg = &output->phy_opts.mipi_dphy; unsigned long tx_byte_period; struct cdns_dsi_cfg dsi_cfg; - u32 tmp, reg_wakeup, div; + u32 tmp, reg_wakeup, div, status; int nlanes; if (WARN_ON(pm_runtime_get_sync(dsi->base.dev) < 0)) @@ -778,6 +786,19 @@ static void cdns_dsi_bridge_enable(struct drm_bridge *bridge) cdns_dsi_hs_init(dsi); cdns_dsi_init_link(dsi); + /* + * Now that the DSI Link and DSI Phy are initialized, + * wait for the CLK and Data Lanes to be ready. + */ + tmp = CLK_LANE_RDY; + for (int i = 0; i < nlanes; i++) + tmp |= DATA_LANE_RDY(i); + + if (readl_poll_timeout(dsi->regs + MCTL_MAIN_STS, status, + (tmp == (status & tmp)), 100, 500000)) + dev_err(dsi->base.dev, + "Timed Out: DSI-DPhy Clock and Data Lanes not ready.\n"); + writel(HBP_LEN(dsi_cfg.hbp) | HSA_LEN(dsi_cfg.hsa), dsi->regs + VID_HSIZE1); writel(HFP_LEN(dsi_cfg.hfp) | HACT_LEN(dsi_cfg.hact), @@ -952,7 +973,7 @@ static int cdns_dsi_attach(struct mipi_dsi_host *host, bridge = drm_panel_bridge_add_typed(panel, DRM_MODE_CONNECTOR_DSI); } else { - bridge = of_drm_find_bridge(dev->dev.of_node); + bridge = of_drm_find_bridge(np); if (!bridge) bridge = ERR_PTR(-EINVAL); } @@ -1152,7 +1173,6 @@ static int __maybe_unused cdns_dsi_suspend(struct device *dev) clk_disable_unprepare(dsi->dsi_sys_clk); clk_disable_unprepare(dsi->dsi_p_clk); reset_control_assert(dsi->dsi_p_rst); - dsi->link_initialized = false; return 0; } diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index 41f72d458487..9ba2a667a1f3 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -2463,9 +2463,9 @@ static int cdns_mhdp_probe(struct platform_device *pdev) if (!mhdp) return -ENOMEM; - clk = devm_clk_get(dev, NULL); + clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(clk)) { - dev_err(dev, "couldn't get clk: %ld\n", PTR_ERR(clk)); + dev_err(dev, "couldn't get and enable clk: %ld\n", PTR_ERR(clk)); return PTR_ERR(clk); } @@ -2504,14 +2504,12 @@ static int cdns_mhdp_probe(struct platform_device *pdev) mhdp->info = of_device_get_match_data(dev); - clk_prepare_enable(clk); - pm_runtime_enable(dev); ret = pm_runtime_resume_and_get(dev); if (ret < 0) { dev_err(dev, "pm_runtime_resume_and_get failed\n"); pm_runtime_disable(dev); - goto clk_disable; + return ret; } if (mhdp->info && mhdp->info->ops && mhdp->info->ops->init) { @@ -2590,8 +2588,6 @@ plat_fini: runtime_put: pm_runtime_put_sync(dev); pm_runtime_disable(dev); -clk_disable: - clk_disable_unprepare(mhdp->clk); return ret; } @@ -2632,8 +2628,6 @@ static void cdns_mhdp_remove(struct platform_device *pdev) cancel_work_sync(&mhdp->modeset_retry_work); flush_work(&mhdp->hpd_work); /* Ignoring mhdp->hdcp.check_work and mhdp->hdcp.prop_work here. */ - - clk_disable_unprepare(mhdp->clk); } static const struct of_device_id mhdp_ids[] = { diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c index 008d86cc562a..967aa24b7c53 100644 --- a/drivers/gpu/drm/bridge/ite-it6505.c +++ b/drivers/gpu/drm/bridge/ite-it6505.c @@ -296,11 +296,11 @@ #define MAX_LANE_COUNT 4 #define MAX_LINK_RATE HBR #define AUTO_TRAIN_RETRY 3 -#define MAX_HDCP_DOWN_STREAM_COUNT 10 +#define MAX_HDCP_DOWN_STREAM_COUNT 127 #define MAX_CR_LEVEL 0x03 #define MAX_EQ_LEVEL 0x03 #define AUX_WAIT_TIMEOUT_MS 15 -#define AUX_FIFO_MAX_SIZE 32 +#define AUX_FIFO_MAX_SIZE 16 #define PIXEL_CLK_DELAY 1 #define PIXEL_CLK_INVERSE 0 #define ADJUST_PHASE_THRESHOLD 80000 @@ -2023,7 +2023,7 @@ static bool it6505_hdcp_part2_ksvlist_check(struct it6505 *it6505) { struct device *dev = it6505->dev; u8 av[5][4], bv[5][4]; - int i, err; + int i, err, retry; i = it6505_setup_sha1_input(it6505, it6505->sha1_input); if (i <= 0) { @@ -2032,22 +2032,29 @@ static bool it6505_hdcp_part2_ksvlist_check(struct it6505 *it6505) } it6505_sha1_digest(it6505, it6505->sha1_input, i, (u8 *)av); + /*1B-05 V' must retry 3 times */ + for (retry = 0; retry < 3; retry++) { + err = it6505_get_dpcd(it6505, DP_AUX_HDCP_V_PRIME(0), (u8 *)bv, + sizeof(bv)); - err = it6505_get_dpcd(it6505, DP_AUX_HDCP_V_PRIME(0), (u8 *)bv, - sizeof(bv)); + if (err < 0) { + dev_err(dev, "Read V' value Fail %d", retry); + continue; + } - if (err < 0) { - dev_err(dev, "Read V' value Fail"); - return false; - } + for (i = 0; i < 5; i++) + if (bv[i][3] != av[i][0] || bv[i][2] != av[i][1] || + bv[i][1] != av[i][2] || bv[i][0] != av[i][3]) + break; - for (i = 0; i < 5; i++) - if (bv[i][3] != av[i][0] || bv[i][2] != av[i][1] || - bv[i][1] != av[i][2] || bv[i][0] != av[i][3]) - return false; + if (i == 5) { + DRM_DEV_DEBUG_DRIVER(dev, "V' all match!! %d", retry); + return true; + } + } - DRM_DEV_DEBUG_DRIVER(dev, "V' all match!!"); - return true; + DRM_DEV_DEBUG_DRIVER(dev, "V' NOT match!! %d", retry); + return false; } static void it6505_hdcp_wait_ksv_list(struct work_struct *work) @@ -2055,12 +2062,13 @@ static void it6505_hdcp_wait_ksv_list(struct work_struct *work) struct it6505 *it6505 = container_of(work, struct it6505, hdcp_wait_ksv_list); struct device *dev = it6505->dev; - unsigned int timeout = 5000; - u8 bstatus = 0; + u8 bstatus; bool ksv_list_check; + /* 1B-04 wait ksv list for 5s */ + unsigned long timeout = jiffies + + msecs_to_jiffies(5000) + 1; - timeout /= 20; - while (timeout > 0) { + for (;;) { if (!it6505_get_sink_hpd_status(it6505)) return; @@ -2069,27 +2077,23 @@ static void it6505_hdcp_wait_ksv_list(struct work_struct *work) if (bstatus & DP_BSTATUS_READY) break; - msleep(20); - timeout--; - } + if (time_after(jiffies, timeout)) { + DRM_DEV_DEBUG_DRIVER(dev, "KSV list wait timeout"); + goto timeout; + } - if (timeout == 0) { - DRM_DEV_DEBUG_DRIVER(dev, "timeout and ksv list wait failed"); - goto timeout; + msleep(20); } ksv_list_check = it6505_hdcp_part2_ksvlist_check(it6505); DRM_DEV_DEBUG_DRIVER(dev, "ksv list ready, ksv list check %s", ksv_list_check ? "pass" : "fail"); - if (ksv_list_check) { - it6505_set_bits(it6505, REG_HDCP_TRIGGER, - HDCP_TRIGGER_KSV_DONE, HDCP_TRIGGER_KSV_DONE); + + if (ksv_list_check) return; - } + timeout: - it6505_set_bits(it6505, REG_HDCP_TRIGGER, - HDCP_TRIGGER_KSV_DONE | HDCP_TRIGGER_KSV_FAIL, - HDCP_TRIGGER_KSV_DONE | HDCP_TRIGGER_KSV_FAIL); + it6505_start_hdcp(it6505); } static void it6505_hdcp_work(struct work_struct *work) @@ -2312,14 +2316,20 @@ static int it6505_process_hpd_irq(struct it6505 *it6505) DRM_DEV_DEBUG_DRIVER(dev, "dp_irq_vector = 0x%02x", dp_irq_vector); if (dp_irq_vector & DP_CP_IRQ) { - it6505_set_bits(it6505, REG_HDCP_TRIGGER, HDCP_TRIGGER_CPIRQ, - HDCP_TRIGGER_CPIRQ); - bstatus = it6505_dpcd_read(it6505, DP_AUX_HDCP_BSTATUS); if (bstatus < 0) return bstatus; DRM_DEV_DEBUG_DRIVER(dev, "Bstatus = 0x%02x", bstatus); + + /*Check BSTATUS when recive CP_IRQ */ + if (bstatus & DP_BSTATUS_R0_PRIME_READY && + it6505->hdcp_status == HDCP_AUTH_GOING) + it6505_set_bits(it6505, REG_HDCP_TRIGGER, HDCP_TRIGGER_CPIRQ, + HDCP_TRIGGER_CPIRQ); + else if (bstatus & (DP_BSTATUS_REAUTH_REQ | DP_BSTATUS_LINK_FAILURE) && + it6505->hdcp_status == HDCP_AUTH_DONE) + it6505_start_hdcp(it6505); } ret = drm_dp_dpcd_read_link_status(&it6505->aux, link_status); @@ -2456,7 +2466,11 @@ static void it6505_irq_hdcp_ksv_check(struct it6505 *it6505) { struct device *dev = it6505->dev; - DRM_DEV_DEBUG_DRIVER(dev, "HDCP event Interrupt"); + DRM_DEV_DEBUG_DRIVER(dev, "HDCP repeater R0 event Interrupt"); + /* 1B01 HDCP encription should start when R0 is ready*/ + it6505_set_bits(it6505, REG_HDCP_TRIGGER, + HDCP_TRIGGER_KSV_DONE, HDCP_TRIGGER_KSV_DONE); + schedule_work(&it6505->hdcp_wait_ksv_list); } diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index 925e42f46cd8..0f8d3ab30daa 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -1452,8 +1452,10 @@ static int it66121_audio_get_eld(struct device *dev, void *data, dev_dbg(dev, "No connector present, passing empty EDID data"); memset(buf, 0, len); } else { + mutex_lock(&ctx->connector->eld_mutex); memcpy(buf, ctx->connector->eld, min(sizeof(ctx->connector->eld), len)); + mutex_unlock(&ctx->connector->eld_mutex); } mutex_unlock(&ctx->lock); diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index 4d1d40e1f1b4..748bed8acd2d 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -879,7 +879,11 @@ retry: } } - return lt9611uxc_audio_init(dev, lt9611uxc); + ret = lt9611uxc_audio_init(dev, lt9611uxc); + if (ret) + goto err_remove_bridge; + + return 0; err_remove_bridge: free_irq(client->irq, lt9611uxc); diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 582cf4f73a74..4d17d1e1c38b 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -331,12 +331,18 @@ static void ti_sn65dsi86_enable_comms(struct ti_sn65dsi86 *pdata) * 200 ms. We'll assume that the panel driver will have the hardcoded * delay in its prepare and always disable HPD. * - * If HPD somehow makes sense on some future panel we'll have to - * change this to be conditional on someone specifying that HPD should - * be used. + * For DisplayPort bridge type, we need HPD. So we use the bridge type + * to conditionally disable HPD. + * NOTE: The bridge type is set in ti_sn_bridge_probe() but enable_comms() + * can be called before. So for DisplayPort, HPD will be enabled once + * bridge type is set. We are using bridge type instead of "no-hpd" + * property because it is not used properly in devicetree description + * and hence is unreliable. */ - regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, - HPD_DISABLE); + + if (pdata->bridge.type != DRM_MODE_CONNECTOR_DisplayPort) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, + HPD_DISABLE); pdata->comms_enabled = true; @@ -424,36 +430,8 @@ static int status_show(struct seq_file *s, void *data) return 0; } - DEFINE_SHOW_ATTRIBUTE(status); -static void ti_sn65dsi86_debugfs_remove(void *data) -{ - debugfs_remove_recursive(data); -} - -static void ti_sn65dsi86_debugfs_init(struct ti_sn65dsi86 *pdata) -{ - struct device *dev = pdata->dev; - struct dentry *debugfs; - int ret; - - debugfs = debugfs_create_dir(dev_name(dev), NULL); - - /* - * We might get an error back if debugfs wasn't enabled in the kernel - * so let's just silently return upon failure. - */ - if (IS_ERR_OR_NULL(debugfs)) - return; - - ret = devm_add_action_or_reset(dev, ti_sn65dsi86_debugfs_remove, debugfs); - if (ret) - return; - - debugfs_create_file("status", 0600, debugfs, pdata, &status_fops); -} - /* ----------------------------------------------------------------------------- * Auxiliary Devices (*not* AUX) */ @@ -480,6 +458,7 @@ static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata, const char *name) { struct device *dev = pdata->dev; + const struct i2c_client *client = to_i2c_client(dev); struct auxiliary_device *aux; int ret; @@ -488,6 +467,7 @@ static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata, return -ENOMEM; aux->name = name; + aux->id = (client->adapter->nr << 10) | client->addr; aux->dev.parent = dev; aux->dev.release = ti_sn65dsi86_aux_device_release; device_set_of_node_from_dev(&aux->dev, dev); @@ -1199,9 +1179,14 @@ static enum drm_connector_status ti_sn_bridge_detect(struct drm_bridge *bridge) struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); int val = 0; - pm_runtime_get_sync(pdata->dev); + /* + * Runtime reference is grabbed in ti_sn_bridge_hpd_enable() + * as the chip won't report HPD just after being powered on. + * HPD_DEBOUNCED_STATE reflects correct state only after the + * debounce time (~100-400 ms). + */ + regmap_read(pdata->regmap, SN_HPD_DISABLE_REG, &val); - pm_runtime_put_autosuspend(pdata->dev); return val & HPD_DEBOUNCED_STATE ? connector_status_connected : connector_status_disconnected; @@ -1215,6 +1200,35 @@ static const struct drm_edid *ti_sn_bridge_edid_read(struct drm_bridge *bridge, return drm_edid_read_ddc(connector, &pdata->aux.ddc); } +static void ti_sn65dsi86_debugfs_init(struct drm_bridge *bridge, struct dentry *root) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + struct dentry *debugfs; + + debugfs = debugfs_create_dir(dev_name(pdata->dev), root); + debugfs_create_file("status", 0600, debugfs, pdata, &status_fops); +} + +static void ti_sn_bridge_hpd_enable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + /* + * Device needs to be powered on before reading the HPD state + * for reliable hpd detection in ti_sn_bridge_detect() due to + * the high debounce time. + */ + + pm_runtime_get_sync(pdata->dev); +} + +static void ti_sn_bridge_hpd_disable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + pm_runtime_put_autosuspend(pdata->dev); +} + static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .attach = ti_sn_bridge_attach, .detach = ti_sn_bridge_detach, @@ -1228,6 +1242,9 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .atomic_reset = drm_atomic_helper_bridge_reset, .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .debugfs_init = ti_sn65dsi86_debugfs_init, + .hpd_enable = ti_sn_bridge_hpd_enable, + .hpd_disable = ti_sn_bridge_hpd_disable, }; static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata, @@ -1316,8 +1333,26 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, pdata->bridge.type = pdata->next_bridge->type == DRM_MODE_CONNECTOR_DisplayPort ? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP; - if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) - pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT; + if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) { + pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT | + DRM_BRIDGE_OP_HPD; + /* + * If comms were already enabled they would have been enabled + * with the wrong value of HPD_DISABLE. Update it now. Comms + * could be enabled if anyone is holding a pm_runtime reference + * (like if a GPIO is in use). Note that in most cases nobody + * is doing AUX channel xfers before the bridge is added so + * HPD doesn't _really_ matter then. The only exception is in + * the eDP case where the panel wants to read the EDID before + * the bridge is added. We always consistently have HPD disabled + * for eDP. + */ + mutex_lock(&pdata->comms_mutex); + if (pdata->comms_enabled) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, + HPD_DISABLE, 0); + mutex_unlock(&pdata->comms_mutex); + } drm_bridge_add(&pdata->bridge); @@ -1936,8 +1971,6 @@ static int ti_sn65dsi86_probe(struct i2c_client *client) if (ret) return ret; - ti_sn65dsi86_debugfs_init(pdata); - /* * Break ourselves up into a collection of aux devices. The only real * motiviation here is to solve the chicken-and-egg problem of probe diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c index 007ceb281d00..56a4965e518c 100644 --- a/drivers/gpu/drm/display/drm_dp_cec.c +++ b/drivers/gpu/drm/display/drm_dp_cec.c @@ -311,16 +311,6 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address) if (!aux->transfer) return; -#ifndef CONFIG_MEDIA_CEC_RC - /* - * CEC_CAP_RC is part of CEC_CAP_DEFAULTS, but it is stripped by - * cec_allocate_adapter() if CONFIG_MEDIA_CEC_RC is undefined. - * - * Do this here as well to ensure the tests against cec_caps are - * correct. - */ - cec_caps &= ~CEC_CAP_RC; -#endif cancel_delayed_work_sync(&aux->cec.unregister_work); mutex_lock(&aux->cec.lock); @@ -337,7 +327,9 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address) num_las = CEC_MAX_LOG_ADDRS; if (aux->cec.adap) { - if (aux->cec.adap->capabilities == cec_caps && + /* Check if the adapter properties have changed */ + if ((aux->cec.adap->capabilities & CEC_CAP_MONITOR_ALL) == + (cec_caps & CEC_CAP_MONITOR_ALL) && aux->cec.adap->available_log_addrs == num_las) { /* Unchanged, so just set the phys addr */ cec_s_phys_addr(aux->cec.adap, source_physical_address, false); diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 6ee51003de3c..9fa13da513d2 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2421,7 +2421,7 @@ u8 drm_dp_dsc_sink_bpp_incr(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]) { u8 bpp_increment_dpcd = dsc_dpcd[DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT]; - switch (bpp_increment_dpcd) { + switch (bpp_increment_dpcd & DP_DSC_BITS_PER_PIXEL_MASK) { case DP_DSC_BITS_PER_PIXEL_1_16: return 16; case DP_DSC_BITS_PER_PIXEL_1_8: diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index f0c6d50d8c33..3e5f721d7540 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -179,13 +179,13 @@ static int drm_dp_mst_rad_to_str(const u8 rad[8], u8 lct, char *out, size_t len) { int i; - u8 unpacked_rad[16]; + u8 unpacked_rad[16] = {}; - for (i = 0; i < lct; i++) { + for (i = 1; i < lct; i++) { if (i % 2) - unpacked_rad[i] = rad[i / 2] >> 4; + unpacked_rad[i] = rad[(i - 1) / 2] >> 4; else - unpacked_rad[i] = rad[i / 2] & BIT_MASK(4); + unpacked_rad[i] = rad[(i - 1) / 2] & 0xF; } /* TODO: Eventually add something to printk so we can format the rad @@ -4034,6 +4034,22 @@ out: return 0; } +static bool primary_mstb_probing_is_done(struct drm_dp_mst_topology_mgr *mgr) +{ + bool probing_done = false; + + mutex_lock(&mgr->lock); + + if (mgr->mst_primary && drm_dp_mst_topology_try_get_mstb(mgr->mst_primary)) { + probing_done = mgr->mst_primary->link_address_sent; + drm_dp_mst_topology_put_mstb(mgr->mst_primary); + } + + mutex_unlock(&mgr->lock); + + return probing_done; +} + static inline bool drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_pending_up_req *up_req) @@ -4064,8 +4080,12 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, /* TODO: Add missing handler for DP_RESOURCE_STATUS_NOTIFY events */ if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) { - dowork = drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat); - hotplug = true; + if (!primary_mstb_probing_is_done(mgr)) { + drm_dbg_kms(mgr->dev, "Got CSN before finish topology probing. Skip it.\n"); + } else { + dowork = drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat); + hotplug = true; + } } drm_dp_mst_topology_put_mstb(mstb); @@ -4144,10 +4164,11 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) drm_dp_send_up_ack_reply(mgr, mst_primary, up_req->msg.req_type, false); + drm_dp_mst_topology_put_mstb(mst_primary); + if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { const struct drm_dp_connection_status_notify *conn_stat = &up_req->msg.u.conn_stat; - bool handle_csn; drm_dbg_kms(mgr->dev, "Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", conn_stat->port_number, @@ -4156,16 +4177,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) conn_stat->message_capability_status, conn_stat->input_port, conn_stat->peer_device_type); - - mutex_lock(&mgr->probe_lock); - handle_csn = mst_primary->link_address_sent; - mutex_unlock(&mgr->probe_lock); - - if (!handle_csn) { - drm_dbg_kms(mgr->dev, "Got CSN before finish topology probing. Skip it."); - kfree(up_req); - goto out_put_primary; - } } else if (up_req->msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { const struct drm_dp_resource_status_notify *res_stat = &up_req->msg.u.resource_stat; @@ -4180,9 +4191,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) list_add_tail(&up_req->next, &mgr->up_req_list); mutex_unlock(&mgr->up_req_lock); queue_work(system_long_wq, &mgr->up_req_work); - -out_put_primary: - drm_dp_mst_topology_put_mstb(mst_primary); out_clear_reply: memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); return 0; diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index feb7a3a75981..936a8f95d80f 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -347,6 +347,8 @@ static int hdmi_generate_avi_infoframe(const struct drm_connector *connector, is_limited_range ? HDMI_QUANTIZATION_RANGE_LIMITED : HDMI_QUANTIZATION_RANGE_FULL; int ret; + infoframe->set = false; + ret = drm_hdmi_avi_infoframe_from_display_mode(frame, connector, mode); if (ret) return ret; @@ -376,6 +378,8 @@ static int hdmi_generate_spd_infoframe(const struct drm_connector *connector, &infoframe->data.spd; int ret; + infoframe->set = false; + ret = hdmi_spd_infoframe_init(frame, connector->hdmi.vendor, connector->hdmi.product); @@ -398,6 +402,8 @@ static int hdmi_generate_hdr_infoframe(const struct drm_connector *connector, &infoframe->data.drm; int ret; + infoframe->set = false; + if (connector->max_bpc < 10) return 0; @@ -425,6 +431,8 @@ static int hdmi_generate_hdmi_vendor_infoframe(const struct drm_connector *conne &infoframe->data.vendor.hdmi; int ret; + infoframe->set = false; + if (!info->has_hdmi_infoframe) return 0; diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 5186d2114a50..40e4e1b6c911 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -574,6 +574,30 @@ mode_valid(struct drm_atomic_state *state) return 0; } +static int drm_atomic_check_valid_clones(struct drm_atomic_state *state, + struct drm_crtc *crtc) +{ + struct drm_encoder *drm_enc; + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, + crtc); + + drm_for_each_encoder_mask(drm_enc, crtc->dev, crtc_state->encoder_mask) { + if (!drm_enc->possible_clones) { + DRM_DEBUG("enc%d possible_clones is 0\n", drm_enc->base.id); + continue; + } + + if ((crtc_state->encoder_mask & drm_enc->possible_clones) != + crtc_state->encoder_mask) { + DRM_DEBUG("crtc%d failed valid clone check for mask 0x%x\n", + crtc->base.id, crtc_state->encoder_mask); + return -EINVAL; + } + } + + return 0; +} + /** * drm_atomic_helper_check_modeset - validate state object for modeset changes * @dev: DRM device @@ -745,6 +769,10 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, ret = drm_atomic_add_affected_planes(state, crtc); if (ret != 0) return ret; + + ret = drm_atomic_check_valid_clones(state, crtc); + if (ret != 0) + return ret; } /* @@ -1376,7 +1404,7 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *old_state) mode = &new_crtc_state->mode; adjusted_mode = &new_crtc_state->adjusted_mode; - if (!new_crtc_state->mode_changed) + if (!new_crtc_state->mode_changed && !new_crtc_state->connectors_changed) continue; drm_dbg_atomic(dev, "modeset on [ENCODER:%d:%s]\n", diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 370dc676e3aa..fd36b8fd54e9 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -956,6 +956,10 @@ int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state, if (mode != DRM_MODE_DPMS_ON) mode = DRM_MODE_DPMS_OFF; + + if (connector->dpms == mode) + goto out; + connector->dpms = mode; crtc = connector->state->crtc; diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 103c185bb1c8..16dea3f2fb11 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -324,7 +324,7 @@ EXPORT_SYMBOL(drm_buddy_init); */ void drm_buddy_fini(struct drm_buddy *mm) { - u64 root_size, size; + u64 root_size, size, start; unsigned int order; int i; @@ -332,7 +332,8 @@ void drm_buddy_fini(struct drm_buddy *mm) for (i = 0; i < mm->n_roots; ++i) { order = ilog2(size) - ilog2(mm->chunk_size); - __force_merge(mm, 0, size, order); + start = drm_buddy_block_offset(mm->roots[i]); + __force_merge(mm, start, start + size, order); WARN_ON(!drm_buddy_block_is_free(mm->roots[i])); drm_block_free(mm, mm->roots[i]); @@ -400,6 +401,49 @@ drm_get_buddy(struct drm_buddy_block *block) EXPORT_SYMBOL(drm_get_buddy); /** + * drm_buddy_reset_clear - reset blocks clear state + * + * @mm: DRM buddy manager + * @is_clear: blocks clear state + * + * Reset the clear state based on @is_clear value for each block + * in the freelist. + */ +void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear) +{ + u64 root_size, size, start; + unsigned int order; + int i; + + size = mm->size; + for (i = 0; i < mm->n_roots; ++i) { + order = ilog2(size) - ilog2(mm->chunk_size); + start = drm_buddy_block_offset(mm->roots[i]); + __force_merge(mm, start, start + size, order); + + root_size = mm->chunk_size << order; + size -= root_size; + } + + for (i = 0; i <= mm->max_order; ++i) { + struct drm_buddy_block *block; + + list_for_each_entry_reverse(block, &mm->free_list[i], link) { + if (is_clear != drm_buddy_block_is_clear(block)) { + if (is_clear) { + mark_cleared(block); + mm->clear_avail += drm_buddy_block_size(mm, block); + } else { + clear_reset(block); + mm->clear_avail -= drm_buddy_block_size(mm, block); + } + } + } + } +} +EXPORT_SYMBOL(drm_buddy_reset_clear); + +/** * drm_buddy_free_block - free a block * * @mm: DRM buddy manager diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index cee5eafbfb81..fd620f7db0dd 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -741,6 +741,15 @@ retry: if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; + for (i = 0; i < count; i++) { + struct drm_connector *connector = connectors[i]; + + if (connector->has_tile) + drm_client_get_tile_offsets(dev, connectors, connector_count, + modes, offsets, i, + connector->tile_h_loc, connector->tile_v_loc); + } + /* * If the BIOS didn't enable everything it could, fall back to have the * same user experiencing of lighting up as much as possible like the diff --git a/drivers/gpu/drm/drm_client_setup.c b/drivers/gpu/drm/drm_client_setup.c new file mode 100644 index 000000000000..5969c4ffe31b --- /dev/null +++ b/drivers/gpu/drm/drm_client_setup.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: MIT + +#include <drm/drm_client_setup.h> +#include <drm/drm_device.h> +#include <drm/drm_fbdev_client.h> +#include <drm/drm_fourcc.h> +#include <drm/drm_print.h> + +/** + * drm_client_setup() - Setup in-kernel DRM clients + * @dev: DRM device + * @format: Preferred pixel format for the device. Use NULL, unless + * there is clearly a driver-preferred format. + * + * This function sets up the in-kernel DRM clients. Restore, hotplug + * events and teardown are all taken care of. + * + * Drivers should call drm_client_setup() after registering the new + * DRM device with drm_dev_register(). This function is safe to call + * even when there are no connectors present. Setup will be retried + * on the next hotplug event. + * + * The clients are destroyed by drm_dev_unregister(). + */ +void drm_client_setup(struct drm_device *dev, const struct drm_format_info *format) +{ + int ret; + + ret = drm_fbdev_client_setup(dev, format); + if (ret) + drm_warn(dev, "Failed to set up DRM client; error %d\n", ret); +} +EXPORT_SYMBOL(drm_client_setup); + +/** + * drm_client_setup_with_fourcc() - Setup in-kernel DRM clients for color mode + * @dev: DRM device + * @fourcc: Preferred pixel format as 4CC code for the device + * + * This function sets up the in-kernel DRM clients. It is equivalent + * to drm_client_setup(), but expects a 4CC code as second argument. + */ +void drm_client_setup_with_fourcc(struct drm_device *dev, u32 fourcc) +{ + drm_client_setup(dev, drm_format_info(fourcc)); +} +EXPORT_SYMBOL(drm_client_setup_with_fourcc); + +/** + * drm_client_setup_with_color_mode() - Setup in-kernel DRM clients for color mode + * @dev: DRM device + * @color_mode: Preferred color mode for the device + * + * This function sets up the in-kernel DRM clients. It is equivalent + * to drm_client_setup(), but expects a color mode as second argument. + * + * Do not use this function in new drivers. Prefer drm_client_setup() with a + * format of NULL. + */ +void drm_client_setup_with_color_mode(struct drm_device *dev, unsigned int color_mode) +{ + u32 fourcc = drm_driver_color_mode_format(dev, color_mode); + + drm_client_setup_with_fourcc(dev, fourcc); +} +EXPORT_SYMBOL(drm_client_setup_with_color_mode); diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index fc35f47e2849..994afa5a0ffb 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -277,6 +277,7 @@ static int __drm_connector_init(struct drm_device *dev, INIT_LIST_HEAD(&connector->probed_modes); INIT_LIST_HEAD(&connector->modes); mutex_init(&connector->mutex); + mutex_init(&connector->eld_mutex); mutex_init(&connector->edid_override_mutex); mutex_init(&connector->hdmi.infoframes.lock); connector->edid_blob_ptr = NULL; @@ -507,6 +508,9 @@ int drmm_connector_hdmi_init(struct drm_device *dev, if (!supported_formats || !(supported_formats & BIT(HDMI_COLORSPACE_RGB))) return -EINVAL; + if (connector->ycbcr_420_allowed != !!(supported_formats & BIT(HDMI_COLORSPACE_YUV420))) + return -EINVAL; + if (!(max_bpc == 8 || max_bpc == 10 || max_bpc == 12)) return -EINVAL; @@ -1304,6 +1308,10 @@ EXPORT_SYMBOL(drm_hdmi_connector_get_output_format_name); * callback. For atomic drivers the remapping to the "ACTIVE" property is * implemented in the DRM core. * + * On atomic drivers any DPMS setproperty ioctl where the value does not + * change is completely skipped, otherwise a full atomic commit will occur. + * On legacy drivers the exact behavior is driver specific. + * * Note that this property cannot be set through the MODE_ATOMIC ioctl, * userspace must use "ACTIVE" on the CRTC instead. * diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 9d3e6dd68810..98a37dc3324e 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -743,7 +743,7 @@ static int bridges_show(struct seq_file *m, void *data) unsigned int idx = 0; drm_for_each_bridge_in_chain(encoder, bridge) { - drm_printf(&p, "bridge[%d]: %ps\n", idx++, bridge->funcs); + drm_printf(&p, "bridge[%u]: %ps\n", idx++, bridge->funcs); drm_printf(&p, "\ttype: [%d] %s\n", bridge->type, drm_get_connector_type_name(bridge->type)); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 855beafb76ff..9edb3247c767 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5605,7 +5605,9 @@ EXPORT_SYMBOL(drm_edid_get_monitor_name); static void clear_eld(struct drm_connector *connector) { + mutex_lock(&connector->eld_mutex); memset(connector->eld, 0, sizeof(connector->eld)); + mutex_unlock(&connector->eld_mutex); connector->latency_present[0] = false; connector->latency_present[1] = false; @@ -5657,6 +5659,8 @@ static void drm_edid_to_eld(struct drm_connector *connector, if (!drm_edid) return; + mutex_lock(&connector->eld_mutex); + mnl = get_monitor_name(drm_edid, &eld[DRM_ELD_MONITOR_NAME_STRING]); drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] ELD monitor %s\n", connector->base.id, connector->name, @@ -5717,6 +5721,8 @@ static void drm_edid_to_eld(struct drm_connector *connector, drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] ELD size %d, SAD count %d\n", connector->base.id, connector->name, drm_eld_size(eld), total_sad_count); + + mutex_unlock(&connector->eld_mutex); } static int _drm_edid_to_sad(const struct drm_edid *drm_edid, @@ -6590,6 +6596,7 @@ static void drm_reset_display_info(struct drm_connector *connector) info->has_hdmi_infoframe = false; info->rgb_quant_range_selectable = false; memset(&info->hdmi, 0, sizeof(info->hdmi)); + memset(&connector->hdr_sink_metadata, 0, sizeof(connector->hdr_sink_metadata)); info->edid_hdmi_rgb444_dc_modes = 0; info->edid_hdmi_ycbcr444_dc_modes = 0; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 29c53f9f449c..b15ddbd65e7b 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -492,8 +492,8 @@ EXPORT_SYMBOL(drm_fb_helper_init); * @fb_helper: driver-allocated fbdev helper * * A helper to alloc fb_info and the member cmap. Called by the driver - * within the fb_probe fb_helper callback function. Drivers do not - * need to release the allocated fb_info structure themselves, this is + * within the struct &drm_driver.fbdev_probe callback function. Drivers do + * not need to release the allocated fb_info structure themselves, this is * automatically done when calling drm_fb_helper_fini(). * * RETURNS: @@ -1348,14 +1348,14 @@ int drm_fb_helper_set_par(struct fb_info *info) } EXPORT_SYMBOL(drm_fb_helper_set_par); -static void pan_set(struct drm_fb_helper *fb_helper, int x, int y) +static void pan_set(struct drm_fb_helper *fb_helper, int dx, int dy) { struct drm_mode_set *mode_set; mutex_lock(&fb_helper->client.modeset_mutex); drm_client_for_each_modeset(mode_set, &fb_helper->client) { - mode_set->x = x; - mode_set->y = y; + mode_set->x += dx; + mode_set->y += dy; } mutex_unlock(&fb_helper->client.modeset_mutex); } @@ -1364,16 +1364,18 @@ static int pan_display_atomic(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; - int ret; + int ret, dx, dy; - pan_set(fb_helper, var->xoffset, var->yoffset); + dx = var->xoffset - info->var.xoffset; + dy = var->yoffset - info->var.yoffset; + pan_set(fb_helper, dx, dy); ret = drm_client_modeset_commit_locked(&fb_helper->client); if (!ret) { info->var.xoffset = var->xoffset; info->var.yoffset = var->yoffset; } else - pan_set(fb_helper, info->var.xoffset, info->var.yoffset); + pan_set(fb_helper, -dx, -dy); return ret; } @@ -1441,67 +1443,27 @@ unlock: EXPORT_SYMBOL(drm_fb_helper_pan_display); static uint32_t drm_fb_helper_find_format(struct drm_fb_helper *fb_helper, const uint32_t *formats, - size_t format_count, uint32_t bpp, uint32_t depth) + size_t format_count, unsigned int color_mode) { struct drm_device *dev = fb_helper->dev; uint32_t format; size_t i; - /* - * Do not consider YUV or other complicated formats - * for framebuffers. This means only legacy formats - * are supported (fmt->depth is a legacy field), but - * the framebuffer emulation can only deal with such - * formats, specifically RGB/BGA formats. - */ - format = drm_mode_legacy_fb_format(bpp, depth); - if (!format) - goto err; + format = drm_driver_color_mode_format(dev, color_mode); + if (!format) { + drm_info(dev, "unsupported color mode of %d\n", color_mode); + return DRM_FORMAT_INVALID; + } for (i = 0; i < format_count; ++i) { if (formats[i] == format) return format; } - -err: - /* We found nothing. */ - drm_warn(dev, "bpp/depth value of %u/%u not supported\n", bpp, depth); + drm_warn(dev, "format %p4cc not supported\n", &format); return DRM_FORMAT_INVALID; } -static uint32_t drm_fb_helper_find_color_mode_format(struct drm_fb_helper *fb_helper, - const uint32_t *formats, size_t format_count, - unsigned int color_mode) -{ - struct drm_device *dev = fb_helper->dev; - uint32_t bpp, depth; - - switch (color_mode) { - case 1: - case 2: - case 4: - case 8: - case 16: - case 24: - bpp = depth = color_mode; - break; - case 15: - bpp = 16; - depth = 15; - break; - case 32: - bpp = 32; - depth = 24; - break; - default: - drm_info(dev, "unsupported color mode of %d\n", color_mode); - return DRM_FORMAT_INVALID; - } - - return drm_fb_helper_find_format(fb_helper, formats, format_count, bpp, depth); -} - static int __drm_fb_helper_find_sizes(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { @@ -1531,10 +1493,10 @@ static int __drm_fb_helper_find_sizes(struct drm_fb_helper *fb_helper, if (!cmdline_mode->bpp_specified) continue; - surface_format = drm_fb_helper_find_color_mode_format(fb_helper, - plane->format_types, - plane->format_count, - cmdline_mode->bpp); + surface_format = drm_fb_helper_find_format(fb_helper, + plane->format_types, + plane->format_count, + cmdline_mode->bpp); if (surface_format != DRM_FORMAT_INVALID) break; /* found supported format */ } @@ -1544,10 +1506,10 @@ static int __drm_fb_helper_find_sizes(struct drm_fb_helper *fb_helper, break; /* found supported format */ /* try preferred color mode */ - surface_format = drm_fb_helper_find_color_mode_format(fb_helper, - plane->format_types, - plane->format_count, - fb_helper->preferred_bpp); + surface_format = drm_fb_helper_find_format(fb_helper, + plane->format_types, + plane->format_count, + fb_helper->preferred_bpp); if (surface_format != DRM_FORMAT_INVALID) break; /* found supported format */ } @@ -1648,7 +1610,7 @@ static int drm_fb_helper_find_sizes(struct drm_fb_helper *fb_helper, /* * Allocates the backing storage and sets up the fbdev info structure through - * the ->fb_probe callback. + * the ->fbdev_probe callback. */ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper) { @@ -1666,7 +1628,10 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper) } /* push down into drivers */ - ret = (*fb_helper->funcs->fb_probe)(fb_helper, &sizes); + if (dev->driver->fbdev_probe) + ret = dev->driver->fbdev_probe(fb_helper, &sizes); + else if (fb_helper->funcs) + ret = fb_helper->funcs->fb_probe(fb_helper, &sizes); if (ret < 0) return ret; @@ -1738,7 +1703,7 @@ static void drm_fb_helper_fill_var(struct fb_info *info, * instance and the drm framebuffer allocated in &drm_fb_helper.fb. * * Drivers should call this (or their equivalent setup code) from their - * &drm_fb_helper_funcs.fb_probe callback after having allocated the fbdev + * &drm_driver.fbdev_probe callback after having allocated the fbdev * backing storage framebuffer. */ void drm_fb_helper_fill_info(struct fb_info *info, @@ -1894,7 +1859,7 @@ __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper) * Note that this also registers the fbdev and so allows userspace to call into * the driver through the fbdev interfaces. * - * This function will call down into the &drm_fb_helper_funcs.fb_probe callback + * This function will call down into the &drm_driver.fbdev_probe callback * to let the driver allocate and initialize the fbdev info structure and the * drm framebuffer used to back the fbdev. drm_fb_helper_fill_info() is provided * as a helper to setup simple default values for the fbdev info structure. diff --git a/drivers/gpu/drm/drm_fbdev_client.c b/drivers/gpu/drm/drm_fbdev_client.c new file mode 100644 index 000000000000..a09382afe2fb --- /dev/null +++ b/drivers/gpu/drm/drm_fbdev_client.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: MIT + +#include <drm/drm_client.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_drv.h> +#include <drm/drm_fbdev_client.h> +#include <drm/drm_fb_helper.h> +#include <drm/drm_fourcc.h> +#include <drm/drm_print.h> + +/* + * struct drm_client_funcs + */ + +static void drm_fbdev_client_unregister(struct drm_client_dev *client) +{ + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + + if (fb_helper->info) { + drm_fb_helper_unregister_info(fb_helper); + } else { + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); + } +} + +static int drm_fbdev_client_restore(struct drm_client_dev *client) +{ + drm_fb_helper_lastclose(client->dev); + + return 0; +} + +static int drm_fbdev_client_hotplug(struct drm_client_dev *client) +{ + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + struct drm_device *dev = client->dev; + int ret; + + if (dev->fb_helper) + return drm_fb_helper_hotplug_event(dev->fb_helper); + + ret = drm_fb_helper_init(dev, fb_helper); + if (ret) + goto err_drm_err; + + if (!drm_drv_uses_atomic_modeset(dev)) + drm_helper_disable_unused_functions(dev); + + ret = drm_fb_helper_initial_config(fb_helper); + if (ret) + goto err_drm_fb_helper_fini; + + return 0; + +err_drm_fb_helper_fini: + drm_fb_helper_fini(fb_helper); +err_drm_err: + drm_err(dev, "fbdev: Failed to setup emulation (ret=%d)\n", ret); + return ret; +} + +static const struct drm_client_funcs drm_fbdev_client_funcs = { + .owner = THIS_MODULE, + .unregister = drm_fbdev_client_unregister, + .restore = drm_fbdev_client_restore, + .hotplug = drm_fbdev_client_hotplug, +}; + +/** + * drm_fbdev_client_setup() - Setup fbdev emulation + * @dev: DRM device + * @format: Preferred color format for the device. DRM_FORMAT_XRGB8888 + * is used if this is zero. + * + * This function sets up fbdev emulation. Restore, hotplug events and + * teardown are all taken care of. Drivers that do suspend/resume need + * to call drm_fb_helper_set_suspend_unlocked() themselves. Simple + * drivers might use drm_mode_config_helper_suspend(). + * + * This function is safe to call even when there are no connectors present. + * Setup will be retried on the next hotplug event. + * + * The fbdev client is destroyed by drm_dev_unregister(). + * + * Returns: + * 0 on success, or a negative errno code otherwise. + */ +int drm_fbdev_client_setup(struct drm_device *dev, const struct drm_format_info *format) +{ + struct drm_fb_helper *fb_helper; + unsigned int color_mode; + int ret; + + /* TODO: Use format info throughout DRM */ + if (format) { + unsigned int bpp = drm_format_info_bpp(format, 0); + + switch (bpp) { + case 16: + color_mode = format->depth; // could also be 15 + break; + default: + color_mode = bpp; + } + } else { + switch (dev->mode_config.preferred_depth) { + case 0: + case 24: + color_mode = 32; + break; + default: + color_mode = dev->mode_config.preferred_depth; + } + } + + drm_WARN(dev, !dev->registered, "Device has not been registered.\n"); + drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n"); + + fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); + if (!fb_helper) + return -ENOMEM; + drm_fb_helper_prepare(dev, fb_helper, color_mode, NULL); + + ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs); + if (ret) { + drm_err(dev, "Failed to register client: %d\n", ret); + goto err_drm_client_init; + } + + drm_client_register(&fb_helper->client); + + return 0; + +err_drm_client_init: + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); + return ret; +} +EXPORT_SYMBOL(drm_fbdev_client_setup); diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c index 51c2d742d199..6fcf2a8bf676 100644 --- a/drivers/gpu/drm/drm_fbdev_dma.c +++ b/drivers/gpu/drm/drm_fbdev_dma.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: MIT #include <linux/fb.h> +#include <linux/vmalloc.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> @@ -72,45 +73,214 @@ static const struct fb_ops drm_fbdev_dma_fb_ops = { .fb_destroy = drm_fbdev_dma_fb_destroy, }; -FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma, +FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma_shadowed, drm_fb_helper_damage_range, drm_fb_helper_damage_area); -static int drm_fbdev_dma_deferred_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) +static void drm_fbdev_dma_shadowed_fb_destroy(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; - struct drm_framebuffer *fb = fb_helper->fb; - struct drm_gem_dma_object *dma = drm_fb_dma_get_gem_obj(fb, 0); + void *shadow = info->screen_buffer; - if (!dma->map_noncoherent) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (!fb_helper->dev) + return; + + if (info->fbdefio) + fb_deferred_io_cleanup(info); + drm_fb_helper_fini(fb_helper); + vfree(shadow); - return fb_deferred_io_mmap(info, vma); + drm_client_buffer_vunmap(fb_helper->buffer); + drm_client_framebuffer_delete(fb_helper->buffer); + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); } -static const struct fb_ops drm_fbdev_dma_deferred_fb_ops = { +static const struct fb_ops drm_fbdev_dma_shadowed_fb_ops = { .owner = THIS_MODULE, .fb_open = drm_fbdev_dma_fb_open, .fb_release = drm_fbdev_dma_fb_release, - __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_dma), + FB_DEFAULT_DEFERRED_OPS(drm_fbdev_dma_shadowed), DRM_FB_HELPER_DEFAULT_OPS, - __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_dma), - .fb_mmap = drm_fbdev_dma_deferred_fb_mmap, - .fb_destroy = drm_fbdev_dma_fb_destroy, + .fb_destroy = drm_fbdev_dma_shadowed_fb_destroy, }; /* * struct drm_fb_helper */ +static void drm_fbdev_dma_damage_blit_real(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip, + struct iosys_map *dst) +{ + struct drm_framebuffer *fb = fb_helper->fb; + size_t offset = clip->y1 * fb->pitches[0]; + size_t len = clip->x2 - clip->x1; + unsigned int y; + void *src; + + switch (drm_format_info_bpp(fb->format, 0)) { + case 1: + offset += clip->x1 / 8; + len = DIV_ROUND_UP(len + clip->x1 % 8, 8); + break; + case 2: + offset += clip->x1 / 4; + len = DIV_ROUND_UP(len + clip->x1 % 4, 4); + break; + case 4: + offset += clip->x1 / 2; + len = DIV_ROUND_UP(len + clip->x1 % 2, 2); + break; + default: + offset += clip->x1 * fb->format->cpp[0]; + len *= fb->format->cpp[0]; + break; + } + + src = fb_helper->info->screen_buffer + offset; + iosys_map_incr(dst, offset); /* go to first pixel within clip rect */ + + for (y = clip->y1; y < clip->y2; y++) { + iosys_map_memcpy_to(dst, 0, src, len); + iosys_map_incr(dst, fb->pitches[0]); + src += fb->pitches[0]; + } +} + +static int drm_fbdev_dma_damage_blit(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip) +{ + struct drm_client_buffer *buffer = fb_helper->buffer; + struct iosys_map dst; + + /* + * For fbdev emulation, we only have to protect against fbdev modeset + * operations. Nothing else will involve the client buffer's BO. So it + * is sufficient to acquire struct drm_fb_helper.lock here. + */ + mutex_lock(&fb_helper->lock); + + dst = buffer->map; + drm_fbdev_dma_damage_blit_real(fb_helper, clip, &dst); + + mutex_unlock(&fb_helper->lock); + + return 0; +} + static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { + return drm_fbdev_dma_driver_fbdev_probe(fb_helper, sizes); +} +static int drm_fbdev_dma_helper_fb_dirty(struct drm_fb_helper *helper, + struct drm_clip_rect *clip) +{ + struct drm_device *dev = helper->dev; + int ret; + + /* Call damage handlers only if necessary */ + if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2)) + return 0; + + if (helper->fb->funcs->dirty) { + ret = drm_fbdev_dma_damage_blit(helper, clip); + if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", ret)) + return ret; + + ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); + if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) + return ret; + } + + return 0; +} + +static const struct drm_fb_helper_funcs drm_fbdev_dma_helper_funcs = { + .fb_probe = drm_fbdev_dma_helper_fb_probe, + .fb_dirty = drm_fbdev_dma_helper_fb_dirty, +}; + +/* + * struct drm_fb_helper + */ + +static int drm_fbdev_dma_driver_fbdev_probe_tail(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_device *dev = fb_helper->dev; + struct drm_client_buffer *buffer = fb_helper->buffer; + struct drm_gem_dma_object *dma_obj = to_drm_gem_dma_obj(buffer->gem); + struct drm_framebuffer *fb = fb_helper->fb; + struct fb_info *info = fb_helper->info; + struct iosys_map map = buffer->map; + + info->fbops = &drm_fbdev_dma_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB; /* system memory */ + if (dma_obj->map_noncoherent) + info->flags |= FBINFO_READS_FAST; /* signal caching */ + info->screen_size = sizes->surface_height * fb->pitches[0]; + info->screen_buffer = map.vaddr; + if (!(info->flags & FBINFO_HIDE_SMEM_START)) { + if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) + info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); + } + info->fix.smem_len = info->screen_size; + + return 0; +} + +static int drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_client_buffer *buffer = fb_helper->buffer; + struct fb_info *info = fb_helper->info; + size_t screen_size = buffer->gem->size; + void *screen_buffer; + int ret; + + /* + * Deferred I/O requires struct page for framebuffer memory, + * which is not guaranteed for all DMA ranges. We thus create + * a shadow buffer in system memory. + */ + screen_buffer = vzalloc(screen_size); + if (!screen_buffer) + return -ENOMEM; + + info->fbops = &drm_fbdev_dma_shadowed_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB; /* system memory */ + info->flags |= FBINFO_READS_FAST; /* signal caching */ + info->screen_buffer = screen_buffer; + info->fix.smem_len = screen_size; + + fb_helper->fbdefio.delay = HZ / 20; + fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; + + info->fbdefio = &fb_helper->fbdefio; + ret = fb_deferred_io_init(info); + if (ret) + goto err_vfree; + + return 0; + +err_vfree: + vfree(screen_buffer); + return ret; +} + +int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; - bool use_deferred_io = false; struct drm_client_buffer *buffer; - struct drm_gem_dma_object *dma_obj; struct drm_framebuffer *fb; struct fb_info *info; u32 format; @@ -127,19 +297,9 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, sizes->surface_height, format); if (IS_ERR(buffer)) return PTR_ERR(buffer); - dma_obj = to_drm_gem_dma_obj(buffer->gem); fb = buffer->fb; - /* - * Deferred I/O requires struct page for framebuffer memory, - * which is not guaranteed for all DMA ranges. We thus only - * install deferred I/O if we have a framebuffer that requires - * it. - */ - if (fb->funcs->dirty) - use_deferred_io = true; - ret = drm_client_buffer_vmap(buffer, &map); if (ret) { goto err_drm_client_buffer_delete; @@ -148,6 +308,7 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, goto err_drm_client_buffer_delete; } + fb_helper->funcs = &drm_fbdev_dma_helper_funcs; fb_helper->buffer = buffer; fb_helper->fb = fb; @@ -159,45 +320,12 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, drm_fb_helper_fill_info(info, fb_helper, sizes); - if (use_deferred_io) - info->fbops = &drm_fbdev_dma_deferred_fb_ops; + if (fb->funcs->dirty) + ret = drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(fb_helper, sizes); else - info->fbops = &drm_fbdev_dma_fb_ops; - - /* screen */ - info->flags |= FBINFO_VIRTFB; /* system memory */ - if (dma_obj->map_noncoherent) - info->flags |= FBINFO_READS_FAST; /* signal caching */ - info->screen_size = sizes->surface_height * fb->pitches[0]; - info->screen_buffer = map.vaddr; - if (!(info->flags & FBINFO_HIDE_SMEM_START)) { - if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) - info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); - } - info->fix.smem_len = info->screen_size; - - /* - * Only set up deferred I/O if the screen buffer supports - * it. If this disagrees with the previous test for ->dirty, - * mmap on the /dev/fb file might not work correctly. - */ - if (!is_vmalloc_addr(info->screen_buffer) && info->fix.smem_start) { - unsigned long pfn = info->fix.smem_start >> PAGE_SHIFT; - - if (drm_WARN_ON(dev, !pfn_to_page(pfn))) - use_deferred_io = false; - } - - /* deferred I/O */ - if (use_deferred_io) { - fb_helper->fbdefio.delay = HZ / 20; - fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; - - info->fbdefio = &fb_helper->fbdefio; - ret = fb_deferred_io_init(info); - if (ret) - goto err_drm_fb_helper_release_info; - } + ret = drm_fbdev_dma_driver_fbdev_probe_tail(fb_helper, sizes); + if (ret) + goto err_drm_fb_helper_release_info; return 0; @@ -211,30 +339,7 @@ err_drm_client_buffer_delete: drm_client_framebuffer_delete(buffer); return ret; } - -static int drm_fbdev_dma_helper_fb_dirty(struct drm_fb_helper *helper, - struct drm_clip_rect *clip) -{ - struct drm_device *dev = helper->dev; - int ret; - - /* Call damage handlers only if necessary */ - if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2)) - return 0; - - if (helper->fb->funcs->dirty) { - ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); - if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) - return ret; - } - - return 0; -} - -static const struct drm_fb_helper_funcs drm_fbdev_dma_helper_funcs = { - .fb_probe = drm_fbdev_dma_helper_fb_probe, - .fb_dirty = drm_fbdev_dma_helper_fb_dirty, -}; +EXPORT_SYMBOL(drm_fbdev_dma_driver_fbdev_probe); /* * struct drm_client_funcs diff --git a/drivers/gpu/drm/drm_fbdev_ttm.c b/drivers/gpu/drm/drm_fbdev_ttm.c index 119ffb28aaf9..d799cbe944cd 100644 --- a/drivers/gpu/drm/drm_fbdev_ttm.c +++ b/drivers/gpu/drm/drm_fbdev_ttm.c @@ -71,71 +71,7 @@ static const struct fb_ops drm_fbdev_ttm_fb_ops = { static int drm_fbdev_ttm_helper_fb_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { - struct drm_client_dev *client = &fb_helper->client; - struct drm_device *dev = fb_helper->dev; - struct drm_client_buffer *buffer; - struct fb_info *info; - size_t screen_size; - void *screen_buffer; - u32 format; - int ret; - - drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n", - sizes->surface_width, sizes->surface_height, - sizes->surface_bpp); - - format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, - sizes->surface_depth); - buffer = drm_client_framebuffer_create(client, sizes->surface_width, - sizes->surface_height, format); - if (IS_ERR(buffer)) - return PTR_ERR(buffer); - - fb_helper->buffer = buffer; - fb_helper->fb = buffer->fb; - - screen_size = buffer->gem->size; - screen_buffer = vzalloc(screen_size); - if (!screen_buffer) { - ret = -ENOMEM; - goto err_drm_client_framebuffer_delete; - } - - info = drm_fb_helper_alloc_info(fb_helper); - if (IS_ERR(info)) { - ret = PTR_ERR(info); - goto err_vfree; - } - - drm_fb_helper_fill_info(info, fb_helper, sizes); - - info->fbops = &drm_fbdev_ttm_fb_ops; - - /* screen */ - info->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; - info->screen_buffer = screen_buffer; - info->fix.smem_len = screen_size; - - /* deferred I/O */ - fb_helper->fbdefio.delay = HZ / 20; - fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; - - info->fbdefio = &fb_helper->fbdefio; - ret = fb_deferred_io_init(info); - if (ret) - goto err_drm_fb_helper_release_info; - - return 0; - -err_drm_fb_helper_release_info: - drm_fb_helper_release_info(fb_helper); -err_vfree: - vfree(screen_buffer); -err_drm_client_framebuffer_delete: - fb_helper->fb = NULL; - fb_helper->buffer = NULL; - drm_client_framebuffer_delete(buffer); - return ret; + return drm_fbdev_ttm_driver_fbdev_probe(fb_helper, sizes); } static void drm_fbdev_ttm_damage_blit_real(struct drm_fb_helper *fb_helper, @@ -240,6 +176,82 @@ static const struct drm_fb_helper_funcs drm_fbdev_ttm_helper_funcs = { .fb_dirty = drm_fbdev_ttm_helper_fb_dirty, }; +/* + * struct drm_driver + */ + +int drm_fbdev_ttm_driver_fbdev_probe(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_client_dev *client = &fb_helper->client; + struct drm_device *dev = fb_helper->dev; + struct drm_client_buffer *buffer; + struct fb_info *info; + size_t screen_size; + void *screen_buffer; + u32 format; + int ret; + + drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n", + sizes->surface_width, sizes->surface_height, + sizes->surface_bpp); + + format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, + sizes->surface_depth); + buffer = drm_client_framebuffer_create(client, sizes->surface_width, + sizes->surface_height, format); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + + fb_helper->funcs = &drm_fbdev_ttm_helper_funcs; + fb_helper->buffer = buffer; + fb_helper->fb = buffer->fb; + + screen_size = buffer->gem->size; + screen_buffer = vzalloc(screen_size); + if (!screen_buffer) { + ret = -ENOMEM; + goto err_drm_client_framebuffer_delete; + } + + info = drm_fb_helper_alloc_info(fb_helper); + if (IS_ERR(info)) { + ret = PTR_ERR(info); + goto err_vfree; + } + + drm_fb_helper_fill_info(info, fb_helper, sizes); + + info->fbops = &drm_fbdev_ttm_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; + info->screen_buffer = screen_buffer; + info->fix.smem_len = screen_size; + + /* deferred I/O */ + fb_helper->fbdefio.delay = HZ / 20; + fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; + + info->fbdefio = &fb_helper->fbdefio; + ret = fb_deferred_io_init(info); + if (ret) + goto err_drm_fb_helper_release_info; + + return 0; + +err_drm_fb_helper_release_info: + drm_fb_helper_release_info(fb_helper); +err_vfree: + vfree(screen_buffer); +err_drm_client_framebuffer_delete: + fb_helper->fb = NULL; + fb_helper->buffer = NULL; + drm_client_framebuffer_delete(buffer); + return ret; +} +EXPORT_SYMBOL(drm_fbdev_ttm_driver_fbdev_probe); + static void drm_fbdev_ttm_client_unregister(struct drm_client_dev *client) { struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index ce82c9451dfe..e2cf118ff01d 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -938,6 +938,10 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) struct drm_file *file = f->private_data; struct drm_device *dev = file->minor->dev; struct drm_printer p = drm_seq_file_printer(m); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return; drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); drm_printf(&p, "drm-client-id:\t%llu\n", file->client_id); @@ -952,6 +956,8 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) if (dev->driver->show_fdinfo) dev->driver->show_fdinfo(&p, file); + + drm_dev_exit(idx); } EXPORT_SYMBOL(drm_show_fdinfo); diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 193cf8ed7912..3a94ca211f9c 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -36,7 +36,6 @@ * @depth: bit depth per pixel * * Computes a drm fourcc pixel format code for the given @bpp/@depth values. - * Useful in fbdev emulation code, since that deals in those values. */ uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t depth) { @@ -140,6 +139,35 @@ uint32_t drm_driver_legacy_fb_format(struct drm_device *dev, } EXPORT_SYMBOL(drm_driver_legacy_fb_format); +/** + * drm_driver_color_mode_format - Compute DRM 4CC code from color mode + * @dev: DRM device + * @color_mode: command-line color mode + * + * Computes a DRM 4CC pixel format code for the given color mode using + * drm_driver_color_mode(). The color mode is in the format used and the + * kernel command line. It specifies the number of bits per pixel + * and color depth in a single value. + * + * Useful in fbdev emulation code, since that deals in those values. The + * helper does not consider YUV or other complicated formats. This means + * only legacy formats are supported (fmt->depth is a legacy field), but + * the framebuffer emulation can only deal with such formats, specifically + * RGB/BGA formats. + */ +uint32_t drm_driver_color_mode_format(struct drm_device *dev, unsigned int color_mode) +{ + switch (color_mode) { + case 15: + return drm_driver_legacy_fb_format(dev, 16, 15); + case 32: + return drm_driver_legacy_fb_format(dev, 32, 24); + default: + return drm_driver_legacy_fb_format(dev, color_mode, color_mode); + } +} +EXPORT_SYMBOL(drm_driver_color_mode_format); + /* * Internal function to query information for a given format. See * drm_format_info() for the public API. diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 888aadb6a4ac..d6550b54fac1 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -860,11 +860,23 @@ void drm_framebuffer_free(struct kref *kref) int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs) { + unsigned int i; int ret; + bool exists; if (WARN_ON_ONCE(fb->dev != dev || !fb->format)) return -EINVAL; + for (i = 0; i < fb->format->num_planes; i++) { + if (drm_WARN_ON_ONCE(dev, fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i))) + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + if (fb->obj[i]) { + exists = drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]); + if (exists) + fb->internal_flags |= DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } + INIT_LIST_HEAD(&fb->filp_head); fb->funcs = funcs; @@ -873,7 +885,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB, false, drm_framebuffer_free); if (ret) - goto out; + goto err; mutex_lock(&dev->mode_config.fb_lock); dev->mode_config.num_fb++; @@ -881,7 +893,16 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, mutex_unlock(&dev->mode_config.fb_lock); drm_mode_object_register(dev, &fb->base); -out: + + return 0; + +err: + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) { + drm_gem_object_handle_put_unlocked(fb->obj[i]); + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } return ret; } EXPORT_SYMBOL(drm_framebuffer_init); @@ -958,6 +979,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private); void drm_framebuffer_cleanup(struct drm_framebuffer *fb) { struct drm_device *dev = fb->dev; + unsigned int i; + + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) + drm_gem_object_handle_put_unlocked(fb->obj[i]); + } mutex_lock(&dev->mode_config.fb_lock); list_del(&fb->head); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 149b8e25da5b..9e8a4da313a0 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -186,6 +186,46 @@ void drm_gem_private_object_fini(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_private_object_fini); +static void drm_gem_object_handle_get(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + drm_WARN_ON(dev, !mutex_is_locked(&dev->object_name_lock)); + + if (obj->handle_count++ == 0) + drm_gem_object_get(obj); +} + +/** + * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any + * @obj: GEM object + * + * Acquires a reference on the GEM buffer object's handle. Required to keep + * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked() + * to release the reference. Does nothing if the buffer object has no handle. + * + * Returns: + * True if a handle exists, or false otherwise + */ +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + guard(mutex)(&dev->object_name_lock); + + /* + * First ref taken during GEM object creation, if any. Some + * drivers set up internal framebuffers with GEM objects that + * do not have a GEM handle. Hence, this counter can be zero. + */ + if (!obj->handle_count) + return false; + + drm_gem_object_handle_get(obj); + + return true; +} + /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. @@ -216,20 +256,26 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) } } -static void -drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) +/** + * drm_gem_object_handle_put_unlocked - releases reference on user-space handle + * @obj: GEM object + * + * Releases a reference on the GEM buffer object's handle. Possibly releases + * the GEM buffer object and associated dma-buf objects. + */ +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; bool final = false; - if (WARN_ON(READ_ONCE(obj->handle_count) == 0)) + if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0)) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ + * Must bump handle count first as this may be the last + * ref, in which case the object would disappear before + * we checked for a name. + */ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { @@ -253,6 +299,9 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) struct drm_file *file_priv = data; struct drm_gem_object *obj = ptr; + if (drm_WARN_ON(obj->dev, !data)) + return 0; + if (obj->funcs->close) obj->funcs->close(obj, file_priv); @@ -322,7 +371,7 @@ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, return -ENOENT; /* Don't allow imported objects to be mapped */ - if (obj->import_attach) { + if (drm_gem_is_imported(obj)) { ret = -EINVAL; goto out; } @@ -363,8 +412,8 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); - if (obj->handle_count++ == 0) - drm_gem_object_get(obj); + + drm_gem_object_handle_get(obj); /* * Get the user-visible handle using idr. Preload and perform @@ -373,7 +422,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); + ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT); spin_unlock(&file_priv->table_lock); idr_preload_end(); @@ -394,6 +443,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, goto err_revoke; } + /* mirrors drm_gem_handle_delete to avoid races */ + spin_lock(&file_priv->table_lock); + obj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(obj != NULL); + spin_unlock(&file_priv->table_lock); *handlep = handle; return 0; @@ -1152,7 +1206,7 @@ void drm_gem_print_info(struct drm_printer *p, unsigned int indent, drm_vma_node_start(&obj->vma_node)); drm_printf_indent(p, indent, "size=%zu\n", obj->size); drm_printf_indent(p, indent, "imported=%s\n", - str_yes_no(obj->import_attach)); + str_yes_no(drm_gem_is_imported(obj))); if (obj->funcs->print_info) obj->funcs->print_info(p, indent, obj); diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 1705bfc90b1e..98b73c581c42 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -153,6 +153,8 @@ void drm_sysfs_lease_event(struct drm_device *dev); /* drm_gem.c */ int drm_gem_init(struct drm_device *dev); +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj); +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj); int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep); diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 34bca7567576..3ea9f23b4f67 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -404,12 +404,16 @@ static void mipi_dbi_blank(struct mipi_dbi_dev *dbidev) u16 height = drm->mode_config.min_height; u16 width = drm->mode_config.min_width; struct mipi_dbi *dbi = &dbidev->dbi; - size_t len = width * height * 2; + const struct drm_format_info *dst_format; + size_t len; int idx; if (!drm_dev_enter(drm, &idx)) return; + dst_format = drm_format_info(dbidev->pixel_format); + len = drm_format_info_min_pitch(dst_format, 0, width) * height; + memset(dbidev->tx_buf, 0, len); mipi_dbi_set_window_address(dbidev, 0, width - 1, 0, height - 1); diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c index 19ab0a794add..fd8fa2e0ef6f 100644 --- a/drivers/gpu/drm/drm_panel.c +++ b/drivers/gpu/drm/drm_panel.c @@ -49,7 +49,7 @@ static LIST_HEAD(panel_list); * @dev: parent device of the panel * @funcs: panel operations * @connector_type: the connector type (DRM_MODE_CONNECTOR_*) corresponding to - * the panel interface + * the panel interface (must NOT be DRM_MODE_CONNECTOR_Unknown) * * Initialize the panel structure for subsequent registration with * drm_panel_add(). @@ -57,6 +57,9 @@ static LIST_HEAD(panel_list); void drm_panel_init(struct drm_panel *panel, struct device *dev, const struct drm_panel_funcs *funcs, int connector_type) { + if (connector_type == DRM_MODE_CONNECTOR_Unknown) + DRM_WARN("%s: %s: a valid connector type is required!\n", __func__, dev_name(dev)); + INIT_LIST_HEAD(&panel->list); INIT_LIST_HEAD(&panel->followers); mutex_init(&panel->follower_lock); diff --git a/drivers/gpu/drm/drm_panel_backlight_quirks.c b/drivers/gpu/drm/drm_panel_backlight_quirks.c new file mode 100644 index 000000000000..c477d98ade2b --- /dev/null +++ b/drivers/gpu/drm/drm_panel_backlight_quirks.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/array_size.h> +#include <linux/dmi.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <drm/drm_edid.h> +#include <drm/drm_utils.h> + +struct drm_panel_min_backlight_quirk { + struct { + enum dmi_field field; + const char * const value; + } dmi_match; + struct drm_edid_ident ident; + u8 min_brightness; +}; + +static const struct drm_panel_min_backlight_quirk drm_panel_min_backlight_quirks[] = { + /* 13 inch matte panel */ + { + .dmi_match.field = DMI_BOARD_VENDOR, + .dmi_match.value = "Framework", + .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x0bca), + .ident.name = "NE135FBM-N41", + .min_brightness = 0, + }, + /* 13 inch glossy panel */ + { + .dmi_match.field = DMI_BOARD_VENDOR, + .dmi_match.value = "Framework", + .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x095f), + .ident.name = "NE135FBM-N41", + .min_brightness = 0, + }, + /* 13 inch 2.8k panel */ + { + .dmi_match.field = DMI_BOARD_VENDOR, + .dmi_match.value = "Framework", + .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x0cb4), + .ident.name = "NE135A1M-NY1", + .min_brightness = 0, + }, +}; + +static bool drm_panel_min_backlight_quirk_matches(const struct drm_panel_min_backlight_quirk *quirk, + const struct drm_edid *edid) +{ + if (!dmi_match(quirk->dmi_match.field, quirk->dmi_match.value)) + return false; + + if (!drm_edid_match(edid, &quirk->ident)) + return false; + + return true; +} + +/** + * drm_get_panel_min_brightness_quirk - Get minimum supported brightness level for a panel. + * @edid: EDID of the panel to check + * + * This function checks for platform specific (e.g. DMI based) quirks + * providing info on the minimum backlight brightness for systems where this + * cannot be probed correctly from the hard-/firm-ware. + * + * Returns: + * A negative error value or + * an override value in the range [0, 255] representing 0-100% to be scaled to + * the drivers target range. + */ +int drm_get_panel_min_brightness_quirk(const struct drm_edid *edid) +{ + const struct drm_panel_min_backlight_quirk *quirk; + size_t i; + + if (!IS_ENABLED(CONFIG_DMI)) + return -ENODATA; + + if (!edid) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(drm_panel_min_backlight_quirks); i++) { + quirk = &drm_panel_min_backlight_quirks[i]; + + if (drm_panel_min_backlight_quirk_matches(quirk, edid)) + return quirk->min_brightness; + } + + return -ENODATA; +} +EXPORT_SYMBOL(drm_get_panel_min_brightness_quirk); + +MODULE_DESCRIPTION("Quirks for panel backlight overrides"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 4a73821b81f6..c554ad8f246b 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -93,6 +93,12 @@ static const struct drm_dmi_panel_orientation_data onegx1_pro = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +static const struct drm_dmi_panel_orientation_data lcd640x960_leftside_up = { + .width = 640, + .height = 960, + .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP, +}; + static const struct drm_dmi_panel_orientation_data lcd720x1280_rightside_up = { .width = 720, .height = 1280, @@ -123,6 +129,12 @@ static const struct drm_dmi_panel_orientation_data lcd1080x1920_rightside_up = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +static const struct drm_dmi_panel_orientation_data lcd1200x1920_leftside_up = { + .width = 1200, + .height = 1920, + .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP, +}; + static const struct drm_dmi_panel_orientation_data lcd1200x1920_rightside_up = { .width = 1200, .height = 1920, @@ -184,10 +196,10 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T103HAF"), }, .driver_data = (void *)&lcd800x1280_rightside_up, - }, { /* AYA NEO AYANEO 2 */ + }, { /* AYA NEO AYANEO 2/2S */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYANEO"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "AYANEO 2"), + DMI_MATCH(DMI_PRODUCT_NAME, "AYANEO 2"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* AYA NEO 2021 */ @@ -202,6 +214,18 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_PRODUCT_NAME, "AIR"), }, .driver_data = (void *)&lcd1080x1920_leftside_up, + }, { /* AYA NEO Flip DS Bottom Screen */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYANEO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "FLIP DS"), + }, + .driver_data = (void *)&lcd640x960_leftside_up, + }, { /* AYA NEO Flip KB/DS Top Screen */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYANEO"), + DMI_MATCH(DMI_PRODUCT_NAME, "FLIP"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* AYA NEO Founder */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYA NEO"), @@ -226,6 +250,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "KUN"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, + }, { /* AYA NEO SLIDE */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYANEO"), + DMI_MATCH(DMI_PRODUCT_NAME, "SLIDE"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* AYN Loki Max */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), @@ -315,6 +345,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"), }, .driver_data = (void *)&gpd_win2, + }, { /* GPD Win 2 (correct DMI strings) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "WIN2") + }, + .driver_data = (void *)&lcd720x1280_rightside_up, }, { /* GPD Win 3 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), @@ -443,6 +479,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), }, .driver_data = (void *)&lcd1600x2560_leftside_up, + }, { /* OneXPlayer Mini (Intel) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK TECHNOLOGY CO., LTD."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), + }, + .driver_data = (void *)&lcd1200x1920_leftside_up, }, { /* OrangePi Neo */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "OrangePi"), diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 447740d79d3d..6903e2010cb9 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -209,12 +209,9 @@ const FORMAT_INFOS_QR_L: [u16; 8] = [ impl Version { /// Returns the smallest QR version than can hold these segments. fn from_segments(segments: &[&Segment<'_>]) -> Option<Version> { - for v in (1..=40).map(|k| Version(k)) { - if v.max_data() * 8 >= segments.iter().map(|s| s.total_size_bits(v)).sum() { - return Some(v); - } - } - None + (1..=40) + .map(Version) + .find(|&v| v.max_data() * 8 >= segments.iter().map(|s| s.total_size_bits(v)).sum()) } fn width(&self) -> u8 { @@ -242,7 +239,7 @@ impl Version { } fn alignment_pattern(&self) -> &'static [u8] { - &ALIGNMENT_PATTERNS[self.0 - 1] + ALIGNMENT_PATTERNS[self.0 - 1] } fn poly(&self) -> &'static [u8] { @@ -479,7 +476,7 @@ struct EncodedMsg<'a> { /// Data to be put in the QR code, with correct segment encoding, padding, and /// Error Code Correction. impl EncodedMsg<'_> { - fn new<'a, 'b>(segments: &[&Segment<'b>], data: &'a mut [u8]) -> Option<EncodedMsg<'a>> { + fn new<'a>(segments: &[&Segment<'_>], data: &'a mut [u8]) -> Option<EncodedMsg<'a>> { let version = Version::from_segments(segments)?; let ec_size = version.ec_size(); let g1_blocks = version.g1_blocks(); @@ -492,7 +489,7 @@ impl EncodedMsg<'_> { data.fill(0); let mut em = EncodedMsg { - data: data, + data, ec_size, g1_blocks, g2_blocks, @@ -548,7 +545,7 @@ impl EncodedMsg<'_> { } self.push(&mut offset, (MODE_STOP, 4)); - let pad_offset = (offset + 7) / 8; + let pad_offset = offset.div_ceil(8); for i in pad_offset..self.version.max_data() { self.data[i] = PADDING[(i & 1) ^ (pad_offset & 1)]; } @@ -662,7 +659,7 @@ struct QrImage<'a> { impl QrImage<'_> { fn new<'a, 'b>(em: &'b EncodedMsg<'b>, qrdata: &'a mut [u8]) -> QrImage<'a> { let width = em.version.width(); - let stride = (width + 7) / 8; + let stride = width.div_ceil(8); let data = qrdata; let mut qr_image = QrImage { @@ -722,7 +719,10 @@ impl QrImage<'_> { fn is_finder(&self, x: u8, y: u8) -> bool { let end = self.width - 8; - (x < 8 && y < 8) || (x < 8 && y >= end) || (x >= end && y < 8) + #[expect(clippy::nonminimal_bool)] + { + (x < 8 && y < 8) || (x < 8 && y >= end) || (x >= end && y < 8) + } } // Alignment pattern: 5x5 squares in a grid. @@ -911,16 +911,16 @@ impl QrImage<'_> { /// /// * `url`: The base URL of the QR code. It will be encoded as Binary segment. /// * `data`: A pointer to the binary data, to be encoded. if URL is NULL, it -/// will be encoded as binary segment, otherwise it will be encoded -/// efficiently as a numeric segment, and appended to the URL. +/// will be encoded as binary segment, otherwise it will be encoded +/// efficiently as a numeric segment, and appended to the URL. /// * `data_len`: Length of the data, that needs to be encoded, must be less -/// than data_size. +/// than data_size. /// * `data_size`: Size of data buffer, it should be at least 4071 bytes to hold -/// a V40 QR code. It will then be overwritten with the QR code image. +/// a V40 QR code. It will then be overwritten with the QR code image. /// * `tmp`: A temporary buffer that the QR code encoder will use, to write the -/// segments and ECC. +/// segments and ECC. /// * `tmp_size`: Size of the temporary buffer, it must be at least 3706 bytes -/// long for V40. +/// long for V40. /// /// # Safety /// @@ -931,7 +931,7 @@ impl QrImage<'_> { /// They must remain valid for the duration of the function call. #[no_mangle] pub unsafe extern "C" fn drm_panic_qr_generate( - url: *const i8, + url: *const kernel::ffi::c_char, data: *mut u8, data_len: usize, data_size: usize, @@ -978,10 +978,11 @@ pub unsafe extern "C" fn drm_panic_qr_generate( /// * `url_len`: Length of the URL. /// /// * If `url_len` > 0, remove the 2 segments header/length and also count the -/// conversion to numeric segments. +/// conversion to numeric segments. /// * If `url_len` = 0, only removes 3 bytes for 1 binary segment. #[no_mangle] pub extern "C" fn drm_panic_qr_max_data_size(version: u8, url_len: usize) -> usize { + #[expect(clippy::manual_range_contains)] if version < 1 || version > 40 { return 0; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 5c0c9d4e3be1..d3f6df047f5a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -342,6 +342,7 @@ void *etnaviv_gem_vmap(struct drm_gem_object *obj) static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj) { struct page **pages; + pgprot_t prot; lockdep_assert_held(&obj->lock); @@ -349,8 +350,19 @@ static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj) if (IS_ERR(pages)) return NULL; - return vmap(pages, obj->base.size >> PAGE_SHIFT, - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + switch (obj->flags & ETNA_BO_CACHE_MASK) { + case ETNA_BO_CACHED: + prot = PAGE_KERNEL; + break; + case ETNA_BO_UNCACHED: + prot = pgprot_noncached(PAGE_KERNEL); + break; + case ETNA_BO_WC: + default: + prot = pgprot_writecombine(PAGE_KERNEL); + } + + return vmap(pages, obj->base.size >> PAGE_SHIFT, VM_MAP, prot); } static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index ab9ca4824b62..e60288af3502 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -34,6 +34,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) { struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job); + struct drm_gpu_scheduler *sched = sched_job->sched; struct etnaviv_gpu *gpu = submit->gpu; u32 dma_addr; int change; @@ -76,7 +77,9 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job return DRM_GPU_SCHED_STAT_NOMINAL; out_no_timeout: - list_add(&sched_job->list, &sched_job->sched->pending_list); + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 0d185c0564b9..9eeba254cf45 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -601,6 +601,10 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id) if (!ctx->drm_dev) goto out; + /* check if crtc and vblank have been initialized properly */ + if (!drm_dev_has_vblank(ctx->drm_dev)) + goto out; + if (!ctx->i80_if) { drm_crtc_handle_vblank(&ctx->crtc->base); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index f57df8c48139..05e4a5a63f5d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -187,6 +187,7 @@ struct fimd_context { u32 i80ifcon; bool i80_if; bool suspended; + bool dp_clk_enabled; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; atomic_t win_updated; @@ -1047,7 +1048,18 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable) struct fimd_context *ctx = container_of(clk, struct fimd_context, dp_clk); u32 val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE; + + if (enable == ctx->dp_clk_enabled) + return; + + if (enable) + pm_runtime_resume_and_get(ctx->dev); + + ctx->dp_clk_enabled = enable; writel(val, ctx->regs + DP_MIE_CLKCON); + + if (!enable) + pm_runtime_put(ctx->dev); } static const struct exynos_drm_crtc_ops fimd_crtc_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 1e26cd4f8347..52059cfff4f0 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1643,7 +1643,9 @@ static int hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, struct hdmi_context *hdata = dev_get_drvdata(dev); struct drm_connector *connector = &hdata->connector; + mutex_lock(&connector->eld_mutex); memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); + mutex_unlock(&connector->eld_mutex); return 0; } diff --git a/drivers/gpu/drm/gma500/mid_bios.c b/drivers/gpu/drm/gma500/mid_bios.c index 7e76790c6a81..cba97d7db131 100644 --- a/drivers/gpu/drm/gma500/mid_bios.c +++ b/drivers/gpu/drm/gma500/mid_bios.c @@ -279,6 +279,11 @@ static void mid_get_vbt_data(struct drm_psb_private *dev_priv) 0, PCI_DEVFN(2, 0)); int ret = -1; + if (pci_gfx_root == NULL) { + WARN_ON(1); + return; + } + /* Get the address of the platform config vbt */ pci_read_config_dword(pci_gfx_root, 0xFC, &addr); pci_dev_put(pci_gfx_root); diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index ff93e08d5036..5f02a5a39ab4 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -154,6 +154,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, return 0; err_free_mmio: + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); err_vmbus_close: vmbus_close(hdev->channel); @@ -172,6 +173,7 @@ static void hyperv_vmbus_remove(struct hv_device *hdev) vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); } diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 46f23bdb4c17..b89a364a3924 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -683,7 +683,7 @@ static bool assert_hdmi_port_valid(struct drm_i915_private *i915, enum port port "Platform does not support HDMI %c\n", port_name(port)); } -void g4x_hdmi_init(struct drm_i915_private *dev_priv, +bool g4x_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port) { struct intel_display *display = &dev_priv->display; @@ -693,10 +693,10 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, struct intel_connector *intel_connector; if (!assert_port_valid(dev_priv, port)) - return; + return false; if (!assert_hdmi_port_valid(dev_priv, port)) - return; + return false; devdata = intel_bios_encoder_data_lookup(display, port); @@ -707,15 +707,13 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); if (!dig_port) - return; + return false; dig_port->aux_ch = AUX_CH_NONE; intel_connector = intel_connector_alloc(); - if (!intel_connector) { - kfree(dig_port); - return; - } + if (!intel_connector) + goto err_connector_alloc; intel_encoder = &dig_port->base; @@ -723,9 +721,10 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, mutex_init(&dig_port->hdcp_mutex); - drm_encoder_init(&dev_priv->drm, &intel_encoder->base, - &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, - "HDMI %c", port_name(port)); + if (drm_encoder_init(&dev_priv->drm, &intel_encoder->base, + &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, + "HDMI %c", port_name(port))) + goto err_encoder_init; intel_encoder->hotplug = intel_hdmi_hotplug; intel_encoder->compute_config = g4x_hdmi_compute_config; @@ -788,5 +787,17 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, intel_infoframe_init(dig_port); - intel_hdmi_init_connector(dig_port, intel_connector); + if (!intel_hdmi_init_connector(dig_port, intel_connector)) + goto err_init_connector; + + return true; + +err_init_connector: + drm_encoder_cleanup(&intel_encoder->base); +err_encoder_init: + kfree(intel_connector); +err_connector_alloc: + kfree(dig_port); + + return false; } diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.h b/drivers/gpu/drm/i915/display/g4x_hdmi.h index 817f55c7a3a1..a52e8986ec7a 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.h +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.h @@ -16,14 +16,15 @@ struct drm_connector; struct drm_i915_private; #ifdef I915 -void g4x_hdmi_init(struct drm_i915_private *dev_priv, +bool g4x_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port); int g4x_hdmi_connector_atomic_check(struct drm_connector *connector, struct drm_atomic_state *state); #else -static inline void g4x_hdmi_init(struct drm_i915_private *dev_priv, +static inline bool g4x_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, int port) { + return false; } static inline int g4x_hdmi_connector_atomic_check(struct drm_connector *connector, struct drm_atomic_state *state) diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 9447f7229b60..17a1e3801a85 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -416,7 +416,8 @@ static int i9xx_plane_min_cdclk(const struct intel_crtc_state *crtc_state, return DIV_ROUND_UP(pixel_rate * num, den); } -static void i9xx_plane_update_noarm(struct intel_plane *plane, +static void i9xx_plane_update_noarm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -444,7 +445,8 @@ static void i9xx_plane_update_noarm(struct intel_plane *plane, } } -static void i9xx_plane_update_arm(struct intel_plane *plane, +static void i9xx_plane_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -507,7 +509,8 @@ static void i9xx_plane_update_arm(struct intel_plane *plane, intel_plane_ggtt_offset(plane_state) + dspaddr_offset); } -static void i830_plane_update_arm(struct intel_plane *plane, +static void i830_plane_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -517,11 +520,12 @@ static void i830_plane_update_arm(struct intel_plane *plane, * Additional breakage on i830 causes register reads to return * the last latched value instead of the last written value [ALM026]. */ - i9xx_plane_update_noarm(plane, crtc_state, plane_state); - i9xx_plane_update_arm(plane, crtc_state, plane_state); + i9xx_plane_update_noarm(dsb, plane, crtc_state, plane_state); + i9xx_plane_update_arm(dsb, plane, crtc_state, plane_state); } -static void i9xx_plane_disable_arm(struct intel_plane *plane, +static void i9xx_plane_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); @@ -549,7 +553,8 @@ static void i9xx_plane_disable_arm(struct intel_plane *plane, } static void -g4x_primary_async_flip(struct intel_plane *plane, +g4x_primary_async_flip(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, bool async_flip) @@ -569,7 +574,8 @@ g4x_primary_async_flip(struct intel_plane *plane, } static void -vlv_primary_async_flip(struct intel_plane *plane, +vlv_primary_async_flip(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, bool async_flip) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 293efc1f841d..4e95b8eda23f 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -50,38 +50,38 @@ #include "skl_scaler.h" #include "skl_universal_plane.h" -static int header_credits_available(struct drm_i915_private *dev_priv, +static int header_credits_available(struct intel_display *display, enum transcoder dsi_trans) { - return (intel_de_read(dev_priv, DSI_CMD_TXCTL(dsi_trans)) & FREE_HEADER_CREDIT_MASK) + return (intel_de_read(display, DSI_CMD_TXCTL(dsi_trans)) & FREE_HEADER_CREDIT_MASK) >> FREE_HEADER_CREDIT_SHIFT; } -static int payload_credits_available(struct drm_i915_private *dev_priv, +static int payload_credits_available(struct intel_display *display, enum transcoder dsi_trans) { - return (intel_de_read(dev_priv, DSI_CMD_TXCTL(dsi_trans)) & FREE_PLOAD_CREDIT_MASK) + return (intel_de_read(display, DSI_CMD_TXCTL(dsi_trans)) & FREE_PLOAD_CREDIT_MASK) >> FREE_PLOAD_CREDIT_SHIFT; } -static bool wait_for_header_credits(struct drm_i915_private *dev_priv, +static bool wait_for_header_credits(struct intel_display *display, enum transcoder dsi_trans, int hdr_credit) { - if (wait_for_us(header_credits_available(dev_priv, dsi_trans) >= + if (wait_for_us(header_credits_available(display, dsi_trans) >= hdr_credit, 100)) { - drm_err(&dev_priv->drm, "DSI header credits not released\n"); + drm_err(display->drm, "DSI header credits not released\n"); return false; } return true; } -static bool wait_for_payload_credits(struct drm_i915_private *dev_priv, +static bool wait_for_payload_credits(struct intel_display *display, enum transcoder dsi_trans, int payld_credit) { - if (wait_for_us(payload_credits_available(dev_priv, dsi_trans) >= + if (wait_for_us(payload_credits_available(display, dsi_trans) >= payld_credit, 100)) { - drm_err(&dev_priv->drm, "DSI payload credits not released\n"); + drm_err(display->drm, "DSI payload credits not released\n"); return false; } @@ -98,7 +98,7 @@ static enum transcoder dsi_port_to_transcoder(enum port port) static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct mipi_dsi_device *dsi; enum port port; @@ -108,8 +108,8 @@ static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder) /* wait for header/payload credits to be released */ for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - wait_for_header_credits(dev_priv, dsi_trans, MAX_HEADER_CREDIT); - wait_for_payload_credits(dev_priv, dsi_trans, MAX_PLOAD_CREDIT); + wait_for_header_credits(display, dsi_trans, MAX_HEADER_CREDIT); + wait_for_payload_credits(display, dsi_trans, MAX_PLOAD_CREDIT); } /* send nop DCS command */ @@ -119,22 +119,22 @@ static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder) dsi->channel = 0; ret = mipi_dsi_dcs_nop(dsi); if (ret < 0) - drm_err(&dev_priv->drm, + drm_err(display->drm, "error sending DCS NOP command\n"); } /* wait for header credits to be released */ for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - wait_for_header_credits(dev_priv, dsi_trans, MAX_HEADER_CREDIT); + wait_for_header_credits(display, dsi_trans, MAX_HEADER_CREDIT); } /* wait for LP TX in progress bit to be cleared */ for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - if (wait_for_us(!(intel_de_read(dev_priv, DSI_LP_MSG(dsi_trans)) & + if (wait_for_us(!(intel_de_read(display, DSI_LP_MSG(dsi_trans)) & LPTX_IN_PROGRESS), 20)) - drm_err(&dev_priv->drm, "LPTX bit not cleared\n"); + drm_err(display->drm, "LPTX bit not cleared\n"); } } @@ -142,7 +142,7 @@ static int dsi_send_pkt_payld(struct intel_dsi_host *host, const struct mipi_dsi_packet *packet) { struct intel_dsi *intel_dsi = host->intel_dsi; - struct drm_i915_private *i915 = to_i915(intel_dsi->base.base.dev); + struct intel_display *display = to_intel_display(&intel_dsi->base); enum transcoder dsi_trans = dsi_port_to_transcoder(host->port); const u8 *data = packet->payload; u32 len = packet->payload_length; @@ -150,20 +150,20 @@ static int dsi_send_pkt_payld(struct intel_dsi_host *host, /* payload queue can accept *256 bytes*, check limit */ if (len > MAX_PLOAD_CREDIT * 4) { - drm_err(&i915->drm, "payload size exceeds max queue limit\n"); + drm_err(display->drm, "payload size exceeds max queue limit\n"); return -EINVAL; } for (i = 0; i < len; i += 4) { u32 tmp = 0; - if (!wait_for_payload_credits(i915, dsi_trans, 1)) + if (!wait_for_payload_credits(display, dsi_trans, 1)) return -EBUSY; for (j = 0; j < min_t(u32, len - i, 4); j++) tmp |= *data++ << 8 * j; - intel_de_write(i915, DSI_CMD_TXPYLD(dsi_trans), tmp); + intel_de_write(display, DSI_CMD_TXPYLD(dsi_trans), tmp); } return 0; @@ -174,14 +174,14 @@ static int dsi_send_pkt_hdr(struct intel_dsi_host *host, bool enable_lpdt) { struct intel_dsi *intel_dsi = host->intel_dsi; - struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); + struct intel_display *display = to_intel_display(&intel_dsi->base); enum transcoder dsi_trans = dsi_port_to_transcoder(host->port); u32 tmp; - if (!wait_for_header_credits(dev_priv, dsi_trans, 1)) + if (!wait_for_header_credits(display, dsi_trans, 1)) return -EBUSY; - tmp = intel_de_read(dev_priv, DSI_CMD_TXHDR(dsi_trans)); + tmp = intel_de_read(display, DSI_CMD_TXHDR(dsi_trans)); if (packet->payload) tmp |= PAYLOAD_PRESENT; @@ -200,15 +200,14 @@ static int dsi_send_pkt_hdr(struct intel_dsi_host *host, tmp |= ((packet->header[0] & DT_MASK) << DT_SHIFT); tmp |= (packet->header[1] << PARAM_WC_LOWER_SHIFT); tmp |= (packet->header[2] << PARAM_WC_UPPER_SHIFT); - intel_de_write(dev_priv, DSI_CMD_TXHDR(dsi_trans), tmp); + intel_de_write(display, DSI_CMD_TXHDR(dsi_trans), tmp); return 0; } void icl_dsi_frame_update(struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); u32 mode_flags; enum port port; @@ -226,12 +225,13 @@ void icl_dsi_frame_update(struct intel_crtc_state *crtc_state) else return; - intel_de_rmw(dev_priv, DSI_CMD_FRMCTL(port), 0, DSI_FRAME_UPDATE_REQUEST); + intel_de_rmw(display, DSI_CMD_FRMCTL(port), 0, + DSI_FRAME_UPDATE_REQUEST); } static void dsi_program_swing_and_deemphasis(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum phy phy; u32 tmp, mask, val; @@ -245,31 +245,31 @@ static void dsi_program_swing_and_deemphasis(struct intel_encoder *encoder) mask = SCALING_MODE_SEL_MASK | RTERM_SELECT_MASK; val = SCALING_MODE_SEL(0x2) | TAP2_DISABLE | TAP3_DISABLE | RTERM_SELECT(0x6); - tmp = intel_de_read(dev_priv, ICL_PORT_TX_DW5_LN(0, phy)); + tmp = intel_de_read(display, ICL_PORT_TX_DW5_LN(0, phy)); tmp &= ~mask; tmp |= val; - intel_de_write(dev_priv, ICL_PORT_TX_DW5_GRP(phy), tmp); - intel_de_rmw(dev_priv, ICL_PORT_TX_DW5_AUX(phy), mask, val); + intel_de_write(display, ICL_PORT_TX_DW5_GRP(phy), tmp); + intel_de_rmw(display, ICL_PORT_TX_DW5_AUX(phy), mask, val); mask = SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | RCOMP_SCALAR_MASK; val = SWING_SEL_UPPER(0x2) | SWING_SEL_LOWER(0x2) | RCOMP_SCALAR(0x98); - tmp = intel_de_read(dev_priv, ICL_PORT_TX_DW2_LN(0, phy)); + tmp = intel_de_read(display, ICL_PORT_TX_DW2_LN(0, phy)); tmp &= ~mask; tmp |= val; - intel_de_write(dev_priv, ICL_PORT_TX_DW2_GRP(phy), tmp); - intel_de_rmw(dev_priv, ICL_PORT_TX_DW2_AUX(phy), mask, val); + intel_de_write(display, ICL_PORT_TX_DW2_GRP(phy), tmp); + intel_de_rmw(display, ICL_PORT_TX_DW2_AUX(phy), mask, val); mask = POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | CURSOR_COEFF_MASK; val = POST_CURSOR_1(0x0) | POST_CURSOR_2(0x0) | CURSOR_COEFF(0x3f); - intel_de_rmw(dev_priv, ICL_PORT_TX_DW4_AUX(phy), mask, val); + intel_de_rmw(display, ICL_PORT_TX_DW4_AUX(phy), mask, val); /* Bspec: must not use GRP register for write */ for (lane = 0; lane <= 3; lane++) - intel_de_rmw(dev_priv, ICL_PORT_TX_DW4_LN(lane, phy), + intel_de_rmw(display, ICL_PORT_TX_DW4_LN(lane, phy), mask, val); } } @@ -277,13 +277,13 @@ static void dsi_program_swing_and_deemphasis(struct intel_encoder *encoder) static void configure_dual_link_mode(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); i915_reg_t dss_ctl1_reg, dss_ctl2_reg; u32 dss_ctl1; /* FIXME: Move all DSS handling to intel_vdsc.c */ - if (DISPLAY_VER(dev_priv) >= 12) { + if (DISPLAY_VER(display) >= 12) { struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); dss_ctl1_reg = ICL_PIPE_DSS_CTL1(crtc->pipe); @@ -293,7 +293,7 @@ static void configure_dual_link_mode(struct intel_encoder *encoder, dss_ctl2_reg = DSS_CTL2; } - dss_ctl1 = intel_de_read(dev_priv, dss_ctl1_reg); + dss_ctl1 = intel_de_read(display, dss_ctl1_reg); dss_ctl1 |= SPLITTER_ENABLE; dss_ctl1 &= ~OVERLAP_PIXELS_MASK; dss_ctl1 |= OVERLAP_PIXELS(intel_dsi->pixel_overlap); @@ -308,19 +308,19 @@ static void configure_dual_link_mode(struct intel_encoder *encoder, dl_buffer_depth = hactive / 2 + intel_dsi->pixel_overlap; if (dl_buffer_depth > MAX_DL_BUFFER_TARGET_DEPTH) - drm_err(&dev_priv->drm, + drm_err(display->drm, "DL buffer depth exceed max value\n"); dss_ctl1 &= ~LEFT_DL_BUF_TARGET_DEPTH_MASK; dss_ctl1 |= LEFT_DL_BUF_TARGET_DEPTH(dl_buffer_depth); - intel_de_rmw(dev_priv, dss_ctl2_reg, RIGHT_DL_BUF_TARGET_DEPTH_MASK, + intel_de_rmw(display, dss_ctl2_reg, RIGHT_DL_BUF_TARGET_DEPTH_MASK, RIGHT_DL_BUF_TARGET_DEPTH(dl_buffer_depth)); } else { /* Interleave */ dss_ctl1 |= DUAL_LINK_MODE_INTERLEAVE; } - intel_de_write(dev_priv, dss_ctl1_reg, dss_ctl1); + intel_de_write(display, dss_ctl1_reg, dss_ctl1); } /* aka DSI 8X clock */ @@ -341,6 +341,7 @@ static int afe_clk(struct intel_encoder *encoder, static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; @@ -360,33 +361,34 @@ static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder, } for_each_dsi_port(port, intel_dsi->ports) { - intel_de_write(dev_priv, ICL_DSI_ESC_CLK_DIV(port), + intel_de_write(display, ICL_DSI_ESC_CLK_DIV(port), esc_clk_div_m & ICL_ESC_CLK_DIV_MASK); - intel_de_posting_read(dev_priv, ICL_DSI_ESC_CLK_DIV(port)); + intel_de_posting_read(display, ICL_DSI_ESC_CLK_DIV(port)); } for_each_dsi_port(port, intel_dsi->ports) { - intel_de_write(dev_priv, ICL_DPHY_ESC_CLK_DIV(port), + intel_de_write(display, ICL_DPHY_ESC_CLK_DIV(port), esc_clk_div_m & ICL_ESC_CLK_DIV_MASK); - intel_de_posting_read(dev_priv, ICL_DPHY_ESC_CLK_DIV(port)); + intel_de_posting_read(display, ICL_DPHY_ESC_CLK_DIV(port)); } if (IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv)) { for_each_dsi_port(port, intel_dsi->ports) { - intel_de_write(dev_priv, ADL_MIPIO_DW(port, 8), + intel_de_write(display, ADL_MIPIO_DW(port, 8), esc_clk_div_m_phy & TX_ESC_CLK_DIV_PHY); - intel_de_posting_read(dev_priv, ADL_MIPIO_DW(port, 8)); + intel_de_posting_read(display, ADL_MIPIO_DW(port, 8)); } } } -static void get_dsi_io_power_domains(struct drm_i915_private *dev_priv, - struct intel_dsi *intel_dsi) +static void get_dsi_io_power_domains(struct intel_dsi *intel_dsi) { + struct intel_display *display = to_intel_display(&intel_dsi->base); + struct drm_i915_private *dev_priv = to_i915(display->drm); enum port port; for_each_dsi_port(port, intel_dsi->ports) { - drm_WARN_ON(&dev_priv->drm, intel_dsi->io_wakeref[port]); + drm_WARN_ON(display->drm, intel_dsi->io_wakeref[port]); intel_dsi->io_wakeref[port] = intel_display_power_get(dev_priv, port == PORT_A ? @@ -397,15 +399,15 @@ static void get_dsi_io_power_domains(struct drm_i915_private *dev_priv, static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; for_each_dsi_port(port, intel_dsi->ports) - intel_de_rmw(dev_priv, ICL_DSI_IO_MODECTL(port), + intel_de_rmw(display, ICL_DSI_IO_MODECTL(port), 0, COMBO_PHY_MODE_DSI); - get_dsi_io_power_domains(dev_priv, intel_dsi); + get_dsi_io_power_domains(intel_dsi); } static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder) @@ -421,6 +423,7 @@ static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder) static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum phy phy; @@ -429,32 +432,33 @@ static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder) /* Step 4b(i) set loadgen select for transmit and aux lanes */ for_each_dsi_phy(phy, intel_dsi->phys) { - intel_de_rmw(dev_priv, ICL_PORT_TX_DW4_AUX(phy), LOADGEN_SELECT, 0); + intel_de_rmw(display, ICL_PORT_TX_DW4_AUX(phy), + LOADGEN_SELECT, 0); for (lane = 0; lane <= 3; lane++) - intel_de_rmw(dev_priv, ICL_PORT_TX_DW4_LN(lane, phy), + intel_de_rmw(display, ICL_PORT_TX_DW4_LN(lane, phy), LOADGEN_SELECT, lane != 2 ? LOADGEN_SELECT : 0); } /* Step 4b(ii) set latency optimization for transmit and aux lanes */ for_each_dsi_phy(phy, intel_dsi->phys) { - intel_de_rmw(dev_priv, ICL_PORT_TX_DW2_AUX(phy), + intel_de_rmw(display, ICL_PORT_TX_DW2_AUX(phy), FRC_LATENCY_OPTIM_MASK, FRC_LATENCY_OPTIM_VAL(0x5)); - tmp = intel_de_read(dev_priv, ICL_PORT_TX_DW2_LN(0, phy)); + tmp = intel_de_read(display, ICL_PORT_TX_DW2_LN(0, phy)); tmp &= ~FRC_LATENCY_OPTIM_MASK; tmp |= FRC_LATENCY_OPTIM_VAL(0x5); - intel_de_write(dev_priv, ICL_PORT_TX_DW2_GRP(phy), tmp); + intel_de_write(display, ICL_PORT_TX_DW2_GRP(phy), tmp); /* For EHL, TGL, set latency optimization for PCS_DW1 lanes */ if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv) || - (DISPLAY_VER(dev_priv) >= 12)) { - intel_de_rmw(dev_priv, ICL_PORT_PCS_DW1_AUX(phy), + (DISPLAY_VER(display) >= 12)) { + intel_de_rmw(display, ICL_PORT_PCS_DW1_AUX(phy), LATENCY_OPTIM_MASK, LATENCY_OPTIM_VAL(0)); - tmp = intel_de_read(dev_priv, + tmp = intel_de_read(display, ICL_PORT_PCS_DW1_LN(0, phy)); tmp &= ~LATENCY_OPTIM_MASK; tmp |= LATENCY_OPTIM_VAL(0x1); - intel_de_write(dev_priv, ICL_PORT_PCS_DW1_GRP(phy), + intel_de_write(display, ICL_PORT_PCS_DW1_GRP(phy), tmp); } } @@ -463,17 +467,17 @@ static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder) static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum phy phy; /* clear common keeper enable bit */ for_each_dsi_phy(phy, intel_dsi->phys) { - tmp = intel_de_read(dev_priv, ICL_PORT_PCS_DW1_LN(0, phy)); + tmp = intel_de_read(display, ICL_PORT_PCS_DW1_LN(0, phy)); tmp &= ~COMMON_KEEPER_EN; - intel_de_write(dev_priv, ICL_PORT_PCS_DW1_GRP(phy), tmp); - intel_de_rmw(dev_priv, ICL_PORT_PCS_DW1_AUX(phy), COMMON_KEEPER_EN, 0); + intel_de_write(display, ICL_PORT_PCS_DW1_GRP(phy), tmp); + intel_de_rmw(display, ICL_PORT_PCS_DW1_AUX(phy), COMMON_KEEPER_EN, 0); } /* @@ -482,14 +486,15 @@ static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder) * as part of lane phy sequence configuration */ for_each_dsi_phy(phy, intel_dsi->phys) - intel_de_rmw(dev_priv, ICL_PORT_CL_DW5(phy), 0, SUS_CLOCK_CONFIG); + intel_de_rmw(display, ICL_PORT_CL_DW5(phy), 0, + SUS_CLOCK_CONFIG); /* Clear training enable to change swing values */ for_each_dsi_phy(phy, intel_dsi->phys) { - tmp = intel_de_read(dev_priv, ICL_PORT_TX_DW5_LN(0, phy)); + tmp = intel_de_read(display, ICL_PORT_TX_DW5_LN(0, phy)); tmp &= ~TX_TRAINING_EN; - intel_de_write(dev_priv, ICL_PORT_TX_DW5_GRP(phy), tmp); - intel_de_rmw(dev_priv, ICL_PORT_TX_DW5_AUX(phy), TX_TRAINING_EN, 0); + intel_de_write(display, ICL_PORT_TX_DW5_GRP(phy), tmp); + intel_de_rmw(display, ICL_PORT_TX_DW5_AUX(phy), TX_TRAINING_EN, 0); } /* Program swing and de-emphasis */ @@ -497,26 +502,26 @@ static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder) /* Set training enable to trigger update */ for_each_dsi_phy(phy, intel_dsi->phys) { - tmp = intel_de_read(dev_priv, ICL_PORT_TX_DW5_LN(0, phy)); + tmp = intel_de_read(display, ICL_PORT_TX_DW5_LN(0, phy)); tmp |= TX_TRAINING_EN; - intel_de_write(dev_priv, ICL_PORT_TX_DW5_GRP(phy), tmp); - intel_de_rmw(dev_priv, ICL_PORT_TX_DW5_AUX(phy), 0, TX_TRAINING_EN); + intel_de_write(display, ICL_PORT_TX_DW5_GRP(phy), tmp); + intel_de_rmw(display, ICL_PORT_TX_DW5_AUX(phy), 0, TX_TRAINING_EN); } } static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; for_each_dsi_port(port, intel_dsi->ports) { - intel_de_rmw(dev_priv, DDI_BUF_CTL(port), 0, DDI_BUF_CTL_ENABLE); + intel_de_rmw(display, DDI_BUF_CTL(port), 0, DDI_BUF_CTL_ENABLE); - if (wait_for_us(!(intel_de_read(dev_priv, DDI_BUF_CTL(port)) & + if (wait_for_us(!(intel_de_read(display, DDI_BUF_CTL(port)) & DDI_BUF_IS_IDLE), 500)) - drm_err(&dev_priv->drm, "DDI port:%c buffer idle\n", + drm_err(display->drm, "DDI port:%c buffer idle\n", port_name(port)); } } @@ -525,6 +530,7 @@ static void gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; @@ -532,12 +538,12 @@ gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder, /* Program DPHY clock lanes timings */ for_each_dsi_port(port, intel_dsi->ports) - intel_de_write(dev_priv, DPHY_CLK_TIMING_PARAM(port), + intel_de_write(display, DPHY_CLK_TIMING_PARAM(port), intel_dsi->dphy_reg); /* Program DPHY data lanes timings */ for_each_dsi_port(port, intel_dsi->ports) - intel_de_write(dev_priv, DPHY_DATA_TIMING_PARAM(port), + intel_de_write(display, DPHY_DATA_TIMING_PARAM(port), intel_dsi->dphy_data_lane_reg); /* @@ -546,10 +552,10 @@ gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder, * a value '0' inside TA_PARAM_REGISTERS otherwise * leave all fields at HW default values. */ - if (DISPLAY_VER(dev_priv) == 11) { + if (DISPLAY_VER(display) == 11) { if (afe_clk(encoder, crtc_state) <= 800000) { for_each_dsi_port(port, intel_dsi->ports) - intel_de_rmw(dev_priv, DPHY_TA_TIMING_PARAM(port), + intel_de_rmw(display, DPHY_TA_TIMING_PARAM(port), TA_SURE_MASK, TA_SURE_OVERRIDE | TA_SURE(0)); } @@ -557,7 +563,7 @@ gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder, if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) { for_each_dsi_phy(phy, intel_dsi->phys) - intel_de_rmw(dev_priv, ICL_DPHY_CHKN(phy), + intel_de_rmw(display, ICL_DPHY_CHKN(phy), 0, ICL_DPHY_CHKN_AFE_OVER_PPI_STRAP); } } @@ -566,30 +572,30 @@ static void gen11_dsi_setup_timings(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; /* Program T-INIT master registers */ for_each_dsi_port(port, intel_dsi->ports) - intel_de_rmw(dev_priv, ICL_DSI_T_INIT_MASTER(port), + intel_de_rmw(display, ICL_DSI_T_INIT_MASTER(port), DSI_T_INIT_MASTER_MASK, intel_dsi->init_count); /* shadow register inside display core */ for_each_dsi_port(port, intel_dsi->ports) - intel_de_write(dev_priv, DSI_CLK_TIMING_PARAM(port), + intel_de_write(display, DSI_CLK_TIMING_PARAM(port), intel_dsi->dphy_reg); /* shadow register inside display core */ for_each_dsi_port(port, intel_dsi->ports) - intel_de_write(dev_priv, DSI_DATA_TIMING_PARAM(port), + intel_de_write(display, DSI_DATA_TIMING_PARAM(port), intel_dsi->dphy_data_lane_reg); /* shadow register inside display core */ - if (DISPLAY_VER(dev_priv) == 11) { + if (DISPLAY_VER(display) == 11) { if (afe_clk(encoder, crtc_state) <= 800000) { for_each_dsi_port(port, intel_dsi->ports) { - intel_de_rmw(dev_priv, DSI_TA_TIMING_PARAM(port), + intel_de_rmw(display, DSI_TA_TIMING_PARAM(port), TA_SURE_MASK, TA_SURE_OVERRIDE | TA_SURE(0)); } @@ -599,45 +605,45 @@ gen11_dsi_setup_timings(struct intel_encoder *encoder, static void gen11_dsi_gate_clocks(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum phy phy; - mutex_lock(&dev_priv->display.dpll.lock); - tmp = intel_de_read(dev_priv, ICL_DPCLKA_CFGCR0); + mutex_lock(&display->dpll.lock); + tmp = intel_de_read(display, ICL_DPCLKA_CFGCR0); for_each_dsi_phy(phy, intel_dsi->phys) tmp |= ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); - intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, tmp); - mutex_unlock(&dev_priv->display.dpll.lock); + intel_de_write(display, ICL_DPCLKA_CFGCR0, tmp); + mutex_unlock(&display->dpll.lock); } static void gen11_dsi_ungate_clocks(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum phy phy; - mutex_lock(&dev_priv->display.dpll.lock); - tmp = intel_de_read(dev_priv, ICL_DPCLKA_CFGCR0); + mutex_lock(&display->dpll.lock); + tmp = intel_de_read(display, ICL_DPCLKA_CFGCR0); for_each_dsi_phy(phy, intel_dsi->phys) tmp &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); - intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, tmp); - mutex_unlock(&dev_priv->display.dpll.lock); + intel_de_write(display, ICL_DPCLKA_CFGCR0, tmp); + mutex_unlock(&display->dpll.lock); } static bool gen11_dsi_is_clock_enabled(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); bool clock_enabled = false; enum phy phy; u32 tmp; - tmp = intel_de_read(dev_priv, ICL_DPCLKA_CFGCR0); + tmp = intel_de_read(display, ICL_DPCLKA_CFGCR0); for_each_dsi_phy(phy, intel_dsi->phys) { if (!(tmp & ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy))) @@ -650,36 +656,36 @@ static bool gen11_dsi_is_clock_enabled(struct intel_encoder *encoder) static void gen11_dsi_map_pll(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct intel_shared_dpll *pll = crtc_state->shared_dpll; enum phy phy; u32 val; - mutex_lock(&dev_priv->display.dpll.lock); + mutex_lock(&display->dpll.lock); - val = intel_de_read(dev_priv, ICL_DPCLKA_CFGCR0); + val = intel_de_read(display, ICL_DPCLKA_CFGCR0); for_each_dsi_phy(phy, intel_dsi->phys) { val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); val |= ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy); } - intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val); + intel_de_write(display, ICL_DPCLKA_CFGCR0, val); for_each_dsi_phy(phy, intel_dsi->phys) { val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); } - intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val); + intel_de_write(display, ICL_DPCLKA_CFGCR0, val); - intel_de_posting_read(dev_priv, ICL_DPCLKA_CFGCR0); + intel_de_posting_read(display, ICL_DPCLKA_CFGCR0); - mutex_unlock(&dev_priv->display.dpll.lock); + mutex_unlock(&display->dpll.lock); } static void gen11_dsi_configure_transcoder(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); enum pipe pipe = crtc->pipe; @@ -689,7 +695,7 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - tmp = intel_de_read(dev_priv, DSI_TRANS_FUNC_CONF(dsi_trans)); + tmp = intel_de_read(display, DSI_TRANS_FUNC_CONF(dsi_trans)); if (intel_dsi->eotp_pkt) tmp &= ~EOTP_DISABLED; @@ -745,7 +751,7 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, } } - if (DISPLAY_VER(dev_priv) >= 12) { + if (DISPLAY_VER(display) >= 12) { if (is_vid_mode(intel_dsi)) tmp |= BLANKING_PACKET_ENABLE; } @@ -778,15 +784,15 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, tmp |= TE_SOURCE_GPIO; } - intel_de_write(dev_priv, DSI_TRANS_FUNC_CONF(dsi_trans), tmp); + intel_de_write(display, DSI_TRANS_FUNC_CONF(dsi_trans), tmp); } /* enable port sync mode if dual link */ if (intel_dsi->dual_link) { for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - intel_de_rmw(dev_priv, - TRANS_DDI_FUNC_CTL2(dev_priv, dsi_trans), + intel_de_rmw(display, + TRANS_DDI_FUNC_CTL2(display, dsi_trans), 0, PORT_SYNC_MODE_ENABLE); } @@ -798,10 +804,10 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, dsi_trans = dsi_port_to_transcoder(port); /* select data lane width */ - tmp = intel_de_read(dev_priv, - TRANS_DDI_FUNC_CTL(dev_priv, dsi_trans)); - tmp &= ~DDI_PORT_WIDTH_MASK; - tmp |= DDI_PORT_WIDTH(intel_dsi->lane_count); + tmp = intel_de_read(display, + TRANS_DDI_FUNC_CTL(display, dsi_trans)); + tmp &= ~TRANS_DDI_PORT_WIDTH_MASK; + tmp |= TRANS_DDI_PORT_WIDTH(intel_dsi->lane_count); /* select input pipe */ tmp &= ~TRANS_DDI_EDP_INPUT_MASK; @@ -825,16 +831,16 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, /* enable DDI buffer */ tmp |= TRANS_DDI_FUNC_ENABLE; - intel_de_write(dev_priv, - TRANS_DDI_FUNC_CTL(dev_priv, dsi_trans), tmp); + intel_de_write(display, + TRANS_DDI_FUNC_CTL(display, dsi_trans), tmp); } /* wait for link ready */ for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - if (wait_for_us((intel_de_read(dev_priv, DSI_TRANS_FUNC_CONF(dsi_trans)) & + if (wait_for_us((intel_de_read(display, DSI_TRANS_FUNC_CONF(dsi_trans)) & LINK_READY), 2500)) - drm_err(&dev_priv->drm, "DSI link not ready\n"); + drm_err(display->drm, "DSI link not ready\n"); } } @@ -842,7 +848,7 @@ static void gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -909,17 +915,17 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, /* minimum hactive as per bspec: 256 pixels */ if (adjusted_mode->crtc_hdisplay < 256) - drm_err(&dev_priv->drm, "hactive is less then 256 pixels\n"); + drm_err(display->drm, "hactive is less then 256 pixels\n"); /* if RGB666 format, then hactive must be multiple of 4 pixels */ if (intel_dsi->pixel_format == MIPI_DSI_FMT_RGB666 && hactive % 4 != 0) - drm_err(&dev_priv->drm, + drm_err(display->drm, "hactive pixels are not multiple of 4\n"); /* program TRANS_HTOTAL register */ for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - intel_de_write(dev_priv, TRANS_HTOTAL(dev_priv, dsi_trans), + intel_de_write(display, TRANS_HTOTAL(display, dsi_trans), HACTIVE(hactive - 1) | HTOTAL(htotal - 1)); } @@ -928,12 +934,12 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, if (intel_dsi->video_mode == NON_BURST_SYNC_PULSE) { /* BSPEC: hsync size should be atleast 16 pixels */ if (hsync_size < 16) - drm_err(&dev_priv->drm, + drm_err(display->drm, "hsync size < 16 pixels\n"); } if (hback_porch < 16) - drm_err(&dev_priv->drm, "hback porch < 16 pixels\n"); + drm_err(display->drm, "hback porch < 16 pixels\n"); if (intel_dsi->dual_link) { hsync_start /= 2; @@ -942,8 +948,8 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - intel_de_write(dev_priv, - TRANS_HSYNC(dev_priv, dsi_trans), + intel_de_write(display, + TRANS_HSYNC(display, dsi_trans), HSYNC_START(hsync_start - 1) | HSYNC_END(hsync_end - 1)); } } @@ -957,22 +963,22 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, * struct drm_display_mode. * For interlace mode: program required pixel minus 2 */ - intel_de_write(dev_priv, TRANS_VTOTAL(dev_priv, dsi_trans), + intel_de_write(display, TRANS_VTOTAL(display, dsi_trans), VACTIVE(vactive - 1) | VTOTAL(vtotal - 1)); } if (vsync_end < vsync_start || vsync_end > vtotal) - drm_err(&dev_priv->drm, "Invalid vsync_end value\n"); + drm_err(display->drm, "Invalid vsync_end value\n"); if (vsync_start < vactive) - drm_err(&dev_priv->drm, "vsync_start less than vactive\n"); + drm_err(display->drm, "vsync_start less than vactive\n"); /* program TRANS_VSYNC register for video mode only */ if (is_vid_mode(intel_dsi)) { for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - intel_de_write(dev_priv, - TRANS_VSYNC(dev_priv, dsi_trans), + intel_de_write(display, + TRANS_VSYNC(display, dsi_trans), VSYNC_START(vsync_start - 1) | VSYNC_END(vsync_end - 1)); } } @@ -986,8 +992,8 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, if (is_vid_mode(intel_dsi)) { for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - intel_de_write(dev_priv, - TRANS_VSYNCSHIFT(dev_priv, dsi_trans), + intel_de_write(display, + TRANS_VSYNCSHIFT(display, dsi_trans), vsync_shift); } } @@ -998,11 +1004,11 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, * FIXME get rid of these local hacks and do it right, * this will not handle eg. delayed vblank correctly. */ - if (DISPLAY_VER(dev_priv) >= 12) { + if (DISPLAY_VER(display) >= 12) { for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - intel_de_write(dev_priv, - TRANS_VBLANK(dev_priv, dsi_trans), + intel_de_write(display, + TRANS_VBLANK(display, dsi_trans), VBLANK_START(vactive - 1) | VBLANK_END(vtotal - 1)); } } @@ -1010,20 +1016,20 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, static void gen11_dsi_enable_transcoder(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - intel_de_rmw(dev_priv, TRANSCONF(dev_priv, dsi_trans), 0, + intel_de_rmw(display, TRANSCONF(display, dsi_trans), 0, TRANSCONF_ENABLE); /* wait for transcoder to be enabled */ - if (intel_de_wait_for_set(dev_priv, TRANSCONF(dev_priv, dsi_trans), + if (intel_de_wait_for_set(display, TRANSCONF(display, dsi_trans), TRANSCONF_STATE_ENABLE, 10)) - drm_err(&dev_priv->drm, + drm_err(display->drm, "DSI transcoder not enabled\n"); } } @@ -1031,7 +1037,7 @@ static void gen11_dsi_enable_transcoder(struct intel_encoder *encoder) static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; @@ -1055,21 +1061,21 @@ static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder, dsi_trans = dsi_port_to_transcoder(port); /* program hst_tx_timeout */ - intel_de_rmw(dev_priv, DSI_HSTX_TO(dsi_trans), + intel_de_rmw(display, DSI_HSTX_TO(dsi_trans), HSTX_TIMEOUT_VALUE_MASK, HSTX_TIMEOUT_VALUE(hs_tx_timeout)); /* FIXME: DSI_CALIB_TO */ /* program lp_rx_host timeout */ - intel_de_rmw(dev_priv, DSI_LPRX_HOST_TO(dsi_trans), + intel_de_rmw(display, DSI_LPRX_HOST_TO(dsi_trans), LPRX_TIMEOUT_VALUE_MASK, LPRX_TIMEOUT_VALUE(lp_rx_timeout)); /* FIXME: DSI_PWAIT_TO */ /* program turn around timeout */ - intel_de_rmw(dev_priv, DSI_TA_TO(dsi_trans), + intel_de_rmw(display, DSI_TA_TO(dsi_trans), TA_TIMEOUT_VALUE_MASK, TA_TIMEOUT_VALUE(ta_timeout)); } @@ -1078,7 +1084,7 @@ static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder, static void gen11_dsi_config_util_pin(struct intel_encoder *encoder, bool enable) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; @@ -1090,7 +1096,7 @@ static void gen11_dsi_config_util_pin(struct intel_encoder *encoder, if (is_vid_mode(intel_dsi) || (intel_dsi->ports & BIT(PORT_B))) return; - tmp = intel_de_read(dev_priv, UTIL_PIN_CTL); + tmp = intel_de_read(display, UTIL_PIN_CTL); if (enable) { tmp |= UTIL_PIN_DIRECTION_INPUT; @@ -1098,7 +1104,7 @@ static void gen11_dsi_config_util_pin(struct intel_encoder *encoder, } else { tmp &= ~UTIL_PIN_ENABLE; } - intel_de_write(dev_priv, UTIL_PIN_CTL, tmp); + intel_de_write(display, UTIL_PIN_CTL, tmp); } static void @@ -1136,7 +1142,7 @@ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder, static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct mipi_dsi_device *dsi; enum port port; @@ -1152,14 +1158,14 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) * FIXME: This uses the number of DW's currently in the payload * receive queue. This is probably not what we want here. */ - tmp = intel_de_read(dev_priv, DSI_CMD_RXCTL(dsi_trans)); + tmp = intel_de_read(display, DSI_CMD_RXCTL(dsi_trans)); tmp &= NUMBER_RX_PLOAD_DW_MASK; /* multiply "Number Rx Payload DW" by 4 to get max value */ tmp = tmp * 4; dsi = intel_dsi->dsi_hosts[port]->device; ret = mipi_dsi_set_maximum_return_packet_size(dsi, tmp); if (ret < 0) - drm_err(&dev_priv->drm, + drm_err(display->drm, "error setting max return pkt size%d\n", tmp); } @@ -1219,10 +1225,10 @@ static void gen11_dsi_pre_enable(struct intel_atomic_state *state, static void icl_apply_kvmr_pipe_a_wa(struct intel_encoder *encoder, enum pipe pipe, bool enable) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); - if (DISPLAY_VER(dev_priv) == 11 && pipe == PIPE_B) - intel_de_rmw(dev_priv, CHICKEN_PAR1_1, + if (DISPLAY_VER(display) == 11 && pipe == PIPE_B) + intel_de_rmw(display, CHICKEN_PAR1_1, IGNORE_KVMR_PIPE_A, enable ? IGNORE_KVMR_PIPE_A : 0); } @@ -1235,13 +1241,13 @@ static void icl_apply_kvmr_pipe_a_wa(struct intel_encoder *encoder, */ static void adlp_set_lp_hs_wakeup_gb(struct intel_encoder *encoder) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; - if (DISPLAY_VER(i915) == 13) { + if (DISPLAY_VER(display) == 13) { for_each_dsi_port(port, intel_dsi->ports) - intel_de_rmw(i915, TGL_DSI_CHKN_REG(port), + intel_de_rmw(display, TGL_DSI_CHKN_REG(port), TGL_DSI_CHKN_LSHS_GB_MASK, TGL_DSI_CHKN_LSHS_GB(4)); } @@ -1275,7 +1281,7 @@ static void gen11_dsi_enable(struct intel_atomic_state *state, static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; @@ -1284,13 +1290,13 @@ static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder) dsi_trans = dsi_port_to_transcoder(port); /* disable transcoder */ - intel_de_rmw(dev_priv, TRANSCONF(dev_priv, dsi_trans), + intel_de_rmw(display, TRANSCONF(display, dsi_trans), TRANSCONF_ENABLE, 0); /* wait for transcoder to be disabled */ - if (intel_de_wait_for_clear(dev_priv, TRANSCONF(dev_priv, dsi_trans), + if (intel_de_wait_for_clear(display, TRANSCONF(display, dsi_trans), TRANSCONF_STATE_ENABLE, 50)) - drm_err(&dev_priv->drm, + drm_err(display->drm, "DSI trancoder not disabled\n"); } } @@ -1307,7 +1313,7 @@ static void gen11_dsi_powerdown_panel(struct intel_encoder *encoder) static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; @@ -1316,29 +1322,29 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) /* disable periodic update mode */ if (is_cmd_mode(intel_dsi)) { for_each_dsi_port(port, intel_dsi->ports) - intel_de_rmw(dev_priv, DSI_CMD_FRMCTL(port), + intel_de_rmw(display, DSI_CMD_FRMCTL(port), DSI_PERIODIC_FRAME_UPDATE_ENABLE, 0); } /* put dsi link in ULPS */ for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - tmp = intel_de_read(dev_priv, DSI_LP_MSG(dsi_trans)); + tmp = intel_de_read(display, DSI_LP_MSG(dsi_trans)); tmp |= LINK_ENTER_ULPS; tmp &= ~LINK_ULPS_TYPE_LP11; - intel_de_write(dev_priv, DSI_LP_MSG(dsi_trans), tmp); + intel_de_write(display, DSI_LP_MSG(dsi_trans), tmp); - if (wait_for_us((intel_de_read(dev_priv, DSI_LP_MSG(dsi_trans)) & + if (wait_for_us((intel_de_read(display, DSI_LP_MSG(dsi_trans)) & LINK_IN_ULPS), 10)) - drm_err(&dev_priv->drm, "DSI link not in ULPS\n"); + drm_err(display->drm, "DSI link not in ULPS\n"); } /* disable ddi function */ for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - intel_de_rmw(dev_priv, - TRANS_DDI_FUNC_CTL(dev_priv, dsi_trans), + intel_de_rmw(display, + TRANS_DDI_FUNC_CTL(display, dsi_trans), TRANS_DDI_FUNC_ENABLE, 0); } @@ -1346,8 +1352,8 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) if (intel_dsi->dual_link) { for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - intel_de_rmw(dev_priv, - TRANS_DDI_FUNC_CTL2(dev_priv, dsi_trans), + intel_de_rmw(display, + TRANS_DDI_FUNC_CTL2(display, dsi_trans), PORT_SYNC_MODE_ENABLE, 0); } } @@ -1355,18 +1361,18 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) static void gen11_dsi_disable_port(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; gen11_dsi_ungate_clocks(encoder); for_each_dsi_port(port, intel_dsi->ports) { - intel_de_rmw(dev_priv, DDI_BUF_CTL(port), DDI_BUF_CTL_ENABLE, 0); + intel_de_rmw(display, DDI_BUF_CTL(port), DDI_BUF_CTL_ENABLE, 0); - if (wait_for_us((intel_de_read(dev_priv, DDI_BUF_CTL(port)) & + if (wait_for_us((intel_de_read(display, DDI_BUF_CTL(port)) & DDI_BUF_IS_IDLE), 8)) - drm_err(&dev_priv->drm, + drm_err(display->drm, "DDI port:%c buffer not idle\n", port_name(port)); } @@ -1375,6 +1381,7 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder) static void gen11_dsi_disable_io_power(struct intel_encoder *encoder) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; @@ -1392,7 +1399,7 @@ static void gen11_dsi_disable_io_power(struct intel_encoder *encoder) /* set mode to DDI */ for_each_dsi_port(port, intel_dsi->ports) - intel_de_rmw(dev_priv, ICL_DSI_IO_MODECTL(port), + intel_de_rmw(display, ICL_DSI_IO_MODECTL(port), COMBO_PHY_MODE_DSI, 0); } @@ -1504,8 +1511,7 @@ static void gen11_dsi_get_timings(struct intel_encoder *encoder, static bool gen11_dsi_is_periodic_cmd_mode(struct intel_dsi *intel_dsi) { - struct drm_device *dev = intel_dsi->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(&intel_dsi->base); enum transcoder dsi_trans; u32 val; @@ -1514,7 +1520,7 @@ static bool gen11_dsi_is_periodic_cmd_mode(struct intel_dsi *intel_dsi) else dsi_trans = TRANSCODER_DSI_0; - val = intel_de_read(dev_priv, DSI_TRANS_FUNC_CONF(dsi_trans)); + val = intel_de_read(display, DSI_TRANS_FUNC_CONF(dsi_trans)); return (val & DSI_PERIODIC_FRAME_UPDATE_ENABLE); } @@ -1557,7 +1563,7 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder, static void gen11_dsi_sync_state(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_crtc *intel_crtc; enum pipe pipe; @@ -1568,9 +1574,9 @@ static void gen11_dsi_sync_state(struct intel_encoder *encoder, pipe = intel_crtc->pipe; /* wa verify 1409054076:icl,jsl,ehl */ - if (DISPLAY_VER(dev_priv) == 11 && pipe == PIPE_B && - !(intel_de_read(dev_priv, CHICKEN_PAR1_1) & IGNORE_KVMR_PIPE_A)) - drm_dbg_kms(&dev_priv->drm, + if (DISPLAY_VER(display) == 11 && pipe == PIPE_B && + !(intel_de_read(display, CHICKEN_PAR1_1) & IGNORE_KVMR_PIPE_A)) + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] BIOS left IGNORE_KVMR_PIPE_A cleared with pipe B enabled\n", encoder->base.base.id, encoder->base.name); @@ -1579,9 +1585,9 @@ static void gen11_dsi_sync_state(struct intel_encoder *encoder, static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; - int dsc_max_bpc = DISPLAY_VER(dev_priv) >= 12 ? 12 : 10; + int dsc_max_bpc = DISPLAY_VER(display) >= 12 ? 12 : 10; bool use_dsc; int ret; @@ -1606,12 +1612,12 @@ static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder, return ret; /* DSI specific sanity checks on the common code */ - drm_WARN_ON(&dev_priv->drm, vdsc_cfg->vbr_enable); - drm_WARN_ON(&dev_priv->drm, vdsc_cfg->simple_422); - drm_WARN_ON(&dev_priv->drm, + drm_WARN_ON(display->drm, vdsc_cfg->vbr_enable); + drm_WARN_ON(display->drm, vdsc_cfg->simple_422); + drm_WARN_ON(display->drm, vdsc_cfg->pic_width % vdsc_cfg->slice_width); - drm_WARN_ON(&dev_priv->drm, vdsc_cfg->slice_height < 8); - drm_WARN_ON(&dev_priv->drm, + drm_WARN_ON(display->drm, vdsc_cfg->slice_height < 8); + drm_WARN_ON(display->drm, vdsc_cfg->pic_height % vdsc_cfg->slice_height); ret = drm_dsc_compute_rc_parameters(vdsc_cfg); @@ -1627,7 +1633,7 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct intel_connector *intel_connector = intel_dsi->attached_connector; struct drm_display_mode *adjusted_mode = @@ -1661,7 +1667,7 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, pipe_config->clock_set = true; if (gen11_dsi_dsc_compute_config(encoder, pipe_config)) - drm_dbg_kms(&i915->drm, "Attempting to use DSC failed\n"); + drm_dbg_kms(display->drm, "Attempting to use DSC failed\n"); pipe_config->port_clock = afe_clk(encoder, pipe_config) / 5; @@ -1679,15 +1685,13 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, static void gen11_dsi_get_power_domains(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - - get_dsi_io_power_domains(i915, - enc_to_intel_dsi(encoder)); + get_dsi_io_power_domains(enc_to_intel_dsi(encoder)); } static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum transcoder dsi_trans; @@ -1703,8 +1707,8 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - tmp = intel_de_read(dev_priv, - TRANS_DDI_FUNC_CTL(dev_priv, dsi_trans)); + tmp = intel_de_read(display, + TRANS_DDI_FUNC_CTL(display, dsi_trans)); switch (tmp & TRANS_DDI_EDP_INPUT_MASK) { case TRANS_DDI_EDP_INPUT_A_ON: *pipe = PIPE_A; @@ -1719,11 +1723,11 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, *pipe = PIPE_D; break; default: - drm_err(&dev_priv->drm, "Invalid PIPE input\n"); + drm_err(display->drm, "Invalid PIPE input\n"); goto out; } - tmp = intel_de_read(dev_priv, TRANSCONF(dev_priv, dsi_trans)); + tmp = intel_de_read(display, TRANSCONF(display, dsi_trans)); ret = tmp & TRANSCONF_ENABLE; } out: @@ -1833,8 +1837,7 @@ static const struct mipi_dsi_host_ops gen11_dsi_host_ops = { static void icl_dphy_param_init(struct intel_dsi *intel_dsi) { - struct drm_device *dev = intel_dsi->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(&intel_dsi->base); struct intel_connector *connector = intel_dsi->attached_connector; struct mipi_config *mipi_config = connector->panel.vbt.dsi.config; u32 tlpx_ns; @@ -1858,7 +1861,7 @@ static void icl_dphy_param_init(struct intel_dsi *intel_dsi) */ prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * 4, tlpx_ns); if (prepare_cnt > ICL_PREPARE_CNT_MAX) { - drm_dbg_kms(&dev_priv->drm, "prepare_cnt out of range (%d)\n", + drm_dbg_kms(display->drm, "prepare_cnt out of range (%d)\n", prepare_cnt); prepare_cnt = ICL_PREPARE_CNT_MAX; } @@ -1867,7 +1870,7 @@ static void icl_dphy_param_init(struct intel_dsi *intel_dsi) clk_zero_cnt = DIV_ROUND_UP(mipi_config->tclk_prepare_clkzero - ths_prepare_ns, tlpx_ns); if (clk_zero_cnt > ICL_CLK_ZERO_CNT_MAX) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "clk_zero_cnt out of range (%d)\n", clk_zero_cnt); clk_zero_cnt = ICL_CLK_ZERO_CNT_MAX; } @@ -1875,7 +1878,7 @@ static void icl_dphy_param_init(struct intel_dsi *intel_dsi) /* trail cnt in escape clocks*/ trail_cnt = DIV_ROUND_UP(tclk_trail_ns, tlpx_ns); if (trail_cnt > ICL_TRAIL_CNT_MAX) { - drm_dbg_kms(&dev_priv->drm, "trail_cnt out of range (%d)\n", + drm_dbg_kms(display->drm, "trail_cnt out of range (%d)\n", trail_cnt); trail_cnt = ICL_TRAIL_CNT_MAX; } @@ -1883,7 +1886,7 @@ static void icl_dphy_param_init(struct intel_dsi *intel_dsi) /* tclk pre count in escape clocks */ tclk_pre_cnt = DIV_ROUND_UP(mipi_config->tclk_pre, tlpx_ns); if (tclk_pre_cnt > ICL_TCLK_PRE_CNT_MAX) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "tclk_pre_cnt out of range (%d)\n", tclk_pre_cnt); tclk_pre_cnt = ICL_TCLK_PRE_CNT_MAX; } @@ -1892,7 +1895,7 @@ static void icl_dphy_param_init(struct intel_dsi *intel_dsi) hs_zero_cnt = DIV_ROUND_UP(mipi_config->ths_prepare_hszero - ths_prepare_ns, tlpx_ns); if (hs_zero_cnt > ICL_HS_ZERO_CNT_MAX) { - drm_dbg_kms(&dev_priv->drm, "hs_zero_cnt out of range (%d)\n", + drm_dbg_kms(display->drm, "hs_zero_cnt out of range (%d)\n", hs_zero_cnt); hs_zero_cnt = ICL_HS_ZERO_CNT_MAX; } @@ -1900,7 +1903,7 @@ static void icl_dphy_param_init(struct intel_dsi *intel_dsi) /* hs exit zero cnt in escape clocks */ exit_zero_cnt = DIV_ROUND_UP(mipi_config->ths_exit, tlpx_ns); if (exit_zero_cnt > ICL_EXIT_ZERO_CNT_MAX) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "exit_zero_cnt out of range (%d)\n", exit_zero_cnt); exit_zero_cnt = ICL_EXIT_ZERO_CNT_MAX; @@ -1942,10 +1945,9 @@ static void icl_dsi_add_properties(struct intel_connector *connector) fixed_mode->vdisplay); } -void icl_dsi_init(struct drm_i915_private *dev_priv, +void icl_dsi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata) { - struct intel_display *display = &dev_priv->display; struct intel_dsi *intel_dsi; struct intel_encoder *encoder; struct intel_connector *intel_connector; @@ -1973,7 +1975,8 @@ void icl_dsi_init(struct drm_i915_private *dev_priv, encoder->devdata = devdata; /* register DSI encoder with DRM subsystem */ - drm_encoder_init(&dev_priv->drm, &encoder->base, &gen11_dsi_encoder_funcs, + drm_encoder_init(display->drm, &encoder->base, + &gen11_dsi_encoder_funcs, DRM_MODE_ENCODER_DSI, "DSI %c", port_name(port)); encoder->pre_pll_enable = gen11_dsi_pre_pll_enable; @@ -1998,7 +2001,8 @@ void icl_dsi_init(struct drm_i915_private *dev_priv, encoder->shutdown = intel_dsi_shutdown; /* register DSI connector with DRM subsystem */ - drm_connector_init(&dev_priv->drm, connector, &gen11_dsi_connector_funcs, + drm_connector_init(display->drm, connector, + &gen11_dsi_connector_funcs, DRM_MODE_CONNECTOR_DSI); drm_connector_helper_add(connector, &gen11_dsi_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; @@ -2011,12 +2015,12 @@ void icl_dsi_init(struct drm_i915_private *dev_priv, intel_bios_init_panel_late(display, &intel_connector->panel, encoder->devdata, NULL); - mutex_lock(&dev_priv->drm.mode_config.mutex); + mutex_lock(&display->drm->mode_config.mutex); intel_panel_add_vbt_lfp_fixed_mode(intel_connector); - mutex_unlock(&dev_priv->drm.mode_config.mutex); + mutex_unlock(&display->drm->mode_config.mutex); if (!intel_panel_preferred_fixed_mode(intel_connector)) { - drm_err(&dev_priv->drm, "DSI fixed mode info missing\n"); + drm_err(display->drm, "DSI fixed mode info missing\n"); goto err; } @@ -2029,10 +2033,10 @@ void icl_dsi_init(struct drm_i915_private *dev_priv, else intel_dsi->ports = BIT(port); - if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports)) + if (drm_WARN_ON(display->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports)) intel_connector->panel.vbt.dsi.bl_ports &= intel_dsi->ports; - if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports)) + if (drm_WARN_ON(display->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports)) intel_connector->panel.vbt.dsi.cabc_ports &= intel_dsi->ports; for_each_dsi_port(port, intel_dsi->ports) { @@ -2046,7 +2050,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv, } if (!intel_dsi_vbt_init(intel_dsi, MIPI_DSI_GENERIC_PANEL_ID)) { - drm_dbg_kms(&dev_priv->drm, "no device found\n"); + drm_dbg_kms(display->drm, "no device found\n"); goto err; } diff --git a/drivers/gpu/drm/i915/display/icl_dsi.h b/drivers/gpu/drm/i915/display/icl_dsi.h index 43fa7d72eeb1..099fc50e35b4 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.h +++ b/drivers/gpu/drm/i915/display/icl_dsi.h @@ -6,11 +6,11 @@ #ifndef __ICL_DSI_H__ #define __ICL_DSI_H__ -struct drm_i915_private; struct intel_bios_encoder_data; struct intel_crtc_state; +struct intel_display; -void icl_dsi_init(struct drm_i915_private *dev_priv, +void icl_dsi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata); void icl_dsi_frame_update(struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index e979786aa5cf..5c2a7987cccb 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -790,7 +790,8 @@ skl_next_plane_to_commit(struct intel_atomic_state *state, return NULL; } -void intel_plane_update_noarm(struct intel_plane *plane, +void intel_plane_update_noarm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -799,10 +800,11 @@ void intel_plane_update_noarm(struct intel_plane *plane, trace_intel_plane_update_noarm(plane, crtc); if (plane->update_noarm) - plane->update_noarm(plane, crtc_state, plane_state); + plane->update_noarm(dsb, plane, crtc_state, plane_state); } -void intel_plane_async_flip(struct intel_plane *plane, +void intel_plane_async_flip(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, bool async_flip) @@ -810,34 +812,37 @@ void intel_plane_async_flip(struct intel_plane *plane, struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); trace_intel_plane_async_flip(plane, crtc, async_flip); - plane->async_flip(plane, crtc_state, plane_state, async_flip); + plane->async_flip(dsb, plane, crtc_state, plane_state, async_flip); } -void intel_plane_update_arm(struct intel_plane *plane, +void intel_plane_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); if (crtc_state->do_async_flip && plane->async_flip) { - intel_plane_async_flip(plane, crtc_state, plane_state, true); + intel_plane_async_flip(dsb, plane, crtc_state, plane_state, true); return; } trace_intel_plane_update_arm(plane, crtc); - plane->update_arm(plane, crtc_state, plane_state); + plane->update_arm(dsb, plane, crtc_state, plane_state); } -void intel_plane_disable_arm(struct intel_plane *plane, +void intel_plane_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); trace_intel_plane_disable_arm(plane, crtc); - plane->disable_arm(plane, crtc_state); + plane->disable_arm(dsb, plane, crtc_state); } -void intel_crtc_planes_update_noarm(struct intel_atomic_state *state, +void intel_crtc_planes_update_noarm(struct intel_dsb *dsb, + struct intel_atomic_state *state, struct intel_crtc *crtc) { struct intel_crtc_state *new_crtc_state = @@ -862,11 +867,13 @@ void intel_crtc_planes_update_noarm(struct intel_atomic_state *state, /* TODO: for mailbox updates this should be skipped */ if (new_plane_state->uapi.visible || new_plane_state->planar_slave) - intel_plane_update_noarm(plane, new_crtc_state, new_plane_state); + intel_plane_update_noarm(dsb, plane, + new_crtc_state, new_plane_state); } } -static void skl_crtc_planes_update_arm(struct intel_atomic_state *state, +static void skl_crtc_planes_update_arm(struct intel_dsb *dsb, + struct intel_atomic_state *state, struct intel_crtc *crtc) { struct intel_crtc_state *old_crtc_state = @@ -893,13 +900,14 @@ static void skl_crtc_planes_update_arm(struct intel_atomic_state *state, */ if (new_plane_state->uapi.visible || new_plane_state->planar_slave) - intel_plane_update_arm(plane, new_crtc_state, new_plane_state); + intel_plane_update_arm(dsb, plane, new_crtc_state, new_plane_state); else - intel_plane_disable_arm(plane, new_crtc_state); + intel_plane_disable_arm(dsb, plane, new_crtc_state); } } -static void i9xx_crtc_planes_update_arm(struct intel_atomic_state *state, +static void i9xx_crtc_planes_update_arm(struct intel_dsb *dsb, + struct intel_atomic_state *state, struct intel_crtc *crtc) { struct intel_crtc_state *new_crtc_state = @@ -919,21 +927,22 @@ static void i9xx_crtc_planes_update_arm(struct intel_atomic_state *state, * would have to be called here as well. */ if (new_plane_state->uapi.visible) - intel_plane_update_arm(plane, new_crtc_state, new_plane_state); + intel_plane_update_arm(dsb, plane, new_crtc_state, new_plane_state); else - intel_plane_disable_arm(plane, new_crtc_state); + intel_plane_disable_arm(dsb, plane, new_crtc_state); } } -void intel_crtc_planes_update_arm(struct intel_atomic_state *state, +void intel_crtc_planes_update_arm(struct intel_dsb *dsb, + struct intel_atomic_state *state, struct intel_crtc *crtc) { struct drm_i915_private *i915 = to_i915(state->base.dev); if (DISPLAY_VER(i915) >= 9) - skl_crtc_planes_update_arm(state, crtc); + skl_crtc_planes_update_arm(dsb, state, crtc); else - i9xx_crtc_planes_update_arm(state, crtc); + i9xx_crtc_planes_update_arm(dsb, state, crtc); } int intel_atomic_plane_check_clipping(struct intel_plane_state *plane_state, diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h index 6c4fe3596465..0f982f452ff3 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h @@ -14,6 +14,7 @@ struct drm_rect; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; +struct intel_dsb; struct intel_plane; struct intel_plane_state; enum plane_id; @@ -32,26 +33,32 @@ void intel_plane_copy_uapi_to_hw_state(struct intel_plane_state *plane_state, struct intel_crtc *crtc); void intel_plane_copy_hw_state(struct intel_plane_state *plane_state, const struct intel_plane_state *from_plane_state); -void intel_plane_async_flip(struct intel_plane *plane, +void intel_plane_async_flip(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, bool async_flip); -void intel_plane_update_noarm(struct intel_plane *plane, +void intel_plane_update_noarm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); -void intel_plane_update_arm(struct intel_plane *plane, +void intel_plane_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); -void intel_plane_disable_arm(struct intel_plane *plane, +void intel_plane_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state); struct intel_plane *intel_plane_alloc(void); void intel_plane_free(struct intel_plane *plane); struct drm_plane_state *intel_plane_duplicate_state(struct drm_plane *plane); void intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state); -void intel_crtc_planes_update_noarm(struct intel_atomic_state *state, +void intel_crtc_planes_update_noarm(struct intel_dsb *dsb, + struct intel_atomic_state *state, struct intel_crtc *crtc); -void intel_crtc_planes_update_arm(struct intel_atomic_state *state, +void intel_crtc_planes_update_arm(struct intel_dsb *dsbx, + struct intel_atomic_state *state, struct intel_crtc *crtc); int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index ec55cb651d44..1fbe3cd452c1 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1912,6 +1912,23 @@ void intel_color_post_update(const struct intel_crtc_state *crtc_state) i915->display.funcs.color->color_post_update(crtc_state); } +void intel_color_modeset(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + intel_color_load_luts(crtc_state); + intel_color_commit_noarm(crtc_state); + intel_color_commit_arm(crtc_state); + + if (DISPLAY_VER(display) < 9) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_plane *plane = to_intel_plane(crtc->base.primary); + + /* update DSPCNTR to configure gamma/csc for pipe bottom color */ + plane->disable_arm(NULL, plane, crtc_state); + } +} + void intel_color_prepare_commit(struct intel_atomic_state *state, struct intel_crtc *crtc) { diff --git a/drivers/gpu/drm/i915/display/intel_color.h b/drivers/gpu/drm/i915/display/intel_color.h index 79f230a1709a..ab3aaec06a2a 100644 --- a/drivers/gpu/drm/i915/display/intel_color.h +++ b/drivers/gpu/drm/i915/display/intel_color.h @@ -28,6 +28,7 @@ void intel_color_commit_noarm(const struct intel_crtc_state *crtc_state); void intel_color_commit_arm(const struct intel_crtc_state *crtc_state); void intel_color_post_update(const struct intel_crtc_state *crtc_state); void intel_color_load_luts(const struct intel_crtc_state *crtc_state); +void intel_color_modeset(const struct intel_crtc_state *crtc_state); void intel_color_get_config(struct intel_crtc_state *crtc_state); bool intel_color_lut_equal(const struct intel_crtc_state *crtc_state, const struct drm_property_blob *blob1, diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 9ad53e1cbbd0..aeadb834d332 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -275,7 +275,8 @@ static int i845_check_cursor(struct intel_crtc_state *crtc_state, } /* TODO: split into noarm+arm pair */ -static void i845_cursor_update_arm(struct intel_plane *plane, +static void i845_cursor_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -315,10 +316,11 @@ static void i845_cursor_update_arm(struct intel_plane *plane, } } -static void i845_cursor_disable_arm(struct intel_plane *plane, +static void i845_cursor_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - i845_cursor_update_arm(plane, crtc_state, NULL); + i845_cursor_update_arm(dsb, plane, crtc_state, NULL); } static bool i845_cursor_get_hw_state(struct intel_plane *plane, @@ -527,22 +529,25 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, return 0; } -static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane, +static void i9xx_cursor_disable_sel_fetch_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; if (!crtc_state->enable_psr2_sel_fetch) return; - intel_de_write_fw(dev_priv, SEL_FETCH_CUR_CTL(pipe), 0); + intel_de_write_dsb(display, dsb, SEL_FETCH_CUR_CTL(pipe), 0); } -static void wa_16021440873(struct intel_plane *plane, +static void wa_16021440873(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); u32 ctl = plane_state->ctl; int et_y_position = drm_rect_height(&crtc_state->pipe_src) + 1; @@ -551,16 +556,18 @@ static void wa_16021440873(struct intel_plane *plane, ctl &= ~MCURSOR_MODE_MASK; ctl |= MCURSOR_MODE_64_2B; - intel_de_write_fw(dev_priv, SEL_FETCH_CUR_CTL(pipe), ctl); + intel_de_write_dsb(display, dsb, SEL_FETCH_CUR_CTL(pipe), ctl); - intel_de_write(dev_priv, CURPOS_ERLY_TPT(dev_priv, pipe), - CURSOR_POS_Y(et_y_position)); + intel_de_write_dsb(display, dsb, CURPOS_ERLY_TPT(dev_priv, pipe), + CURSOR_POS_Y(et_y_position)); } -static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane, +static void i9xx_cursor_update_sel_fetch_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; @@ -571,19 +578,17 @@ static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane, if (crtc_state->enable_psr2_su_region_et) { u32 val = intel_cursor_position(crtc_state, plane_state, true); - intel_de_write_fw(dev_priv, - CURPOS_ERLY_TPT(dev_priv, pipe), - val); + + intel_de_write_dsb(display, dsb, CURPOS_ERLY_TPT(dev_priv, pipe), val); } - intel_de_write_fw(dev_priv, SEL_FETCH_CUR_CTL(pipe), - plane_state->ctl); + intel_de_write_dsb(display, dsb, SEL_FETCH_CUR_CTL(pipe), plane_state->ctl); } else { /* Wa_16021440873 */ if (crtc_state->enable_psr2_su_region_et) - wa_16021440873(plane, crtc_state, plane_state); + wa_16021440873(dsb, plane, crtc_state, plane_state); else - i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state); + i9xx_cursor_disable_sel_fetch_arm(dsb, plane, crtc_state); } } @@ -610,9 +615,11 @@ static u32 skl_cursor_wm_reg_val(const struct skl_wm_level *level) return val; } -static void skl_write_cursor_wm(struct intel_plane *plane, +static void skl_write_cursor_wm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *i915 = to_i915(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; @@ -622,30 +629,32 @@ static void skl_write_cursor_wm(struct intel_plane *plane, int level; for (level = 0; level < i915->display.wm.num_levels; level++) - intel_de_write_fw(i915, CUR_WM(pipe, level), - skl_cursor_wm_reg_val(skl_plane_wm_level(pipe_wm, plane_id, level))); + intel_de_write_dsb(display, dsb, CUR_WM(pipe, level), + skl_cursor_wm_reg_val(skl_plane_wm_level(pipe_wm, plane_id, level))); - intel_de_write_fw(i915, CUR_WM_TRANS(pipe), - skl_cursor_wm_reg_val(skl_plane_trans_wm(pipe_wm, plane_id))); + intel_de_write_dsb(display, dsb, CUR_WM_TRANS(pipe), + skl_cursor_wm_reg_val(skl_plane_trans_wm(pipe_wm, plane_id))); if (HAS_HW_SAGV_WM(i915)) { const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; - intel_de_write_fw(i915, CUR_WM_SAGV(pipe), - skl_cursor_wm_reg_val(&wm->sagv.wm0)); - intel_de_write_fw(i915, CUR_WM_SAGV_TRANS(pipe), - skl_cursor_wm_reg_val(&wm->sagv.trans_wm)); + intel_de_write_dsb(display, dsb, CUR_WM_SAGV(pipe), + skl_cursor_wm_reg_val(&wm->sagv.wm0)); + intel_de_write_dsb(display, dsb, CUR_WM_SAGV_TRANS(pipe), + skl_cursor_wm_reg_val(&wm->sagv.trans_wm)); } - intel_de_write_fw(i915, CUR_BUF_CFG(pipe), - skl_cursor_ddb_reg_val(ddb)); + intel_de_write_dsb(display, dsb, CUR_BUF_CFG(pipe), + skl_cursor_ddb_reg_val(ddb)); } /* TODO: split into noarm+arm pair */ -static void i9xx_cursor_update_arm(struct intel_plane *plane, +static void i9xx_cursor_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; u32 cntl = 0, base = 0, pos = 0, fbc_ctl = 0; @@ -685,38 +694,36 @@ static void i9xx_cursor_update_arm(struct intel_plane *plane, */ if (DISPLAY_VER(dev_priv) >= 9) - skl_write_cursor_wm(plane, crtc_state); + skl_write_cursor_wm(dsb, plane, crtc_state); if (plane_state) - i9xx_cursor_update_sel_fetch_arm(plane, crtc_state, - plane_state); + i9xx_cursor_update_sel_fetch_arm(dsb, plane, crtc_state, plane_state); else - i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state); + i9xx_cursor_disable_sel_fetch_arm(dsb, plane, crtc_state); if (plane->cursor.base != base || plane->cursor.size != fbc_ctl || plane->cursor.cntl != cntl) { if (HAS_CUR_FBC(dev_priv)) - intel_de_write_fw(dev_priv, - CUR_FBC_CTL(dev_priv, pipe), - fbc_ctl); - intel_de_write_fw(dev_priv, CURCNTR(dev_priv, pipe), cntl); - intel_de_write_fw(dev_priv, CURPOS(dev_priv, pipe), pos); - intel_de_write_fw(dev_priv, CURBASE(dev_priv, pipe), base); + intel_de_write_dsb(display, dsb, CUR_FBC_CTL(dev_priv, pipe), fbc_ctl); + intel_de_write_dsb(display, dsb, CURCNTR(dev_priv, pipe), cntl); + intel_de_write_dsb(display, dsb, CURPOS(dev_priv, pipe), pos); + intel_de_write_dsb(display, dsb, CURBASE(dev_priv, pipe), base); plane->cursor.base = base; plane->cursor.size = fbc_ctl; plane->cursor.cntl = cntl; } else { - intel_de_write_fw(dev_priv, CURPOS(dev_priv, pipe), pos); - intel_de_write_fw(dev_priv, CURBASE(dev_priv, pipe), base); + intel_de_write_dsb(display, dsb, CURPOS(dev_priv, pipe), pos); + intel_de_write_dsb(display, dsb, CURBASE(dev_priv, pipe), base); } } -static void i9xx_cursor_disable_arm(struct intel_plane *plane, +static void i9xx_cursor_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - i9xx_cursor_update_arm(plane, crtc_state, NULL); + i9xx_cursor_update_arm(dsb, plane, crtc_state, NULL); } static bool i9xx_cursor_get_hw_state(struct intel_plane *plane, @@ -905,10 +912,10 @@ intel_legacy_cursor_update(struct drm_plane *_plane, } if (new_plane_state->uapi.visible) { - intel_plane_update_noarm(plane, crtc_state, new_plane_state); - intel_plane_update_arm(plane, crtc_state, new_plane_state); + intel_plane_update_noarm(NULL, plane, crtc_state, new_plane_state); + intel_plane_update_arm(NULL, plane, crtc_state, new_plane_state); } else { - intel_plane_disable_arm(plane, crtc_state); + intel_plane_disable_arm(NULL, plane, crtc_state); } local_irq_enable(); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index b1c294236cc8..5b24460c0134 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3363,7 +3363,7 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, intel_de_rmw(dev_priv, XELPDP_PORT_BUF_CTL1(dev_priv, port), XELPDP_PORT_WIDTH_MASK | XELPDP_PORT_REVERSAL, port_buf); - buf_ctl |= DDI_PORT_WIDTH(lane_count); + buf_ctl |= DDI_PORT_WIDTH(crtc_state->lane_count); if (DISPLAY_VER(dev_priv) >= 20) buf_ctl |= XE2LPD_DDI_BUF_D2D_LINK_ENABLE; @@ -4413,8 +4413,7 @@ static const struct drm_encoder_funcs intel_ddi_funcs = { .late_register = intel_ddi_encoder_late_register, }; -static struct intel_connector * -intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) +static int intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); struct intel_connector *connector; @@ -4422,7 +4421,7 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) connector = intel_connector_alloc(); if (!connector) - return NULL; + return -ENOMEM; dig_port->dp.output_reg = DDI_BUF_CTL(port); if (DISPLAY_VER(i915) >= 14) @@ -4437,7 +4436,7 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) if (!intel_dp_init_connector(dig_port, connector)) { kfree(connector); - return NULL; + return -EINVAL; } if (dig_port->base.type == INTEL_OUTPUT_EDP) { @@ -4453,7 +4452,7 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) } } - return connector; + return 0; } static int intel_hdmi_reset_link(struct intel_encoder *encoder, @@ -4623,20 +4622,28 @@ static bool bdw_digital_port_connected(struct intel_encoder *encoder) return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & bit; } -static struct intel_connector * -intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port) +static int intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port) { struct intel_connector *connector; enum port port = dig_port->base.port; connector = intel_connector_alloc(); if (!connector) - return NULL; + return -ENOMEM; dig_port->hdmi.hdmi_reg = DDI_BUF_CTL(port); - intel_hdmi_init_connector(dig_port, connector); - return connector; + if (!intel_hdmi_init_connector(dig_port, connector)) { + /* + * HDMI connector init failures may just mean conflicting DDC + * pins or not having enough lanes. Handle them gracefully, but + * don't fail the entire DDI init. + */ + dig_port->hdmi.hdmi_reg = INVALID_MMIO_REG; + kfree(connector); + } + + return 0; } static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) @@ -4791,8 +4798,10 @@ static void intel_ddi_tc_encoder_suspend_complete(struct intel_encoder *encoder) static void intel_ddi_encoder_shutdown(struct intel_encoder *encoder) { - intel_dp_encoder_shutdown(encoder); - intel_hdmi_encoder_shutdown(encoder); + if (intel_encoder_is_dp(encoder)) + intel_dp_encoder_shutdown(encoder); + if (intel_encoder_is_hdmi(encoder)) + intel_hdmi_encoder_shutdown(encoder); } static void intel_ddi_tc_encoder_shutdown_complete(struct intel_encoder *encoder) @@ -4888,7 +4897,7 @@ void intel_ddi_init(struct intel_display *display, if (!assert_has_icl_dsi(dev_priv)) return; - icl_dsi_init(dev_priv, devdata); + icl_dsi_init(display, devdata); return; } @@ -5185,7 +5194,7 @@ void intel_ddi_init(struct intel_display *display, intel_infoframe_init(dig_port); if (init_dp) { - if (!intel_ddi_init_dp_connector(dig_port)) + if (intel_ddi_init_dp_connector(dig_port)) goto err; dig_port->hpd_pulse = intel_dp_hpd_pulse; @@ -5199,7 +5208,7 @@ void intel_ddi_init(struct intel_display *display, * but leave it just in case we have some really bad VBTs... */ if (encoder->type != INTEL_OUTPUT_EDP && init_hdmi) { - if (!intel_ddi_init_hdmi_connector(dig_port)) + if (intel_ddi_init_hdmi_connector(dig_port)) goto err; } diff --git a/drivers/gpu/drm/i915/display/intel_de.h b/drivers/gpu/drm/i915/display/intel_de.h index e881bfeafb47..e017cd4a8168 100644 --- a/drivers/gpu/drm/i915/display/intel_de.h +++ b/drivers/gpu/drm/i915/display/intel_de.h @@ -8,6 +8,7 @@ #include "i915_drv.h" #include "i915_trace.h" +#include "intel_dsb.h" #include "intel_uncore.h" static inline struct intel_uncore *__to_uncore(struct intel_display *display) @@ -233,4 +234,14 @@ __intel_de_write_notrace(struct intel_display *display, i915_reg_t reg, } #define intel_de_write_notrace(p,...) __intel_de_write_notrace(__to_intel_display(p), __VA_ARGS__) +static __always_inline void +intel_de_write_dsb(struct intel_display *display, struct intel_dsb *dsb, + i915_reg_t reg, u32 val) +{ + if (dsb) + intel_dsb_reg_write(dsb, reg, val); + else + intel_de_write_fw(display, reg, val); +} + #endif /* __INTEL_DE_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b4ef4d59da1a..4f8899cd125d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -135,7 +135,8 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_state); static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state); static void hsw_set_transconf(const struct intel_crtc_state *crtc_state); -static void bdw_set_pipe_misc(const struct intel_crtc_state *crtc_state); +static void bdw_set_pipe_misc(struct intel_dsb *dsb, + const struct intel_crtc_state *crtc_state); /* returns HPLL frequency in kHz */ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) @@ -715,7 +716,7 @@ void intel_plane_disable_noatomic(struct intel_crtc *crtc, if (DISPLAY_VER(dev_priv) == 2 && !crtc_state->active_planes) intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); - intel_plane_disable_arm(plane, crtc_state); + intel_plane_disable_arm(NULL, plane, crtc_state); intel_crtc_wait_for_next_vblank(crtc); } @@ -1005,7 +1006,9 @@ static bool vrr_params_changed(const struct intel_crtc_state *old_crtc_state, old_crtc_state->vrr.vmin != new_crtc_state->vrr.vmin || old_crtc_state->vrr.vmax != new_crtc_state->vrr.vmax || old_crtc_state->vrr.guardband != new_crtc_state->vrr.guardband || - old_crtc_state->vrr.pipeline_full != new_crtc_state->vrr.pipeline_full; + old_crtc_state->vrr.pipeline_full != new_crtc_state->vrr.pipeline_full || + old_crtc_state->vrr.vsync_start != new_crtc_state->vrr.vsync_start || + old_crtc_state->vrr.vsync_end != new_crtc_state->vrr.vsync_end; } static bool cmrr_params_changed(const struct intel_crtc_state *old_crtc_state, @@ -1172,8 +1175,8 @@ static void intel_crtc_async_flip_disable_wa(struct intel_atomic_state *state, * Apart from the async flip bit we want to * preserve the old state for the plane. */ - intel_plane_async_flip(plane, old_crtc_state, - old_plane_state, false); + intel_plane_async_flip(NULL, plane, + old_crtc_state, old_plane_state, false); need_vbl_wait = true; } } @@ -1315,7 +1318,7 @@ static void intel_crtc_disable_planes(struct intel_atomic_state *state, !(update_mask & BIT(plane->id))) continue; - intel_plane_disable_arm(plane, new_crtc_state); + intel_plane_disable_arm(NULL, plane, new_crtc_state); if (old_plane_state->uapi.visible) fb_bits |= plane->frontbuffer_bit; @@ -1502,14 +1505,6 @@ static void intel_encoders_update_pipe(struct intel_atomic_state *state, } } -static void intel_disable_primary_plane(const struct intel_crtc_state *crtc_state) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_plane *plane = to_intel_plane(crtc->base.primary); - - plane->disable_arm(plane, crtc_state); -} - static void ilk_configure_cpu_transcoder(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -1575,11 +1570,7 @@ static void ilk_crtc_enable(struct intel_atomic_state *state, * On ILK+ LUT must be loaded before the pipe is running but with * clocks enabled */ - intel_color_load_luts(new_crtc_state); - intel_color_commit_noarm(new_crtc_state); - intel_color_commit_arm(new_crtc_state); - /* update DSPCNTR to configure gamma for pipe bottom color */ - intel_disable_primary_plane(new_crtc_state); + intel_color_modeset(new_crtc_state); intel_initial_watermarks(state, crtc); intel_enable_transcoder(new_crtc_state); @@ -1716,7 +1707,7 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, intel_set_pipe_src_size(pipe_crtc_state); if (DISPLAY_VER(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - bdw_set_pipe_misc(pipe_crtc_state); + bdw_set_pipe_misc(NULL, pipe_crtc_state); } if (!transcoder_is_dsi(cpu_transcoder)) @@ -1741,12 +1732,7 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, * On ILK+ LUT must be loaded before the pipe is running but with * clocks enabled */ - intel_color_load_luts(pipe_crtc_state); - intel_color_commit_noarm(pipe_crtc_state); - intel_color_commit_arm(pipe_crtc_state); - /* update DSPCNTR to configure gamma/csc for pipe bottom color */ - if (DISPLAY_VER(dev_priv) < 9) - intel_disable_primary_plane(pipe_crtc_state); + intel_color_modeset(pipe_crtc_state); hsw_set_linetime_wm(pipe_crtc_state); @@ -2147,11 +2133,7 @@ static void valleyview_crtc_enable(struct intel_atomic_state *state, i9xx_pfit_enable(new_crtc_state); - intel_color_load_luts(new_crtc_state); - intel_color_commit_noarm(new_crtc_state); - intel_color_commit_arm(new_crtc_state); - /* update DSPCNTR to configure gamma for pipe bottom color */ - intel_disable_primary_plane(new_crtc_state); + intel_color_modeset(new_crtc_state); intel_initial_watermarks(state, crtc); intel_enable_transcoder(new_crtc_state); @@ -2187,11 +2169,7 @@ static void i9xx_crtc_enable(struct intel_atomic_state *state, i9xx_pfit_enable(new_crtc_state); - intel_color_load_luts(new_crtc_state); - intel_color_commit_noarm(new_crtc_state); - intel_color_commit_arm(new_crtc_state); - /* update DSPCNTR to configure gamma for pipe bottom color */ - intel_disable_primary_plane(new_crtc_state); + intel_color_modeset(new_crtc_state); if (!intel_initial_watermarks(state, crtc)) intel_update_watermarks(dev_priv); @@ -3246,9 +3224,11 @@ static void hsw_set_transconf(const struct intel_crtc_state *crtc_state) intel_de_posting_read(dev_priv, TRANSCONF(dev_priv, cpu_transcoder)); } -static void bdw_set_pipe_misc(const struct intel_crtc_state *crtc_state) +static void bdw_set_pipe_misc(struct intel_dsb *dsb, + const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_display *display = to_intel_display(crtc->base.dev); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 val = 0; @@ -3293,7 +3273,7 @@ static void bdw_set_pipe_misc(const struct intel_crtc_state *crtc_state) if (IS_BROADWELL(dev_priv)) val |= PIPE_MISC_PSR_MASK_SPRITE_ENABLE; - intel_de_write(dev_priv, PIPE_MISC(crtc->pipe), val); + intel_de_write_dsb(display, dsb, PIPE_MISC(crtc->pipe), val); } int bdw_get_pipe_misc_bpp(struct intel_crtc *crtc) @@ -6369,12 +6349,30 @@ static int intel_async_flip_check_hw(struct intel_atomic_state *state, struct in static int intel_joiner_add_affected_crtcs(struct intel_atomic_state *state) { struct drm_i915_private *i915 = to_i915(state->base.dev); + const struct intel_plane_state *plane_state; struct intel_crtc_state *crtc_state; + struct intel_plane *plane; struct intel_crtc *crtc; u8 affected_pipes = 0; u8 modeset_pipes = 0; int i; + /* + * Any plane which is in use by the joiner needs its crtc. + * Pull those in first as this will not have happened yet + * if the plane remains disabled according to uapi. + */ + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + crtc = to_intel_crtc(plane_state->hw.crtc); + if (!crtc) + continue; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } + + /* Now pull in all joined crtcs */ for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { affected_pipes |= crtc_state->joiner_pipes; if (intel_crtc_needs_modeset(crtc_state)) @@ -6828,7 +6826,7 @@ static void commit_pipe_pre_planes(struct intel_atomic_state *state, intel_color_commit_arm(new_crtc_state); if (DISPLAY_VER(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - bdw_set_pipe_misc(new_crtc_state); + bdw_set_pipe_misc(NULL, new_crtc_state); if (intel_crtc_needs_fastset(new_crtc_state)) intel_pipe_fastset(old_crtc_state, new_crtc_state); @@ -6928,7 +6926,7 @@ static void intel_pre_update_crtc(struct intel_atomic_state *state, intel_crtc_needs_color_update(new_crtc_state)) intel_color_commit_noarm(new_crtc_state); - intel_crtc_planes_update_noarm(state, crtc); + intel_crtc_planes_update_noarm(NULL, state, crtc); } static void intel_update_crtc(struct intel_atomic_state *state, @@ -6944,7 +6942,7 @@ static void intel_update_crtc(struct intel_atomic_state *state, commit_pipe_pre_planes(state, crtc); - intel_crtc_planes_update_arm(state, crtc); + intel_crtc_planes_update_arm(NULL, state, crtc); commit_pipe_post_planes(state, crtc); @@ -7442,9 +7440,6 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) /* Now enable the clocks, plane, pipe, and connectors that we set up. */ dev_priv->display.funcs.display->commit_modeset_enables(state); - if (state->modeset) - intel_set_cdclk_post_plane_update(state); - intel_wait_for_vblank_workers(state); /* FIXME: We should call drm_atomic_helper_commit_hw_done() here @@ -7525,6 +7520,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_verify_planes(state); intel_sagv_post_plane_update(state); + if (state->modeset) + intel_set_cdclk_post_plane_update(state); intel_pmdemand_post_plane_update(state); drm_atomic_helper_commit_hw_done(&state->base); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index f29e5dc3db91..9812191e7ef2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1036,6 +1036,10 @@ struct intel_csc_matrix { u16 postoff[3]; }; +void intel_io_mmio_fw_write(void *ctx, i915_reg_t reg, u32 val); + +typedef void (*intel_io_reg_write)(void *ctx, i915_reg_t reg, u32 val); + struct intel_crtc_state { /* * uapi (drm) state. This is the software state shown to userspace. @@ -1578,22 +1582,26 @@ struct intel_plane { u32 pixel_format, u64 modifier, unsigned int rotation); /* Write all non-self arming plane registers */ - void (*update_noarm)(struct intel_plane *plane, + void (*update_noarm)(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); /* Write all self-arming plane registers */ - void (*update_arm)(struct intel_plane *plane, + void (*update_arm)(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); /* Disable the plane, must arm */ - void (*disable_arm)(struct intel_plane *plane, + void (*disable_arm)(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state); bool (*get_hw_state)(struct intel_plane *plane, enum pipe *pipe); int (*check_plane)(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); int (*min_cdclk)(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); - void (*async_flip)(struct intel_plane *plane, + void (*async_flip)(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, bool async_flip); @@ -2067,6 +2075,19 @@ static inline bool intel_encoder_is_dp(struct intel_encoder *encoder) } } +static inline bool intel_encoder_is_hdmi(struct intel_encoder *encoder) +{ + switch (encoder->type) { + case INTEL_OUTPUT_HDMI: + return true; + case INTEL_OUTPUT_DDI: + /* See if the HDMI encoder is valid. */ + return i915_mmio_reg_valid(enc_to_intel_hdmi(encoder)->hdmi_reg); + default: + return false; + } +} + static inline struct intel_lspcon * enc_to_intel_lspcon(struct intel_encoder *encoder) { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 90fa73575feb..af80f1ac8880 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1506,6 +1506,12 @@ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, u8 *link_bw, u8 *rate_select) { + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + /* FIXME g4x can't generate an exact 2.7GHz with the 96MHz non-SSC refclk */ + if (IS_G4X(i915) && port_clock == 268800) + port_clock = 270000; + /* eDP 1.4 rate select method. */ if (intel_dp->use_rate_select) { *link_bw = 0; @@ -2022,11 +2028,10 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, /* Compressed BPP should be less than the Input DSC bpp */ dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); - for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) { - if (valid_dsc_bpp[i] < dsc_min_bpp) + for (i = ARRAY_SIZE(valid_dsc_bpp) - 1; i >= 0; i--) { + if (valid_dsc_bpp[i] < dsc_min_bpp || + valid_dsc_bpp[i] > dsc_max_bpp) continue; - if (valid_dsc_bpp[i] > dsc_max_bpp) - break; ret = dsc_compute_link_config(intel_dp, pipe_config, @@ -2738,7 +2743,6 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC); - /* Currently only DP_AS_SDP_AVT_FIXED_VTOTAL mode supported */ as_sdp->sdp_type = DP_SDP_ADAPTIVE_SYNC; as_sdp->length = 0x9; as_sdp->duration_incr_ms = 0; @@ -2750,7 +2754,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, as_sdp->target_rr = drm_mode_vrefresh(adjusted_mode); as_sdp->target_rr_divider = true; } else { - as_sdp->mode = DP_AS_SDP_AVT_FIXED_VTOTAL; + as_sdp->mode = DP_AS_SDP_AVT_DYNAMIC_VTOTAL; as_sdp->vtotal = adjusted_mode->vtotal; as_sdp->target_rr = 0; } @@ -4302,6 +4306,24 @@ intel_dp_mst_disconnect(struct intel_dp *intel_dp) static bool intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *esi) { + struct intel_display *display = to_intel_display(intel_dp); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + /* + * Display WA for HSD #13013007775: mtl/arl/lnl + * Read the sink count and link service IRQ registers in separate + * transactions to prevent disconnecting the sink on a TBT link + * inadvertently. + */ + if (IS_DISPLAY_VER(display, 14, 20) && !IS_BATTLEMAGE(i915)) { + if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT_ESI, esi, 3) != 3) + return false; + + /* DP_SINK_COUNT_ESI + 3 == DP_LINK_SERVICE_IRQ_VECTOR_ESI0 */ + return drm_dp_dpcd_readb(&intel_dp->aux, DP_LINK_SERVICE_IRQ_VECTOR_ESI0, + &esi[3]) == 1; + } + return drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT_ESI, esi, 4) == 4; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 40bedc31d6bf..5d8f93d4cdc6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -1561,7 +1561,7 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, if (wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) { lt_err(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clear\n"); - return false; + goto out; } if (intel_dp_128b132b_lane_eq(intel_dp, crtc_state) && @@ -1573,6 +1573,19 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, passed ? "passed" : "failed", crtc_state->port_clock, crtc_state->lane_count); +out: + /* + * Ensure that the training pattern does get set to TPS2 even in case + * of a failure, as is the case at the end of a passing link training + * and what is expected by the transcoder. Leaving TPS1 set (and + * disabling the link train mode in DP_TP_CTL later from TPS1 directly) + * would result in a stuck transcoder HW state and flip-done timeouts + * later in the modeset sequence. + */ + if (!passed) + intel_dp_program_link_training_pattern(intel_dp, crtc_state, + DP_PHY_DPRX, DP_TRAINING_PATTERN_2); + return passed; } diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 35557d98d7a7..3d16e9406dc6 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1613,7 +1613,7 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer * * arithmetic related to alignment and offset calculation. */ if (is_gen12_ccs_cc_plane(&fb->base, i)) { - if (IS_ALIGNED(fb->base.offsets[i], PAGE_SIZE)) + if (IS_ALIGNED(fb->base.offsets[i], 64)) continue; else return -EINVAL; diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index cd9ee171e0df..c5b2fbaeff89 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -3015,7 +3015,7 @@ void intel_infoframe_init(struct intel_digital_port *dig_port) } } -void intel_hdmi_init_connector(struct intel_digital_port *dig_port, +bool intel_hdmi_init_connector(struct intel_digital_port *dig_port, struct intel_connector *intel_connector) { struct intel_display *display = to_intel_display(dig_port); @@ -3033,17 +3033,17 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port, intel_encoder->base.base.id, intel_encoder->base.name); if (DISPLAY_VER(display) < 12 && drm_WARN_ON(dev, port == PORT_A)) - return; + return false; if (drm_WARN(dev, dig_port->max_lanes < 4, "Not enough lanes (%d) for HDMI on [ENCODER:%d:%s]\n", dig_port->max_lanes, intel_encoder->base.base.id, intel_encoder->base.name)) - return; + return false; ddc_pin = intel_hdmi_ddc_pin(intel_encoder); if (!ddc_pin) - return; + return false; drm_connector_init_with_ddc(dev, connector, &intel_hdmi_connector_funcs, @@ -3088,6 +3088,8 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port, &conn_info); if (!intel_hdmi->cec_notifier) drm_dbg_kms(display->drm, "CEC notifier get failed\n"); + + return true; } /* diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h index 9b97623665c5..fc64a3affc71 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.h +++ b/drivers/gpu/drm/i915/display/intel_hdmi.h @@ -22,7 +22,7 @@ struct intel_encoder; struct intel_hdmi; union hdmi_infoframe; -void intel_hdmi_init_connector(struct intel_digital_port *dig_port, +bool intel_hdmi_init_connector(struct intel_digital_port *dig_port, struct intel_connector *intel_connector); bool intel_hdmi_compute_has_hdmi_sink(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_psr_regs.h b/drivers/gpu/drm/i915/display/intel_psr_regs.h index 642bb15fb547..25c0424e34db 100644 --- a/drivers/gpu/drm/i915/display/intel_psr_regs.h +++ b/drivers/gpu/drm/i915/display/intel_psr_regs.h @@ -314,8 +314,8 @@ #define PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK REG_GENMASK(20, 16) #define PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK, val) #define PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION_MASK REG_GENMASK(12, 8) -#define PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK, val) +#define PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION_MASK, val) #define PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION_MASK REG_GENMASK(4, 0) -#define PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK, val) +#define PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION_MASK, val) #endif /* __INTEL_PSR_REGS_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index e657b09ede99..e6fadcef58e0 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -378,7 +378,8 @@ static void vlv_sprite_update_gamma(const struct intel_plane_state *plane_state) } static void -vlv_sprite_update_noarm(struct intel_plane *plane, +vlv_sprite_update_noarm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -399,7 +400,8 @@ vlv_sprite_update_noarm(struct intel_plane *plane, } static void -vlv_sprite_update_arm(struct intel_plane *plane, +vlv_sprite_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -449,7 +451,8 @@ vlv_sprite_update_arm(struct intel_plane *plane, } static void -vlv_sprite_disable_arm(struct intel_plane *plane, +vlv_sprite_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(plane->base.dev); @@ -795,7 +798,8 @@ static void ivb_sprite_update_gamma(const struct intel_plane_state *plane_state) } static void -ivb_sprite_update_noarm(struct intel_plane *plane, +ivb_sprite_update_noarm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -826,7 +830,8 @@ ivb_sprite_update_noarm(struct intel_plane *plane, } static void -ivb_sprite_update_arm(struct intel_plane *plane, +ivb_sprite_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -874,7 +879,8 @@ ivb_sprite_update_arm(struct intel_plane *plane, } static void -ivb_sprite_disable_arm(struct intel_plane *plane, +ivb_sprite_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(plane->base.dev); @@ -1133,7 +1139,8 @@ static void ilk_sprite_update_gamma(const struct intel_plane_state *plane_state) } static void -g4x_sprite_update_noarm(struct intel_plane *plane, +g4x_sprite_update_noarm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -1162,7 +1169,8 @@ g4x_sprite_update_noarm(struct intel_plane *plane, } static void -g4x_sprite_update_arm(struct intel_plane *plane, +g4x_sprite_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -1206,7 +1214,8 @@ g4x_sprite_update_arm(struct intel_plane *plane, } static void -g4x_sprite_disable_arm(struct intel_plane *plane, +g4x_sprite_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(plane->base.dev); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index c8720d31d101..7f77a76309bd 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -105,8 +105,6 @@ static const u32 icl_sdr_y_plane_formats[] = { DRM_FORMAT_Y216, DRM_FORMAT_XYUV8888, DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, }; static const u32 icl_sdr_uv_plane_formats[] = { @@ -133,8 +131,6 @@ static const u32 icl_sdr_uv_plane_formats[] = { DRM_FORMAT_Y216, DRM_FORMAT_XYUV8888, DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, }; static const u32 icl_hdr_plane_formats[] = { @@ -593,11 +589,11 @@ static u32 skl_plane_min_alignment(struct intel_plane *plane, * in full-range YCbCr. */ static void -icl_program_input_csc(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, +icl_program_input_csc(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; @@ -641,31 +637,31 @@ icl_program_input_csc(struct intel_plane *plane, }; const u16 *csc = input_csc_matrix[plane_state->hw.color_encoding]; - intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 0), - ROFF(csc[0]) | GOFF(csc[1])); - intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 1), - BOFF(csc[2])); - intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 2), - ROFF(csc[3]) | GOFF(csc[4])); - intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 3), - BOFF(csc[5])); - intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 4), - ROFF(csc[6]) | GOFF(csc[7])); - intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 5), - BOFF(csc[8])); - - intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 0), - PREOFF_YUV_TO_RGB_HI); - intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1), - PREOFF_YUV_TO_RGB_ME); - intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 2), - PREOFF_YUV_TO_RGB_LO); - intel_de_write_fw(dev_priv, - PLANE_INPUT_CSC_POSTOFF(pipe, plane_id, 0), 0x0); - intel_de_write_fw(dev_priv, - PLANE_INPUT_CSC_POSTOFF(pipe, plane_id, 1), 0x0); - intel_de_write_fw(dev_priv, - PLANE_INPUT_CSC_POSTOFF(pipe, plane_id, 2), 0x0); + intel_de_write_dsb(display, dsb, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 0), + ROFF(csc[0]) | GOFF(csc[1])); + intel_de_write_dsb(display, dsb, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 1), + BOFF(csc[2])); + intel_de_write_dsb(display, dsb, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 2), + ROFF(csc[3]) | GOFF(csc[4])); + intel_de_write_dsb(display, dsb, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 3), + BOFF(csc[5])); + intel_de_write_dsb(display, dsb, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 4), + ROFF(csc[6]) | GOFF(csc[7])); + intel_de_write_dsb(display, dsb, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 5), + BOFF(csc[8])); + + intel_de_write_dsb(display, dsb, PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 0), + PREOFF_YUV_TO_RGB_HI); + intel_de_write_dsb(display, dsb, PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1), + PREOFF_YUV_TO_RGB_ME); + intel_de_write_dsb(display, dsb, PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 2), + PREOFF_YUV_TO_RGB_LO); + intel_de_write_dsb(display, dsb, + PLANE_INPUT_CSC_POSTOFF(pipe, plane_id, 0), 0x0); + intel_de_write_dsb(display, dsb, + PLANE_INPUT_CSC_POSTOFF(pipe, plane_id, 1), 0x0); + intel_de_write_dsb(display, dsb, + PLANE_INPUT_CSC_POSTOFF(pipe, plane_id, 2), 0x0); } static unsigned int skl_plane_stride_mult(const struct drm_framebuffer *fb, @@ -719,9 +715,11 @@ static u32 skl_plane_wm_reg_val(const struct skl_wm_level *level) return val; } -static void skl_write_plane_wm(struct intel_plane *plane, +static void skl_write_plane_wm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *i915 = to_i915(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; @@ -733,71 +731,75 @@ static void skl_write_plane_wm(struct intel_plane *plane, int level; for (level = 0; level < i915->display.wm.num_levels; level++) - intel_de_write_fw(i915, PLANE_WM(pipe, plane_id, level), - skl_plane_wm_reg_val(skl_plane_wm_level(pipe_wm, plane_id, level))); + intel_de_write_dsb(display, dsb, PLANE_WM(pipe, plane_id, level), + skl_plane_wm_reg_val(skl_plane_wm_level(pipe_wm, plane_id, level))); - intel_de_write_fw(i915, PLANE_WM_TRANS(pipe, plane_id), - skl_plane_wm_reg_val(skl_plane_trans_wm(pipe_wm, plane_id))); + intel_de_write_dsb(display, dsb, PLANE_WM_TRANS(pipe, plane_id), + skl_plane_wm_reg_val(skl_plane_trans_wm(pipe_wm, plane_id))); if (HAS_HW_SAGV_WM(i915)) { const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; - intel_de_write_fw(i915, PLANE_WM_SAGV(pipe, plane_id), - skl_plane_wm_reg_val(&wm->sagv.wm0)); - intel_de_write_fw(i915, PLANE_WM_SAGV_TRANS(pipe, plane_id), - skl_plane_wm_reg_val(&wm->sagv.trans_wm)); + intel_de_write_dsb(display, dsb, PLANE_WM_SAGV(pipe, plane_id), + skl_plane_wm_reg_val(&wm->sagv.wm0)); + intel_de_write_dsb(display, dsb, PLANE_WM_SAGV_TRANS(pipe, plane_id), + skl_plane_wm_reg_val(&wm->sagv.trans_wm)); } - intel_de_write_fw(i915, PLANE_BUF_CFG(pipe, plane_id), - skl_plane_ddb_reg_val(ddb)); + intel_de_write_dsb(display, dsb, PLANE_BUF_CFG(pipe, plane_id), + skl_plane_ddb_reg_val(ddb)); if (DISPLAY_VER(i915) < 11) - intel_de_write_fw(i915, PLANE_NV12_BUF_CFG(pipe, plane_id), - skl_plane_ddb_reg_val(ddb_y)); + intel_de_write_dsb(display, dsb, PLANE_NV12_BUF_CFG(pipe, plane_id), + skl_plane_ddb_reg_val(ddb_y)); } static void -skl_plane_disable_arm(struct intel_plane *plane, +skl_plane_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; - skl_write_plane_wm(plane, crtc_state); + skl_write_plane_wm(dsb, plane, crtc_state); - intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), 0); - intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0); + intel_de_write_dsb(display, dsb, PLANE_CTL(pipe, plane_id), 0); + intel_de_write_dsb(display, dsb, PLANE_SURF(pipe, plane_id), 0); } -static void icl_plane_disable_sel_fetch_arm(struct intel_plane *plane, +static void icl_plane_disable_sel_fetch_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; if (!crtc_state->enable_psr2_sel_fetch) return; - intel_de_write_fw(i915, SEL_FETCH_PLANE_CTL(pipe, plane->id), 0); + intel_de_write_dsb(display, dsb, SEL_FETCH_PLANE_CTL(pipe, plane->id), 0); } static void -icl_plane_disable_arm(struct intel_plane *plane, +icl_plane_disable_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; if (icl_is_hdr_plane(dev_priv, plane_id)) - intel_de_write_fw(dev_priv, PLANE_CUS_CTL(pipe, plane_id), 0); + intel_de_write_dsb(display, dsb, PLANE_CUS_CTL(pipe, plane_id), 0); - skl_write_plane_wm(plane, crtc_state); + skl_write_plane_wm(dsb, plane, crtc_state); - icl_plane_disable_sel_fetch_arm(plane, crtc_state); - intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), 0); - intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0); + icl_plane_disable_sel_fetch_arm(dsb, plane, crtc_state); + intel_de_write_dsb(display, dsb, PLANE_CTL(pipe, plane_id), 0); + intel_de_write_dsb(display, dsb, PLANE_SURF(pipe, plane_id), 0); } static bool @@ -1234,28 +1236,30 @@ static u32 skl_plane_keymsk(const struct intel_plane_state *plane_state) return keymsk; } -static void icl_plane_csc_load_black(struct intel_plane *plane) +static void icl_plane_csc_load_black(struct intel_dsb *dsb, + struct intel_plane *plane, + const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; - intel_de_write_fw(i915, PLANE_CSC_COEFF(pipe, plane_id, 0), 0); - intel_de_write_fw(i915, PLANE_CSC_COEFF(pipe, plane_id, 1), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_COEFF(pipe, plane_id, 0), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_COEFF(pipe, plane_id, 1), 0); - intel_de_write_fw(i915, PLANE_CSC_COEFF(pipe, plane_id, 2), 0); - intel_de_write_fw(i915, PLANE_CSC_COEFF(pipe, plane_id, 3), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_COEFF(pipe, plane_id, 2), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_COEFF(pipe, plane_id, 3), 0); - intel_de_write_fw(i915, PLANE_CSC_COEFF(pipe, plane_id, 4), 0); - intel_de_write_fw(i915, PLANE_CSC_COEFF(pipe, plane_id, 5), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_COEFF(pipe, plane_id, 4), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_COEFF(pipe, plane_id, 5), 0); - intel_de_write_fw(i915, PLANE_CSC_PREOFF(pipe, plane_id, 0), 0); - intel_de_write_fw(i915, PLANE_CSC_PREOFF(pipe, plane_id, 1), 0); - intel_de_write_fw(i915, PLANE_CSC_PREOFF(pipe, plane_id, 2), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_PREOFF(pipe, plane_id, 0), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_PREOFF(pipe, plane_id, 1), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_PREOFF(pipe, plane_id, 2), 0); - intel_de_write_fw(i915, PLANE_CSC_POSTOFF(pipe, plane_id, 0), 0); - intel_de_write_fw(i915, PLANE_CSC_POSTOFF(pipe, plane_id, 1), 0); - intel_de_write_fw(i915, PLANE_CSC_POSTOFF(pipe, plane_id, 2), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_POSTOFF(pipe, plane_id, 0), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_POSTOFF(pipe, plane_id, 1), 0); + intel_de_write_dsb(display, dsb, PLANE_CSC_POSTOFF(pipe, plane_id, 2), 0); } static int icl_plane_color_plane(const struct intel_plane_state *plane_state) @@ -1268,11 +1272,12 @@ static int icl_plane_color_plane(const struct intel_plane_state *plane_state) } static void -skl_plane_update_noarm(struct intel_plane *plane, +skl_plane_update_noarm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; u32 stride = skl_plane_stride(plane_state, 0); @@ -1287,21 +1292,23 @@ skl_plane_update_noarm(struct intel_plane *plane, crtc_y = 0; } - intel_de_write_fw(dev_priv, PLANE_STRIDE(pipe, plane_id), - PLANE_STRIDE_(stride)); - intel_de_write_fw(dev_priv, PLANE_POS(pipe, plane_id), - PLANE_POS_Y(crtc_y) | PLANE_POS_X(crtc_x)); - intel_de_write_fw(dev_priv, PLANE_SIZE(pipe, plane_id), - PLANE_HEIGHT(src_h - 1) | PLANE_WIDTH(src_w - 1)); + intel_de_write_dsb(display, dsb, PLANE_STRIDE(pipe, plane_id), + PLANE_STRIDE_(stride)); + intel_de_write_dsb(display, dsb, PLANE_POS(pipe, plane_id), + PLANE_POS_Y(crtc_y) | PLANE_POS_X(crtc_x)); + intel_de_write_dsb(display, dsb, PLANE_SIZE(pipe, plane_id), + PLANE_HEIGHT(src_h - 1) | PLANE_WIDTH(src_w - 1)); - skl_write_plane_wm(plane, crtc_state); + skl_write_plane_wm(dsb, plane, crtc_state); } static void -skl_plane_update_arm(struct intel_plane *plane, +skl_plane_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; @@ -1321,22 +1328,26 @@ skl_plane_update_arm(struct intel_plane *plane, plane_color_ctl = plane_state->color_ctl | glk_plane_color_ctl_crtc(crtc_state); - intel_de_write_fw(dev_priv, PLANE_KEYVAL(pipe, plane_id), skl_plane_keyval(plane_state)); - intel_de_write_fw(dev_priv, PLANE_KEYMSK(pipe, plane_id), skl_plane_keymsk(plane_state)); - intel_de_write_fw(dev_priv, PLANE_KEYMAX(pipe, plane_id), skl_plane_keymax(plane_state)); + intel_de_write_dsb(display, dsb, PLANE_KEYVAL(pipe, plane_id), + skl_plane_keyval(plane_state)); + intel_de_write_dsb(display, dsb, PLANE_KEYMSK(pipe, plane_id), + skl_plane_keymsk(plane_state)); + intel_de_write_dsb(display, dsb, PLANE_KEYMAX(pipe, plane_id), + skl_plane_keymax(plane_state)); - intel_de_write_fw(dev_priv, PLANE_OFFSET(pipe, plane_id), - PLANE_OFFSET_Y(y) | PLANE_OFFSET_X(x)); + intel_de_write_dsb(display, dsb, PLANE_OFFSET(pipe, plane_id), + PLANE_OFFSET_Y(y) | PLANE_OFFSET_X(x)); - intel_de_write_fw(dev_priv, PLANE_AUX_DIST(pipe, plane_id), - skl_plane_aux_dist(plane_state, 0)); + intel_de_write_dsb(display, dsb, PLANE_AUX_DIST(pipe, plane_id), + skl_plane_aux_dist(plane_state, 0)); - intel_de_write_fw(dev_priv, PLANE_AUX_OFFSET(pipe, plane_id), - PLANE_OFFSET_Y(plane_state->view.color_plane[1].y) | - PLANE_OFFSET_X(plane_state->view.color_plane[1].x)); + intel_de_write_dsb(display, dsb, PLANE_AUX_OFFSET(pipe, plane_id), + PLANE_OFFSET_Y(plane_state->view.color_plane[1].y) | + PLANE_OFFSET_X(plane_state->view.color_plane[1].x)); if (DISPLAY_VER(dev_priv) >= 10) - intel_de_write_fw(dev_priv, PLANE_COLOR_CTL(pipe, plane_id), plane_color_ctl); + intel_de_write_dsb(display, dsb, PLANE_COLOR_CTL(pipe, plane_id), + plane_color_ctl); /* * Enable the scaler before the plane so that we don't @@ -1353,17 +1364,19 @@ skl_plane_update_arm(struct intel_plane *plane, * disabled. Try to make the plane enable atomic by writing * the control register just before the surface register. */ - intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), plane_ctl); - intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), - skl_plane_surf(plane_state, 0)); + intel_de_write_dsb(display, dsb, PLANE_CTL(pipe, plane_id), + plane_ctl); + intel_de_write_dsb(display, dsb, PLANE_SURF(pipe, plane_id), + skl_plane_surf(plane_state, 0)); } -static void icl_plane_update_sel_fetch_noarm(struct intel_plane *plane, +static void icl_plane_update_sel_fetch_noarm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, int color_plane) { - struct drm_i915_private *i915 = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; const struct drm_rect *clip; u32 val; @@ -1380,7 +1393,7 @@ static void icl_plane_update_sel_fetch_noarm(struct intel_plane *plane, y = (clip->y1 + plane_state->uapi.dst.y1); val = y << 16; val |= plane_state->uapi.dst.x1; - intel_de_write_fw(i915, SEL_FETCH_PLANE_POS(pipe, plane->id), val); + intel_de_write_dsb(display, dsb, SEL_FETCH_PLANE_POS(pipe, plane->id), val); x = plane_state->view.color_plane[color_plane].x; @@ -1395,20 +1408,21 @@ static void icl_plane_update_sel_fetch_noarm(struct intel_plane *plane, val = y << 16 | x; - intel_de_write_fw(i915, SEL_FETCH_PLANE_OFFSET(pipe, plane->id), - val); + intel_de_write_dsb(display, dsb, SEL_FETCH_PLANE_OFFSET(pipe, plane->id), val); /* Sizes are 0 based */ val = (drm_rect_height(clip) - 1) << 16; val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1; - intel_de_write_fw(i915, SEL_FETCH_PLANE_SIZE(pipe, plane->id), val); + intel_de_write_dsb(display, dsb, SEL_FETCH_PLANE_SIZE(pipe, plane->id), val); } static void -icl_plane_update_noarm(struct intel_plane *plane, +icl_plane_update_noarm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; @@ -1432,76 +1446,82 @@ icl_plane_update_noarm(struct intel_plane *plane, crtc_y = 0; } - intel_de_write_fw(dev_priv, PLANE_STRIDE(pipe, plane_id), - PLANE_STRIDE_(stride)); - intel_de_write_fw(dev_priv, PLANE_POS(pipe, plane_id), - PLANE_POS_Y(crtc_y) | PLANE_POS_X(crtc_x)); - intel_de_write_fw(dev_priv, PLANE_SIZE(pipe, plane_id), - PLANE_HEIGHT(src_h - 1) | PLANE_WIDTH(src_w - 1)); + intel_de_write_dsb(display, dsb, PLANE_STRIDE(pipe, plane_id), + PLANE_STRIDE_(stride)); + intel_de_write_dsb(display, dsb, PLANE_POS(pipe, plane_id), + PLANE_POS_Y(crtc_y) | PLANE_POS_X(crtc_x)); + intel_de_write_dsb(display, dsb, PLANE_SIZE(pipe, plane_id), + PLANE_HEIGHT(src_h - 1) | PLANE_WIDTH(src_w - 1)); - intel_de_write_fw(dev_priv, PLANE_KEYVAL(pipe, plane_id), skl_plane_keyval(plane_state)); - intel_de_write_fw(dev_priv, PLANE_KEYMSK(pipe, plane_id), skl_plane_keymsk(plane_state)); - intel_de_write_fw(dev_priv, PLANE_KEYMAX(pipe, plane_id), skl_plane_keymax(plane_state)); + intel_de_write_dsb(display, dsb, PLANE_KEYVAL(pipe, plane_id), + skl_plane_keyval(plane_state)); + intel_de_write_dsb(display, dsb, PLANE_KEYMSK(pipe, plane_id), + skl_plane_keymsk(plane_state)); + intel_de_write_dsb(display, dsb, PLANE_KEYMAX(pipe, plane_id), + skl_plane_keymax(plane_state)); - intel_de_write_fw(dev_priv, PLANE_OFFSET(pipe, plane_id), - PLANE_OFFSET_Y(y) | PLANE_OFFSET_X(x)); + intel_de_write_dsb(display, dsb, PLANE_OFFSET(pipe, plane_id), + PLANE_OFFSET_Y(y) | PLANE_OFFSET_X(x)); if (intel_fb_is_rc_ccs_cc_modifier(fb->modifier)) { - intel_de_write_fw(dev_priv, PLANE_CC_VAL(pipe, plane_id, 0), - lower_32_bits(plane_state->ccval)); - intel_de_write_fw(dev_priv, PLANE_CC_VAL(pipe, plane_id, 1), - upper_32_bits(plane_state->ccval)); + intel_de_write_dsb(display, dsb, PLANE_CC_VAL(pipe, plane_id, 0), + lower_32_bits(plane_state->ccval)); + intel_de_write_dsb(display, dsb, PLANE_CC_VAL(pipe, plane_id, 1), + upper_32_bits(plane_state->ccval)); } /* FLAT CCS doesn't need to program AUX_DIST */ if (!HAS_FLAT_CCS(dev_priv) && DISPLAY_VER(dev_priv) < 20) - intel_de_write_fw(dev_priv, PLANE_AUX_DIST(pipe, plane_id), - skl_plane_aux_dist(plane_state, color_plane)); + intel_de_write_dsb(display, dsb, PLANE_AUX_DIST(pipe, plane_id), + skl_plane_aux_dist(plane_state, color_plane)); if (icl_is_hdr_plane(dev_priv, plane_id)) - intel_de_write_fw(dev_priv, PLANE_CUS_CTL(pipe, plane_id), - plane_state->cus_ctl); + intel_de_write_dsb(display, dsb, PLANE_CUS_CTL(pipe, plane_id), + plane_state->cus_ctl); - intel_de_write_fw(dev_priv, PLANE_COLOR_CTL(pipe, plane_id), plane_color_ctl); + intel_de_write_dsb(display, dsb, PLANE_COLOR_CTL(pipe, plane_id), + plane_color_ctl); if (fb->format->is_yuv && icl_is_hdr_plane(dev_priv, plane_id)) - icl_program_input_csc(plane, crtc_state, plane_state); + icl_program_input_csc(dsb, plane, plane_state); - skl_write_plane_wm(plane, crtc_state); + skl_write_plane_wm(dsb, plane, crtc_state); /* * FIXME: pxp session invalidation can hit any time even at time of commit * or after the commit, display content will be garbage. */ if (plane_state->force_black) - icl_plane_csc_load_black(plane); + icl_plane_csc_load_black(dsb, plane, crtc_state); - icl_plane_update_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane); + icl_plane_update_sel_fetch_noarm(dsb, plane, crtc_state, plane_state, color_plane); } -static void icl_plane_update_sel_fetch_arm(struct intel_plane *plane, +static void icl_plane_update_sel_fetch_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; if (!crtc_state->enable_psr2_sel_fetch) return; if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0) - intel_de_write_fw(i915, SEL_FETCH_PLANE_CTL(pipe, plane->id), - SEL_FETCH_PLANE_CTL_ENABLE); + intel_de_write_dsb(display, dsb, SEL_FETCH_PLANE_CTL(pipe, plane->id), + SEL_FETCH_PLANE_CTL_ENABLE); else - icl_plane_disable_sel_fetch_arm(plane, crtc_state); + icl_plane_disable_sel_fetch_arm(dsb, plane, crtc_state); } static void -icl_plane_update_arm(struct intel_plane *plane, +icl_plane_update_arm(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; int color_plane = icl_plane_color_plane(plane_state); @@ -1520,25 +1540,27 @@ icl_plane_update_arm(struct intel_plane *plane, if (plane_state->scaler_id >= 0) skl_program_plane_scaler(plane, crtc_state, plane_state); - icl_plane_update_sel_fetch_arm(plane, crtc_state, plane_state); + icl_plane_update_sel_fetch_arm(dsb, plane, crtc_state, plane_state); /* * The control register self-arms if the plane was previously * disabled. Try to make the plane enable atomic by writing * the control register just before the surface register. */ - intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), plane_ctl); - intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), - skl_plane_surf(plane_state, color_plane)); + intel_de_write_dsb(display, dsb, PLANE_CTL(pipe, plane_id), + plane_ctl); + intel_de_write_dsb(display, dsb, PLANE_SURF(pipe, plane_id), + skl_plane_surf(plane_state, color_plane)); } static void -skl_plane_async_flip(struct intel_plane *plane, +skl_plane_async_flip(struct intel_dsb *dsb, + struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, bool async_flip) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; u32 plane_ctl = plane_state->ctl; @@ -1548,9 +1570,10 @@ skl_plane_async_flip(struct intel_plane *plane, if (async_flip) plane_ctl |= PLANE_CTL_ASYNC_FLIP; - intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), plane_ctl); - intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), - skl_plane_surf(plane_state, 0)); + intel_de_write_dsb(display, dsb, PLANE_CTL(pipe, plane_id), + plane_ctl); + intel_de_write_dsb(display, dsb, PLANE_SURF(pipe, plane_id), + skl_plane_surf(plane_state, 0)); } static bool intel_format_is_p01x(u32 format) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index d21f3fb39706..3c7789ca6207 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1059,7 +1059,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, BXT_MIPI_TRANS_VACTIVE(port)); adjusted_mode->crtc_vtotal = intel_de_read(display, - BXT_MIPI_TRANS_VTOTAL(port)); + BXT_MIPI_TRANS_VTOTAL(port)) + 1; hactive = adjusted_mode->crtc_hdisplay; hfp = intel_de_read(display, MIPI_HFP_COUNT(display, port)); @@ -1264,7 +1264,7 @@ static void set_dsi_timings(struct intel_encoder *encoder, intel_de_write(display, BXT_MIPI_TRANS_VACTIVE(port), adjusted_mode->crtc_vdisplay); intel_de_write(display, BXT_MIPI_TRANS_VTOTAL(port), - adjusted_mode->crtc_vtotal); + adjusted_mode->crtc_vtotal - 1); } intel_de_write(display, MIPI_HACTIVE_AREA_COUNT(display, port), diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 21274aa9bddd..c3dabb857960 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -164,6 +164,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) * 4 - Support multiple fault handlers per object depending on object's * backing storage (a.k.a. MMAP_OFFSET). * + * 5 - Support multiple partial mmaps(mmap part of BO + unmap a offset, multiple + * times with different size and offset). + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -191,7 +194,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 4; + return 5; } static inline struct i915_gtt_view diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index fe69f2c8527d..ae3343c81a64 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -209,8 +209,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct address_space *mapping = obj->base.filp->f_mapping; unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; - struct sgt_iter sgt_iter; - struct page *page; int ret; /* @@ -239,9 +237,7 @@ rebuild_st: * for PAGE_SIZE chunks instead may be helpful. */ if (max_segment > PAGE_SIZE) { - for_each_sgt_page(page, sgt_iter, st) - put_page(page); - sg_free_table(st); + shmem_sg_free_table(st, mapping, false, false); kfree(st); max_segment = PAGE_SIZE; diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 1e925c75fb08..c43febc862dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -284,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) if (gt->gsc.intf[intf_id].irq < 0) return; - ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); + ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq); if (ret) gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9378d5901c49..9ca42589da4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -117,21 +117,10 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - /* - * BSpec 52698 - Render powergating must be off. - * FIXME BSpec is outdated, disabling powergating for MTL is just - * temporary wa and should be removed after fixing real cause - * of forcewake timeouts. - */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) - pg_enable = - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; - else - pg_enable = - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { for (i = 0; i < I915_MAX_VCS; i++) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 72277bc8322e..f84fa09cdb33 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -575,7 +575,6 @@ static int ring_context_alloc(struct intel_context *ce) /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); ce->ring = engine->legacy.ring; - ce->timeline = intel_timeline_get(engine->legacy.timeline); GEM_BUG_ON(ce->state); if (engine->context_size) { @@ -588,6 +587,8 @@ static int ring_context_alloc(struct intel_context *ce) ce->state = vma; } + ce->timeline = intel_timeline_get(engine->legacy.timeline); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ee12ee0ed418..b48373b16677 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -633,7 +633,7 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, atomic_inc(&guc->outstanding_submission_g2h); ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - if (ret) + if (ret && g2h_len_dw) atomic_dec(&guc->outstanding_submission_g2h); return ret; @@ -3422,18 +3422,29 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) * GuC is active, lets destroy this context, but at this point we can still be racing * with suspend, so we undo everything if the H2G fails in deregister_context so * that GuC reset will find this context during clean up. + * + * There is a race condition where the reset code could have altered + * this context's state and done a wakeref put before we try to + * deregister it here. So check if the context is still set to be + * destroyed before undoing earlier changes, to avoid two wakeref puts + * on the same context. */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { - spin_lock(&ce->guc_state.lock); - set_context_registered(ce); - clr_context_destroyed(ce); - spin_unlock(&ce->guc_state.lock); + bool pending_destroyed; + spin_lock_irqsave(&ce->guc_state.lock, flags); + pending_destroyed = context_destroyed(ce); + if (pending_destroyed) { + set_context_registered(ce); + clr_context_destroyed(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. */ - intel_wakeref_put_async(>->wakeref); + if (pending_destroyed) + intel_wakeref_put_async(>->wakeref); } return ret; @@ -5511,12 +5522,20 @@ static inline void guc_log_context(struct drm_printer *p, { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); + if (intel_context_pin_if_active(ce)) { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + intel_context_unpin(ce); + } else { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n", + ce->ring->head); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n", + ce->ring->tail); + } drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 2d9152eb7282..24fdce844d9e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -317,6 +317,11 @@ void intel_huc_init_early(struct intel_huc *huc) } } +void intel_huc_fini_late(struct intel_huc *huc) +{ + delayed_huc_load_fini(huc); +} + #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") static int check_huc_loading_mode(struct intel_huc *huc) { @@ -414,12 +419,6 @@ out: void intel_huc_fini(struct intel_huc *huc) { - /* - * the fence is initialized in init_early, so we need to clean it up - * even if HuC loading is off. - */ - delayed_huc_load_fini(huc); - if (huc->heci_pkt) i915_vma_unpin_and_release(&huc->heci_pkt, 0); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index ba5cb08e9e7b..09aff3148f7d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -55,6 +55,7 @@ struct intel_huc { int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); +void intel_huc_fini_late(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); void intel_huc_suspend(struct intel_huc *huc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 5b8080ec5315..4f751ce74214 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -136,6 +136,7 @@ void intel_uc_init_late(struct intel_uc *uc) void intel_uc_driver_late_release(struct intel_uc *uc) { + intel_huc_fini_late(&uc->huc); } /** diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 908f910420c2..4ef45520e199 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -222,7 +222,6 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu) u8 *buf; struct opregion_header *header; struct vbt v; - const char opregion_signature[16] = OPREGION_SIGNATURE; gvt_dbg_core("init vgpu%d opregion\n", vgpu->id); vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL | @@ -236,8 +235,10 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu) /* emulated opregion with VBT mailbox only */ buf = (u8 *)vgpu_opregion(vgpu)->va; header = (struct opregion_header *)buf; - memcpy(header->signature, opregion_signature, - sizeof(opregion_signature)); + + static_assert(sizeof(header->signature) == sizeof(OPREGION_SIGNATURE) - 1); + memcpy(header->signature, OPREGION_SIGNATURE, sizeof(header->signature)); + header->size = 0x8; header->opregion_ver = 0x02000000; header->mboxes = MBOX_VBT; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 21eb0c5b320d..5cc302ad13e1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -111,11 +111,11 @@ static unsigned int config_bit(const u64 config) return other_bit(config); } -static u32 config_mask(const u64 config) +static __always_inline u32 config_mask(const u64 config) { unsigned int bit = config_bit(config); - if (__builtin_constant_p(config)) + if (__builtin_constant_p(bit)) BUILD_BUG_ON(bit > BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); @@ -124,7 +124,7 @@ static u32 config_mask(const u64 config) BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); - return BIT(config_bit(config)); + return BIT(bit); } static bool is_engine_event(struct perf_event *event) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 41f4350a7c6c..b7f521a9b337 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3869,7 +3869,7 @@ enum skl_power_gate { #define DDI_BUF_IS_IDLE (1 << 7) #define DDI_BUF_CTL_TC_PHY_OWNERSHIP REG_BIT(6) #define DDI_A_4_LANES (1 << 4) -#define DDI_PORT_WIDTH(width) (((width) - 1) << 1) +#define DDI_PORT_WIDTH(width) (((width) == 3 ? 4 : ((width) - 1)) << 1) #define DDI_PORT_WIDTH_MASK (7 << 1) #define DDI_PORT_WIDTH_SHIFT 1 #define DDI_INIT_DISPLAY_DETECTED (1 << 0) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 9aae779c4da3..4969d3de2bac 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -23,6 +23,7 @@ int intel_pxp_gsccs_init(struct intel_pxp *pxp); int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) @@ -34,8 +35,11 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) return 0; } -#endif +static inline bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + return false; +} -bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); +#endif #endif /*__INTEL_PXP_GSCCS_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 5c397a2df70e..5d27e1c733c5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -168,7 +168,7 @@ static int igt_ppgtt_alloc(void *arg) return PTR_ERR(ppgtt); if (!ppgtt->vm.allocate_va_range) - goto err_ppgtt_cleanup; + goto ppgtt_vm_put; /* * While we only allocate the page tables here and so we could @@ -236,7 +236,7 @@ err_ppgtt_cleanup: goto retry; } i915_gem_ww_ctx_fini(&ww); - +ppgtt_vm_put: i915_vm_put(&ppgtt->vm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index acae30a04a94..0122719ee921 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -73,8 +73,8 @@ static int igt_add_request(void *arg) /* Basic preliminary test to create a request and let it loose! */ request = mock_request(rcs0(i915)->kernel_context, HZ / 10); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_add(request); @@ -91,8 +91,8 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_get(request); @@ -160,8 +160,8 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); @@ -219,8 +219,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); request = mock_request(ce, 2 * HZ); intel_context_put(ce); - if (!request) { - err = -ENOMEM; + if (IS_ERR(request)) { + err = PTR_ERR(request); goto err_context_0; } @@ -237,8 +237,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); vip = mock_request(ce, 0); intel_context_put(ce); - if (!vip) { - err = -ENOMEM; + if (IS_ERR(vip)) { + err = PTR_ERR(vip); goto err_context_1; } diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index fee76c1d2f45..889281819c5b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -23,7 +23,9 @@ #include <linux/random.h> +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_regs.h" #include "gt/uc/intel_gsc_fw.h" #include "i915_driver.h" @@ -253,11 +255,27 @@ int i915_mock_selftests(void) int i915_live_selftests(struct pci_dev *pdev) { struct drm_i915_private *i915 = pdev_to_i915(pdev); + struct intel_uncore *uncore = &i915->uncore; int err; + u32 pg_enable; + intel_wakeref_t wakeref; if (!i915_selftest.live) return 0; + /* + * FIXME Disable render powergating, this is temporary wa and should be removed + * after fixing real cause of forcewake timeouts. + */ + with_intel_runtime_pm(uncore->rpm, wakeref) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 00), IP_VER(12, 74))) { + pg_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + if (pg_enable & GEN9_RENDER_PG_ENABLE) + intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, + pg_enable & ~GEN9_RENDER_PG_ENABLE); + } + } + __wait_gsc_proxy_completed(i915); __wait_gsc_huc_load_completed(i915); diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 09f747228dff..1b0cf073e964 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -35,7 +35,7 @@ mock_request(struct intel_context *ce, unsigned long delay) /* NB the i915->requests slab cache is enlarged to fit mock_request */ request = intel_context_create_request(ce); if (IS_ERR(request)) - return NULL; + return request; request->mock.delay = delay; return request; diff --git a/drivers/gpu/drm/imagination/pvr_fw.c b/drivers/gpu/drm/imagination/pvr_fw.c index 3debc9870a82..d09c4c684116 100644 --- a/drivers/gpu/drm/imagination/pvr_fw.c +++ b/drivers/gpu/drm/imagination/pvr_fw.c @@ -732,7 +732,7 @@ pvr_fw_process(struct pvr_device *pvr_dev) fw_mem->core_data, fw_mem->core_code_alloc_size); if (err) - goto err_free_fw_core_data_obj; + goto err_free_kdata; memcpy(fw_code_ptr, fw_mem->code, fw_mem->code_alloc_size); memcpy(fw_data_ptr, fw_mem->data, fw_mem->data_alloc_size); @@ -742,10 +742,14 @@ pvr_fw_process(struct pvr_device *pvr_dev) memcpy(fw_core_data_ptr, fw_mem->core_data, fw_mem->core_data_alloc_size); /* We're finished with the firmware section memory on the CPU, unmap. */ - if (fw_core_data_ptr) + if (fw_core_data_ptr) { pvr_fw_object_vunmap(fw_mem->core_data_obj); - if (fw_core_code_ptr) + fw_core_data_ptr = NULL; + } + if (fw_core_code_ptr) { pvr_fw_object_vunmap(fw_mem->core_code_obj); + fw_core_code_ptr = NULL; + } pvr_fw_object_vunmap(fw_mem->data_obj); fw_data_ptr = NULL; pvr_fw_object_vunmap(fw_mem->code_obj); @@ -753,7 +757,7 @@ pvr_fw_process(struct pvr_device *pvr_dev) err = pvr_fw_create_fwif_connection_ctl(pvr_dev); if (err) - goto err_free_fw_core_data_obj; + goto err_free_kdata; return 0; @@ -763,13 +767,16 @@ err_free_kdata: kfree(fw_mem->data); kfree(fw_mem->code); -err_free_fw_core_data_obj: if (fw_core_data_ptr) - pvr_fw_object_unmap_and_destroy(fw_mem->core_data_obj); + pvr_fw_object_vunmap(fw_mem->core_data_obj); + if (fw_mem->core_data_obj) + pvr_fw_object_destroy(fw_mem->core_data_obj); err_free_fw_core_code_obj: if (fw_core_code_ptr) - pvr_fw_object_unmap_and_destroy(fw_mem->core_code_obj); + pvr_fw_object_vunmap(fw_mem->core_code_obj); + if (fw_mem->core_code_obj) + pvr_fw_object_destroy(fw_mem->core_code_obj); err_free_fw_data_obj: if (fw_data_ptr) @@ -836,6 +843,12 @@ pvr_fw_cleanup(struct pvr_device *pvr_dev) struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem; pvr_fw_fini_fwif_connection_ctl(pvr_dev); + + kfree(fw_mem->core_data); + kfree(fw_mem->core_code); + kfree(fw_mem->data); + kfree(fw_mem->code); + if (fw_mem->core_code_obj) pvr_fw_object_destroy(fw_mem->core_code_obj); if (fw_mem->core_data_obj) diff --git a/drivers/gpu/drm/imagination/pvr_fw_meta.c b/drivers/gpu/drm/imagination/pvr_fw_meta.c index c39beb70c317..6d13864851fc 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_meta.c +++ b/drivers/gpu/drm/imagination/pvr_fw_meta.c @@ -527,8 +527,10 @@ pvr_meta_vm_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) static void pvr_meta_vm_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) { - pvr_vm_unmap(pvr_dev->kernel_vm_ctx, fw_obj->fw_mm_node.start, - fw_obj->fw_mm_node.size); + struct pvr_gem_object *pvr_obj = fw_obj->gem; + + pvr_vm_unmap_obj(pvr_dev->kernel_vm_ctx, pvr_obj, + fw_obj->fw_mm_node.start, fw_obj->fw_mm_node.size); } static bool diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c index 73707daa4e52..5dbb636d7d4f 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_trace.c +++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c @@ -333,8 +333,8 @@ static int fw_trace_seq_show(struct seq_file *s, void *v) if (sf_id == ROGUE_FW_SF_LAST) return -EINVAL; - timestamp = read_fw_trace(trace_seq_data, 1) | - ((u64)read_fw_trace(trace_seq_data, 2) << 32); + timestamp = ((u64)read_fw_trace(trace_seq_data, 1) << 32) | + read_fw_trace(trace_seq_data, 2); timestamp = (timestamp & ~ROGUE_FWT_TIMESTAMP_TIME_CLRMSK) >> ROGUE_FWT_TIMESTAMP_TIME_SHIFT; diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c index 78c2f3c6dce0..6a15c1d2d871 100644 --- a/drivers/gpu/drm/imagination/pvr_job.c +++ b/drivers/gpu/drm/imagination/pvr_job.c @@ -684,6 +684,13 @@ pvr_jobs_link_geom_frag(struct pvr_job_data *job_data, u32 *job_count) geom_job->paired_job = frag_job; frag_job->paired_job = geom_job; + /* The geometry job pvr_job structure is used when the fragment + * job is being prepared by the GPU scheduler. Have the fragment + * job hold a reference on the geometry job to prevent it being + * freed until the fragment job has finished with it. + */ + pvr_job_get(geom_job); + /* Skip the fragment job we just paired to the geometry job. */ i++; } diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c index ba7816fd28ec..d97613c6a0a9 100644 --- a/drivers/gpu/drm/imagination/pvr_power.c +++ b/drivers/gpu/drm/imagination/pvr_power.c @@ -317,6 +317,63 @@ pvr_power_device_idle(struct device *dev) return pvr_power_is_idle(pvr_dev) ? 0 : -EBUSY; } +static int +pvr_power_clear_error(struct pvr_device *pvr_dev) +{ + struct device *dev = from_pvr_device(pvr_dev)->dev; + int err; + + /* Ensure the device state is known and nothing is happening past this point */ + pm_runtime_disable(dev); + + /* Attempt to clear the runtime PM error by setting the current state again */ + if (pm_runtime_status_suspended(dev)) + err = pm_runtime_set_suspended(dev); + else + err = pm_runtime_set_active(dev); + + if (err) { + drm_err(from_pvr_device(pvr_dev), + "%s: Failed to clear runtime PM error (new error %d)\n", + __func__, err); + } + + pm_runtime_enable(dev); + + return err; +} + +/** + * pvr_power_get_clear() - Acquire a power reference, correcting any errors + * @pvr_dev: Device pointer + * + * Attempt to acquire a power reference on the device. If the runtime PM + * is in error state, attempt to clear the error and retry. + * + * Returns: + * * 0 on success, or + * * Any error code returned by pvr_power_get() or the runtime PM API. + */ +static int +pvr_power_get_clear(struct pvr_device *pvr_dev) +{ + int err; + + err = pvr_power_get(pvr_dev); + if (err == 0) + return err; + + drm_warn(from_pvr_device(pvr_dev), + "%s: pvr_power_get returned error %d, attempting recovery\n", + __func__, err); + + err = pvr_power_clear_error(pvr_dev); + if (err) + return err; + + return pvr_power_get(pvr_dev); +} + /** * pvr_power_reset() - Reset the GPU * @pvr_dev: Device pointer @@ -341,7 +398,7 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) * Take a power reference during the reset. This should prevent any interference with the * power state during reset. */ - WARN_ON(pvr_power_get(pvr_dev)); + WARN_ON(pvr_power_get_clear(pvr_dev)); down_write(&pvr_dev->reset_sem); @@ -363,13 +420,13 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) if (!err) { if (hard_reset) { pvr_dev->fw_dev.booted = false; - WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev)); + WARN_ON(pvr_power_device_suspend(from_pvr_device(pvr_dev)->dev)); err = pvr_fw_hard_reset(pvr_dev); if (err) goto err_device_lost; - err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev); + err = pvr_power_device_resume(from_pvr_device(pvr_dev)->dev); pvr_dev->fw_dev.booted = true; if (err) goto err_device_lost; diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index 20cb46012082..130473cfdfc9 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -109,12 +109,20 @@ pvr_queue_fence_get_driver_name(struct dma_fence *f) return PVR_DRIVER_NAME; } +static void pvr_queue_fence_release_work(struct work_struct *w) +{ + struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work); + + pvr_context_put(fence->queue->ctx); + dma_fence_free(&fence->base); +} + static void pvr_queue_fence_release(struct dma_fence *f) { struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); + struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev; - pvr_context_put(fence->queue->ctx); - dma_fence_free(f); + queue_work(pvr_dev->sched_wq, &fence->release_work); } static const char * @@ -268,6 +276,7 @@ pvr_queue_fence_init(struct dma_fence *f, pvr_context_get(queue->ctx); fence->queue = queue; + INIT_WORK(&fence->release_work, pvr_queue_fence_release_work); dma_fence_init(&fence->base, fence_ops, &fence_ctx->lock, fence_ctx->id, atomic_inc_return(&fence_ctx->seqno)); @@ -304,8 +313,9 @@ pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue) static void pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue) { - pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, - &queue->job_fence_ctx); + if (!fence->ops) + pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, + &queue->job_fence_ctx); } /** @@ -856,6 +866,10 @@ static void pvr_queue_free_job(struct drm_sched_job *sched_job) struct pvr_job *job = container_of(sched_job, struct pvr_job, base); drm_sched_job_cleanup(sched_job); + + if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) + pvr_job_put(job->paired_job); + job->paired_job = NULL; pvr_job_put(job); } diff --git a/drivers/gpu/drm/imagination/pvr_queue.h b/drivers/gpu/drm/imagination/pvr_queue.h index e06ced69302f..93fe9ac9f58c 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.h +++ b/drivers/gpu/drm/imagination/pvr_queue.h @@ -5,6 +5,7 @@ #define PVR_QUEUE_H #include <drm/gpu_scheduler.h> +#include <linux/workqueue.h> #include "pvr_cccb.h" #include "pvr_device.h" @@ -63,6 +64,9 @@ struct pvr_queue_fence { /** @queue: Queue that created this fence. */ struct pvr_queue *queue; + + /** @release_work: Fence release work structure. */ + struct work_struct release_work; }; /** diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c index 363f885a7098..2896fa7501b1 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.c +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -293,8 +293,9 @@ err_bind_op_fini: static int pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op, - struct pvr_vm_context *vm_ctx, u64 device_addr, - u64 size) + struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) { int err; @@ -318,6 +319,7 @@ pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op, goto err_bind_op_fini; } + bind_op->pvr_obj = pvr_obj; bind_op->vm_ctx = vm_ctx; bind_op->device_addr = device_addr; bind_op->size = size; @@ -598,20 +600,6 @@ err_free: } /** - * pvr_vm_unmap_all() - Unmap all mappings associated with a VM context. - * @vm_ctx: Target VM context. - * - * This function ensures that no mappings are left dangling by unmapping them - * all in order of ascending device-virtual address. - */ -void -pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx) -{ - WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start, - vm_ctx->gpuvm_mgr.mm_range)); -} - -/** * pvr_vm_context_release() - Teardown a VM context. * @ref_count: Pointer to reference counter of the VM context. * @@ -703,11 +691,7 @@ pvr_vm_lock_extra(struct drm_gpuvm_exec *vm_exec) struct pvr_vm_bind_op *bind_op = vm_exec->extra.priv; struct pvr_gem_object *pvr_obj = bind_op->pvr_obj; - /* Unmap operations don't have an object to lock. */ - if (!pvr_obj) - return 0; - - /* Acquire lock on the GEM being mapped. */ + /* Acquire lock on the GEM object being mapped/unmapped. */ return drm_exec_lock_obj(&vm_exec->exec, gem_from_pvr_gem(pvr_obj)); } @@ -772,8 +756,10 @@ err_cleanup: } /** - * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * pvr_vm_unmap_obj_locked() - Unmap an already mapped section of device-virtual + * memory. * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. * @device_addr: Virtual device address at the start of the target mapping. * @size: Size of the target mapping. * @@ -784,9 +770,13 @@ err_cleanup: * * Any error encountered while performing internal operations required to * destroy the mapping (returned from pvr_vm_gpuva_unmap or * pvr_vm_gpuva_remap). + * + * The vm_ctx->lock must be held when calling this function. */ -int -pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +static int +pvr_vm_unmap_obj_locked(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) { struct pvr_vm_bind_op bind_op = {0}; struct drm_gpuvm_exec vm_exec = { @@ -799,11 +789,13 @@ pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) }, }; - int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, device_addr, - size); + int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, pvr_obj, + device_addr, size); if (err) return err; + pvr_gem_object_get(pvr_obj); + err = drm_gpuvm_exec_lock(&vm_exec); if (err) goto err_cleanup; @@ -818,6 +810,96 @@ err_cleanup: return err; } +/** + * pvr_vm_unmap_obj() - Unmap an already mapped section of device-virtual + * memory. + * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * Any error encountered by pvr_vm_unmap_obj_locked. + */ +int +pvr_vm_unmap_obj(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) +{ + int err; + + mutex_lock(&vm_ctx->lock); + err = pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, device_addr, size); + mutex_unlock(&vm_ctx->lock); + + return err; +} + +/** + * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * @vm_ctx: Target VM context. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * Any error encountered by drm_gpuva_find, + * * Any error encountered by pvr_vm_unmap_obj_locked. + */ +int +pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +{ + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + int err; + + mutex_lock(&vm_ctx->lock); + + va = drm_gpuva_find(&vm_ctx->gpuvm_mgr, device_addr, size); + if (va) { + pvr_obj = gem_to_pvr_gem(va->gem.obj); + err = pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, + va->va.addr, va->va.range); + } else { + err = -ENOENT; + } + + mutex_unlock(&vm_ctx->lock); + + return err; +} + +/** + * pvr_vm_unmap_all() - Unmap all mappings associated with a VM context. + * @vm_ctx: Target VM context. + * + * This function ensures that no mappings are left dangling by unmapping them + * all in order of ascending device-virtual address. + */ +void +pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx) +{ + mutex_lock(&vm_ctx->lock); + + for (;;) { + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + + va = drm_gpuva_find_first(&vm_ctx->gpuvm_mgr, + vm_ctx->gpuvm_mgr.mm_start, + vm_ctx->gpuvm_mgr.mm_range); + if (!va) + break; + + pvr_obj = gem_to_pvr_gem(va->gem.obj); + + WARN_ON(pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, + va->va.addr, va->va.range)); + } + + mutex_unlock(&vm_ctx->lock); +} + /* Static data areas are determined by firmware. */ static const struct drm_pvr_static_data_area static_data_areas[] = { { diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h index 79406243617c..b0528dffa7f1 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.h +++ b/drivers/gpu/drm/imagination/pvr_vm.h @@ -38,6 +38,9 @@ struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev, int pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset, u64 device_addr, u64 size); +int pvr_vm_unmap_obj(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size); int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size); void pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx); diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c index 5674f5707cca..bc7527542fdc 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_crtc.c @@ -620,13 +620,16 @@ static void mtk_crtc_update_config(struct mtk_crtc *mtk_crtc, bool needs_vblank) mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); + goto update_config_out; } -#else +#endif spin_lock_irqsave(&mtk_crtc->config_lock, flags); mtk_crtc->config_updating = false; spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); -#endif +#if IS_REACHABLE(CONFIG_MTK_CMDQ) +update_config_out: +#endif mutex_unlock(&mtk_crtc->hw_lock); } @@ -716,6 +719,39 @@ int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, return 0; } +void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane) +{ +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + struct mtk_crtc *mtk_crtc = to_mtk_crtc(crtc); + struct mtk_plane_state *plane_state = to_mtk_plane_state(plane->state); + int i; + + /* no need to wait for disabling the plane by CPU */ + if (!mtk_crtc->cmdq_client.chan) + return; + + if (!mtk_crtc->enabled) + return; + + /* set pending plane state to disabled */ + for (i = 0; i < mtk_crtc->layer_nr; i++) { + struct drm_plane *mtk_plane = &mtk_crtc->planes[i]; + struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(mtk_plane->state); + + if (mtk_plane->index == plane->index) { + memcpy(mtk_plane_state, plane_state, sizeof(*plane_state)); + break; + } + } + mtk_crtc_update_config(mtk_crtc, false); + + /* wait for planes to be disabled by CMDQ */ + wait_event_timeout(mtk_crtc->cb_blocking_queue, + mtk_crtc->cmdq_vblank_cnt == 0, + msecs_to_jiffies(500)); +#endif +} + void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, struct drm_atomic_state *state) { @@ -927,7 +963,8 @@ static int mtk_crtc_init_comp_planes(struct drm_device *drm_dev, mtk_ddp_comp_supported_rotations(comp), mtk_ddp_comp_get_blend_modes(comp), mtk_ddp_comp_get_formats(comp), - mtk_ddp_comp_get_num_formats(comp), i); + mtk_ddp_comp_get_num_formats(comp), + mtk_ddp_comp_is_afbc_supported(comp), i); if (ret) return ret; diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.h b/drivers/gpu/drm/mediatek/mtk_crtc.h index 388e900b6f4d..828f109b83e7 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_crtc.h @@ -21,6 +21,7 @@ int mtk_crtc_create(struct drm_device *drm_dev, const unsigned int *path, unsigned int num_conn_routes); int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, struct mtk_plane_state *state); +void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane); void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, struct drm_atomic_state *plane_state); struct device *mtk_crtc_dma_dev_get(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c index edc6417639e6..ac6620e10262 100644 --- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c @@ -366,6 +366,7 @@ static const struct mtk_ddp_comp_funcs ddp_ovl = { .get_blend_modes = mtk_ovl_get_blend_modes, .get_formats = mtk_ovl_get_formats, .get_num_formats = mtk_ovl_get_num_formats, + .is_afbc_supported = mtk_ovl_is_afbc_supported, }; static const struct mtk_ddp_comp_funcs ddp_postmask = { diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h index 39720b27f4e9..7289b3dcf22f 100644 --- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h @@ -83,6 +83,7 @@ struct mtk_ddp_comp_funcs { u32 (*get_blend_modes)(struct device *dev); const u32 *(*get_formats)(struct device *dev); size_t (*get_num_formats)(struct device *dev); + bool (*is_afbc_supported)(struct device *dev); void (*connect)(struct device *dev, struct device *mmsys_dev, unsigned int next); void (*disconnect)(struct device *dev, struct device *mmsys_dev, unsigned int next); void (*add)(struct device *dev, struct mtk_mutex *mutex); @@ -294,6 +295,14 @@ size_t mtk_ddp_comp_get_num_formats(struct mtk_ddp_comp *comp) return 0; } +static inline bool mtk_ddp_comp_is_afbc_supported(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->is_afbc_supported) + return comp->funcs->is_afbc_supported(comp->dev); + + return false; +} + static inline bool mtk_ddp_comp_add(struct mtk_ddp_comp *comp, struct mtk_mutex *mutex) { if (comp->funcs && comp->funcs->add) { diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h index 04154db9085c..c0f7f77e0574 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h @@ -106,6 +106,7 @@ void mtk_ovl_disable_vblank(struct device *dev); u32 mtk_ovl_get_blend_modes(struct device *dev); const u32 *mtk_ovl_get_formats(struct device *dev); size_t mtk_ovl_get_num_formats(struct device *dev); +bool mtk_ovl_is_afbc_supported(struct device *dev); void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex); void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 19b0d5083981..ca4a9a60b890 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -236,6 +236,13 @@ size_t mtk_ovl_get_num_formats(struct device *dev) return ovl->data->num_formats; } +bool mtk_ovl_is_afbc_supported(struct device *dev) +{ + struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); + + return ovl->data->supports_afbc; +} + int mtk_ovl_clk_enable(struct device *dev) { struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index cad65ea851ed..4979d49ae25a 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -1746,7 +1746,7 @@ static int mtk_dp_parse_capabilities(struct mtk_dp *mtk_dp) ret = drm_dp_dpcd_readb(&mtk_dp->aux, DP_MSTM_CAP, &val); if (ret < 1) { - drm_err(mtk_dp->drm_dev, "Read mstm cap failed\n"); + dev_err(mtk_dp->dev, "Read mstm cap failed: %zd\n", ret); return ret == 0 ? -EIO : ret; } @@ -1756,7 +1756,7 @@ static int mtk_dp_parse_capabilities(struct mtk_dp *mtk_dp) DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0, &val); if (ret < 1) { - drm_err(mtk_dp->drm_dev, "Read irq vector failed\n"); + dev_err(mtk_dp->dev, "Read irq vector failed: %zd\n", ret); return ret == 0 ? -EIO : ret; } @@ -2039,7 +2039,7 @@ static int mtk_dp_wait_hpd_asserted(struct drm_dp_aux *mtk_aux, unsigned long wa ret = mtk_dp_parse_capabilities(mtk_dp); if (ret) { - drm_err(mtk_dp->drm_dev, "Can't parse capabilities\n"); + dev_err(mtk_dp->dev, "Can't parse capabilities: %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index a08d20654954..20a50180d4d4 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -127,14 +127,14 @@ struct mtk_dpi_yc_limit { * @is_ck_de_pol: Support CK/DE polarity. * @swap_input_support: Support input swap function. * @support_direct_pin: IP supports direct connection to dpi panels. - * @input_2pixel: Input pixel of dp_intf is 2 pixel per round, so enable this - * config to enable this feature. * @dimension_mask: Mask used for HWIDTH, HPORCH, VSYNC_WIDTH and VSYNC_PORCH * (no shift). * @hvsize_mask: Mask of HSIZE and VSIZE mask (no shift). * @channel_swap_shift: Shift value of channel swap. * @yuv422_en_bit: Enable bit of yuv422. * @csc_enable_bit: Enable bit of CSC. + * @input_2p_en_bit: Enable bit for input two pixel per round feature. + * If present, implies that the feature must be enabled. * @pixels_per_iter: Quantity of transferred pixels per iteration. * @edge_cfg_in_mmsys: If the edge configuration for DPI's output needs to be set in MMSYS. */ @@ -148,12 +148,12 @@ struct mtk_dpi_conf { bool is_ck_de_pol; bool swap_input_support; bool support_direct_pin; - bool input_2pixel; u32 dimension_mask; u32 hvsize_mask; u32 channel_swap_shift; u32 yuv422_en_bit; u32 csc_enable_bit; + u32 input_2p_en_bit; u32 pixels_per_iter; bool edge_cfg_in_mmsys; }; @@ -410,12 +410,13 @@ static void mtk_dpi_config_swap_input(struct mtk_dpi *dpi, bool enable) static void mtk_dpi_config_2n_h_fre(struct mtk_dpi *dpi) { - mtk_dpi_mask(dpi, dpi->conf->reg_h_fre_con, H_FRE_2N, H_FRE_2N); + if (dpi->conf->reg_h_fre_con) + mtk_dpi_mask(dpi, dpi->conf->reg_h_fre_con, H_FRE_2N, H_FRE_2N); } static void mtk_dpi_config_disable_edge(struct mtk_dpi *dpi) { - if (dpi->conf->edge_sel_en) + if (dpi->conf->edge_sel_en && dpi->conf->reg_h_fre_con) mtk_dpi_mask(dpi, dpi->conf->reg_h_fre_con, 0, EDGE_SEL_EN); } @@ -471,6 +472,7 @@ static void mtk_dpi_power_off(struct mtk_dpi *dpi) mtk_dpi_disable(dpi); clk_disable_unprepare(dpi->pixel_clk); + clk_disable_unprepare(dpi->tvd_clk); clk_disable_unprepare(dpi->engine_clk); } @@ -487,6 +489,12 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi) goto err_refcount; } + ret = clk_prepare_enable(dpi->tvd_clk); + if (ret) { + dev_err(dpi->dev, "Failed to enable tvd pll: %d\n", ret); + goto err_engine; + } + ret = clk_prepare_enable(dpi->pixel_clk); if (ret) { dev_err(dpi->dev, "Failed to enable pixel clock: %d\n", ret); @@ -496,6 +504,8 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi) return 0; err_pixel: + clk_disable_unprepare(dpi->tvd_clk); +err_engine: clk_disable_unprepare(dpi->engine_clk); err_refcount: dpi->refcount--; @@ -610,9 +620,9 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, mtk_dpi_dual_edge(dpi); mtk_dpi_config_disable_edge(dpi); } - if (dpi->conf->input_2pixel) { - mtk_dpi_mask(dpi, DPI_CON, DPINTF_INPUT_2P_EN, - DPINTF_INPUT_2P_EN); + if (dpi->conf->input_2p_en_bit) { + mtk_dpi_mask(dpi, DPI_CON, dpi->conf->input_2p_en_bit, + dpi->conf->input_2p_en_bit); } mtk_dpi_sw_reset(dpi, false); @@ -992,12 +1002,12 @@ static const struct mtk_dpi_conf mt8195_dpintf_conf = { .output_fmts = mt8195_output_fmts, .num_output_fmts = ARRAY_SIZE(mt8195_output_fmts), .pixels_per_iter = 4, - .input_2pixel = true, .dimension_mask = DPINTF_HPW_MASK, .hvsize_mask = DPINTF_HSIZE_MASK, .channel_swap_shift = DPINTF_CH_SWAP, .yuv422_en_bit = DPINTF_YUV422_EN, .csc_enable_bit = DPINTF_CSC_ENABLE, + .input_2p_en_bit = DPINTF_INPUT_2P_EN, }; static int mtk_dpi_probe(struct platform_device *pdev) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 4e93fd075e03..42e62b040961 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -463,7 +463,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) ret = drmm_mode_config_init(drm); if (ret) - goto put_mutex_dev; + return ret; drm->mode_config.min_width = 64; drm->mode_config.min_height = 64; @@ -481,8 +481,11 @@ static int mtk_drm_kms_init(struct drm_device *drm) for (i = 0; i < private->data->mmsys_dev_num; i++) { drm->dev_private = private->all_drm_private[i]; ret = component_bind_all(private->all_drm_private[i]->dev, drm); - if (ret) - goto put_mutex_dev; + if (ret) { + while (--i >= 0) + component_unbind_all(private->all_drm_private[i]->dev, drm); + return ret; + } } /* @@ -575,9 +578,6 @@ static int mtk_drm_kms_init(struct drm_device *drm) err_component_unbind: for (i = 0; i < private->data->mmsys_dev_num; i++) component_unbind_all(private->all_drm_private[i]->dev, drm); -put_mutex_dev: - for (i = 0; i < private->data->mmsys_dev_num; i++) - put_device(private->all_drm_private[i]->mutex_dev); return ret; } @@ -648,8 +648,10 @@ static int mtk_drm_bind(struct device *dev) return 0; drm = drm_dev_alloc(&mtk_drm_driver, dev); - if (IS_ERR(drm)) - return PTR_ERR(drm); + if (IS_ERR(drm)) { + ret = PTR_ERR(drm); + goto err_put_dev; + } private->drm_master = true; drm->dev_private = private; @@ -675,18 +677,31 @@ err_free: drm_dev_put(drm); for (i = 0; i < private->data->mmsys_dev_num; i++) private->all_drm_private[i]->drm = NULL; +err_put_dev: + for (i = 0; i < private->data->mmsys_dev_num; i++) { + /* For device_find_child in mtk_drm_get_all_priv() */ + put_device(private->all_drm_private[i]->dev); + } + put_device(private->mutex_dev); return ret; } static void mtk_drm_unbind(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); + int i; /* for multi mmsys dev, unregister drm dev in mmsys master */ if (private->drm_master) { drm_dev_unregister(private->drm); mtk_drm_kms_deinit(private->drm); drm_dev_put(private->drm); + + for (i = 0; i < private->data->mmsys_dev_num; i++) { + /* For device_find_child in mtk_drm_get_all_priv() */ + put_device(private->all_drm_private[i]->dev); + } + put_device(private->mutex_dev); } private->mtk_drm_bound = false; private->drm_master = false; diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index b9b7fd08b7d7..88f3dfeb4731 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1108,12 +1108,12 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host, const struct mipi_dsi_msg *msg) { struct mtk_dsi *dsi = host_to_dsi(host); - u32 recv_cnt, i; + ssize_t recv_cnt; u8 read_data[16]; void *src_addr; u8 irq_flag = CMD_DONE_INT_FLAG; u32 dsi_mode; - int ret; + int ret, i; dsi_mode = readl(dsi->regs + DSI_MODE_CTRL); if (dsi_mode & MODE) { @@ -1162,7 +1162,7 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host, if (recv_cnt) memcpy(msg->rx_buf, src_addr, recv_cnt); - DRM_INFO("dsi get %d byte data from the panel address(0x%x)\n", + DRM_INFO("dsi get %zd byte data from the panel address(0x%x)\n", recv_cnt, *((u8 *)(msg->tx_buf))); restore_dsi_mode: diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 7687f673964e..1aad8e6cf52e 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -137,7 +137,7 @@ enum hdmi_aud_channel_swap_type { struct hdmi_audio_param { enum hdmi_audio_coding_type aud_codec; - enum hdmi_audio_sample_size aud_sampe_size; + enum hdmi_audio_sample_size aud_sample_size; enum hdmi_aud_input_type aud_input_type; enum hdmi_aud_i2s_fmt aud_i2s_fmt; enum hdmi_aud_mclk aud_mclk; @@ -173,6 +173,7 @@ struct mtk_hdmi { unsigned int sys_offset; void __iomem *regs; enum hdmi_colorspace csp; + struct platform_device *audio_pdev; struct hdmi_audio_param aud_param; bool audio_enable; bool powered; @@ -1074,7 +1075,7 @@ static int mtk_hdmi_output_init(struct mtk_hdmi *hdmi) hdmi->csp = HDMI_COLORSPACE_RGB; aud_param->aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; - aud_param->aud_sampe_size = HDMI_AUDIO_SAMPLE_SIZE_16; + aud_param->aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; aud_param->aud_input_type = HDMI_AUD_INPUT_I2S; aud_param->aud_i2s_fmt = HDMI_I2S_MODE_I2S_24BIT; aud_param->aud_mclk = HDMI_AUD_MCLK_128FS; @@ -1572,14 +1573,14 @@ static int mtk_hdmi_audio_hw_params(struct device *dev, void *data, switch (daifmt->fmt) { case HDMI_I2S: hdmi_params.aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; - hdmi_params.aud_sampe_size = HDMI_AUDIO_SAMPLE_SIZE_16; + hdmi_params.aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; hdmi_params.aud_input_type = HDMI_AUD_INPUT_I2S; hdmi_params.aud_i2s_fmt = HDMI_I2S_MODE_I2S_24BIT; hdmi_params.aud_mclk = HDMI_AUD_MCLK_128FS; break; case HDMI_SPDIF: hdmi_params.aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; - hdmi_params.aud_sampe_size = HDMI_AUDIO_SAMPLE_SIZE_16; + hdmi_params.aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; hdmi_params.aud_input_type = HDMI_AUD_INPUT_SPDIF; break; default: @@ -1663,6 +1664,11 @@ static const struct hdmi_codec_ops mtk_hdmi_audio_codec_ops = { .no_capture_mute = 1, }; +static void mtk_hdmi_unregister_audio_driver(void *data) +{ + platform_device_unregister(data); +} + static int mtk_hdmi_register_audio_driver(struct device *dev) { struct mtk_hdmi *hdmi = dev_get_drvdata(dev); @@ -1672,13 +1678,20 @@ static int mtk_hdmi_register_audio_driver(struct device *dev) .i2s = 1, .data = hdmi, }; - struct platform_device *pdev; + int ret; - pdev = platform_device_register_data(dev, HDMI_CODEC_DRV_NAME, - PLATFORM_DEVID_AUTO, &codec_data, - sizeof(codec_data)); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + hdmi->audio_pdev = platform_device_register_data(dev, + HDMI_CODEC_DRV_NAME, + PLATFORM_DEVID_AUTO, + &codec_data, + sizeof(codec_data)); + if (IS_ERR(hdmi->audio_pdev)) + return PTR_ERR(hdmi->audio_pdev); + + ret = devm_add_action_or_reset(dev, mtk_hdmi_unregister_audio_driver, + hdmi->audio_pdev); + if (ret) + return ret; DRM_INFO("%s driver bound to HDMI\n", HDMI_CODEC_DRV_NAME); return 0; diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c index 8a48b3b0a956..74c2704efb66 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_plane.c @@ -285,9 +285,14 @@ static void mtk_plane_atomic_disable(struct drm_plane *plane, struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(new_state); + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); + mtk_plane_state->pending.enable = false; wmb(); /* Make sure the above parameter is set before update */ mtk_plane_state->pending.dirty = true; + + mtk_crtc_plane_disable(old_state->crtc, plane); } static void mtk_plane_atomic_update(struct drm_plane *plane, @@ -321,7 +326,8 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, unsigned long possible_crtcs, enum drm_plane_type type, unsigned int supported_rotations, const u32 blend_modes, - const u32 *formats, size_t num_formats, unsigned int plane_idx) + const u32 *formats, size_t num_formats, + bool supports_afbc, unsigned int plane_idx) { int err; @@ -332,7 +338,9 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, err = drm_universal_plane_init(dev, plane, possible_crtcs, &mtk_plane_funcs, formats, - num_formats, modifiers, type, NULL); + num_formats, + supports_afbc ? modifiers : NULL, + type, NULL); if (err) { DRM_ERROR("failed to initialize plane\n"); return err; diff --git a/drivers/gpu/drm/mediatek/mtk_plane.h b/drivers/gpu/drm/mediatek/mtk_plane.h index 3b13b89989c7..95c5fa5295d8 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.h +++ b/drivers/gpu/drm/mediatek/mtk_plane.h @@ -49,5 +49,6 @@ to_mtk_plane_state(struct drm_plane_state *state) int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, unsigned long possible_crtcs, enum drm_plane_type type, unsigned int supported_rotations, const u32 blend_modes, - const u32 *formats, size_t num_formats, unsigned int plane_idx); + const u32 *formats, size_t num_formats, + bool supports_afbc, unsigned int plane_idx); #endif diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 4bd0baa2a4f5..f59452e8fa6f 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -168,7 +168,7 @@ static const struct meson_drm_soc_attr meson_drm_soc_attrs[] = { /* S805X/S805Y HDMI PLL won't lock for HDMI PHY freq > 1,65GHz */ { .limits = { - .max_hdmi_phy_freq = 1650000, + .max_hdmi_phy_freq = 1650000000, }, .attrs = (const struct soc_device_attribute []) { { .soc_id = "GXL (S805*)", }, diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h index 3f9345c14f31..be4b0e4df6e1 100644 --- a/drivers/gpu/drm/meson/meson_drv.h +++ b/drivers/gpu/drm/meson/meson_drv.h @@ -37,7 +37,7 @@ struct meson_drm_match_data { }; struct meson_drm_soc_limits { - unsigned int max_hdmi_phy_freq; + unsigned long long max_hdmi_phy_freq; }; struct meson_drm { diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 0593a1cde906..2ad8383fcaed 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -70,12 +70,12 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, { struct meson_drm *priv = encoder_hdmi->priv; int vic = drm_match_cea_mode(mode); - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; - vclk_freq = mode->clock; + vclk_freq = mode->clock * 1000ULL; /* For 420, pixel clock is half unlike venc clock */ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) @@ -107,7 +107,8 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n", + dev_dbg(priv->dev, + "phy:%lluHz vclk=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); @@ -122,10 +123,11 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); struct meson_drm *priv = encoder_hdmi->priv; bool is_hdmi2_sink = display_info->hdmi.scdc.supported; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long clock = mode->clock * 1000ULL; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; int vic = drm_match_cea_mode(mode); enum drm_mode_status status; @@ -144,12 +146,12 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (status != MODE_OK) return status; - return meson_vclk_dmt_supported_freq(priv, mode->clock); + return meson_vclk_dmt_supported_freq(priv, clock); /* Check against supported VIC modes */ } else if (!meson_venc_hdmi_supported_vic(vic)) return MODE_BAD; - vclk_freq = mode->clock; + vclk_freq = clock; /* For 420, pixel clock is half unlike venc clock */ if (drm_mode_is_420_only(display_info, mode) || @@ -179,7 +181,8 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n", + dev_dbg(priv->dev, + "%s: vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz\n", __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq); return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq); diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a942dc6a6dc..dfe0c28a0f05 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,7 +110,7 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST(_freq * 1000, 1001) +#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) /* VID PLL Dividers */ enum { @@ -360,11 +360,11 @@ enum { }; struct meson_vclk_params { - unsigned int pll_freq; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int pixel_freq; + unsigned long long pll_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long pixel_freq; unsigned int pll_od1; unsigned int pll_od2; unsigned int pll_od3; @@ -372,11 +372,11 @@ struct meson_vclk_params { unsigned int vclk_div; } params[] = { [MESON_VCLK_HDMI_ENCI_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 54000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 54000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -384,11 +384,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 27000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 27000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -396,11 +396,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_148500] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 74250000, .pll_od1 = 4, .pll_od2 = 1, .pll_od3 = 1, @@ -408,11 +408,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_74250] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 74250, - .venc_freq = 74250, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 74250000, + .venc_freq = 74250000, + .pixel_freq = 74250000, .pll_od1 = 2, .pll_od2 = 2, .pll_od3 = 2, @@ -420,11 +420,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_148500] = { - .pll_freq = 2970000, - .phy_freq = 1485000, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 148500, + .pll_freq = 2970000000, + .phy_freq = 1485000000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 148500000, .pll_od1 = 1, .pll_od2 = 2, .pll_od3 = 2, @@ -432,11 +432,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_297000] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 297000, - .vclk_freq = 297000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 297000000, + .vclk_freq = 297000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -444,11 +444,11 @@ struct meson_vclk_params { .vclk_div = 2, }, [MESON_VCLK_HDMI_594000] = { - .pll_freq = 5940000, - .phy_freq = 5940000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 594000, + .pll_freq = 5940000000, + .phy_freq = 5940000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 594000000, .pll_od1 = 1, .pll_od2 = 1, .pll_od3 = 2, @@ -456,11 +456,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_594000_YUV420] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -617,16 +617,16 @@ static void meson_hdmi_pll_set_params(struct meson_drm *priv, unsigned int m, 3 << 20, pll_od_to_reg(od3) << 20); } -#define XTAL_FREQ 24000 +#define XTAL_FREQ (24 * 1000 * 1000) static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { /* The GXBB PLL has a /2 pre-multiplier */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) - pll_freq /= 2; + pll_freq = DIV_ROUND_DOWN_ULL(pll_freq, 2); - return pll_freq / XTAL_FREQ; + return DIV_ROUND_DOWN_ULL(pll_freq, XTAL_FREQ); } #define HDMI_FRAC_MAX_GXBB 4096 @@ -635,12 +635,13 @@ static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, unsigned int m, - unsigned int pll_freq) + unsigned long long pll_freq) { - unsigned int parent_freq = XTAL_FREQ; + unsigned long long parent_freq = XTAL_FREQ; unsigned int frac_max = HDMI_FRAC_MAX_GXL; unsigned int frac_m; unsigned int frac; + u32 remainder; /* The GXBB PLL has a /2 pre-multiplier and a larger FRAC width */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -652,11 +653,11 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, frac_max = HDMI_FRAC_MAX_G12A; /* We can have a perfect match !*/ - if (pll_freq / m == parent_freq && - pll_freq % m == 0) + if (div_u64_rem(pll_freq, m, &remainder) == parent_freq && + remainder == 0) return 0; - frac = div_u64((u64)pll_freq * (u64)frac_max, parent_freq); + frac = mul_u64_u64_div_u64(pll_freq, frac_max, parent_freq); frac_m = m * frac_max; if (frac_m > frac) return frac_max; @@ -666,7 +667,7 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, } static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, - unsigned int m, + unsigned long long m, unsigned int frac) { if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -694,7 +695,7 @@ static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, } static bool meson_hdmi_pll_find_params(struct meson_drm *priv, - unsigned int freq, + unsigned long long freq, unsigned int *m, unsigned int *frac, unsigned int *od) @@ -706,7 +707,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, continue; *frac = meson_hdmi_pll_get_frac(priv, *m, freq * *od); - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d\n", freq, *m, *frac, *od); if (meson_hdmi_pll_validate_params(priv, *m, *frac)) @@ -718,7 +719,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, /* pll_freq is the frequency after the OD dividers */ enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq) +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq) { unsigned int od, m, frac; @@ -741,7 +742,7 @@ EXPORT_SYMBOL_GPL(meson_vclk_dmt_supported_freq); /* pll_freq is the frequency after the OD dividers */ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { unsigned int od, m, frac, od1, od2, od3; @@ -756,7 +757,7 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, od1 = od / od2; } - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d/%d/%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d/%d/%d\n", pll_freq, m, frac, od1, od2, od3); meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); @@ -764,17 +765,48 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, return; } - DRM_ERROR("Fatal, unable to find parameters for PLL freq %d\n", + DRM_ERROR("Fatal, unable to find parameters for PLL freq %lluHz\n", pll_freq); } +static bool meson_vclk_freqs_are_matching_param(unsigned int idx, + unsigned long long phy_freq, + unsigned long long vclk_freq) +{ + DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", + idx, params[idx].vclk_freq, + FREQ_1000_1001(params[idx].vclk_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", + idx, params[idx].phy_freq, + FREQ_1000_1001(params[idx].phy_freq)); + + /* Match strict frequency */ + if (phy_freq == params[idx].phy_freq && + vclk_freq == params[idx].vclk_freq) + return true; + + /* Match 1000/1001 variant: vclk deviation has to be less than 1kHz + * (drm EDID is defined in 1kHz steps, so everything smaller must be + * rounding error) and the PHY freq deviation has to be less than + * 10kHz (as the TMDS clock is 10 times the pixel clock, so anything + * smaller must be rounding error as well). + */ + if (abs(vclk_freq - FREQ_1000_1001(params[idx].vclk_freq)) < 1000 && + abs(phy_freq - FREQ_1000_1001(params[idx].phy_freq)) < 10000) + return true; + + /* no match */ + return false; +} + enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq) +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq) { int i; - DRM_DEBUG_DRIVER("phy_freq = %d vclk_freq = %d\n", + DRM_DEBUG_DRIVER("phy_freq = %lluHz vclk_freq = %lluHz\n", phy_freq, vclk_freq); /* Check against soc revision/package limits */ @@ -785,19 +817,7 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %d alt = %d\n", - i, params[i].pixel_freq, - FREQ_1000_1001(params[i].pixel_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", - i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/1000)*1000); - /* Match strict frequency */ - if (phy_freq == params[i].phy_freq && - vclk_freq == params[i].vclk_freq) - return MODE_OK; - /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && - vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) + if (meson_vclk_freqs_are_matching_param(i, phy_freq, vclk_freq)) return MODE_OK; } @@ -805,8 +825,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } EXPORT_SYMBOL_GPL(meson_vclk_vic_supported_freq); -static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, - unsigned int od1, unsigned int od2, unsigned int od3, +static void meson_vclk_set(struct meson_drm *priv, + unsigned long long pll_base_freq, unsigned int od1, + unsigned int od2, unsigned int od3, unsigned int vid_pll_div, unsigned int vclk_div, unsigned int hdmi_tx_div, unsigned int venc_div, bool hdmi_use_enci, bool vic_alternate_clock) @@ -826,15 +847,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_generic_set(priv, pll_base_freq); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x3d; frac = vic_alternate_clock ? 0xd02 : 0xe00; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0x59 : 0x5a; frac = vic_alternate_clock ? 0xe8f : 0; break; - case 5940000: + case 5940000000: m = 0x7b; frac = vic_alternate_clock ? 0xa05 : 0xc00; break; @@ -844,15 +865,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) || meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x281 : 0x300; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x347 : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x102 : 0x200; break; @@ -861,15 +882,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x140b4 : 0x18000; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x1a3ee : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x8148 : 0x10000; break; @@ -1025,14 +1046,14 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci) { bool vic_alternate_clock = false; - unsigned int freq; - unsigned int hdmi_tx_div; - unsigned int venc_div; + unsigned long long freq; + unsigned long long hdmi_tx_div; + unsigned long long venc_div; if (target == MESON_VCLK_TARGET_CVBS) { meson_venci_cvbs_clock_config(priv); @@ -1052,27 +1073,25 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, return; } - hdmi_tx_div = vclk_freq / dac_freq; + hdmi_tx_div = DIV_ROUND_DOWN_ULL(vclk_freq, dac_freq); if (hdmi_tx_div == 0) { - pr_err("Fatal Error, invalid HDMI-TX freq %d\n", + pr_err("Fatal Error, invalid HDMI-TX freq %lluHz\n", dac_freq); return; } - venc_div = vclk_freq / venc_freq; + venc_div = DIV_ROUND_DOWN_ULL(vclk_freq, venc_freq); if (venc_div == 0) { - pr_err("Fatal Error, invalid HDMI venc freq %d\n", + pr_err("Fatal Error, invalid HDMI venc freq %lluHz\n", venc_freq); return; } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { - if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && - (vclk_freq == params[freq].vclk_freq || - vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { + if (meson_vclk_freqs_are_matching_param(freq, phy_freq, + vclk_freq)) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else @@ -1098,7 +1117,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } if (!params[freq].pixel_freq) { - pr_err("Fatal Error, invalid HDMI vclk freq %d\n", vclk_freq); + pr_err("Fatal Error, invalid HDMI vclk freq %lluHz\n", + vclk_freq); return; } diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h index 60617aaf18dd..7ac55744e574 100644 --- a/drivers/gpu/drm/meson/meson_vclk.h +++ b/drivers/gpu/drm/meson/meson_vclk.h @@ -20,17 +20,18 @@ enum { }; /* 27MHz is the CVBS Pixel Clock */ -#define MESON_VCLK_CVBS 27000 +#define MESON_VCLK_CVBS (27 * 1000 * 1000) enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq); +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq); enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq); +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq); void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci); #endif /* __MESON_VCLK_H */ diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index fb71658c3117..6067d08aeee3 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -223,7 +223,7 @@ void mgag200_set_mode_regs(struct mga_device *mdev, const struct drm_display_mod vsyncstr = mode->crtc_vsync_start - 1; vsyncend = mode->crtc_vsync_end - 1; vtotal = mode->crtc_vtotal - 2; - vblkstr = mode->crtc_vblank_start; + vblkstr = mode->crtc_vblank_start - 1; vblkend = vtotal + 1; linecomp = vdispend; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 14db7376c712..67fa528f546d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1126,49 +1126,50 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; u32 val; + int ret; /* - * The GMU may still be in slumber unless the GPU started so check and - * skip putting it back into slumber if so + * GMU firmware's internal power state gets messed up if we send "prepare_slumber" hfi when + * oob_gpu handshake wasn't done after the last wake up. So do a dummy handshake here when + * required */ - val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE); + if (adreno_gpu->base.needs_hw_init) { + if (a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET)) + goto force_off; - if (val != 0xf) { - int ret = a6xx_gmu_wait_for_idle(gmu); + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + } - /* If the GMU isn't responding assume it is hung */ - if (ret) { - a6xx_gmu_force_off(gmu); - return; - } + ret = a6xx_gmu_wait_for_idle(gmu); - a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung); + /* If the GMU isn't responding assume it is hung */ + if (ret) + goto force_off; - /* tell the GMU we want to slumber */ - ret = a6xx_gmu_notify_slumber(gmu); - if (ret) { - a6xx_gmu_force_off(gmu); - return; - } + a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung); - ret = gmu_poll_timeout(gmu, - REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val, - !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB), - 100, 10000); + /* tell the GMU we want to slumber */ + ret = a6xx_gmu_notify_slumber(gmu); + if (ret) + goto force_off; - /* - * Let the user know we failed to slumber but don't worry too - * much because we are powering down anyway - */ + ret = gmu_poll_timeout(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val, + !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB), + 100, 10000); - if (ret) - DRM_DEV_ERROR(gmu->dev, - "Unable to slumber GMU: status = 0%x/0%x\n", - gmu_read(gmu, - REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS), - gmu_read(gmu, - REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2)); - } + /* + * Let the user know we failed to slumber but don't worry too + * much because we are powering down anyway + */ + + if (ret) + DRM_DEV_ERROR(gmu->dev, + "Unable to slumber GMU: status = 0%x/0%x\n", + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS), + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2)); /* Turn off HFI */ a6xx_hfi_stop(gmu); @@ -1178,6 +1179,11 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) /* Tell RPMh to power off the GPU */ a6xx_rpmh_stop(gmu); + + return; + +force_off: + a6xx_gmu_force_off(gmu); } @@ -1603,7 +1609,9 @@ int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; pm_runtime_enable(gmu->dev); @@ -1668,7 +1676,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; /* Fow now, don't do anything fancy until we get our feet under us */ gmu->idle_level = GMU_IDLE_STATE_ACTIVE; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 702b8d4b3497..80c78aff9643 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -123,6 +123,20 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno - 1); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BOTH); + + /* Reset state used to synchronize BR and BV */ + OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); + OUT_RING(ring, + CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | + CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | + CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | + CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BR); } if (!sysprof) { @@ -233,10 +247,10 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -319,10 +333,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -554,7 +568,6 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) if (adreno_is_7c3(gpu)) { gpu->ubwc_config.highest_bank_bit = 14; gpu->ubwc_config.amsbc = 1; - gpu->ubwc_config.rgb565_predicator = 1; gpu->ubwc_config.uavflagprd_inv = 2; gpu->ubwc_config.macrotile_mode = 1; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 0fcae53c0b14..159665cb6b14 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -1507,6 +1507,8 @@ static void a6xx_get_indexed_registers(struct msm_gpu *gpu, /* Restore the size in the hardware */ gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); + + a6xx_state->nr_indexed_regs = count; } static void a7xx_get_indexed_registers(struct msm_gpu *gpu, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h index eb5dfff2ec4f..e187e7b1cef1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h @@ -160,6 +160,7 @@ static const struct dpu_lm_cfg sm8650_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x400, @@ -167,6 +168,7 @@ static const struct dpu_lm_cfg sm8650_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x400, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h index cbbdaebe357e..daef07924886 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h @@ -65,6 +65,54 @@ static const struct dpu_sspp_cfg sdm670_sspp[] = { }, }; +static const struct dpu_lm_cfg sdm670_lm[] = { + { + .name = "lm_0", .id = LM_0, + .base = 0x44000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_1, + .pingpong = PINGPONG_0, + .dspp = DSPP_0, + }, { + .name = "lm_1", .id = LM_1, + .base = 0x45000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_0, + .pingpong = PINGPONG_1, + .dspp = DSPP_1, + }, { + .name = "lm_2", .id = LM_2, + .base = 0x46000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_5, + .pingpong = PINGPONG_2, + }, { + .name = "lm_5", .id = LM_5, + .base = 0x49000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_2, + .pingpong = PINGPONG_3, + }, +}; + +static const struct dpu_dspp_cfg sdm670_dspp[] = { + { + .name = "dspp_0", .id = DSPP_0, + .base = 0x54000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, { + .name = "dspp_1", .id = DSPP_1, + .base = 0x56000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, +}; + static const struct dpu_dsc_cfg sdm670_dsc[] = { { .name = "dsc_0", .id = DSC_0, @@ -88,8 +136,10 @@ const struct dpu_mdss_cfg dpu_sdm670_cfg = { .ctl = sdm845_ctl, .sspp_count = ARRAY_SIZE(sdm670_sspp), .sspp = sdm670_sspp, - .mixer_count = ARRAY_SIZE(sdm845_lm), - .mixer = sdm845_lm, + .mixer_count = ARRAY_SIZE(sdm670_lm), + .mixer = sdm670_lm, + .dspp_count = ARRAY_SIZE(sdm670_dspp), + .dspp = sdm670_dspp, .pingpong_count = ARRAY_SIZE(sdm845_pp), .pingpong = sdm845_pp, .dsc_count = ARRAY_SIZE(sdm670_dsc), diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 6ccfde82fecd..d8d5a91c00ec 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -84,7 +84,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -92,7 +92,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -100,7 +100,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, @@ -108,7 +108,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, @@ -116,7 +116,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, @@ -124,7 +124,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, @@ -132,7 +132,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, @@ -164,6 +164,7 @@ static const struct dpu_lm_cfg sm8150_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -171,6 +172,7 @@ static const struct dpu_lm_cfg sm8150_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, @@ -295,7 +297,7 @@ static const struct dpu_wb_cfg sm8150_wb[] = { { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, - .features = WB_SDM845_MASK, + .features = WB_SM8250_MASK, .format_list = wb2_formats_rgb, .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index bab19ddd1d4f..67f0694a2f10 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, @@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, @@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, @@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, @@ -163,6 +163,7 @@ static const struct dpu_lm_cfg sc8180x_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -170,6 +171,7 @@ static const struct dpu_lm_cfg sc8180x_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, @@ -302,7 +304,7 @@ static const struct dpu_wb_cfg sc8180x_wb[] = { { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, - .features = WB_SDM845_MASK, + .features = WB_SM8250_MASK, .format_list = wb2_formats_rgb, .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, @@ -381,6 +383,7 @@ static const struct dpu_perf_cfg sc8180x_perf_data = { .min_core_ib = 2400000, .min_llcc_ib = 800000, .min_dram_ib = 800000, + .min_prefill_lines = 24, .danger_lut_tbl = {0xf, 0xffff, 0x0}, .safe_lut_tbl = {0xfff0, 0xf000, 0xffff}, .qos_lut_tbl = { diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h index d039b96beb97..76f60a2df7a8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h @@ -144,7 +144,7 @@ static const struct dpu_wb_cfg sm6125_wb[] = { { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, - .features = WB_SDM845_MASK, + .features = WB_SM8250_MASK, .format_list = wb2_formats_rgb, .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index a57d50b1f028..e8916ae826a6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -162,6 +162,7 @@ static const struct dpu_lm_cfg sm8250_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -169,6 +170,7 @@ static const struct dpu_lm_cfg sm8250_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index aced16e350da..f7c08e89c882 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -162,6 +162,7 @@ static const struct dpu_lm_cfg sm8350_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -169,6 +170,7 @@ static const struct dpu_lm_cfg sm8350_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index ad48defa154f..a1dbbf5c652f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h @@ -160,6 +160,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -167,6 +168,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h index a3e60ac70689..4f110be6b750 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h @@ -159,6 +159,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -166,6 +167,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, @@ -389,8 +391,8 @@ static const struct dpu_intf_cfg x1e80100_intf[] = { .type = INTF_DP, .controller_id = MSM_DP_CONTROLLER_2, .prog_fetch_lines_worst_case = 24, - .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), - .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), }, { .name = "intf_7", .id = INTF_7, .base = 0x3b000, .len = 0x280, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index db6c57900781..ecd595215a6b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -1191,10 +1191,6 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc, DRM_DEBUG_ATOMIC("%s: check\n", dpu_crtc->name); - /* force a full mode set if active state changed */ - if (crtc_state->active_changed) - crtc_state->mode_changed = true; - if (cstate->num_mixers) { rc = _dpu_crtc_check_and_setup_lm_bounds(crtc, crtc_state); if (rc) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index bd3698bf0cf7..47b514c89ce6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -718,12 +718,11 @@ static int dpu_encoder_virt_atomic_check( crtc_state->mode_changed = true; /* * Release and Allocate resources on every modeset - * Dont allocate when active is false. */ if (drm_atomic_crtc_needs_modeset(crtc_state)) { dpu_rm_release(global_state, drm_enc); - if (!crtc_state->active_changed || crtc_state->enable) + if (crtc_state->enable) ret = dpu_rm_reserve(&dpu_kms->rm, global_state, drm_enc, crtc_state, topology); if (!ret) @@ -2125,6 +2124,9 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) } } + if (phys_enc->hw_pp && phys_enc->hw_pp->ops.setup_dither) + phys_enc->hw_pp->ops.setup_dither(phys_enc->hw_pp, NULL); + /* reset the merge 3D HW block */ if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) { phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index d8a2edebfe8c..b7699ca89dcc 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -94,17 +94,21 @@ static void drm_mode_to_intf_timing_params( timing->vsync_polarity = 0; } - /* for DP/EDP, Shift timings to align it to bottom right */ - if (phys_enc->hw_intf->cap->type == INTF_DP) { + timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); + timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); + + /* + * For DP/EDP, Shift timings to align it to bottom right. + * wide_bus_en is set for everything excluding SDM845 & + * porch changes cause DisplayPort failure and HDMI tearing. + */ + if (phys_enc->hw_intf->cap->type == INTF_DP && timing->wide_bus_en) { timing->h_back_porch += timing->h_front_porch; timing->h_front_porch = 0; timing->v_back_porch += timing->v_front_porch; timing->v_front_porch = 0; } - timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); - timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); - /* * for DP, divide the horizonal parameters by 2 when * widebus is enabled diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c index 5e9aad1b2aa2..d1e0fb213976 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c @@ -52,6 +52,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, u32 slice_last_group_size; u32 det_thresh_flatness; bool is_cmd_mode = !(mode & DSC_MODE_VIDEO); + bool input_10_bits = dsc->bits_per_component == 10; DPU_REG_WRITE(c, DSC_COMMON_MODE, mode); @@ -68,7 +69,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, data |= (dsc->line_buf_depth << 3); data |= (dsc->simple_422 << 2); data |= (dsc->convert_rgb << 1); - data |= dsc->bits_per_component; + data |= input_10_bits; DPU_REG_WRITE(c, DSC_ENC, data); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c index 0f40eea7f5e2..2040bee8d512 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c @@ -272,7 +272,7 @@ static void _setup_mdp_ops(struct dpu_hw_mdp_ops *ops, if (cap & BIT(DPU_MDP_VSYNC_SEL)) ops->setup_vsync_source = dpu_hw_setup_vsync_sel; - else + else if (!(cap & BIT(DPU_MDP_PERIPH_0_REMOVED))) ops->setup_vsync_source = dpu_hw_setup_wd_timer; ops->get_safe_status = dpu_hw_get_safe_status; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 16f144cbc0c9..8ff496082902 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -42,9 +42,6 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector, if (!conn_state || !conn_state->connector) { DPU_ERROR("invalid connector state\n"); return -EINVAL; - } else if (conn_state->connector->status != connector_status_connected) { - DPU_ERROR("connector not connected %d\n", conn_state->connector->status); - return -EINVAL; } crtc = conn_state->crtc; diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c index 576995ddce37..8bbc7fb881d5 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c @@ -389,7 +389,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev, /* TODO: different regulators in other cases? */ mdp4_lcdc_encoder->regs[0].supply = "lvds-vccs-3p3v"; - mdp4_lcdc_encoder->regs[1].supply = "lvds-vccs-3p3v"; + mdp4_lcdc_encoder->regs[1].supply = "lvds-pll-vdda"; mdp4_lcdc_encoder->regs[2].supply = "lvds-vdda"; ret = devm_regulator_bulk_get(dev->dev, diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c index a599fc5d63c5..f4e01da5c55b 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.c +++ b/drivers/gpu/drm/msm/dp/dp_audio.c @@ -329,10 +329,10 @@ static void dp_audio_safe_to_exit_level(struct dp_audio_private *audio) safe_to_exit_level = 5; break; default: + safe_to_exit_level = 14; drm_dbg_dp(audio->drm_dev, "setting the default safe_to_exit_level = %u\n", safe_to_exit_level); - safe_to_exit_level = 14; break; } diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index e1228fb093ee..a5c1534eafdb 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -928,16 +928,17 @@ enum drm_mode_status dp_bridge_mode_valid(struct drm_bridge *bridge, return -EINVAL; } - if (mode->clock > DP_MAX_PIXEL_CLK_KHZ) - return MODE_CLOCK_HIGH; - dp_display = container_of(dp, struct dp_display_private, dp_display); link_info = &dp_display->panel->link_info; - if (drm_mode_is_420_only(&dp->connector->display_info, mode) && - dp_display->panel->vsc_sdp_supported) + if ((drm_mode_is_420_only(&dp->connector->display_info, mode) && + dp_display->panel->vsc_sdp_supported) || + msm_dp_wide_bus_available(dp)) mode_pclk_khz /= 2; + if (mode_pclk_khz > DP_MAX_PIXEL_CLK_KHZ) + return MODE_CLOCK_HIGH; + mode_bpp = dp->connector->display_info.bpc * num_components; if (!mode_bpp) mode_bpp = default_bpp; diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index 1b9be5bd97f1..da0176eae3fe 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -257,7 +257,10 @@ static enum drm_mode_status edp_bridge_mode_valid(struct drm_bridge *bridge, return -EINVAL; } - if (mode->clock > DP_MAX_PIXEL_CLK_KHZ) + if (msm_dp_wide_bus_available(dp)) + mode_pclk_khz /= 2; + + if (mode_pclk_khz > DP_MAX_PIXEL_CLK_KHZ) return MODE_CLOCK_HIGH; /* diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index a98d24b7cb00..d22e01751f5e 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -846,7 +846,7 @@ static void dsi_ctrl_enable(struct msm_dsi_host *msm_host, dsi_write(msm_host, REG_DSI_CPHY_MODE_CTRL, BIT(0)); } -static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mode, u32 hdisplay) +static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mode) { struct drm_dsc_config *dsc = msm_host->dsc; u32 reg, reg_ctrl, reg_ctrl2; @@ -858,7 +858,7 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod /* first calculate dsc parameters and then program * compress mode registers */ - slice_per_intf = msm_dsc_get_slices_per_intf(dsc, hdisplay); + slice_per_intf = dsc->slice_count; total_bytes_per_intf = dsc->slice_chunk_size * slice_per_intf; bytes_per_pkt = dsc->slice_chunk_size; /* * slice_per_pkt; */ @@ -991,7 +991,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) if (msm_host->mode_flags & MIPI_DSI_MODE_VIDEO) { if (msm_host->dsc) - dsi_update_dsc_timing(msm_host, false, mode->hdisplay); + dsi_update_dsc_timing(msm_host, false); dsi_write(msm_host, REG_DSI_ACTIVE_H, DSI_ACTIVE_H_START(ha_start) | @@ -1012,7 +1012,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) DSI_ACTIVE_VSYNC_VPOS_END(vs_end)); } else { /* command mode */ if (msm_host->dsc) - dsi_update_dsc_timing(msm_host, true, mode->hdisplay); + dsi_update_dsc_timing(msm_host, true); /* image data and 1 byte write_memory_start cmd */ if (!msm_host->dsc) @@ -1827,8 +1827,15 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host) __func__, ret); goto err; } - if (!ret) + if (!ret) { msm_dsi->te_source = devm_kstrdup(dev, te_source, GFP_KERNEL); + if (!msm_dsi->te_source) { + DRM_DEV_ERROR(dev, "%s: failed to allocate te_source\n", + __func__); + ret = -ENOMEM; + goto err; + } + } ret = 0; if (of_property_read_bool(np, "syscon-sfpb")) { diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index a210b7c9e5ca..4fabb01345aa 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -74,17 +74,35 @@ static int dsi_mgr_setup_components(int id) int ret; if (!IS_BONDED_DSI()) { + /* + * Set the usecase before calling msm_dsi_host_register(), which would + * already program the PLL source mux based on a default usecase. + */ + msm_dsi_phy_set_usecase(msm_dsi->phy, MSM_DSI_PHY_STANDALONE); + msm_dsi_host_set_phy_mode(msm_dsi->host, msm_dsi->phy); + ret = msm_dsi_host_register(msm_dsi->host); if (ret) return ret; - - msm_dsi_phy_set_usecase(msm_dsi->phy, MSM_DSI_PHY_STANDALONE); - msm_dsi_host_set_phy_mode(msm_dsi->host, msm_dsi->phy); } else if (other_dsi) { struct msm_dsi *master_link_dsi = IS_MASTER_DSI_LINK(id) ? msm_dsi : other_dsi; struct msm_dsi *slave_link_dsi = IS_MASTER_DSI_LINK(id) ? other_dsi : msm_dsi; + + /* + * PLL0 is to drive both DSI link clocks in bonded DSI mode. + * + * Set the usecase before calling msm_dsi_host_register(), which would + * already program the PLL source mux based on a default usecase. + */ + msm_dsi_phy_set_usecase(clk_master_dsi->phy, + MSM_DSI_PHY_MASTER); + msm_dsi_phy_set_usecase(clk_slave_dsi->phy, + MSM_DSI_PHY_SLAVE); + msm_dsi_host_set_phy_mode(msm_dsi->host, msm_dsi->phy); + msm_dsi_host_set_phy_mode(other_dsi->host, other_dsi->phy); + /* Register slave host first, so that slave DSI device * has a chance to probe, and do not block the master * DSI device's probe. @@ -98,14 +116,6 @@ static int dsi_mgr_setup_components(int id) ret = msm_dsi_host_register(master_link_dsi->host); if (ret) return ret; - - /* PLL0 is to drive both 2 DSI link clocks in bonded DSI mode. */ - msm_dsi_phy_set_usecase(clk_master_dsi->phy, - MSM_DSI_PHY_MASTER); - msm_dsi_phy_set_usecase(clk_slave_dsi->phy, - MSM_DSI_PHY_SLAVE); - msm_dsi_host_set_phy_mode(msm_dsi->host, msm_dsi->phy); - msm_dsi_host_set_phy_mode(other_dsi->host, other_dsi->phy); } return 0; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index 677c62571811..28cc550e22a8 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -703,6 +703,13 @@ static int dsi_pll_10nm_init(struct msm_dsi_phy *phy) /* TODO: Remove this when we have proper display handover support */ msm_dsi_phy_pll_save_state(phy); + /* + * Store also proper vco_current_rate, because its value will be used in + * dsi_10nm_pll_restore_state(). + */ + if (!dsi_pll_10nm_vco_recalc_rate(&pll_10nm->clk_hw, VCO_REF_CLK_RATE)) + pll_10nm->vco_current_rate = pll_10nm->phy->cfg->min_pll_rate; + return 0; } diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 031446c87dae..a2c87c84aa05 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -83,6 +83,9 @@ struct dsi_pll_7nm { /* protects REG_DSI_7nm_PHY_CMN_CLK_CFG0 register */ spinlock_t postdiv_lock; + /* protects REG_DSI_7nm_PHY_CMN_CLK_CFG1 register */ + spinlock_t pclk_mux_lock; + struct pll_7nm_cached_state cached_state; struct dsi_pll_7nm *slave; @@ -302,7 +305,7 @@ static void dsi_pll_commit(struct dsi_pll_7nm *pll, struct dsi_pll_config *confi writel(pll->phy->cphy_mode ? 0x00 : 0x10, base + REG_DSI_7nm_PHY_PLL_CMODE_1); writel(config->pll_clock_inverters, - base + REG_DSI_7nm_PHY_PLL_CLOCK_INVERTERS); + base + REG_DSI_7nm_PHY_PLL_CLOCK_INVERTERS_1); } static int dsi_pll_7nm_vco_set_rate(struct clk_hw *hw, unsigned long rate, @@ -372,22 +375,41 @@ static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll) ndelay(250); } -static void dsi_pll_disable_global_clk(struct dsi_pll_7nm *pll) +static void dsi_pll_cmn_clk_cfg0_write(struct dsi_pll_7nm *pll, u32 val) { + unsigned long flags; + + spin_lock_irqsave(&pll->postdiv_lock, flags); + writel(val, pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG0); + spin_unlock_irqrestore(&pll->postdiv_lock, flags); +} + +static void dsi_pll_cmn_clk_cfg1_update(struct dsi_pll_7nm *pll, u32 mask, + u32 val) +{ + unsigned long flags; u32 data; + spin_lock_irqsave(&pll->pclk_mux_lock, flags); data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); - writel(data & ~BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + data &= ~mask; + data |= val & mask; + + writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + spin_unlock_irqrestore(&pll->pclk_mux_lock, flags); +} + +static void dsi_pll_disable_global_clk(struct dsi_pll_7nm *pll) +{ + dsi_pll_cmn_clk_cfg1_update(pll, DSI_7nm_PHY_CMN_CLK_CFG1_CLK_EN, 0); } static void dsi_pll_enable_global_clk(struct dsi_pll_7nm *pll) { - u32 data; + u32 cfg_1 = DSI_7nm_PHY_CMN_CLK_CFG1_CLK_EN | DSI_7nm_PHY_CMN_CLK_CFG1_CLK_EN_SEL; writel(0x04, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_3); - - data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); - writel(data | BIT(5) | BIT(4), pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_pll_cmn_clk_cfg1_update(pll, cfg_1, cfg_1); } static void dsi_pll_phy_dig_reset(struct dsi_pll_7nm *pll) @@ -565,7 +587,6 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) { struct dsi_pll_7nm *pll_7nm = to_pll_7nm(phy->vco_hw); struct pll_7nm_cached_state *cached = &pll_7nm->cached_state; - void __iomem *phy_base = pll_7nm->phy->base; u32 val; int ret; @@ -574,13 +595,10 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) val |= cached->pll_out_div; writel(val, pll_7nm->phy->pll_base + REG_DSI_7nm_PHY_PLL_PLL_OUTDIV_RATE); - writel(cached->bit_clk_div | (cached->pix_clk_div << 4), - phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG0); - - val = readl(phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); - val &= ~0x3; - val |= cached->pll_mux; - writel(val, phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_pll_cmn_clk_cfg0_write(pll_7nm, + DSI_7nm_PHY_CMN_CLK_CFG0_DIV_CTRL_3_0(cached->bit_clk_div) | + DSI_7nm_PHY_CMN_CLK_CFG0_DIV_CTRL_7_4(cached->pix_clk_div)); + dsi_pll_cmn_clk_cfg1_update(pll_7nm, 0x3, cached->pll_mux); ret = dsi_pll_7nm_vco_set_rate(phy->vco_hw, pll_7nm->vco_current_rate, @@ -599,7 +617,6 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) static int dsi_7nm_set_usecase(struct msm_dsi_phy *phy) { struct dsi_pll_7nm *pll_7nm = to_pll_7nm(phy->vco_hw); - void __iomem *base = phy->base; u32 data = 0x0; /* internal PLL */ DBG("DSI PLL%d", pll_7nm->phy->id); @@ -618,7 +635,8 @@ static int dsi_7nm_set_usecase(struct msm_dsi_phy *phy) } /* set PLL src */ - writel(data << 2, base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_pll_cmn_clk_cfg1_update(pll_7nm, DSI_7nm_PHY_CMN_CLK_CFG1_BITCLK_SEL__MASK, + DSI_7nm_PHY_CMN_CLK_CFG1_BITCLK_SEL(data)); return 0; } @@ -733,7 +751,7 @@ static int pll_7nm_register(struct dsi_pll_7nm *pll_7nm, struct clk_hw **provide pll_by_2_bit, }), 2, 0, pll_7nm->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1, - 0, 1, 0, NULL); + 0, 1, 0, &pll_7nm->pclk_mux_lock); if (IS_ERR(hw)) { ret = PTR_ERR(hw); goto fail; @@ -778,6 +796,7 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) pll_7nm_list[phy->id] = pll_7nm; spin_lock_init(&pll_7nm->postdiv_lock); + spin_lock_init(&pll_7nm->pclk_mux_lock); pll_7nm->phy = phy; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c index e6ffaf92d26d..1c4211cfa2a4 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c @@ -137,7 +137,7 @@ static inline u32 pll_get_integloop_gain(u64 frac_start, u64 bclk, u32 ref_clk, base <<= (digclk_divsel == 2 ? 1 : 0); - return (base <= 2046 ? base : 2046); + return base; } static inline u32 pll_get_pll_cmp(u64 fdata, unsigned long ref_clk) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 8c13b08708d2..197d8d9a421d 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -550,6 +550,7 @@ static int msm_ioctl_gem_info_set_metadata(struct drm_gem_object *obj, u32 metadata_size) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + void *new_metadata; void *buf; int ret; @@ -567,8 +568,14 @@ static int msm_ioctl_gem_info_set_metadata(struct drm_gem_object *obj, if (ret) goto out; - msm_obj->metadata = + new_metadata = krealloc(msm_obj->metadata, metadata_size, GFP_KERNEL); + if (!new_metadata) { + ret = -ENOMEM; + goto out; + } + + msm_obj->metadata = new_metadata; msm_obj->metadata_size = metadata_size; memcpy(msm_obj->metadata, buf, metadata_size); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 2e28a1344636..9526b22038ab 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -543,15 +543,12 @@ static inline int align_pitch(int width, int bpp) static inline unsigned long timeout_to_jiffies(const ktime_t *timeout) { ktime_t now = ktime_get(); - s64 remaining_jiffies; - if (ktime_compare(*timeout, now) < 0) { - remaining_jiffies = 0; - } else { - ktime_t rem = ktime_sub(*timeout, now); - remaining_jiffies = ktime_divns(rem, NSEC_PER_SEC / HZ); - } + if (ktime_compare(*timeout, now) <= 0) + return 0; + ktime_t rem = ktime_sub(*timeout, now); + s64 remaining_jiffies = ktime_divns(rem, NSEC_PER_SEC / HZ); return clamp(remaining_jiffies, 1LL, (s64)INT_MAX); } diff --git a/drivers/gpu/drm/msm/msm_dsc_helper.h b/drivers/gpu/drm/msm/msm_dsc_helper.h index b9049fe1e279..63f95523b2cb 100644 --- a/drivers/gpu/drm/msm/msm_dsc_helper.h +++ b/drivers/gpu/drm/msm/msm_dsc_helper.h @@ -13,17 +13,6 @@ #include <drm/display/drm_dsc_helper.h> /** - * msm_dsc_get_slices_per_intf() - calculate number of slices per interface - * @dsc: Pointer to drm dsc config struct - * @intf_width: interface width in pixels - * Returns: Integer representing the number of slices for the given interface - */ -static inline u32 msm_dsc_get_slices_per_intf(const struct drm_dsc_config *dsc, u32 intf_width) -{ - return DIV_ROUND_UP(intf_width, dsc->slice_width); -} - -/** * msm_dsc_get_bytes_per_line() - calculate bytes per line * @dsc: Pointer to drm dsc config struct * Returns: Integer value representing bytes per line. DSI and DP need diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index ebc9ba66efb8..eeb3b65dd4d1 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -963,7 +963,8 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, uint64_t off = drm_vma_node_start(&obj->vma_node); const char *madv; - msm_gem_lock(obj); + if (!msm_gem_trylock(obj)) + return; stats->all.count++; stats->all.size += obj->size; diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 85f0257e83da..748053f70ca7 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -188,6 +188,12 @@ msm_gem_lock(struct drm_gem_object *obj) dma_resv_lock(obj->resv, NULL); } +static inline bool __must_check +msm_gem_trylock(struct drm_gem_object *obj) +{ + return dma_resv_trylock(obj->resv); +} + static inline int msm_gem_lock_interruptible(struct drm_gem_object *obj) { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index fba78193127d..4b3a8ee8e278 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -85,6 +85,15 @@ void __msm_gem_submit_destroy(struct kref *kref) container_of(kref, struct msm_gem_submit, ref); unsigned i; + /* + * In error paths, we could unref the submit without calling + * drm_sched_entity_push_job(), so msm_job_free() will never + * get called. Since drm_sched_job_cleanup() will NULL out + * s_fence, we can use that to detect this case. + */ + if (submit->base.s_fence) + drm_sched_job_cleanup(&submit->base); + if (submit->fence_id) { spin_lock(&submit->queue->idr_lock); idr_remove(&submit->queue->fence_idr, submit->fence_id); @@ -658,6 +667,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_ringbuffer *ring; struct msm_submit_post_dep *post_deps = NULL; struct drm_syncobj **syncobjs_to_reset = NULL; + struct sync_file *sync_file = NULL; int out_fence_fd = -1; unsigned i; int ret; @@ -787,8 +797,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; if (!submit->cmd[i].size || - ((submit->cmd[i].size + submit->cmd[i].offset) > - obj->size / 4)) { + (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) { SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4); ret = -EINVAL; goto out; @@ -869,7 +878,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, } if (ret == 0 && args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - struct sync_file *sync_file = sync_file_create(submit->user_fence); + sync_file = sync_file_create(submit->user_fence); if (!sync_file) { ret = -ENOMEM; } else { @@ -903,8 +912,11 @@ out: out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) + if (ret && (out_fence_fd >= 0)) { put_unused_fd(out_fence_fd); + if (sync_file) + fput(sync_file->file); + } if (!IS_ERR_OR_NULL(submit)) { msm_gem_submit_put(submit); diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 6970b0f7f457..2e1d5c343272 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -156,6 +156,7 @@ void msm_devfreq_init(struct msm_gpu *gpu) priv->gpu_devfreq_config.downdifferential = 10; mutex_init(&df->lock); + df->suspended = true; ret = dev_pm_qos_add_request(&gpu->pdev->dev, &df->boost_freq, DEV_PM_QOS_MIN_FREQUENCY, 0); diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c index af6a6fcb1173..6749f0fbca96 100644 --- a/drivers/gpu/drm/msm/msm_kms.c +++ b/drivers/gpu/drm/msm/msm_kms.c @@ -244,7 +244,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) ret = priv->kms_init(ddev); if (ret) { DRM_DEV_ERROR(dev, "failed to load kms\n"); - priv->kms = NULL; return ret; } diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index cab01af55d22..2ca0ad6efc96 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -2260,7 +2260,15 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <reg32 offset="0" name="0"> <bitfield name="CLEAR_ON_CHIP_TS" pos="0" type="boolean"/> <bitfield name="CLEAR_RESOURCE_TABLE" pos="1" type="boolean"/> - <bitfield name="CLEAR_GLOBAL_LOCAL_TS" pos="2" type="boolean"/> + <bitfield name="CLEAR_BV_BR_COUNTER" pos="2" type="boolean"/> + <bitfield name="RESET_GLOBAL_LOCAL_TS" pos="3" type="boolean"/> + </reg32> +</domain> + +<domain name="CP_INDIRECT_BUFFER" width="32" varset="chip" prefix="chip" variants="A5XX-"> + <reg64 offset="0" name="IB_BASE" type="address"/> + <reg32 offset="2" name="2"> + <bitfield name="IB_SIZE" low="0" high="19"/> </reg32> </domain> diff --git a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml index d54b72f92449..35f7f40e405b 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml @@ -9,8 +9,15 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x00004" name="REVISION_ID1"/> <reg32 offset="0x00008" name="REVISION_ID2"/> <reg32 offset="0x0000c" name="REVISION_ID3"/> - <reg32 offset="0x00010" name="CLK_CFG0"/> - <reg32 offset="0x00014" name="CLK_CFG1"/> + <reg32 offset="0x00010" name="CLK_CFG0"> + <bitfield name="DIV_CTRL_3_0" low="0" high="3" type="uint"/> + <bitfield name="DIV_CTRL_7_4" low="4" high="7" type="uint"/> + </reg32> + <reg32 offset="0x00014" name="CLK_CFG1"> + <bitfield name="CLK_EN" pos="5" type="boolean"/> + <bitfield name="CLK_EN_SEL" pos="4" type="boolean"/> + <bitfield name="BITCLK_SEL" low="2" high="3" type="uint"/> + </reg32> <reg32 offset="0x00018" name="GLBL_CTRL"/> <reg32 offset="0x0001c" name="RBUF_CTRL"/> <reg32 offset="0x00020" name="VREG_CTRL_0"/> diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index ceef470c9fbf..1050a4617fc1 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -4,6 +4,8 @@ config DRM_NOUVEAU depends on DRM && PCI && MMU select IOMMU_API select FW_LOADER + select FW_CACHE if PM_SLEEP + select DRM_CLIENT_SELECTION select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HELPER diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index d47442125fa1..9aae26eb7d8f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -42,7 +42,7 @@ #include "nouveau_acpi.h" static struct ida bl_ida; -#define BL_NAME_SIZE 15 // 12 for name + 2 for digits + 1 for '\0' +#define BL_NAME_SIZE 24 // 12 for name + 11 for digits + 1 for '\0' static bool nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index db961eade225..2016c1e7242f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -144,6 +144,9 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) nouveau_bo_del_io_reserve_lru(bo); nv10_bo_put_tile_region(dev, nvbo->tile, NULL); + if (bo->base.import_attach) + drm_prime_gem_destroy(&bo->base, bo->sg); + /* * If nouveau_bo_new() allocated this buffer, the GEM object was never * initialized, so don't attempt to release it. diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index b06aa473102b..5ab4201c981e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -776,7 +776,6 @@ nouveau_connector_force(struct drm_connector *connector) if (!nv_encoder) { NV_ERROR(drm, "can't find encoder to force %s on!\n", connector->name); - connector->status = connector_status_disconnected; return; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 34985771b2a2..6e5adab03471 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -31,6 +31,7 @@ #include <linux/dynamic_debug.h> #include <drm/drm_aperture.h> +#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_ttm.h> #include <drm/drm_gem_ttm_helper.h> @@ -836,6 +837,7 @@ static int nouveau_drm_probe(struct pci_dev *pdev, { struct nvkm_device *device; struct nouveau_drm *drm; + const struct drm_format_info *format; int ret; if (vga_switcheroo_client_probe_defer(pdev)) @@ -873,9 +875,11 @@ static int nouveau_drm_probe(struct pci_dev *pdev, goto fail_pci; if (drm->client.device.info.ram_size <= 32 * 1024 * 1024) - drm_fbdev_ttm_setup(drm->dev, 8); + format = drm_format_info(DRM_FORMAT_C8); else - drm_fbdev_ttm_setup(drm->dev, 32); + format = NULL; + + drm_client_setup(drm->dev, format); quirk_broken_nv_runpm(pdev); return 0; @@ -1318,6 +1322,8 @@ driver_stub = { .dumb_create = nouveau_display_dumb_create, .dumb_map_offset = drm_gem_ttm_dumb_map_offset, + DRM_FBDEV_TTM_DRIVER_OPS, + .name = DRIVER_NAME, .desc = DRIVER_DESC, #ifdef GIT_REVISION diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 09686d038d60..edddfc036c6d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -90,7 +90,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) while (!list_empty(&fctx->pending)) { fence = list_entry(fctx->pending.next, typeof(*fence), head); - if (error) + if (error && !dma_fence_is_signaled_locked(&fence->base)) dma_fence_set_error(&fence->base, error); if (nouveau_fence_signal(fence)) @@ -387,11 +387,13 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, if (f) { struct nouveau_channel *prev; bool must_wait = true; + bool local; rcu_read_lock(); prev = rcu_dereference(f->channel); - if (prev && (prev == chan || - fctx->sync(f, prev, chan) == 0)) + local = prev && prev->cli->drm == chan->cli->drm; + if (local && (prev == chan || + fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock(); if (!must_wait) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 9ae2cee1c7c5..67e3c99de73a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -87,9 +87,6 @@ nouveau_gem_object_del(struct drm_gem_object *gem) return; } - if (gem->import_attach) - drm_prime_gem_destroy(gem, nvbo->bo.sg); - ttm_bo_put(&nvbo->bo); pm_runtime_mark_last_busy(dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index b4da82ddbb6b..8ea98f06d39a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -590,6 +590,7 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); struct mm_struct *mm = svmm->notifier.mm; + struct folio *folio; struct page *page; unsigned long start = args->p.addr; unsigned long notifier_seq; @@ -616,12 +617,16 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, ret = -EINVAL; goto out; } + folio = page_folio(page); mutex_lock(&svmm->mutex); if (!mmu_interval_read_retry(¬ifier->notifier, notifier_seq)) break; mutex_unlock(&svmm->mutex); + + folio_unlock(folio); + folio_put(folio); } /* Map the page on the GPU. */ @@ -637,8 +642,8 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); mutex_unlock(&svmm->mutex); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); out: mmu_interval_notifier_remove(¬ifier->notifier); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c index 841e3b69fcaf..5a0c9b8a79f3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c @@ -31,6 +31,7 @@ mcp77_sor = { .state = g94_sor_state, .power = nv50_sor_power, .clock = nv50_sor_clock, + .bl = &nv50_sor_bl, .hdmi = &g84_sor_hdmi, .dp = &g94_sor_dp, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index d586aea30898..3ad4f6e9a8ac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -58,7 +58,7 @@ #include <linux/parser.h> #define GSP_MSG_MIN_SIZE GSP_PAGE_SIZE -#define GSP_MSG_MAX_SIZE GSP_PAGE_MIN_SIZE * 16 +#define GSP_MSG_MAX_SIZE (GSP_MSG_MIN_SIZE * 16) struct r535_gsp_msg { u8 auth_tag_buffer[16]; @@ -121,6 +121,8 @@ r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime) return mqe->data; } + size = ALIGN(repc + GSP_MSG_HDR_SIZE, GSP_PAGE_SIZE); + msg = kvmalloc(repc, GFP_KERNEL); if (!msg) return ERR_PTR(-ENOMEM); @@ -129,19 +131,15 @@ r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime) len = min_t(u32, repc, len); memcpy(msg, mqe->data, len); - rptr += DIV_ROUND_UP(len, GSP_PAGE_SIZE); - if (rptr == gsp->msgq.cnt) - rptr = 0; - repc -= len; if (repc) { mqe = (void *)((u8 *)gsp->shm.msgq.ptr + 0x1000 + 0 * 0x1000); memcpy(msg + len, mqe, repc); - - rptr += DIV_ROUND_UP(repc, GSP_PAGE_SIZE); } + rptr = (rptr + DIV_ROUND_UP(size, GSP_PAGE_SIZE)) % gsp->msgq.cnt; + mb(); (*gsp->msgq.rptr) = rptr; return msg; @@ -163,7 +161,7 @@ r535_gsp_cmdq_push(struct nvkm_gsp *gsp, void *argv) u64 *end; u64 csum = 0; int free, time = 1000000; - u32 wptr, size; + u32 wptr, size, step; u32 off = 0; argc = ALIGN(GSP_MSG_HDR_SIZE + argc, GSP_PAGE_SIZE); @@ -197,7 +195,9 @@ r535_gsp_cmdq_push(struct nvkm_gsp *gsp, void *argv) } cqe = (void *)((u8 *)gsp->shm.cmdq.ptr + 0x1000 + wptr * 0x1000); - size = min_t(u32, argc, (gsp->cmdq.cnt - wptr) * GSP_PAGE_SIZE); + step = min_t(u32, free, (gsp->cmdq.cnt - wptr)); + size = min_t(u32, argc, step * GSP_PAGE_SIZE); + memcpy(cqe, (u8 *)cmd + off, size); wptr += DIV_ROUND_UP(size, 0x1000); @@ -1454,7 +1454,6 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = 4, - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; caps->status = 0xffff; @@ -1462,17 +1461,22 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!acpi_check_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, BIT_ULL(0x1a))) return; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, 0x1a, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; caps->status = 0; caps->optimusCaps = *(u32 *)obj->buffer.pointer; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); @@ -1489,24 +1493,28 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = sizeof(caps), - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; jt->status = 0xffff; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &JT_DSM_GUID, JT_DSM_REV, 0x1, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; jt->status = 0; jt->jtCaps = *(u32 *)obj->buffer.pointer; jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20; jt->bSBIOSCaps = 0; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c index a6f410ba60bc..d393bc540f86 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c @@ -75,7 +75,7 @@ gp10b_pmu_acr = { .bootstrap_multiple_falcons = gp10b_pmu_acr_bootstrap_multiple_falcons, }; -#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) +#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC) MODULE_FIRMWARE("nvidia/gp10b/pmu/desc.bin"); MODULE_FIRMWARE("nvidia/gp10b/pmu/image.bin"); MODULE_FIRMWARE("nvidia/gp10b/pmu/sig.bin"); diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 767e47a2b0c1..663af985d1b3 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -1983,6 +1983,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('S', 'H', 'P', 0x153a, &delay_200_500_e50, "LQ140T1JH01"), EDP_PANEL_ENTRY('S', 'H', 'P', 0x154c, &delay_200_500_p2e100, "LQ116M1JW10"), + EDP_PANEL_ENTRY('S', 'T', 'A', 0x0004, &delay_200_500_e200, "116KHD024006"), EDP_PANEL_ENTRY('S', 'T', 'A', 0x0100, &delay_100_500_e200, "2081116HHD028001-51D"), { /* sentinal */ } diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c index 266a087fe14c..3c24a63b6be8 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c @@ -607,7 +607,7 @@ static int ili9882t_add(struct ili9882t *ili) ili->enable_gpio = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(ili->enable_gpio)) { - dev_err(dev, "cannot get reset-gpios %ld\n", + dev_err(dev, "cannot get enable-gpios %ld\n", PTR_ERR(ili->enable_gpio)); return PTR_ERR(ili->enable_gpio); } diff --git a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c index 45d09e6fa667..eb0f8373258c 100644 --- a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c +++ b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c @@ -109,13 +109,13 @@ static int jadard_prepare(struct drm_panel *panel) if (jadard->desc->lp11_to_reset_delay_ms) msleep(jadard->desc->lp11_to_reset_delay_ms); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(5); - gpiod_set_value(jadard->reset, 0); + gpiod_set_value(jadard->reset, 1); msleep(10); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(130); ret = jadard->desc->init(jadard); @@ -129,11 +129,11 @@ static int jadard_unprepare(struct drm_panel *panel) { struct jadard *jadard = panel_to_jadard(panel); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(120); if (jadard->desc->reset_before_power_off_vcioo) { - gpiod_set_value(jadard->reset, 0); + gpiod_set_value(jadard->reset, 1); usleep_range(1000, 2000); } @@ -1130,7 +1130,7 @@ static int jadard_dsi_probe(struct mipi_dsi_device *dsi) dsi->format = desc->format; dsi->lanes = desc->lanes; - jadard->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + jadard->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(jadard->reset)) { DRM_DEV_ERROR(&dsi->dev, "failed to get our reset GPIO\n"); return PTR_ERR(jadard->reset); diff --git a/drivers/gpu/drm/panel/panel-samsung-sofef00.c b/drivers/gpu/drm/panel/panel-samsung-sofef00.c index 04ce925b3d9d..49cfa84b34f0 100644 --- a/drivers/gpu/drm/panel/panel-samsung-sofef00.c +++ b/drivers/gpu/drm/panel/panel-samsung-sofef00.c @@ -22,7 +22,6 @@ struct sofef00_panel { struct mipi_dsi_device *dsi; struct regulator *supply; struct gpio_desc *reset_gpio; - const struct drm_display_mode *mode; }; static inline @@ -159,26 +158,11 @@ static const struct drm_display_mode enchilada_panel_mode = { .height_mm = 145, }; -static const struct drm_display_mode fajita_panel_mode = { - .clock = (1080 + 72 + 16 + 36) * (2340 + 32 + 4 + 18) * 60 / 1000, - .hdisplay = 1080, - .hsync_start = 1080 + 72, - .hsync_end = 1080 + 72 + 16, - .htotal = 1080 + 72 + 16 + 36, - .vdisplay = 2340, - .vsync_start = 2340 + 32, - .vsync_end = 2340 + 32 + 4, - .vtotal = 2340 + 32 + 4 + 18, - .width_mm = 68, - .height_mm = 145, -}; - static int sofef00_panel_get_modes(struct drm_panel *panel, struct drm_connector *connector) { struct drm_display_mode *mode; - struct sofef00_panel *ctx = to_sofef00_panel(panel); - mode = drm_mode_duplicate(connector->dev, ctx->mode); + mode = drm_mode_duplicate(connector->dev, &enchilada_panel_mode); if (!mode) return -ENOMEM; @@ -239,13 +223,6 @@ static int sofef00_panel_probe(struct mipi_dsi_device *dsi) if (!ctx) return -ENOMEM; - ctx->mode = of_device_get_match_data(dev); - - if (!ctx->mode) { - dev_err(dev, "Missing device mode\n"); - return -ENODEV; - } - ctx->supply = devm_regulator_get(dev, "vddio"); if (IS_ERR(ctx->supply)) return dev_err_probe(dev, PTR_ERR(ctx->supply), @@ -295,14 +272,7 @@ static void sofef00_panel_remove(struct mipi_dsi_device *dsi) } static const struct of_device_id sofef00_panel_of_match[] = { - { // OnePlus 6 / enchilada - .compatible = "samsung,sofef00", - .data = &enchilada_panel_mode, - }, - { // OnePlus 6T / fajita - .compatible = "samsung,s6e3fc2x01", - .data = &fajita_panel_mode, - }, + { .compatible = "samsung,sofef00" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sofef00_panel_of_match); diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 06381c628209..82db3daf4f81 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1027,27 +1027,28 @@ static const struct panel_desc auo_g070vvn01 = { }, }; -static const struct drm_display_mode auo_g101evn010_mode = { - .clock = 68930, - .hdisplay = 1280, - .hsync_start = 1280 + 82, - .hsync_end = 1280 + 82 + 2, - .htotal = 1280 + 82 + 2 + 84, - .vdisplay = 800, - .vsync_start = 800 + 8, - .vsync_end = 800 + 8 + 2, - .vtotal = 800 + 8 + 2 + 6, +static const struct display_timing auo_g101evn010_timing = { + .pixelclock = { 64000000, 68930000, 85000000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 8, 64, 256 }, + .hback_porch = { 8, 64, 256 }, + .hsync_len = { 40, 168, 767 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 4, 8, 100 }, + .vback_porch = { 4, 8, 100 }, + .vsync_len = { 8, 16, 223 }, }; static const struct panel_desc auo_g101evn010 = { - .modes = &auo_g101evn010_mode, - .num_modes = 1, + .timings = &auo_g101evn010_timing, + .num_timings = 1, .bpc = 6, .size = { .width = 216, .height = 135, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; @@ -2140,13 +2141,14 @@ static const struct display_timing evervision_vgg644804_timing = { static const struct panel_desc evervision_vgg644804 = { .timings = &evervision_vgg644804_timing, .num_timings = 1, - .bpc = 8, + .bpc = 6, .size = { .width = 115, .height = 86, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, - .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, }; static const struct display_timing evervision_vgg804821_timing = { diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index 3385fd3ef41a..5d0dce10336b 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -29,7 +29,7 @@ static void panfrost_devfreq_update_utilization(struct panfrost_devfreq *pfdevfr static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { - struct panfrost_device *ptdev = dev_get_drvdata(dev); + struct panfrost_device *pfdev = dev_get_drvdata(dev); struct dev_pm_opp *opp; int err; @@ -40,7 +40,7 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, err = dev_pm_opp_set_rate(dev, *freq); if (!err) - ptdev->pfdevfreq.current_frequency = *freq; + pfdev->pfdevfreq.current_frequency = *freq; return err; } diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 6fbff516c1c1..01dff89bed4e 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -445,8 +445,8 @@ int panthor_device_resume(struct device *dev) drm_dev_enter(&ptdev->base, &cookie)) { panthor_gpu_resume(ptdev); panthor_mmu_resume(ptdev); - ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); - if (!ret) { + ret = panthor_fw_resume(ptdev); + if (!drm_WARN_ON(&ptdev->base, ret)) { panthor_sched_resume(ptdev); } else { panthor_mmu_suspend(ptdev); diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h index 22448abde992..6598d96c6d2a 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.h +++ b/drivers/gpu/drm/panthor/panthor_fw.h @@ -102,9 +102,9 @@ struct panthor_fw_cs_output_iface { #define CS_STATUS_BLOCKED_REASON_SB_WAIT 1 #define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2 #define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3 -#define CS_STATUS_BLOCKED_REASON_DEFERRED 5 -#define CS_STATUS_BLOCKED_REASON_RES 6 -#define CS_STATUS_BLOCKED_REASON_FLUSH 7 +#define CS_STATUS_BLOCKED_REASON_DEFERRED 4 +#define CS_STATUS_BLOCKED_REASON_RESOURCE 5 +#define CS_STATUS_BLOCKED_REASON_FLUSH 6 #define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0) u32 status_blocked_reason; u32 status_wait_sync_value_hi; diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 0e6f94df690d..b57824abeb9e 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -780,6 +780,7 @@ out_enable_as: if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) { gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as)); ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as); + ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as); gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); } diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h index b7b3b3add166..a7a323dc5cf9 100644 --- a/drivers/gpu/drm/panthor/panthor_regs.h +++ b/drivers/gpu/drm/panthor/panthor_regs.h @@ -133,8 +133,8 @@ #define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name) #define GPU_COHERENCY_PROTOCOL 0x304 -#define GPU_COHERENCY_ACE 0 -#define GPU_COHERENCY_ACE_LITE 1 +#define GPU_COHERENCY_ACE_LITE 0 +#define GPU_COHERENCY_ACE 1 #define GPU_COHERENCY_NONE 31 #define MCU_CONTROL 0x700 diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 05c13102a8cb..d22889fbfa9c 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -359,7 +359,8 @@ int r300_mc_wait_for_idle(struct radeon_device *rdev) return -1; } -static void r300_gpu_init(struct radeon_device *rdev) +/* rs400_gpu_init also calls this! */ +void r300_gpu_init(struct radeon_device *rdev) { uint32_t gb_tile_config, tmp; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 1e00f6b99f94..8f5e07834fcc 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -165,6 +165,7 @@ void r200_set_safe_registers(struct radeon_device *rdev); */ extern int r300_init(struct radeon_device *rdev); extern void r300_fini(struct radeon_device *rdev); +extern void r300_gpu_init(struct radeon_device *rdev); extern int r300_suspend(struct radeon_device *rdev); extern int r300_resume(struct radeon_device *rdev); extern int r300_asic_reset(struct radeon_device *rdev, bool hard); diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 5b69cc8011b4..8d64ba18572e 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -775,8 +775,10 @@ static int radeon_audio_component_get_eld(struct device *kdev, int port, if (!dig->pin || dig->pin->id != port) continue; *enabled = true; + mutex_lock(&connector->eld_mutex); ret = drm_eld_size(connector->eld); memcpy(buf, connector->eld, min(max_bytes, ret)); + mutex_unlock(&connector->eld_mutex); break; } diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index d1871af967d4..2355a78e1b69 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -557,7 +557,7 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p) { int session_idx = -1; bool destroyed = false, created = false, allocated = false; - uint32_t tmp, handle = 0; + uint32_t tmp = 0, handle = 0; uint32_t *size = &tmp; int i, r = 0; diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index d6c18fd740ec..13cd0a688a65 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -256,8 +256,22 @@ int rs400_mc_wait_for_idle(struct radeon_device *rdev) static void rs400_gpu_init(struct radeon_device *rdev) { - /* FIXME: is this correct ? */ - r420_pipes_init(rdev); + /* Earlier code was calling r420_pipes_init and then + * rs400_mc_wait_for_idle(rdev). The problem is that + * at least on my Mobility Radeon Xpress 200M RC410 card + * that ends up in this code path ends up num_gb_pipes == 3 + * while the card seems to have only one pipe. With the + * r420 pipe initialization method. + * + * Problems shown up as HyperZ glitches, see: + * https://bugs.freedesktop.org/show_bug.cgi?id=110897 + * + * Delegating initialization to r300 code seems to work + * and results in proper pipe numbers. The rs400 cards + * are said to be not r400, but r300 kind of cards. + */ + r300_gpu_init(rdev); + if (rs400_mc_wait_for_idle(rdev)) { pr_warn("rs400: Failed to wait MC idle while programming pipes. Bad things might happen. %08x\n", RREG32(RADEON_MC_STATUS)); diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c index 70d8ad065bfa..4c8fe83dd610 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c @@ -705,7 +705,7 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) ret = of_parse_phandle_with_fixed_args(np, vsps_prop_name, cells, i, &args); if (ret < 0) - goto error; + goto done; /* * Add the VSP to the list or update the corresponding existing @@ -743,13 +743,11 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) vsp->dev = rcdu; ret = rcar_du_vsp_init(vsp, vsps[i].np, vsps[i].crtcs_mask); - if (ret < 0) - goto error; + if (ret) + goto done; } - return 0; - -error: +done: for (i = 0; i < ARRAY_SIZE(vsps); ++i) of_node_put(vsps[i].np); diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c index 2dba7c5ffd2c..92f4261305bd 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c @@ -587,7 +587,7 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi, for (timeout = 10; timeout > 0; --timeout) { if ((rcar_mipi_dsi_read(dsi, PPICLSR) & PPICLSR_STPST) && (rcar_mipi_dsi_read(dsi, PPIDLSR) & PPIDLSR_STPST) && - (rcar_mipi_dsi_read(dsi, CLOCKSET1) & CLOCKSET1_LOCK)) + (rcar_mipi_dsi_read(dsi, CLOCKSET1) & CLOCKSET1_LOCK_PHY)) break; usleep_range(1000, 2000); diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h index f8114d11f2d1..a6b276f1d6ee 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h @@ -142,7 +142,6 @@ #define CLOCKSET1 0x101c #define CLOCKSET1_LOCK_PHY (1 << 17) -#define CLOCKSET1_LOCK (1 << 16) #define CLOCKSET1_CLKSEL (1 << 8) #define CLOCKSET1_CLKINSEL_EXTAL (0 << 2) #define CLOCKSET1_CLKINSEL_DIG (1 << 2) diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c index b99217b4e05d..90c6269ccd29 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c @@ -311,11 +311,11 @@ int rzg2l_du_modeset_init(struct rzg2l_du_device *rcdu) dev->mode_config.helper_private = &rzg2l_du_mode_config_helper; /* - * The RZ DU uses the VSP1 for memory access, and is limited - * to frame sizes of 1920x1080. + * The RZ DU was designed to support a frame size of 1920x1200 (landscape) + * or 1200x1920 (portrait). */ dev->mode_config.max_width = 1920; - dev->mode_config.max_height = 1080; + dev->mode_config.max_height = 1920; rcdu->num_crtcs = hweight8(rcdu->info->channels_mask); diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c index 10febea473cd..6cec796dd463 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c @@ -585,6 +585,9 @@ rzg2l_mipi_dsi_bridge_mode_valid(struct drm_bridge *bridge, if (mode->clock > 148500) return MODE_CLOCK_HIGH; + if (mode->clock < 5803) + return MODE_CLOCK_LOW; + return MODE_OK; } diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index b04538907f95..f576b1aa86d1 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -947,9 +947,6 @@ static void cdn_dp_pd_event_work(struct work_struct *work) { struct cdn_dp_device *dp = container_of(work, struct cdn_dp_device, event_work); - struct drm_connector *connector = &dp->connector; - enum drm_connector_status old_status; - int ret; mutex_lock(&dp->lock); @@ -1009,11 +1006,7 @@ static void cdn_dp_pd_event_work(struct work_struct *work) out: mutex_unlock(&dp->lock); - - old_status = connector->status; - connector->status = connector->funcs->detect(connector, false); - if (old_status != connector->status) - drm_kms_helper_hotplug_event(dp->drm_dev); + drm_connector_helper_hpd_irq_event(&dp->connector); } static int cdn_dp_pd_event(struct notifier_block *nb, diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index cfe8b793d344..69ab8d4f289c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -52,16 +52,9 @@ rockchip_fb_create(struct drm_device *dev, struct drm_file *file, } if (drm_is_afbc(mode_cmd->modifier[0])) { - int ret, i; - ret = drm_gem_fb_afbc_init(dev, mode_cmd, afbc_fb); if (ret) { - struct drm_gem_object **obj = afbc_fb->base.obj; - - for (i = 0; i < info->num_planes; ++i) - drm_gem_object_put(obj[i]); - - kfree(afbc_fb); + drm_framebuffer_put(&afbc_fb->base); return ERR_PTR(ret); } } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 9873172e3fd3..5d7df4c3b08c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -33,7 +33,6 @@ #include <uapi/linux/videodev2.h> #include <dt-bindings/soc/rockchip,vop2.h> -#include "rockchip_drm_drv.h" #include "rockchip_drm_gem.h" #include "rockchip_drm_vop2.h" #include "rockchip_rgb.h" @@ -158,6 +157,7 @@ struct vop2_video_port { struct drm_crtc crtc; struct vop2 *vop2; struct clk *dclk; + struct clk *dclk_src; unsigned int id; const struct vop2_video_port_data *data; @@ -212,6 +212,7 @@ struct vop2 { struct clk *hclk; struct clk *aclk; struct clk *pclk; + struct clk *pll_hdmiphy0; /* optional internal rgb encoder */ struct rockchip_rgb *rgb; @@ -220,6 +221,8 @@ struct vop2 { struct vop2_win win[]; }; +#define VOP2_MAX_DCLK_RATE 600000000 + #define vop2_output_if_is_hdmi(x) ((x) == ROCKCHIP_VOP2_EP_HDMI0 || \ (x) == ROCKCHIP_VOP2_EP_HDMI1) @@ -550,6 +553,25 @@ static bool rockchip_vop2_mod_supported(struct drm_plane *plane, u32 format, if (modifier == DRM_FORMAT_MOD_INVALID) return false; + if (vop2->data->soc_id == 3568 || vop2->data->soc_id == 3566) { + if (vop2_cluster_window(win)) { + if (modifier == DRM_FORMAT_MOD_LINEAR) { + drm_dbg_kms(vop2->drm, + "Cluster window only supports format with afbc\n"); + return false; + } + } + } + + if (format == DRM_FORMAT_XRGB2101010 || format == DRM_FORMAT_XBGR2101010) { + if (vop2->data->soc_id == 3588) { + if (!rockchip_afbc(plane, modifier)) { + drm_dbg_kms(vop2->drm, "Only support 32 bpp format with afbc\n"); + return false; + } + } + } + if (modifier == DRM_FORMAT_MOD_LINEAR) return true; @@ -1033,6 +1055,9 @@ static void vop2_crtc_atomic_disable(struct drm_crtc *crtc, vop2_crtc_disable_irq(vp, VP_INT_DSP_HOLD_VALID); + if (vp->dclk_src) + clk_set_parent(vp->dclk, vp->dclk_src); + clk_disable_unprepare(vp->dclk); vop2->enable_count--; @@ -1320,6 +1345,12 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, &fb->format->format, afbc_en ? "AFBC" : "", &yrgb_mst); + if (vop2->data->soc_id > 3568) { + vop2_win_write(win, VOP2_WIN_AXI_BUS_ID, win->data->axi_bus_id); + vop2_win_write(win, VOP2_WIN_AXI_YRGB_R_ID, win->data->axi_yrgb_r_id); + vop2_win_write(win, VOP2_WIN_AXI_UV_R_ID, win->data->axi_uv_r_id); + } + if (vop2_cluster_window(win)) vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, half_block_en); @@ -1408,10 +1439,8 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, rb_swap = vop2_win_rb_swap(fb->format->format); vop2_win_write(win, VOP2_WIN_RB_SWAP, rb_swap); - if (!vop2_cluster_window(win)) { - uv_swap = vop2_win_uv_swap(fb->format->format); - vop2_win_write(win, VOP2_WIN_UV_SWAP, uv_swap); - } + uv_swap = vop2_win_uv_swap(fb->format->format); + vop2_win_write(win, VOP2_WIN_UV_SWAP, uv_swap); if (fb->format->is_yuv) { vop2_win_write(win, VOP2_WIN_UV_VIR, DIV_ROUND_UP(fb->pitches[1], 4)); @@ -1721,9 +1750,9 @@ static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id, else dclk_out_rate = v_pixclk >> 2; - dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000); + dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000000); if (!dclk_rate) { - drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld KHZ\n", + drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld Hz\n", dclk_out_rate); return 0; } @@ -1738,9 +1767,9 @@ static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id, * dclk_rate = N * dclk_core_rate N = (1,2,4 ), * we get a little factor here */ - dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000); + dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000000); if (!dclk_rate) { - drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld KHZ\n", + drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld Hz\n", dclk_out_rate); return 0; } @@ -2049,6 +2078,27 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, vop2_vp_write(vp, RK3568_VP_MIPI_CTRL, 0); + /* + * Switch to HDMI PHY PLL as DCLK source for display modes up + * to 4K@60Hz, if available, otherwise keep using the system CRU. + */ + if (vop2->pll_hdmiphy0 && clock <= VOP2_MAX_DCLK_RATE) { + drm_for_each_encoder_mask(encoder, crtc->dev, crtc_state->encoder_mask) { + struct rockchip_encoder *rkencoder = to_rockchip_encoder(encoder); + + if (rkencoder->crtc_endpoint_id == ROCKCHIP_VOP2_EP_HDMI0) { + if (!vp->dclk_src) + vp->dclk_src = clk_get_parent(vp->dclk); + + ret = clk_set_parent(vp->dclk, vop2->pll_hdmiphy0); + if (ret < 0) + drm_warn(vop2->drm, + "Could not switch to HDMI0 PHY PLL: %d\n", ret); + break; + } + } + } + clk_set_rate(vp->dclk, clock); vop2_post_config(crtc); @@ -2159,7 +2209,6 @@ static int vop2_find_start_mixer_id_for_vp(struct vop2 *vop2, u8 port_id) static void vop2_setup_cluster_alpha(struct vop2 *vop2, struct vop2_win *main_win) { - u32 offset = (main_win->data->phys_id * 0x10); struct vop2_alpha_config alpha_config; struct vop2_alpha alpha; struct drm_plane_state *bottom_win_pstate; @@ -2167,6 +2216,7 @@ static void vop2_setup_cluster_alpha(struct vop2 *vop2, struct vop2_win *main_wi u16 src_glb_alpha_val, dst_glb_alpha_val; bool premulti_en = false; bool swap = false; + u32 offset = 0; /* At one win mode, win0 is dst/bottom win, and win1 is a all zero src/top win */ bottom_win_pstate = main_win->base.state; @@ -2185,6 +2235,22 @@ static void vop2_setup_cluster_alpha(struct vop2 *vop2, struct vop2_win *main_wi vop2_parse_alpha(&alpha_config, &alpha); alpha.src_color_ctrl.bits.src_dst_swap = swap; + + switch (main_win->data->phys_id) { + case ROCKCHIP_VOP2_CLUSTER0: + offset = 0x0; + break; + case ROCKCHIP_VOP2_CLUSTER1: + offset = 0x10; + break; + case ROCKCHIP_VOP2_CLUSTER2: + offset = 0x20; + break; + case ROCKCHIP_VOP2_CLUSTER3: + offset = 0x30; + break; + } + vop2_writel(vop2, RK3568_CLUSTER0_MIX_SRC_COLOR_CTRL + offset, alpha.src_color_ctrl.val); vop2_writel(vop2, RK3568_CLUSTER0_MIX_DST_COLOR_CTRL + offset, @@ -2232,6 +2298,12 @@ static void vop2_setup_alpha(struct vop2_video_port *vp) struct vop2_win *win = to_vop2_win(plane); int zpos = plane->state->normalized_zpos; + /* + * Need to configure alpha from second layer. + */ + if (zpos == 0) + continue; + if (plane->state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) premulti_en = 1; else @@ -2308,7 +2380,10 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) struct drm_plane *plane; u32 layer_sel = 0; u32 port_sel; - unsigned int nlayer, ofs; + u8 layer_id; + u8 old_layer_id; + u8 layer_sel_id; + unsigned int ofs; u32 ovl_ctrl; int i; struct vop2_video_port *vp0 = &vop2->vps[0]; @@ -2352,9 +2427,30 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) for (i = 0; i < vp->id; i++) ofs += vop2->vps[i].nlayers; - nlayer = 0; drm_atomic_crtc_for_each_plane(plane, &vp->crtc) { struct vop2_win *win = to_vop2_win(plane); + struct vop2_win *old_win; + + layer_id = (u8)(plane->state->normalized_zpos + ofs); + + /* + * Find the layer this win bind in old state. + */ + for (old_layer_id = 0; old_layer_id < vop2->data->win_size; old_layer_id++) { + layer_sel_id = (layer_sel >> (4 * old_layer_id)) & 0xf; + if (layer_sel_id == win->data->layer_sel_id) + break; + } + + /* + * Find the win bind to this layer in old state + */ + for (i = 0; i < vop2->data->win_size; i++) { + old_win = &vop2->win[i]; + layer_sel_id = (layer_sel >> (4 * layer_id)) & 0xf; + if (layer_sel_id == old_win->data->layer_sel_id) + break; + } switch (win->data->phys_id) { case ROCKCHIP_VOP2_CLUSTER0: @@ -2399,17 +2495,14 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) break; } - layer_sel &= ~RK3568_OVL_LAYER_SEL__LAYER(plane->state->normalized_zpos + ofs, - 0x7); - layer_sel |= RK3568_OVL_LAYER_SEL__LAYER(plane->state->normalized_zpos + ofs, - win->data->layer_sel_id); - nlayer++; - } - - /* configure unused layers to 0x5 (reserved) */ - for (; nlayer < vp->nlayers; nlayer++) { - layer_sel &= ~RK3568_OVL_LAYER_SEL__LAYER(nlayer + ofs, 0x7); - layer_sel |= RK3568_OVL_LAYER_SEL__LAYER(nlayer + ofs, 5); + layer_sel &= ~RK3568_OVL_LAYER_SEL__LAYER(layer_id, 0x7); + layer_sel |= RK3568_OVL_LAYER_SEL__LAYER(layer_id, win->data->layer_sel_id); + /* + * When we bind a window from layerM to layerN, we also need to move the old + * window on layerN to layerM to avoid one window selected by two or more layers. + */ + layer_sel &= ~RK3568_OVL_LAYER_SEL__LAYER(old_layer_id, 0x7); + layer_sel |= RK3568_OVL_LAYER_SEL__LAYER(old_layer_id, old_win->data->layer_sel_id); } vop2_writel(vop2, RK3568_OVL_LAYER_SEL, layer_sel); @@ -2444,9 +2537,11 @@ static void vop2_setup_dly_for_windows(struct vop2 *vop2) sdly |= FIELD_PREP(RK3568_SMART_DLY_NUM__ESMART1, dly); break; case ROCKCHIP_VOP2_SMART0: + case ROCKCHIP_VOP2_ESMART2: sdly |= FIELD_PREP(RK3568_SMART_DLY_NUM__SMART0, dly); break; case ROCKCHIP_VOP2_SMART1: + case ROCKCHIP_VOP2_ESMART3: sdly |= FIELD_PREP(RK3568_SMART_DLY_NUM__SMART1, dly); break; } @@ -2865,6 +2960,10 @@ static struct reg_field vop2_cluster_regs[VOP2_WIN_MAX_REG] = { [VOP2_WIN_Y2R_EN] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL0, 8, 8), [VOP2_WIN_R2Y_EN] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL0, 9, 9), [VOP2_WIN_CSC_MODE] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL0, 10, 11), + [VOP2_WIN_AXI_YRGB_R_ID] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL2, 0, 3), + [VOP2_WIN_AXI_UV_R_ID] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL2, 5, 8), + /* RK3588 only, reserved bit on rk3568*/ + [VOP2_WIN_AXI_BUS_ID] = REG_FIELD(RK3568_CLUSTER_CTRL, 13, 13), /* Scale */ [VOP2_WIN_SCALE_YRGB_X] = REG_FIELD(RK3568_CLUSTER_WIN_SCL_FACTOR_YRGB, 0, 15), @@ -2957,6 +3056,10 @@ static struct reg_field vop2_esmart_regs[VOP2_WIN_MAX_REG] = { [VOP2_WIN_YMIRROR] = REG_FIELD(RK3568_SMART_CTRL1, 31, 31), [VOP2_WIN_COLOR_KEY] = REG_FIELD(RK3568_SMART_COLOR_KEY_CTRL, 0, 29), [VOP2_WIN_COLOR_KEY_EN] = REG_FIELD(RK3568_SMART_COLOR_KEY_CTRL, 31, 31), + [VOP2_WIN_AXI_YRGB_R_ID] = REG_FIELD(RK3568_SMART_CTRL1, 4, 8), + [VOP2_WIN_AXI_UV_R_ID] = REG_FIELD(RK3568_SMART_CTRL1, 12, 16), + /* RK3588 only, reserved register on rk3568 */ + [VOP2_WIN_AXI_BUS_ID] = REG_FIELD(RK3588_SMART_AXI_CTRL, 1, 1), /* Scale */ [VOP2_WIN_SCALE_YRGB_X] = REG_FIELD(RK3568_SMART_REGION0_SCL_FACTOR_YRGB, 0, 15), @@ -3167,6 +3270,12 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) return PTR_ERR(vop2->pclk); } + vop2->pll_hdmiphy0 = devm_clk_get_optional(vop2->dev, "pll_hdmiphy0"); + if (IS_ERR(vop2->pll_hdmiphy0)) { + drm_err(vop2->drm, "failed to get pll_hdmiphy0\n"); + return PTR_ERR(vop2->pll_hdmiphy0); + } + vop2->irq = platform_get_irq(pdev, 0); if (vop2->irq < 0) { drm_err(vop2->drm, "cannot find irq for vop2\n"); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h index 615a16196aff..130aaa40316d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h @@ -9,6 +9,7 @@ #include <linux/regmap.h> #include <drm/drm_modes.h> +#include "rockchip_drm_drv.h" #include "rockchip_drm_vop.h" #define VOP2_VP_FEATURE_OUTPUT_10BIT BIT(0) @@ -78,6 +79,9 @@ enum vop2_win_regs { VOP2_WIN_COLOR_KEY, VOP2_WIN_COLOR_KEY_EN, VOP2_WIN_DITHER_UP, + VOP2_WIN_AXI_BUS_ID, + VOP2_WIN_AXI_YRGB_R_ID, + VOP2_WIN_AXI_UV_R_ID, /* scale regs */ VOP2_WIN_SCALE_YRGB_X, @@ -140,6 +144,10 @@ struct vop2_win_data { unsigned int layer_sel_id; uint64_t feature; + uint8_t axi_bus_id; + uint8_t axi_yrgb_r_id; + uint8_t axi_uv_r_id; + unsigned int max_upscale_factor; unsigned int max_downscale_factor; const u8 dly[VOP2_DLY_MODE_MAX]; @@ -308,6 +316,7 @@ enum dst_factor_mode { #define RK3568_CLUSTER_WIN_CTRL0 0x00 #define RK3568_CLUSTER_WIN_CTRL1 0x04 +#define RK3568_CLUSTER_WIN_CTRL2 0x08 #define RK3568_CLUSTER_WIN_YRGB_MST 0x10 #define RK3568_CLUSTER_WIN_CBR_MST 0x14 #define RK3568_CLUSTER_WIN_VIR 0x18 @@ -330,6 +339,7 @@ enum dst_factor_mode { /* (E)smart register definition, offset relative to window base */ #define RK3568_SMART_CTRL0 0x00 #define RK3568_SMART_CTRL1 0x04 +#define RK3588_SMART_AXI_CTRL 0x08 #define RK3568_SMART_REGION0_CTRL 0x10 #define RK3568_SMART_REGION0_YRGB_MST 0x14 #define RK3568_SMART_REGION0_CBR_MST 0x18 diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 18efb3fe1c00..e473a8f8fd32 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -313,7 +313,7 @@ static const struct vop2_video_port_data rk3588_vop_video_ports[] = { * AXI1 is a read only bus. * * Every window on a AXI bus must assigned two unique - * read id(yrgb_id/uv_id, valid id are 0x1~0xe). + * read id(yrgb_r_id/uv_r_id, valid id are 0x1~0xe). * * AXI0: * Cluster0/1, Esmart0/1, WriteBack @@ -333,6 +333,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 0, .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, + .axi_bus_id = 0, + .axi_yrgb_r_id = 2, + .axi_uv_r_id = 3, .max_upscale_factor = 4, .max_downscale_factor = 4, .dly = { 4, 26, 29 }, @@ -349,6 +352,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_PRIMARY, + .axi_bus_id = 0, + .axi_yrgb_r_id = 6, + .axi_uv_r_id = 7, .max_upscale_factor = 4, .max_downscale_factor = 4, .dly = { 4, 26, 29 }, @@ -364,6 +370,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_PRIMARY, + .axi_bus_id = 1, + .axi_yrgb_r_id = 2, + .axi_uv_r_id = 3, .max_upscale_factor = 4, .max_downscale_factor = 4, .dly = { 4, 26, 29 }, @@ -379,6 +388,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_PRIMARY, + .axi_bus_id = 1, + .axi_yrgb_r_id = 6, + .axi_uv_r_id = 7, .max_upscale_factor = 4, .max_downscale_factor = 4, .dly = { 4, 26, 29 }, @@ -393,6 +405,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 2, .supported_rotations = DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_OVERLAY, + .axi_bus_id = 0, + .axi_yrgb_r_id = 0x0a, + .axi_uv_r_id = 0x0b, .max_upscale_factor = 8, .max_downscale_factor = 8, .dly = { 23, 45, 48 }, @@ -406,6 +421,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 3, .supported_rotations = DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_OVERLAY, + .axi_bus_id = 0, + .axi_yrgb_r_id = 0x0c, + .axi_uv_r_id = 0x01, .max_upscale_factor = 8, .max_downscale_factor = 8, .dly = { 23, 45, 48 }, @@ -419,6 +437,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 6, .supported_rotations = DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_OVERLAY, + .axi_bus_id = 1, + .axi_yrgb_r_id = 0x0a, + .axi_uv_r_id = 0x0b, .max_upscale_factor = 8, .max_downscale_factor = 8, .dly = { 23, 45, 48 }, @@ -432,6 +453,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 7, .supported_rotations = DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_OVERLAY, + .axi_bus_id = 1, + .axi_yrgb_r_id = 0x0c, + .axi_uv_r_id = 0x0d, .max_upscale_factor = 8, .max_downscale_factor = 8, .dly = { 23, 45, 48 }, diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index c75302ca3427..f56e77e7f6d0 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -21,7 +21,7 @@ * */ -#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_GPU_SCHED_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _GPU_SCHED_TRACE_H_ #include <linux/stringify.h> @@ -106,7 +106,7 @@ TRACE_EVENT(drm_sched_job_wait_dep, __entry->seqno) ); -#endif +#endif /* _GPU_SCHED_TRACE_H_ */ /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index a75eede8bf8d..3e75fc1f6607 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -189,6 +189,7 @@ static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) { struct drm_sched_job *job = container_of(wrk, typeof(*job), work); + drm_sched_fence_scheduled(job->s_fence, NULL); drm_sched_fence_finished(job->s_fence, -ESRCH); WARN_ON(job->s_fence->parent); job->sched->ops->free_job(job); @@ -259,9 +260,16 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity) struct drm_sched_fence *s_fence = job->s_fence; dma_fence_get(&s_fence->finished); - if (!prev || dma_fence_add_callback(prev, &job->finish_cb, - drm_sched_entity_kill_jobs_cb)) + if (!prev || + dma_fence_add_callback(prev, &job->finish_cb, + drm_sched_entity_kill_jobs_cb)) { + /* + * Adding callback above failed. + * dma_fence_put() checks for NULL. + */ + dma_fence_put(prev); drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); + } prev = &s_fence->finished; } @@ -360,17 +368,6 @@ void drm_sched_entity_destroy(struct drm_sched_entity *entity) } EXPORT_SYMBOL(drm_sched_entity_destroy); -/* drm_sched_entity_clear_dep - callback to clear the entities dependency */ -static void drm_sched_entity_clear_dep(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_entity *entity = - container_of(cb, struct drm_sched_entity, cb); - - entity->dependency = NULL; - dma_fence_put(f); -} - /* * drm_sched_entity_clear_dep - callback to clear the entities dependency and * wake up scheduler @@ -381,7 +378,8 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, struct drm_sched_entity *entity = container_of(cb, struct drm_sched_entity, cb); - drm_sched_entity_clear_dep(f, cb); + entity->dependency = NULL; + dma_fence_put(f); drm_sched_wakeup(entity->rq->sched); } @@ -434,13 +432,6 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) fence = dma_fence_get(&s_fence->scheduled); dma_fence_put(entity->dependency); entity->dependency = fence; - if (!dma_fence_add_callback(fence, &entity->cb, - drm_sched_entity_clear_dep)) - return true; - - /* Ignore it when it is already scheduled */ - dma_fence_put(fence); - return false; } if (!dma_fence_add_callback(entity->dependency, &entity->cb, diff --git a/drivers/gpu/drm/solomon/ssd130x-spi.c b/drivers/gpu/drm/solomon/ssd130x-spi.c index 84bfde31d172..fd1b858dcb78 100644 --- a/drivers/gpu/drm/solomon/ssd130x-spi.c +++ b/drivers/gpu/drm/solomon/ssd130x-spi.c @@ -151,7 +151,6 @@ static const struct of_device_id ssd130x_of_match[] = { }; MODULE_DEVICE_TABLE(of, ssd130x_of_match); -#if IS_MODULE(CONFIG_DRM_SSD130X_SPI) /* * The SPI core always reports a MODALIAS uevent of the form "spi:<dev>", even * if the device was registered via OF. This means that the module will not be @@ -160,7 +159,7 @@ MODULE_DEVICE_TABLE(of, ssd130x_of_match); * To workaround this issue, add a SPI device ID table. Even when this should * not be needed for this driver to match the registered SPI devices. */ -static const struct spi_device_id ssd130x_spi_table[] = { +static const struct spi_device_id ssd130x_spi_id[] = { /* ssd130x family */ { "sh1106", SH1106_ID }, { "ssd1305", SSD1305_ID }, @@ -175,14 +174,14 @@ static const struct spi_device_id ssd130x_spi_table[] = { { "ssd1331", SSD1331_ID }, { /* sentinel */ } }; -MODULE_DEVICE_TABLE(spi, ssd130x_spi_table); -#endif +MODULE_DEVICE_TABLE(spi, ssd130x_spi_id); static struct spi_driver ssd130x_spi_driver = { .driver = { .name = DRIVER_NAME, .of_match_table = ssd130x_of_match, }, + .id_table = ssd130x_spi_id, .probe = ssd130x_spi_probe, .remove = ssd130x_spi_remove, .shutdown = ssd130x_spi_shutdown, diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index 6f51bcf774e2..e0fc12d514d7 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -880,7 +880,7 @@ static int ssd132x_update_rect(struct ssd130x_device *ssd130x, u8 n1 = buf[i * width + j]; u8 n2 = buf[i * width + j + 1]; - data_array[array_idx++] = (n2 << 4) | n1; + data_array[array_idx++] = (n2 & 0xf0) | (n1 >> 4); } } @@ -974,7 +974,7 @@ static void ssd130x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) static void ssd132x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) { - unsigned int columns = DIV_ROUND_UP(ssd130x->height, SSD132X_SEGMENT_WIDTH); + unsigned int columns = DIV_ROUND_UP(ssd130x->width, SSD132X_SEGMENT_WIDTH); unsigned int height = ssd130x->height; memset(data_array, 0, columns * height); @@ -1037,7 +1037,7 @@ static int ssd132x_fb_blit_rect(struct drm_framebuffer *fb, struct drm_format_conv_state *fmtcnv_state) { struct ssd130x_device *ssd130x = drm_to_ssd130x(fb->dev); - unsigned int dst_pitch = drm_rect_width(rect); + unsigned int dst_pitch; struct iosys_map dst; int ret = 0; @@ -1046,6 +1046,8 @@ static int ssd132x_fb_blit_rect(struct drm_framebuffer *fb, rect->x2 = min_t(unsigned int, round_up(rect->x2, SSD132X_SEGMENT_WIDTH), ssd130x->width); + dst_pitch = drm_rect_width(rect); + ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) return ret; diff --git a/drivers/gpu/drm/sti/Makefile b/drivers/gpu/drm/sti/Makefile index f203ac5514ae..f778a4eee7c9 100644 --- a/drivers/gpu/drm/sti/Makefile +++ b/drivers/gpu/drm/sti/Makefile @@ -7,8 +7,6 @@ sti-drm-y := \ sti_compositor.o \ sti_crtc.o \ sti_plane.o \ - sti_crtc.o \ - sti_plane.o \ sti_hdmi.o \ sti_hdmi_tx3g4c28phy.o \ sti_dvo.o \ diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c index 847470f747c0..3c8f3532c797 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.c +++ b/drivers/gpu/drm/sti/sti_hdmi.c @@ -1225,7 +1225,9 @@ static int hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, size struct drm_connector *connector = hdmi->drm_connector; DRM_DEBUG_DRIVER("\n"); + mutex_lock(&connector->eld_mutex); memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); + mutex_unlock(&connector->eld_mutex); return 0; } diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index be61c9d1a4f0..51ca78551b57 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -1320,10 +1320,16 @@ static struct drm_plane *tegra_dc_add_shared_planes(struct drm_device *drm, if (wgrp->dc == dc->pipe) { for (j = 0; j < wgrp->num_windows; j++) { unsigned int index = wgrp->windows[j]; + enum drm_plane_type type; + + if (primary) + type = DRM_PLANE_TYPE_OVERLAY; + else + type = DRM_PLANE_TYPE_PRIMARY; plane = tegra_shared_plane_create(drm, dc, wgrp->index, - index); + index, type); if (IS_ERR(plane)) return plane; @@ -1331,10 +1337,8 @@ static struct drm_plane *tegra_dc_add_shared_planes(struct drm_device *drm, * Choose the first shared plane owned by this * head as the primary plane. */ - if (!primary) { - plane->type = DRM_PLANE_TYPE_PRIMARY; + if (!primary) primary = plane; - } } } } @@ -1388,7 +1392,10 @@ static void tegra_crtc_reset(struct drm_crtc *crtc) if (crtc->state) tegra_crtc_atomic_destroy_state(crtc, crtc->state); - __drm_atomic_helper_crtc_reset(crtc, &state->base); + if (state) + __drm_atomic_helper_crtc_reset(crtc, &state->base); + else + __drm_atomic_helper_crtc_reset(crtc, NULL); } static struct drm_crtc_state * diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index e0c2019a591b..3507dd6e9023 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -755,9 +755,9 @@ static const struct drm_plane_helper_funcs tegra_shared_plane_helper_funcs = { struct drm_plane *tegra_shared_plane_create(struct drm_device *drm, struct tegra_dc *dc, unsigned int wgrp, - unsigned int index) + unsigned int index, + enum drm_plane_type type) { - enum drm_plane_type type = DRM_PLANE_TYPE_OVERLAY; struct tegra_drm *tegra = drm->dev_private; struct tegra_display_hub *hub = tegra->hub; struct tegra_shared_plane *plane; diff --git a/drivers/gpu/drm/tegra/hub.h b/drivers/gpu/drm/tegra/hub.h index 23c4b2115ed1..a66f18c4facc 100644 --- a/drivers/gpu/drm/tegra/hub.h +++ b/drivers/gpu/drm/tegra/hub.h @@ -80,7 +80,8 @@ void tegra_display_hub_cleanup(struct tegra_display_hub *hub); struct drm_plane *tegra_shared_plane_create(struct drm_device *drm, struct tegra_dc *dc, unsigned int wgrp, - unsigned int index); + unsigned int index, + enum drm_plane_type type); int tegra_display_hub_atomic_check(struct drm_device *drm, struct drm_atomic_state *state); diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c index 4860790666af..14ef61b44f47 100644 --- a/drivers/gpu/drm/tegra/nvdec.c +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -261,10 +261,8 @@ static int nvdec_load_falcon_firmware(struct nvdec *nvdec) if (!client->group) { virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL); - - err = dma_mapping_error(nvdec->dev, iova); - if (err < 0) - return err; + if (!virt) + return -ENOMEM; } else { virt = tegra_drm_alloc(tegra, size, &iova); if (IS_ERR(virt)) diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 1e8ec50b759e..ff5a749710db 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -200,6 +200,11 @@ static const struct drm_encoder_helper_funcs tegra_rgb_encoder_helper_funcs = { .atomic_check = tegra_rgb_encoder_atomic_check, }; +static void tegra_dc_of_node_put(void *data) +{ + of_node_put(data); +} + int tegra_dc_rgb_probe(struct tegra_dc *dc) { struct device_node *np; @@ -207,7 +212,14 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) int err; np = of_get_child_by_name(dc->dev->of_node, "rgb"); - if (!np || !of_device_is_available(np)) + if (!np) + return -ENODEV; + + err = devm_add_action_or_reset(dc->dev, tegra_dc_of_node_put, np); + if (err < 0) + return err; + + if (!of_device_is_available(np)) return -ENODEV; rgb = devm_kzalloc(dc->dev, sizeof(*rgb), GFP_KERNEL); diff --git a/drivers/gpu/drm/tests/drm_client_modeset_test.c b/drivers/gpu/drm/tests/drm_client_modeset_test.c index 7516f6cb36e4..3e9518d7b8b7 100644 --- a/drivers/gpu/drm/tests/drm_client_modeset_test.c +++ b/drivers/gpu/drm/tests/drm_client_modeset_test.c @@ -95,6 +95,9 @@ static void drm_test_pick_cmdline_res_1920_1080_60(struct kunit *test) expected_mode = drm_mode_find_dmt(priv->drm, 1920, 1080, 60, false); KUNIT_ASSERT_NOT_NULL(test, expected_mode); + ret = drm_kunit_add_mode_destroy_action(test, expected_mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_TRUE(test, drm_mode_parse_command_line_for_connector(cmdline, connector, diff --git a/drivers/gpu/drm/tests/drm_cmdline_parser_test.c b/drivers/gpu/drm/tests/drm_cmdline_parser_test.c index 59c8408c453c..1cfcb597b088 100644 --- a/drivers/gpu/drm/tests/drm_cmdline_parser_test.c +++ b/drivers/gpu/drm/tests/drm_cmdline_parser_test.c @@ -7,6 +7,7 @@ #include <kunit/test.h> #include <drm/drm_connector.h> +#include <drm/drm_kunit_helpers.h> #include <drm/drm_modes.h> static const struct drm_connector no_connector = {}; @@ -955,8 +956,15 @@ struct drm_cmdline_tv_option_test { static void drm_test_cmdline_tv_options(struct kunit *test) { const struct drm_cmdline_tv_option_test *params = test->param_value; - const struct drm_display_mode *expected_mode = params->mode_fn(NULL); + struct drm_display_mode *expected_mode; struct drm_cmdline_mode mode = { }; + int ret; + + expected_mode = params->mode_fn(NULL); + KUNIT_ASSERT_NOT_NULL(test, expected_mode); + + ret = drm_kunit_add_mode_destroy_action(test, expected_mode); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_TRUE(test, drm_mode_parse_command_line_for_connector(params->cmdline, &no_connector, &mode)); diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index fd4215e2f982..925fbc2cda70 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -216,6 +216,9 @@ static void drm_gem_shmem_test_get_pages_sgt(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); KUNIT_EXPECT_NULL(test, shmem->sgt); + ret = kunit_add_action_or_reset(test, kfree_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt); KUNIT_ASSERT_EQ(test, ret, 0); diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index 294773342e71..383fbe128348 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -46,7 +46,7 @@ static struct drm_display_mode *find_preferred_mode(struct drm_connector *connec struct drm_display_mode *mode, *preferred; mutex_lock(&drm->mode_config.mutex); - preferred = list_first_entry(&connector->modes, struct drm_display_mode, head); + preferred = list_first_entry_or_null(&connector->modes, struct drm_display_mode, head); list_for_each_entry(mode, &connector->modes, head) if (mode->type & DRM_MODE_TYPE_PREFERRED) preferred = mode; @@ -70,10 +70,17 @@ static int light_up_connector(struct kunit *test, state = drm_kunit_helper_atomic_state_alloc(test, drm, ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); +retry: conn_state = drm_atomic_get_connector_state(state, connector); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); ret = drm_atomic_set_crtc_for_connector(conn_state, crtc); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(ctx); + if (!ret) + goto retry; + } KUNIT_EXPECT_EQ(test, ret, 0); crtc_state = drm_atomic_get_crtc_state(state, crtc); @@ -105,9 +112,8 @@ static int set_connector_edid(struct kunit *test, struct drm_connector *connecto mutex_lock(&drm->mode_config.mutex); ret = connector->funcs->fill_modes(connector, 4096, 4096); mutex_unlock(&drm->mode_config.mutex); - KUNIT_ASSERT_GT(test, ret, 0); - return 0; + return ret; } static const struct drm_connector_hdmi_funcs dummy_connector_hdmi_funcs = { @@ -223,7 +229,7 @@ drm_atomic_helper_connector_hdmi_init(struct kunit *test, ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); return priv; } @@ -252,15 +258,16 @@ static void drm_test_check_broadcast_rgb_crtc_mode_changed(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; + preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -315,15 +322,16 @@ static void drm_test_check_broadcast_rgb_crtc_mode_not_changed(struct kunit *tes 8); KUNIT_ASSERT_NOT_NULL(test, priv); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; + preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -378,18 +386,18 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -444,7 +452,6 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode_vic_1(struct kunit *test) mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -490,18 +497,18 @@ static void drm_test_check_broadcast_rgb_full_cea_mode(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -558,7 +565,6 @@ static void drm_test_check_broadcast_rgb_full_cea_mode_vic_1(struct kunit *test) mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -606,18 +612,18 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -674,7 +680,6 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode_vic_1(struct kunit *te mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -724,20 +729,20 @@ static void drm_test_check_output_bpc_crtc_mode_changed(struct kunit *test) 10); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); - - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + KUNIT_ASSERT_GT(test, ret, 0); preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -798,20 +803,20 @@ static void drm_test_check_output_bpc_crtc_mode_not_changed(struct kunit *test) 10); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); - - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + KUNIT_ASSERT_GT(test, ret, 0); preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -869,23 +874,23 @@ static void drm_test_check_output_bpc_dvi(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_dvi_1080p, ARRAY_SIZE(test_edid_dvi_1080p)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_FALSE(test, info->is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -916,21 +921,21 @@ static void drm_test_check_tmds_char_rate_rgb_8bpc(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); - - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + KUNIT_ASSERT_GT(test, ret, 0); preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -963,21 +968,21 @@ static void drm_test_check_tmds_char_rate_rgb_10bpc(struct kunit *test) 10); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); - - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + KUNIT_ASSERT_GT(test, ret, 0); preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1010,21 +1015,21 @@ static void drm_test_check_tmds_char_rate_rgb_12bpc(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); - - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + KUNIT_ASSERT_GT(test, ret, 0); preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1061,15 +1066,16 @@ static void drm_test_check_hdmi_funcs_reject_rate(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; + preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1117,19 +1123,18 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); @@ -1140,8 +1145,9 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 10, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1186,19 +1192,18 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); @@ -1212,8 +1217,9 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1254,15 +1260,12 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); @@ -1276,7 +1279,9 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) rate = mode->clock * 1500; KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1310,19 +1315,18 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1341,8 +1345,9 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1377,19 +1382,18 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1408,8 +1412,9 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1443,19 +1448,18 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1466,8 +1470,9 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1503,19 +1508,18 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_340mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1526,8 +1530,9 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c index 04a6b8cc62ac..922c4b6ed1dc 100644 --- a/drivers/gpu/drm/tests/drm_kunit_helpers.c +++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c @@ -320,8 +320,29 @@ static void kunit_action_drm_mode_destroy(void *ptr) } /** - * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC - for a KUnit test + * drm_kunit_add_mode_destroy_action() - Add a drm_destroy_mode kunit action + * @test: The test context object + * @mode: The drm_display_mode to destroy eventually + * + * Registers a kunit action that will destroy the drm_display_mode at + * the end of the test. + * + * If an error occurs, the drm_display_mode will be destroyed. + * + * Returns: + * 0 on success, an error code otherwise. + */ +int drm_kunit_add_mode_destroy_action(struct kunit *test, + struct drm_display_mode *mode) +{ + return kunit_add_action_or_reset(test, + kunit_action_drm_mode_destroy, + mode); +} +EXPORT_SYMBOL_GPL(drm_kunit_add_mode_destroy_action); + +/** + * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC for a KUnit test * @test: The test context object * @dev: DRM device * @video_code: CEA VIC of the mode diff --git a/drivers/gpu/drm/tests/drm_modes_test.c b/drivers/gpu/drm/tests/drm_modes_test.c index 6ed51f99e133..7ba646d87856 100644 --- a/drivers/gpu/drm/tests/drm_modes_test.c +++ b/drivers/gpu/drm/tests/drm_modes_test.c @@ -40,6 +40,7 @@ static void drm_test_modes_analog_tv_ntsc_480i(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *mode; + int ret; mode = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_NTSC, @@ -47,6 +48,9 @@ static void drm_test_modes_analog_tv_ntsc_480i(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 60); KUNIT_EXPECT_EQ(test, mode->hdisplay, 720); @@ -70,6 +74,7 @@ static void drm_test_modes_analog_tv_ntsc_480i_inlined(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *expected, *mode; + int ret; expected = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_NTSC, @@ -77,9 +82,15 @@ static void drm_test_modes_analog_tv_ntsc_480i_inlined(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, expected); + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); + mode = drm_mode_analog_ntsc_480i(priv->drm); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected, mode)); } @@ -87,6 +98,7 @@ static void drm_test_modes_analog_tv_pal_576i(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *mode; + int ret; mode = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_PAL, @@ -94,6 +106,9 @@ static void drm_test_modes_analog_tv_pal_576i(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 50); KUNIT_EXPECT_EQ(test, mode->hdisplay, 720); @@ -117,6 +132,7 @@ static void drm_test_modes_analog_tv_pal_576i_inlined(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *expected, *mode; + int ret; expected = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_PAL, @@ -124,9 +140,15 @@ static void drm_test_modes_analog_tv_pal_576i_inlined(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, expected); + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); + mode = drm_mode_analog_pal_576i(priv->drm); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected, mode)); } diff --git a/drivers/gpu/drm/tests/drm_probe_helper_test.c b/drivers/gpu/drm/tests/drm_probe_helper_test.c index bc09ff38aca1..db0e4f5df275 100644 --- a/drivers/gpu/drm/tests/drm_probe_helper_test.c +++ b/drivers/gpu/drm/tests/drm_probe_helper_test.c @@ -98,7 +98,7 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test) struct drm_connector *connector = &priv->connector; struct drm_cmdline_mode *cmdline = &connector->cmdline_mode; struct drm_display_mode *mode; - const struct drm_display_mode *expected; + struct drm_display_mode *expected; size_t len; int ret; @@ -134,6 +134,9 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test) KUNIT_EXPECT_TRUE(test, drm_mode_equal(mode, expected)); KUNIT_EXPECT_TRUE(test, mode->type & DRM_MODE_TYPE_PREFERRED); + + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); } if (params->num_expected_modes >= 2) { @@ -145,6 +148,9 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test) KUNIT_EXPECT_TRUE(test, drm_mode_equal(mode, expected)); KUNIT_EXPECT_FALSE(test, mode->type & DRM_MODE_TYPE_PREFERRED); + + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); } mutex_unlock(&priv->drm->mode_config.mutex); diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c index 1ad711f8d2a8..45f22ead3e61 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc.c +++ b/drivers/gpu/drm/tidss/tidss_dispc.c @@ -700,7 +700,7 @@ void dispc_k2g_set_irqenable(struct dispc_device *dispc, dispc_irq_t mask) { dispc_irq_t old_mask = dispc_k2g_read_irqenable(dispc); - /* clear the irqstatus for newly enabled irqs */ + /* clear the irqstatus for irqs that will be enabled */ dispc_k2g_clear_irqstatus(dispc, (mask ^ old_mask) & mask); dispc_k2g_vp_set_irqenable(dispc, 0, mask); @@ -708,6 +708,9 @@ void dispc_k2g_set_irqenable(struct dispc_device *dispc, dispc_irq_t mask) dispc_write(dispc, DISPC_IRQENABLE_SET, (1 << 0) | (1 << 7)); + /* clear the irqstatus for irqs that were disabled */ + dispc_k2g_clear_irqstatus(dispc, (mask ^ old_mask) & old_mask); + /* flush posted write */ dispc_k2g_read_irqenable(dispc); } @@ -780,24 +783,20 @@ static void dispc_k3_clear_irqstatus(struct dispc_device *dispc, dispc_irq_t clearmask) { unsigned int i; - u32 top_clear = 0; for (i = 0; i < dispc->feat->num_vps; ++i) { - if (clearmask & DSS_IRQ_VP_MASK(i)) { + if (clearmask & DSS_IRQ_VP_MASK(i)) dispc_k3_vp_write_irqstatus(dispc, i, clearmask); - top_clear |= BIT(i); - } } for (i = 0; i < dispc->feat->num_planes; ++i) { - if (clearmask & DSS_IRQ_PLANE_MASK(i)) { + if (clearmask & DSS_IRQ_PLANE_MASK(i)) dispc_k3_vid_write_irqstatus(dispc, i, clearmask); - top_clear |= BIT(4 + i); - } } if (dispc->feat->subrev == DISPC_K2G) return; - dispc_write(dispc, DISPC_IRQSTATUS, top_clear); + /* always clear the top level irqstatus */ + dispc_write(dispc, DISPC_IRQSTATUS, dispc_read(dispc, DISPC_IRQSTATUS)); /* Flush posted writes */ dispc_read(dispc, DISPC_IRQSTATUS); @@ -843,7 +842,7 @@ static void dispc_k3_set_irqenable(struct dispc_device *dispc, old_mask = dispc_k3_read_irqenable(dispc); - /* clear the irqstatus for newly enabled irqs */ + /* clear the irqstatus for irqs that will be enabled */ dispc_k3_clear_irqstatus(dispc, (old_mask ^ mask) & mask); for (i = 0; i < dispc->feat->num_vps; ++i) { @@ -868,6 +867,9 @@ static void dispc_k3_set_irqenable(struct dispc_device *dispc, if (main_disable) dispc_write(dispc, DISPC_IRQENABLE_CLR, main_disable); + /* clear the irqstatus for irqs that were disabled */ + dispc_k3_clear_irqstatus(dispc, (old_mask ^ mask) & old_mask); + /* Flush posted writes */ dispc_read(dispc, DISPC_IRQENABLE_SET); } @@ -2767,8 +2769,12 @@ static void dispc_init_errata(struct dispc_device *dispc) */ static void dispc_softreset_k2g(struct dispc_device *dispc) { + unsigned long flags; + + spin_lock_irqsave(&dispc->tidss->wait_lock, flags); dispc_set_irqenable(dispc, 0); dispc_read_and_clear_irqstatus(dispc); + spin_unlock_irqrestore(&dispc->tidss->wait_lock, flags); for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx) VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0); diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c index 604334ef526a..d053dbb9d28c 100644 --- a/drivers/gpu/drm/tidss/tidss_irq.c +++ b/drivers/gpu/drm/tidss/tidss_irq.c @@ -60,7 +60,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg) unsigned int id; dispc_irq_t irqstatus; + spin_lock(&tidss->wait_lock); irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc); + spin_unlock(&tidss->wait_lock); for (id = 0; id < tidss->num_crtcs; id++) { struct drm_crtc *crtc = tidss->crtcs[id]; diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index f6889f649bc1..ce17143d47a8 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -67,6 +67,7 @@ config DRM_OFDRM config DRM_PANEL_MIPI_DBI tristate "DRM support for MIPI DBI compatible panels" depends on DRM && SPI + select DRM_CLIENT_SELECTION select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER select DRM_MIPI_DBI diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c index 751326e3d9c3..c7e81f2610f8 100644 --- a/drivers/gpu/drm/tiny/cirrus.c +++ b/drivers/gpu/drm/tiny/cirrus.c @@ -318,7 +318,6 @@ static void cirrus_pitch_set(struct cirrus_device *cirrus, unsigned int pitch) /* Enable extended blanking and pitch bits, and enable full memory */ cr1b = 0x22; cr1b |= (pitch >> 7) & 0x10; - cr1b |= (pitch >> 6) & 0x40; wreg_crt(cirrus, 0x1b, cr1b); cirrus_set_start_address(cirrus, 0); diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c index f753cdffe6f8..ac159e8127d5 100644 --- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c +++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c @@ -15,6 +15,7 @@ #include <linux/spi/spi.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_atomic_helper.h> @@ -264,6 +265,7 @@ static const struct drm_driver panel_mipi_dbi_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, .fops = &panel_mipi_dbi_fops, DRM_GEM_DMA_DRIVER_OPS_VMAP, + DRM_FBDEV_DMA_DRIVER_OPS, .debugfs_init = mipi_dbi_debugfs_init, .name = "panel-mipi-dbi", .desc = "MIPI DBI compatible display panel", @@ -388,7 +390,10 @@ static int panel_mipi_dbi_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_fbdev_dma_setup(drm, 0); + if (bpp == 16) + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB565); + else + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB888); return 0; } diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c index 1f78aa3d26bb..768dfea15aec 100644 --- a/drivers/gpu/drm/tiny/repaper.c +++ b/drivers/gpu/drm/tiny/repaper.c @@ -455,7 +455,7 @@ static void repaper_frame_fixed_repeat(struct repaper_epd *epd, u8 fixed_value, enum repaper_stage stage) { u64 start = local_clock(); - u64 end = start + (epd->factored_stage_time * 1000 * 1000); + u64 end = start + ((u64)epd->factored_stage_time * 1000 * 1000); do { repaper_frame_fixed(epd, fixed_value, stage); @@ -466,7 +466,7 @@ static void repaper_frame_data_repeat(struct repaper_epd *epd, const u8 *image, const u8 *mask, enum repaper_stage stage) { u64 start = local_clock(); - u64 end = start + (epd->factored_stage_time * 1000 * 1000); + u64 end = start + ((u64)epd->factored_stage_time * 1000 * 1000); do { repaper_frame_data(epd, image, mask, stage); diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index d19e10289428..07abaf27315f 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -284,7 +284,7 @@ static struct simpledrm_device *simpledrm_device_of_dev(struct drm_device *dev) static void simpledrm_device_release_clocks(void *res) { - struct simpledrm_device *sdev = simpledrm_device_of_dev(res); + struct simpledrm_device *sdev = res; unsigned int i; for (i = 0; i < sdev->clk_count; ++i) { @@ -382,7 +382,7 @@ static int simpledrm_device_init_clocks(struct simpledrm_device *sdev) static void simpledrm_device_release_regulators(void *res) { - struct simpledrm_device *sdev = simpledrm_device_of_dev(res); + struct simpledrm_device *sdev = res; unsigned int i; for (i = 0; i < sdev->regulator_count; ++i) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 3c07f4712d5c..b600be2a5c84 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -254,6 +254,13 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); + ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + if (ret) { + dma_resv_unlock(&fbo->base.base._resv); + kfree(fbo); + return ret; + } + if (fbo->base.resource) { ttm_resource_set_bo(fbo->base.resource, &fbo->base); bo->resource = NULL; @@ -262,12 +269,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.bulk_move = NULL; } - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); - if (ret) { - kfree(fbo); - return ret; - } - ttm_bo_get(bo); fbo->bo = bo; diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 8504dbe19c1a..4ae9d33cf485 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -630,7 +630,6 @@ void ttm_pool_fini(struct ttm_pool *pool) } EXPORT_SYMBOL(ttm_pool_fini); -/* As long as pages are available make sure to release at least one */ static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { @@ -638,9 +637,12 @@ static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink, do num_freed += ttm_pool_shrink(); - while (!num_freed && atomic_long_read(&allocated_pages)); + while (num_freed < sc->nr_to_scan && + atomic_long_read(&allocated_pages)); - return num_freed; + sc->nr_scanned = num_freed; + + return num_freed ?: SHRINK_STOP; } /* Return the number of pages available or SHRINK_EMPTY if we have none */ diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 6d764ba88aab..53f258f39ceb 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -501,6 +501,9 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, cond_resched(); } while (!ret); + if (ret && ret != -ENOENT) + return ret; + spin_lock(&man->move_lock); fence = dma_fence_get(man->move); spin_unlock(&man->move_lock); diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 280a09a6e2ad..0f712eb685ba 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -126,9 +126,9 @@ static void udl_usb_disconnect(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); + drm_dev_unplug(dev); drm_kms_helper_poll_fini(dev); udl_drop_usb(dev); - drm_dev_unplug(dev); } /* diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index 19e3ee7ac897..76816f2551c1 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -237,8 +237,8 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) if (v3d->ver >= 40) { int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3, - V3D_SET_FIELD(cycle_count_reg, - V3D_PCTR_S0)); + V3D_SET_FIELD_VER(cycle_count_reg, + V3D_PCTR_S0, v3d->ver)); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1); } else { diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index d7ff1f5fa481..7c17108da7d2 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -286,11 +286,21 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) if (ret) return ret; + v3d->clk = devm_clk_get_optional(dev, NULL); + if (IS_ERR(v3d->clk)) + return dev_err_probe(dev, PTR_ERR(v3d->clk), "Failed to get V3D clock\n"); + + ret = clk_prepare_enable(v3d->clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the V3D clock\n"); + return ret; + } + mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO); mask = DMA_BIT_MASK(30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_PA_WIDTH)); ret = dma_set_mask_and_coherent(dev, mask); if (ret) - return ret; + goto clk_disable; v3d->va_width = 30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_VA_WIDTH); @@ -310,28 +320,29 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) ret = PTR_ERR(v3d->reset); if (ret == -EPROBE_DEFER) - return ret; + goto clk_disable; v3d->reset = NULL; ret = map_regs(v3d, &v3d->bridge_regs, "bridge"); if (ret) { dev_err(dev, "Failed to get reset control or bridge regs\n"); - return ret; + goto clk_disable; } } if (v3d->ver < 41) { ret = map_regs(v3d, &v3d->gca_regs, "gca"); if (ret) - return ret; + goto clk_disable; } v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); if (!v3d->mmu_scratch) { dev_err(dev, "Failed to allocate MMU scratch page\n"); - return -ENOMEM; + ret = -ENOMEM; + goto clk_disable; } ret = v3d_gem_init(drm); @@ -360,6 +371,8 @@ gem_destroy: v3d_gem_destroy(drm); dma_free: dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); +clk_disable: + clk_disable_unprepare(v3d->clk); return ret; } @@ -377,6 +390,8 @@ static void v3d_platform_drm_remove(struct platform_device *pdev) dma_free_wc(v3d->drm.dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); + + clk_disable_unprepare(v3d->clk); } static struct platform_driver v3d_platform_driver = { diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 75b4725d49c7..d4b0549205c2 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -95,6 +95,12 @@ struct v3d_perfmon { u64 values[] __counted_by(ncounters); }; +enum v3d_irq { + V3D_CORE_IRQ, + V3D_HUB_IRQ, + V3D_MAX_IRQS, +}; + struct v3d_dev { struct drm_device drm; @@ -106,6 +112,8 @@ struct v3d_dev { bool single_irq_line; + int irq[V3D_MAX_IRQS]; + struct v3d_perfmon_info perfmon_info; void __iomem *hub_regs; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index da8faf3b9011..6b6ba7a68fcb 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -118,6 +118,8 @@ v3d_reset(struct v3d_dev *v3d) if (false) v3d_idle_axi(v3d, 0); + v3d_irq_disable(v3d); + v3d_idle_gca(v3d); v3d_reset_v3d(v3d); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 20bf33702c3c..b98e1a4b33c7 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -107,7 +107,10 @@ v3d_irq(int irq, void *arg) v3d_job_update_stats(&v3d->bin_job->base, V3D_BIN); trace_v3d_bcl_irq(&v3d->drm, fence->seqno); + + v3d->bin_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -117,7 +120,10 @@ v3d_irq(int irq, void *arg) v3d_job_update_stats(&v3d->render_job->base, V3D_RENDER); trace_v3d_rcl_irq(&v3d->drm, fence->seqno); + + v3d->render_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -127,7 +133,10 @@ v3d_irq(int irq, void *arg) v3d_job_update_stats(&v3d->csd_job->base, V3D_CSD); trace_v3d_csd_irq(&v3d->drm, fence->seqno); + + v3d->csd_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -164,7 +173,10 @@ v3d_hub_irq(int irq, void *arg) v3d_job_update_stats(&v3d->tfu_job->base, V3D_TFU); trace_v3d_tfu_irq(&v3d->drm, fence->seqno); + + v3d->tfu_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -216,7 +228,7 @@ v3d_hub_irq(int irq, void *arg) int v3d_irq_init(struct v3d_dev *v3d) { - int irq1, ret, core; + int irq, ret, core; INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); @@ -227,17 +239,24 @@ v3d_irq_init(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); - irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1); - if (irq1 == -EPROBE_DEFER) - return irq1; - if (irq1 > 0) { - ret = devm_request_irq(v3d->drm.dev, irq1, + irq = platform_get_irq_optional(v3d_to_pdev(v3d), 1); + if (irq == -EPROBE_DEFER) + return irq; + if (irq > 0) { + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d_core0", v3d); if (ret) goto fail; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_HUB_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_HUB_IRQ], v3d_hub_irq, IRQF_SHARED, "v3d_hub", v3d); if (ret) @@ -245,8 +264,12 @@ v3d_irq_init(struct v3d_dev *v3d) } else { v3d->single_irq_line = true; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d", v3d); if (ret) @@ -287,6 +310,12 @@ v3d_irq_disable(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); + /* Finish any interrupt handler still in flight. */ + for (int i = 0; i < V3D_MAX_IRQS; i++) { + if (v3d->irq[i]) + synchronize_irq(v3d->irq[i]); + } + /* Clear any pending interrupts we might have left. */ for (core = 0; core < v3d->cores; core++) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 6ee56cbd3f1b..1abfd738a601 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -240,17 +240,18 @@ void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon) for (i = 0; i < ncounters; i++) { u32 source = i / 4; - u32 channel = V3D_SET_FIELD(perfmon->counters[i], V3D_PCTR_S0); + u32 channel = V3D_SET_FIELD_VER(perfmon->counters[i], V3D_PCTR_S0, + v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S1); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S1, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S2); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S2, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S3); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S3, v3d->ver); V3D_CORE_WRITE(0, V3D_V4_PCTR_0_SRC_X(source), channel); } @@ -383,6 +384,7 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, { struct v3d_file_priv *v3d_priv = file_priv->driver_priv; struct drm_v3d_perfmon_destroy *req = data; + struct v3d_dev *v3d = v3d_priv->v3d; struct v3d_perfmon *perfmon; mutex_lock(&v3d_priv->perfmon.lock); @@ -392,6 +394,10 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, if (!perfmon) return -EINVAL; + /* If the active perfmon is being destroyed, stop it first */ + if (perfmon == v3d->active_perfmon) + v3d_perfmon_stop(v3d, perfmon, false); + v3d_perfmon_put(perfmon); return 0; diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h index 1b1a62ad9585..6da3c69082bd 100644 --- a/drivers/gpu/drm/v3d/v3d_regs.h +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -15,6 +15,14 @@ fieldval & field##_MASK; \ }) +#define V3D_SET_FIELD_VER(value, field, ver) \ + ({ \ + typeof(ver) _ver = (ver); \ + u32 fieldval = (value) << field##_SHIFT(_ver); \ + WARN_ON((fieldval & ~field##_MASK(_ver)) != 0); \ + fieldval & field##_MASK(_ver); \ + }) + #define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \ field##_SHIFT) @@ -354,18 +362,15 @@ #define V3D_V4_PCTR_0_SRC_28_31 0x0067c #define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \ 4 * (x)) -# define V3D_PCTR_S0_MASK V3D_MASK(6, 0) -# define V3D_V7_PCTR_S0_MASK V3D_MASK(7, 0) -# define V3D_PCTR_S0_SHIFT 0 -# define V3D_PCTR_S1_MASK V3D_MASK(14, 8) -# define V3D_V7_PCTR_S1_MASK V3D_MASK(15, 8) -# define V3D_PCTR_S1_SHIFT 8 -# define V3D_PCTR_S2_MASK V3D_MASK(22, 16) -# define V3D_V7_PCTR_S2_MASK V3D_MASK(23, 16) -# define V3D_PCTR_S2_SHIFT 16 -# define V3D_PCTR_S3_MASK V3D_MASK(30, 24) -# define V3D_V7_PCTR_S3_MASK V3D_MASK(31, 24) -# define V3D_PCTR_S3_SHIFT 24 +# define V3D_PCTR_S0_MASK(ver) (((ver) >= 71) ? V3D_MASK(7, 0) : V3D_MASK(6, 0)) +# define V3D_PCTR_S0_SHIFT(ver) 0 +# define V3D_PCTR_S1_MASK(ver) (((ver) >= 71) ? V3D_MASK(15, 8) : V3D_MASK(14, 8)) +# define V3D_PCTR_S1_SHIFT(ver) 8 +# define V3D_PCTR_S2_MASK(ver) (((ver) >= 71) ? V3D_MASK(23, 16) : V3D_MASK(22, 16)) +# define V3D_PCTR_S2_SHIFT(ver) 16 +# define V3D_PCTR_S3_MASK(ver) (((ver) >= 71) ? V3D_MASK(31, 24) : V3D_MASK(30, 24)) +# define V3D_PCTR_S3_SHIFT(ver) 24 + #define V3D_PCTR_CYCLE_COUNT(ver) ((ver >= 71) ? 0 : 32) /* Output values of the counters. */ diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 4f935f1d50a9..c9c88d3ad669 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -191,7 +191,6 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) struct v3d_dev *v3d = job->v3d; struct v3d_file_priv *file = job->file->driver_priv; struct v3d_stats *global_stats = &v3d->queue[queue].stats; - struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); unsigned long flags; @@ -201,7 +200,12 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) else preempt_disable(); - v3d_stats_update(local_stats, now); + /* Don't update the local stats if the file context has already closed */ + if (file) + v3d_stats_update(&file->stats[queue], now); + else + drm_dbg(&v3d->drm, "The file descriptor was closed before job completion\n"); + v3d_stats_update(global_stats, now); if (IS_ENABLED(CONFIG_LOCKDEP)) @@ -319,11 +323,15 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) struct drm_device *dev = &v3d->drm; struct dma_fence *fence; + if (unlikely(job->base.base.s_fence->finished.error)) + return NULL; + + v3d->tfu_job = job; + fence = v3d_fence_create(v3d, V3D_TFU); if (IS_ERR(fence)) return NULL; - v3d->tfu_job = job; if (job->base.irq_fence) dma_fence_put(job->base.irq_fence); job->base.irq_fence = dma_fence_get(fence); @@ -361,6 +369,9 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; int i, csd_cfg0_reg; + if (unlikely(job->base.base.s_fence->finished.error)) + return NULL; + v3d->csd_job = job; v3d_invalidate_caches(v3d); @@ -403,7 +414,8 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); struct drm_v3d_submit_csd *args = &indirect_csd->job->args; - u32 *wg_counts; + struct v3d_dev *v3d = job->base.v3d; + u32 num_batches, *wg_counts; v3d_get_bo_vaddr(bo); v3d_get_bo_vaddr(indirect); @@ -416,8 +428,17 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; - args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * - (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; + + num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * + (wg_counts[0] * wg_counts[1] * wg_counts[2]); + + /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ + if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) + args->cfg[4] = num_batches - 1; + else + args->cfg[4] = num_batches; + + WARN_ON(args->cfg[4] == ~0); for (int i = 0; i < 3; i++) { /* 0xffffffff indicates that the uniform rewrite is not needed */ @@ -711,11 +732,16 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_NOMINAL; } -/* If the current address or return address have changed, then the GPU - * has probably made progress and we should delay the reset. This - * could fail if the GPU got in an infinite loop in the CL, but that - * is pretty unlikely outside of an i-g-t testcase. - */ +static void +v3d_sched_skip_reset(struct drm_sched_job *sched_job) +{ + struct drm_gpu_scheduler *sched = sched_job->sched; + + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); +} + static enum drm_gpu_sched_stat v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 *timedout_ctca, u32 *timedout_ctra) @@ -725,9 +751,16 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); + /* If the current address or return address have changed, then the GPU + * has probably made progress and we should delay the reset. This + * could fail if the GPU got in an infinite loop in the CL, but that + * is pretty unlikely outside of an i-g-t testcase. + */ if (*timedout_ctca != ctca || *timedout_ctra != ctra) { *timedout_ctca = ctca; *timedout_ctra = ctra; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -767,11 +800,13 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); - /* If we've made progress, skip reset and let the timer get - * rearmed. + /* If we've made progress, skip reset, add the job to the pending + * list, and let the timer get rearmed. */ if (job->timedout_batches != batches) { job->timedout_batches = batches; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c index e70d7c3076ac..f0ddc223c1f8 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c +++ b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c @@ -75,24 +75,30 @@ int vc4_mock_atomic_add_output(struct kunit *test, int ret; encoder = vc4_find_encoder_by_type(drm, type); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder); + if (!encoder) + return -ENODEV; crtc = vc4_find_crtc_for_encoder(test, drm, encoder); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc); + if (!crtc) + return -ENODEV; output = encoder_to_vc4_dummy_output(encoder); conn = &output->connector; conn_state = drm_atomic_get_connector_state(state, conn); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); + if (IS_ERR(conn_state)) + return PTR_ERR(conn_state); ret = drm_atomic_set_crtc_for_connector(conn_state, crtc); - KUNIT_EXPECT_EQ(test, ret, 0); + if (ret) + return ret; crtc_state = drm_atomic_get_crtc_state(state, crtc); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); ret = drm_atomic_set_mode_for_crtc(crtc_state, &default_mode); - KUNIT_EXPECT_EQ(test, ret, 0); + if (ret) + return ret; crtc_state->active = true; @@ -113,26 +119,32 @@ int vc4_mock_atomic_del_output(struct kunit *test, int ret; encoder = vc4_find_encoder_by_type(drm, type); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder); + if (!encoder) + return -ENODEV; crtc = vc4_find_crtc_for_encoder(test, drm, encoder); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc); + if (!crtc) + return -ENODEV; crtc_state = drm_atomic_get_crtc_state(state, crtc); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); crtc_state->active = false; ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL); - KUNIT_ASSERT_EQ(test, ret, 0); + if (ret) + return ret; output = encoder_to_vc4_dummy_output(encoder); conn = &output->connector; conn_state = drm_atomic_get_connector_state(state, conn); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); + if (IS_ERR(conn_state)) + return PTR_ERR(conn_state); ret = drm_atomic_set_crtc_for_connector(conn_state, NULL); - KUNIT_ASSERT_EQ(test, ret, 0); + if (ret) + return ret; return 0; } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 7e0a5ea7ab85..6b83d02b5d62 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -2192,9 +2192,9 @@ static int vc4_hdmi_audio_get_eld(struct device *dev, void *data, struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); struct drm_connector *connector = &vc4_hdmi->connector; - mutex_lock(&vc4_hdmi->mutex); + mutex_lock(&connector->eld_mutex); memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); - mutex_unlock(&vc4_hdmi->mutex); + mutex_unlock(&connector->eld_mutex); return 0; } diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 64c236169db8..5dc8eeaf7123 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -194,6 +194,13 @@ struct virtio_gpu_framebuffer { #define to_virtio_gpu_framebuffer(x) \ container_of(x, struct virtio_gpu_framebuffer, base) +struct virtio_gpu_plane_state { + struct drm_plane_state base; + struct virtio_gpu_fence *fence; +}; +#define to_virtio_gpu_plane_state(x) \ + container_of(x, struct virtio_gpu_plane_state, base) + struct virtio_gpu_queue { struct virtqueue *vq; spinlock_t qlock; diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index a72a2dbda031..7acd38b962c6 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -66,11 +66,28 @@ uint32_t virtio_gpu_translate_format(uint32_t drm_fourcc) return format; } +static struct +drm_plane_state *virtio_gpu_plane_duplicate_state(struct drm_plane *plane) +{ + struct virtio_gpu_plane_state *new; + + if (WARN_ON(!plane->state)) + return NULL; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + __drm_atomic_helper_plane_duplicate_state(plane, &new->base); + + return &new->base; +} + static const struct drm_plane_funcs virtio_gpu_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .reset = drm_atomic_helper_plane_reset, - .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_duplicate_state = virtio_gpu_plane_duplicate_state, .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, }; @@ -138,11 +155,13 @@ static void virtio_gpu_resource_flush(struct drm_plane *plane, struct drm_device *dev = plane->dev; struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_framebuffer *vgfb; + struct virtio_gpu_plane_state *vgplane_st; struct virtio_gpu_object *bo; vgfb = to_virtio_gpu_framebuffer(plane->state->fb); + vgplane_st = to_virtio_gpu_plane_state(plane->state); bo = gem_to_virtio_gpu_obj(vgfb->base.obj[0]); - if (vgfb->fence) { + if (vgplane_st->fence) { struct virtio_gpu_object_array *objs; objs = virtio_gpu_array_alloc(1); @@ -151,13 +170,11 @@ static void virtio_gpu_resource_flush(struct drm_plane *plane, virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); virtio_gpu_array_lock_resv(objs); virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y, - width, height, objs, vgfb->fence); + width, height, objs, + vgplane_st->fence); virtio_gpu_notify(vgdev); - - dma_fence_wait_timeout(&vgfb->fence->f, true, + dma_fence_wait_timeout(&vgplane_st->fence->f, true, msecs_to_jiffies(50)); - dma_fence_put(&vgfb->fence->f); - vgfb->fence = NULL; } else { virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y, width, height, NULL, NULL); @@ -247,20 +264,23 @@ static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane, struct drm_device *dev = plane->dev; struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_framebuffer *vgfb; + struct virtio_gpu_plane_state *vgplane_st; struct virtio_gpu_object *bo; if (!new_state->fb) return 0; vgfb = to_virtio_gpu_framebuffer(new_state->fb); + vgplane_st = to_virtio_gpu_plane_state(new_state); bo = gem_to_virtio_gpu_obj(vgfb->base.obj[0]); if (!bo || (plane->type == DRM_PLANE_TYPE_PRIMARY && !bo->guest_blob)) return 0; - if (bo->dumb && (plane->state->fb != new_state->fb)) { - vgfb->fence = virtio_gpu_fence_alloc(vgdev, vgdev->fence_drv.context, + if (bo->dumb) { + vgplane_st->fence = virtio_gpu_fence_alloc(vgdev, + vgdev->fence_drv.context, 0); - if (!vgfb->fence) + if (!vgplane_st->fence) return -ENOMEM; } @@ -270,15 +290,15 @@ static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane, static void virtio_gpu_plane_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *state) { - struct virtio_gpu_framebuffer *vgfb; + struct virtio_gpu_plane_state *vgplane_st; if (!state->fb) return; - vgfb = to_virtio_gpu_framebuffer(state->fb); - if (vgfb->fence) { - dma_fence_put(&vgfb->fence->f); - vgfb->fence = NULL; + vgplane_st = to_virtio_gpu_plane_state(state); + if (vgplane_st->fence) { + dma_fence_put(&vgplane_st->fence->f); + vgplane_st->fence = NULL; } } @@ -291,6 +311,7 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_output *output = NULL; struct virtio_gpu_framebuffer *vgfb; + struct virtio_gpu_plane_state *vgplane_st; struct virtio_gpu_object *bo = NULL; uint32_t handle; @@ -303,6 +324,7 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, if (plane->state->fb) { vgfb = to_virtio_gpu_framebuffer(plane->state->fb); + vgplane_st = to_virtio_gpu_plane_state(plane->state); bo = gem_to_virtio_gpu_obj(vgfb->base.obj[0]); handle = bo->hw_res_handle; } else { @@ -322,11 +344,9 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, (vgdev, 0, plane->state->crtc_w, plane->state->crtc_h, - 0, 0, objs, vgfb->fence); + 0, 0, objs, vgplane_st->fence); virtio_gpu_notify(vgdev); - dma_fence_wait(&vgfb->fence->f, true); - dma_fence_put(&vgfb->fence->f); - vgfb->fence = NULL; + dma_fence_wait(&vgplane_st->fence->f, true); } if (plane->state->fb != old_state->fb) { diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index e7441b227b3c..3d6785d081f2 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -98,7 +98,7 @@ static u16 lerp_u16(u16 a, u16 b, s64 t) s64 delta = drm_fixp_mul(b_fp - a_fp, t); - return drm_fixp2int(a_fp + delta); + return drm_fixp2int_round(a_fp + delta); } static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value) diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 40b4d084e3ce..91b589a497d0 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -198,7 +198,7 @@ static int vkms_crtc_atomic_check(struct drm_crtc *crtc, i++; } - vkms_state->active_planes = kcalloc(i, sizeof(plane), GFP_KERNEL); + vkms_state->active_planes = kcalloc(i, sizeof(*vkms_state->active_planes), GFP_KERNEL); if (!vkms_state->active_planes) return -ENOMEM; vkms_state->num_active_planes = i; diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index 0c1a713b7b7b..be642ee739c4 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -245,17 +245,19 @@ static int __init vkms_init(void) if (!config) return -ENOMEM; - default_config = config; - config->cursor = enable_cursor; config->writeback = enable_writeback; config->overlay = enable_overlay; ret = vkms_create(config); - if (ret) + if (ret) { kfree(config); + return ret; + } - return ret; + default_config = config; + + return 0; } static void vkms_destroy(struct vkms_config *config) @@ -279,9 +281,10 @@ static void vkms_destroy(struct vkms_config *config) static void __exit vkms_exit(void) { - if (default_config->dev) - vkms_destroy(default_config); + if (!default_config) + return; + vkms_destroy(default_config); kfree(default_config); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index a0e433fbcba6..e8e49f13cfa2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -51,11 +51,13 @@ static void vmw_bo_release(struct vmw_bo *vbo) mutex_lock(&res->dev_priv->cmdbuf_mutex); (void)vmw_resource_reserve(res, false, true); vmw_resource_mob_detach(res); + if (res->dirty) + res->func->dirty_free(res); if (res->coherent) vmw_bo_dirty_release(res->guest_memory_bo); res->guest_memory_bo = NULL; res->guest_memory_offset = 0; - vmw_resource_unreserve(res, false, false, false, NULL, + vmw_resource_unreserve(res, true, false, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); } @@ -73,9 +75,9 @@ static void vmw_bo_free(struct ttm_buffer_object *bo) { struct vmw_bo *vbo = to_vmw_bo(&bo->base); - WARN_ON(vbo->dirty); WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); vmw_bo_release(vbo); + WARN_ON(vbo->dirty); kfree(vbo); } @@ -443,7 +445,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv, if (params->pin) ttm_bo_pin(&vmw_bo->tbo); - ttm_bo_unreserve(&vmw_bo->tbo); + if (!params->keep_resv) + ttm_bo_unreserve(&vmw_bo->tbo); return 0; } @@ -848,9 +851,9 @@ void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo) vmw_bo_placement_set(bo, domain, domain); } -void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +int vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) { - xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL); + return xa_err(xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL)); } void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 43b5439ec9f7..940c0a0b9c45 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -56,8 +56,9 @@ struct vmw_bo_params { u32 domain; u32 busy_domain; enum ttm_bo_type bo_type; - size_t size; bool pin; + bool keep_resv; + size_t size; struct dma_resv *resv; struct sg_table *sg; }; @@ -141,7 +142,7 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); void vmw_bo_swap_notify(struct ttm_buffer_object *bo); -void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +int vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 2825dd3149ed..2e84e1029732 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -401,7 +401,8 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv) .busy_domain = VMW_BO_DOMAIN_SYS, .bo_type = ttm_bo_type_kernel, .size = PAGE_SIZE, - .pin = true + .pin = true, + .keep_resv = true, }; /* @@ -413,10 +414,6 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv) if (unlikely(ret != 0)) return ret; - ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL); - BUG_ON(ret != 0); - vmw_bo_pin_reserved(vbo, true); - ret = ttm_bo_kmap(&vbo->tbo, 0, 1, &map); if (likely(ret == 0)) { result = ttm_kmap_obj_virtual(&map, &dummy); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 2e52d73eba48..ea741bc4ac3f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -4086,6 +4086,23 @@ static int vmw_execbuf_tie_context(struct vmw_private *dev_priv, return 0; } +/* + * DMA fence callback to remove a seqno_waiter + */ +struct seqno_waiter_rm_context { + struct dma_fence_cb base; + struct vmw_private *dev_priv; +}; + +static void seqno_waiter_rm_cb(struct dma_fence *f, struct dma_fence_cb *cb) +{ + struct seqno_waiter_rm_context *ctx = + container_of(cb, struct seqno_waiter_rm_context, base); + + vmw_seqno_waiter_remove(ctx->dev_priv); + kfree(ctx); +} + int vmw_execbuf_process(struct drm_file *file_priv, struct vmw_private *dev_priv, void __user *user_commands, void *kernel_commands, @@ -4266,6 +4283,15 @@ int vmw_execbuf_process(struct drm_file *file_priv, } else { /* Link the fence with the FD created earlier */ fd_install(out_fence_fd, sync_file->file); + struct seqno_waiter_rm_context *ctx = + kmalloc(sizeof(*ctx), GFP_KERNEL); + ctx->dev_priv = dev_priv; + vmw_seqno_waiter_add(dev_priv); + if (dma_fence_add_callback(&fence->base, &ctx->base, + seqno_waiter_rm_cb) < 0) { + vmw_seqno_waiter_remove(dev_priv); + kfree(ctx); + } } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index b9857f37ca1a..ed5015ced392 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -206,6 +206,7 @@ struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev, .bo_type = ttm_bo_type_sg, .size = attach->dmabuf->size, .pin = false, + .keep_resv = true, .resv = attach->dmabuf->resv, .sg = table, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 10d596cb4b40..5f99f7437ae6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -750,6 +750,7 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state); struct vmw_bo *old_bo = NULL; struct vmw_bo *new_bo = NULL; + struct ww_acquire_ctx ctx; s32 hotspot_x, hotspot_y; int ret; @@ -769,9 +770,11 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, if (du->cursor_surface) du->cursor_age = du->cursor_surface->snooper.age; + ww_acquire_init(&ctx, &reservation_ww_class); + if (!vmw_user_object_is_null(&old_vps->uo)) { old_bo = vmw_user_object_buffer(&old_vps->uo); - ret = ttm_bo_reserve(&old_bo->tbo, false, false, NULL); + ret = ttm_bo_reserve(&old_bo->tbo, false, false, &ctx); if (ret != 0) return; } @@ -779,9 +782,14 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, if (!vmw_user_object_is_null(&vps->uo)) { new_bo = vmw_user_object_buffer(&vps->uo); if (old_bo != new_bo) { - ret = ttm_bo_reserve(&new_bo->tbo, false, false, NULL); - if (ret != 0) + ret = ttm_bo_reserve(&new_bo->tbo, false, false, &ctx); + if (ret != 0) { + if (old_bo) { + ttm_bo_unreserve(&old_bo->tbo); + ww_acquire_fini(&ctx); + } return; + } } else { new_bo = NULL; } @@ -803,10 +811,12 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, hotspot_x, hotspot_y); } - if (old_bo) - ttm_bo_unreserve(&old_bo->tbo); if (new_bo) ttm_bo_unreserve(&new_bo->tbo); + if (old_bo) + ttm_bo_unreserve(&old_bo->tbo); + + ww_acquire_fini(&ctx); du->cursor_x = new_state->crtc_x + du->set_gui_x; du->cursor_y = new_state->crtc_y + du->set_gui_y; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index a73af8a355fb..c4d5fe5f330f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -273,7 +273,7 @@ int vmw_user_resource_lookup_handle(struct vmw_private *dev_priv, goto out_bad_resource; res = converter->base_obj_to_res(base); - kref_get(&res->kref); + vmw_resource_reference(res); *p_res = res; ret = 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index a01ca3226d0a..69dfe69ce0f8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -896,7 +896,8 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, .busy_domain = VMW_BO_DOMAIN_SYS, .bo_type = ttm_bo_type_device, .size = size, - .pin = true + .pin = false, + .keep_resv = true, }; if (!vmw_shader_id_ok(user_key, shader_type)) @@ -906,10 +907,6 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, if (unlikely(ret != 0)) goto out; - ret = ttm_bo_reserve(&buf->tbo, false, true, NULL); - if (unlikely(ret != 0)) - goto no_reserve; - /* Map and copy shader bytecode. */ ret = ttm_bo_kmap(&buf->tbo, 0, PFN_UP(size), &map); if (unlikely(ret != 0)) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 5721c74da3e0..d7a8070330ba 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -658,7 +658,7 @@ static void vmw_user_surface_free(struct vmw_resource *res) struct vmw_user_surface *user_srf = container_of(srf, struct vmw_user_surface, srf); - WARN_ON_ONCE(res->dirty); + WARN_ON(res->dirty); if (user_srf->master) drm_master_put(&user_srf->master); kfree(srf->offsets); @@ -689,8 +689,7 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) * Dumb buffers own the resource and they'll unref the * resource themselves */ - if (res && res->guest_memory_bo && res->guest_memory_bo->is_dumb) - return; + WARN_ON(res && res->guest_memory_bo && res->guest_memory_bo->is_dumb); vmw_resource_unreference(&res); } @@ -871,7 +870,12 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, vmw_resource_unreference(&res); goto out_unlock; } - vmw_bo_add_detached_resource(res->guest_memory_bo, res); + + ret = vmw_bo_add_detached_resource(res->guest_memory_bo, res); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); + goto out_unlock; + } } tmp = vmw_resource_reference(&srf->res); @@ -1670,6 +1674,14 @@ vmw_gb_surface_define_internal(struct drm_device *dev, } + if (res->guest_memory_bo) { + ret = vmw_bo_add_detached_resource(res->guest_memory_bo, res); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); + goto out_unlock; + } + } + tmp = vmw_resource_reference(res); ret = ttm_prime_object_init(tfile, res->guest_memory_size, &user_srf->prime, VMW_RES_SURFACE, @@ -1684,7 +1696,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, rep->handle = user_srf->prime.base.handle; rep->backup_size = res->guest_memory_size; if (res->guest_memory_bo) { - vmw_bo_add_detached_resource(res->guest_memory_bo, res); rep->buffer_map_handle = drm_vma_node_offset_addr(&res->guest_memory_bo->tbo.base.vma_node); rep->buffer_size = res->guest_memory_bo->tbo.base.size; @@ -2358,12 +2369,19 @@ int vmw_dumb_create(struct drm_file *file_priv, vbo = res->guest_memory_bo; vbo->is_dumb = true; vbo->dumb_surface = vmw_res_to_srf(res); - + drm_gem_object_put(&vbo->tbo.base); + /* + * Unset the user surface dtor since this in not actually exposed + * to userspace. The suface is owned via the dumb_buffer's GEM handle + */ + struct vmw_user_surface *usurf = container_of(vbo->dumb_surface, + struct vmw_user_surface, srf); + usurf->prime.base.refcount_release = NULL; err: if (res) vmw_resource_unreference(&res); - if (ret) - ttm_ref_object_base_unref(tfile, arg.rep.handle); + + ttm_ref_object_base_unref(tfile, arg.rep.handle); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 621d98b376bb..5553892d7c3e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -572,15 +572,14 @@ int vmw_bo_create_and_populate(struct vmw_private *dev_priv, .busy_domain = domain, .bo_type = ttm_bo_type_kernel, .size = bo_size, - .pin = true + .pin = true, + .keep_resv = true, }; ret = vmw_bo_create(dev_priv, &bo_params, &vbo); if (unlikely(ret != 0)) return ret; - ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL); - BUG_ON(ret != 0); ret = vmw_ttm_populate(vbo->tbo.bdev, vbo->tbo.ttm, &ctx); if (likely(ret == 0)) { struct vmw_ttm_tt *vmw_tt = diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 7bbe46a98ff1..93e742c1f21e 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe Graphics" - depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) + depends on DRM && PCI && MMU + depends on KUNIT || !KUNIT select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h index 8f86a16dc577..f58198cf2cf6 100644 --- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -52,6 +52,7 @@ struct guc_ct_buffer_desc { #define GUC_CTB_STATUS_OVERFLOW (1 << 0) #define GUC_CTB_STATUS_UNDERFLOW (1 << 1) #define GUC_CTB_STATUS_MISMATCH (1 << 2) +#define GUC_CTB_STATUS_DISABLED (1 << 3) u32 reserved[13]; } __packed; static_assert(sizeof(struct guc_ct_buffer_desc) == 64); diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index cb6c7598824b..9c4cf050059a 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -29,7 +29,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), NULL, size, start, end, - ttm_bo_type_kernel, flags); + ttm_bo_type_kernel, flags, 0); if (IS_ERR(bo)) { err = PTR_ERR(bo); bo = NULL; diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index c6e0c8d77a70..e164e2d71e11 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -96,6 +96,8 @@ int xe_display_create(struct xe_device *xe) spin_lock_init(&xe->display.fb_tracking.lock); xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); + if (!xe->display.hotplug.dp_wq) + return -ENOMEM; return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); } @@ -352,6 +354,36 @@ void xe_display_pm_suspend(struct xe_device *xe) __xe_display_pm_suspend(xe, false); } +void xe_display_pm_shutdown(struct xe_device *xe) +{ + struct intel_display *display = &xe->display; + + if (!xe->info.probe_display) + return; + + intel_power_domains_disable(xe); + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + if (has_display(xe)) { + drm_kms_helper_poll_disable(&xe->drm); + intel_display_driver_disable_user_access(xe); + intel_display_driver_suspend(xe); + } + + xe_display_flush_cleanup_work(xe); + intel_dp_mst_suspend(xe); + intel_hpd_cancel_work(xe); + + if (has_display(xe)) + intel_display_driver_suspend_access(xe); + + intel_encoder_suspend_all(display); + intel_encoder_shutdown_all(display); + + intel_opregion_suspend(display, PCI_D3cold); + + intel_dmc_suspend(xe); +} + void xe_display_pm_runtime_suspend(struct xe_device *xe) { if (!xe->info.probe_display) @@ -376,6 +408,19 @@ void xe_display_pm_suspend_late(struct xe_device *xe) intel_display_power_suspend_late(xe); } +void xe_display_pm_shutdown_late(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + /* + * The only requirement is to reboot with display DC states disabled, + * for now leaving all display power wells in the INIT power domain + * enabled. + */ + intel_power_domains_driver_remove(xe); +} + void xe_display_pm_resume_early(struct xe_device *xe) { if (!xe->info.probe_display) diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index bed55fd26f30..17afa537aee5 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -35,7 +35,9 @@ void xe_display_irq_reset(struct xe_device *xe); void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt); void xe_display_pm_suspend(struct xe_device *xe); +void xe_display_pm_shutdown(struct xe_device *xe); void xe_display_pm_suspend_late(struct xe_device *xe); +void xe_display_pm_shutdown_late(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe); void xe_display_pm_resume(struct xe_device *xe); void xe_display_pm_runtime_suspend(struct xe_device *xe); @@ -66,7 +68,9 @@ static inline void xe_display_irq_reset(struct xe_device *xe) {} static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {} static inline void xe_display_pm_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_shutdown(struct xe_device *xe) {} static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} +static inline void xe_display_pm_shutdown_late(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {} static inline void xe_display_pm_resume(struct xe_device *xe) {} static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f99d901a3214..9f941fc2e36b 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -48,11 +42,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d if (!vma) return false; + /* Set scanout flag for WC mapping */ obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_ALIGN(size), ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_GGTT); + XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT); if (IS_ERR(obj)) { kfree(vma); return false; @@ -73,5 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { - /* TODO: add xe specific flush_map() for dsb buffer object. */ + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + + /* + * The memory barrier here is to ensure coherency of DSB vs MMIO, + * both for weak ordering archs and discrete cards. + */ + xe_device_wmb(xe); + xe_device_l2_flush(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index b58fc4ba2aac..0558b106f8b6 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -153,7 +153,10 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, } vma->dpt = dpt; - vma->node = dpt->ggtt_node; + vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -203,8 +206,8 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { - vma->node = bo->ggtt_node; + if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) { + vma->node = bo->ggtt_node[ggtt->tile->id]; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; @@ -318,8 +321,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: @@ -333,10 +334,12 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { + u8 tile_id = vma->node->ggtt->tile->id; + if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || - vma->bo->ggtt_node->base.start != vma->node->base.start) + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node[tile_id]) || + vma->bo->ggtt_node[tile_id]->base.start != vma->node->base.start) xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index a50ab9eae40a..f99d38cc5d8e 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -194,8 +194,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, to_intel_plane(crtc->base.primary); struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); struct drm_framebuffer *fb; struct i915_vma *vma; @@ -241,14 +239,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits); plane_config->vma = vma; - - /* - * Flip to the newly created mapping ASAP, so we can re-use the - * first part of GGTT for WOPCM, prevent flickering, and prevent - * the lookup of sysmem scratch pages. - */ - plane->check_plane(crtc_state, plane_state); - plane->async_flip(plane, crtc_state, plane_state, true); return; nofb: diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 10ec2920d31b..d4033278be9f 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -47,6 +47,10 @@ #define MI_LRI_FORCE_POSTED REG_BIT(12) #define MI_LRI_LEN(x) (((x) & 0xff) + 1) +#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4)) +#define MI_SRM_USE_GGTT REG_BIT(22) +#define MI_SRM_ADD_CS_OFFSET REG_BIT(19) + #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) #define MI_INVALIDATE_TLB REG_BIT(18) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 7c78496e6213..192e571348f6 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -53,7 +53,6 @@ #define RING_CTL(base) XE_REG((base) + 0x3c) #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ -#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_START_UDW(base) XE_REG((base) + 0x48) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5404de2aea54..c160b015d178 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -157,6 +157,7 @@ #define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) #define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 23f7dc5bbe99..51fd40ffafcb 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -128,7 +128,7 @@ struct xe_reg_mcr { * options. */ #define XE_REG_MCR(r_, ...) ((const struct xe_reg_mcr){ \ - .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ + .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ }) static inline bool xe_reg_is_valid(struct xe_reg r) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 79be73b4a02b..bf3f97d0c9c7 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -47,7 +47,7 @@ static void read_l3cc_table(struct xe_gt *gt, u32 reg_val; u32 ret; - ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); for (i = 0; i < info->num_mocs_regs; i++) { @@ -72,7 +72,7 @@ static void read_l3cc_table(struct xe_gt *gt, KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc, "l3cc idx=%u has incorrect val.\n", i); } - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); } static void read_mocs_table(struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 84e327b56925..5f745d9ed6cc 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -702,6 +702,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } + /* Reject BO eviction if BO is bound to current VM. */ + if (evict && ctx->resv) { + struct drm_gpuvm_bo *vm_bo; + + drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) { + struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); + + if (xe_vm_resv(vm) == ctx->resv && + xe_vm_in_preempt_fence_mode(vm)) { + ret = -EBUSY; + goto out; + } + } + } + /* * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. @@ -1115,6 +1130,8 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) { struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct xe_tile *tile; + u8 id; if (bo->ttm.base.import_attach) drm_prime_gem_destroy(&bo->ttm.base, NULL); @@ -1122,8 +1139,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node && bo->ggtt_node->base.size) - xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); + for_each_tile(tile, xe, id) + if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size) + xe_ggtt_remove_bo(tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS if (bo->client) @@ -1293,6 +1311,10 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return ERR_PTR(-EINVAL); } + /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */ + if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) + return ERR_PTR(-EINVAL); + if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) || @@ -1439,7 +1461,8 @@ static struct xe_bo * __xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, - u16 cpu_caching, enum ttm_bo_type type, u32 flags) + u16 cpu_caching, enum ttm_bo_type type, u32 flags, + u64 alignment) { struct xe_bo *bo = NULL; int err; @@ -1468,6 +1491,8 @@ __xe_bo_create_locked(struct xe_device *xe, if (IS_ERR(bo)) return bo; + bo->min_align = alignment; + /* * Note that instead of taking a reference no the drm_gpuvm_resv_bo(), * to ensure the shared resv doesn't disappear under the bo, the bo @@ -1480,19 +1505,29 @@ __xe_bo_create_locked(struct xe_device *xe, bo->vm = vm; if (bo->flags & XE_BO_FLAG_GGTT) { - if (!tile && flags & XE_BO_FLAG_STOLEN) - tile = xe_device_get_root_tile(xe); + struct xe_tile *t; + u8 id; - xe_assert(xe, tile); + if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) { + if (!tile && flags & XE_BO_FLAG_STOLEN) + tile = xe_device_get_root_tile(xe); - if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { - err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, - start + bo->size, U64_MAX); - } else { - err = xe_ggtt_insert_bo(tile->mem.ggtt, bo); + xe_assert(xe, tile); + } + + for_each_tile(t, xe, id) { + if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t))) + continue; + + if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { + err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, + start + bo->size, U64_MAX); + } else { + err = xe_ggtt_insert_bo(t->mem.ggtt, bo); + } + if (err) + goto err_unlock_put_bo; } - if (err) - goto err_unlock_put_bo; } return bo; @@ -1508,16 +1543,18 @@ struct xe_bo * xe_bo_create_locked_range(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags) + enum ttm_bo_type type, u32 flags, u64 alignment) { - return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags); + return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, + flags, alignment); } struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags) { - return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags); + return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, + flags, 0); } struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, @@ -1527,7 +1564,7 @@ struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, { struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, cpu_caching, ttm_bo_type_device, - flags | XE_BO_FLAG_USER); + flags | XE_BO_FLAG_USER, 0); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -1551,6 +1588,17 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile size_t size, u64 offset, enum ttm_bo_type type, u32 flags) { + return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset, + type, flags, 0); +} + +struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, + struct xe_tile *tile, + struct xe_vm *vm, + size_t size, u64 offset, + enum ttm_bo_type type, u32 flags, + u64 alignment) +{ struct xe_bo *bo; int err; u64 start = offset == ~0ull ? 0 : offset; @@ -1561,7 +1609,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile flags |= XE_BO_FLAG_GGTT; bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, - flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS, + alignment); if (IS_ERR(bo)) return bo; @@ -1975,6 +2024,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct xe_file *xef = to_xe_file(file); struct drm_xe_gem_create *args = data; struct xe_vm *vm = NULL; + ktime_t end = 0; struct xe_bo *bo; unsigned int bo_flags; u32 handle; @@ -2047,6 +2097,10 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; + } + +retry: + if (vm) { err = xe_vm_lock(vm, true); if (err) goto out_vm; @@ -2060,6 +2114,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) { err = PTR_ERR(bo); + if (xe_vm_validate_should_retry(NULL, err, &end)) + goto retry; goto out_vm; } @@ -2333,14 +2389,18 @@ void xe_bo_put_commit(struct llist_head *deferred) void xe_bo_put(struct xe_bo *bo) { + struct xe_tile *tile; + u8 id; + might_sleep(); if (bo) { #ifdef CONFIG_PROC_FS if (bo->client) might_lock(&bo->client->bos_lock); #endif - if (bo->ggtt_node && bo->ggtt_node->ggtt) - might_lock(&bo->ggtt_node->ggtt->lock); + for_each_tile(tile, xe_bo_device(bo), id) + if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt) + might_lock(&bo->ggtt_node[id]->ggtt->lock); drm_gem_object_put(&bo->ttm.base); } } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 6e4be52306df..d04159c59846 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -39,10 +39,22 @@ #define XE_BO_FLAG_NEEDS_64K BIT(15) #define XE_BO_FLAG_NEEDS_2M BIT(16) #define XE_BO_FLAG_GGTT_INVALIDATE BIT(17) +#define XE_BO_FLAG_GGTT0 BIT(18) +#define XE_BO_FLAG_GGTT1 BIT(19) +#define XE_BO_FLAG_GGTT2 BIT(20) +#define XE_BO_FLAG_GGTT3 BIT(21) +#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \ + XE_BO_FLAG_GGTT1 | \ + XE_BO_FLAG_GGTT2 | \ + XE_BO_FLAG_GGTT3) + /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) #define XE_BO_FLAG_INTERNAL_64K BIT(31) +#define XE_BO_FLAG_GGTTx(tile) \ + (XE_BO_FLAG_GGTT0 << (tile)->id) + #define XE_PTE_SHIFT 12 #define XE_PAGE_SIZE (1 << XE_PTE_SHIFT) #define XE_PTE_MASK (XE_PAGE_SIZE - 1) @@ -77,7 +89,7 @@ struct xe_bo * xe_bo_create_locked_range(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags); + enum ttm_bo_type type, u32 flags, u64 alignment); struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); @@ -94,6 +106,12 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 offset, enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, + struct xe_tile *tile, + struct xe_vm *vm, + size_t size, u64 offset, + enum ttm_bo_type type, u32 flags, + u64 alignment); struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, enum ttm_bo_type type, u32 flags); @@ -188,14 +206,24 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) } static inline u32 -xe_bo_ggtt_addr(struct xe_bo *bo) +__xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) { - if (XE_WARN_ON(!bo->ggtt_node)) + struct xe_ggtt_node *ggtt_node = bo->ggtt_node[tile_id]; + + if (XE_WARN_ON(!ggtt_node)) return 0; - XE_WARN_ON(bo->ggtt_node->base.size > bo->size); - XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); - return bo->ggtt_node->base.start; + XE_WARN_ON(ggtt_node->base.size > bo->size); + XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); + return ggtt_node->base.start; +} + +static inline u32 +xe_bo_ggtt_addr(struct xe_bo *bo) +{ + xe_assert(xe_bo_device(bo), bo->tile); + + return __xe_bo_ggtt_addr(bo, bo->tile->id); } int xe_bo_vmap(struct xe_bo *bo); @@ -314,7 +342,6 @@ static inline unsigned int xe_sg_segment_size(struct device *dev) #define i915_gem_object_flush_if_display(obj) ((void)(obj)) -#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) /** * xe_bo_is_mem_type - Whether the bo currently resides in the given * TTM memory type @@ -329,4 +356,3 @@ static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type) return bo->ttm.resource->mem_type == mem_type; } #endif -#endif diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 8fb2be061003..6a40eedd9db1 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -152,11 +152,17 @@ int xe_bo_restore_kernel(struct xe_device *xe) } if (bo->flags & XE_BO_FLAG_GGTT) { - struct xe_tile *tile = bo->tile; + struct xe_tile *tile; + u8 id; - mutex_lock(&tile->mem.ggtt->lock); - xe_ggtt_map_bo(tile->mem.ggtt, bo); - mutex_unlock(&tile->mem.ggtt->lock); + for_each_tile(tile, xe, id) { + if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) + continue; + + mutex_lock(&tile->mem.ggtt->lock); + xe_ggtt_map_bo(tile->mem.ggtt, bo); + mutex_unlock(&tile->mem.ggtt->lock); + } } /* diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 2ed558ac2264..aa298d33c250 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -13,6 +13,7 @@ #include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_placement.h> +#include "xe_device_types.h" #include "xe_ggtt_types.h" struct xe_device; @@ -39,8 +40,8 @@ struct xe_bo { struct ttm_place placements[XE_BO_MAX_PLACEMENTS]; /** @placement: current placement for this BO */ struct ttm_placement placement; - /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct xe_ggtt_node *ggtt_node; + /** @ggtt_node: Array of GGTT nodes if this BO is mapped in the GGTTs */ + struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE]; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ @@ -76,6 +77,11 @@ struct xe_bo { /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ struct list_head vram_userfault_link; + + /** @min_align: minimum alignment needed for this BO if different + * from default + */ + u64 min_align; }; #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index fe4319eb13fd..278ce1c37b39 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -147,7 +147,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, return -EINVAL; if (xe->wedged.mode == wedged_mode) - return 0; + return size; xe->wedged.mode = wedged_mode; @@ -156,6 +156,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, ret = xe_guc_ads_scheduler_policy_toggle_reset(>->uc.guc.ads); if (ret) { xe_gt_err(gt, "Failed to update GuC ADS scheduler policy. GuC may still cause engine reset even with wedged_mode=2\n"); + xe_pm_runtime_put(xe); return -EIO; } } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 85aa3ab0da3b..e412a70323cc 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -104,11 +104,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, drm_puts(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(ss->ct, &p); - /* - * Don't add a new section header here because the mesa debug decoder - * tool expects the context information to be in the 'GuC CT' section. - */ - /* drm_puts(&p, "\n**** Contexts ****\n"); */ + drm_puts(&p, "\n**** Contexts ****\n"); xe_guc_exec_queue_snapshot_print(ss->ge, &p); drm_puts(&p, "\n**** Job ****\n"); @@ -201,7 +197,6 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); struct xe_device *xe = coredump_to_xe(coredump); - unsigned int fw_ref; /* * NB: Despite passing a GFP_ flags parameter here, more allocations are done @@ -215,12 +210,11 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) xe_pm_runtime_get(xe); /* keep going if fw fails as we still want to save the memory and SW data */ - fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); xe_vm_snapshot_capture_delayed(ss->vm); xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); xe_pm_runtime_put(xe); @@ -247,9 +241,8 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - unsigned int fw_ref; - bool cookie; int i; + bool cookie; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); @@ -272,7 +265,8 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, } /* keep going if fw fails as we still want to save the memory and SW data */ - fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); ss->ct = xe_guc_ct_snapshot_capture(&guc->ct, true); ss->ge = xe_guc_exec_queue_snapshot_capture(q); @@ -290,7 +284,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, queue_work(system_unbound_wq, &ss->work); - xe_force_wake_put(gt_to_fw(q->gt), fw_ref); + xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); dma_fence_end_signalling(cookie); } @@ -337,42 +331,34 @@ int xe_devcoredump_init(struct xe_device *xe) /** * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85 * - * The output is split to multiple lines because some print targets, e.g. dmesg - * cannot handle arbitrarily long lines. Note also that printing to dmesg in - * piece-meal fashion is not possible, each separate call to drm_puts() has a - * line-feed automatically added! Therefore, the entire output line must be - * constructed in a local buffer first, then printed in one atomic output call. + * The output is split into multiple calls to drm_puts() because some print + * targets, e.g. dmesg, cannot handle arbitrarily long lines. These targets may + * add newlines, as is the case with dmesg: each drm_puts() call creates a + * separate line. * * There is also a scheduler yield call to prevent the 'task has been stuck for * 120s' kernel hang check feature from firing when printing to a slow target * such as dmesg over a serial port. * - * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down. - * * @p: the printer object to output to * @prefix: optional prefix to add to output string + * @suffix: optional suffix to add at the end. 0 disables it and is + * not added to the output, which is useful when using multiple calls + * to dump data to @p * @blob: the Binary Large OBject to dump out * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32) * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32) */ -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0; - /* - * Splitting blobs across multiple lines is not compatible with the mesa - * debug decoder tool. Note that even dropping the explicit '\n' below - * doesn't help because the GuC log is so big some underlying implementation - * still splits the lines at 512K characters. So just bail completely for - * the moment. - */ - return; - #define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ + /* Always leave space for the suffix char and the \0 */ +#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -404,7 +390,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; drm_puts(p, line_buff); @@ -416,10 +401,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } } + if (suffix) + line_buff[line_pos++] = suffix; + if (line_pos) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; - drm_puts(p, line_buff); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index a4eebc285fc8..b231c8ad799f 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -26,7 +26,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size); #endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index bb85208cf1a9..2e1d6d248d2e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -37,6 +37,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" @@ -374,6 +375,11 @@ err: return ERR_PTR(err); } +static bool xe_driver_flr_disabled(struct xe_device *xe) +{ + return xe_mmio_read32(xe_root_mmio_gt(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS; +} + /* * The driver-initiated FLR is the highest level of reset that we can trigger * from within the driver. It is different from the PCI FLR in that it doesn't @@ -387,17 +393,12 @@ err: * if/when a new instance of i915 is bound to the device it will do a full * re-init anyway. */ -static void xe_driver_flr(struct xe_device *xe) +static void __xe_driver_flr(struct xe_device *xe) { const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ struct xe_gt *gt = xe_root_mmio_gt(xe); int ret; - if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) { - drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); - return; - } - drm_dbg(&xe->drm, "Triggering Driver-FLR\n"); /* @@ -438,6 +439,16 @@ static void xe_driver_flr(struct xe_device *xe) xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); } +static void xe_driver_flr(struct xe_device *xe) +{ + if (xe_driver_flr_disabled(xe)) { + drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); + return; + } + + __xe_driver_flr(xe); +} + static void xe_driver_flr_fini(void *arg) { struct xe_device *xe = arg; @@ -538,6 +549,7 @@ static void update_device_info(struct xe_device *xe) /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; @@ -694,7 +706,9 @@ int xe_device_probe(struct xe_device *xe) } /* Allocate and map stolen after potential VRAM resize */ - xe_ttm_stolen_mgr_init(xe); + err = xe_ttm_stolen_mgr_init(xe); + if (err) + return err; /* * Now that GT is initialized (TTM in particular), @@ -706,6 +720,12 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err; + for_each_tile(tile, xe, id) { + err = xe_tile_init(tile); + if (err) + goto err; + } + for_each_gt(gt, xe, id) { last_gt = id; @@ -789,6 +809,24 @@ void xe_device_remove(struct xe_device *xe) void xe_device_shutdown(struct xe_device *xe) { + struct xe_gt *gt; + u8 id; + + drm_dbg(&xe->drm, "Shutting down device\n"); + + if (xe_driver_flr_disabled(xe)) { + xe_display_pm_shutdown(xe); + + xe_irq_suspend(xe); + + for_each_gt(gt, xe, id) + xe_gt_shutdown(gt); + + xe_display_pm_shutdown_late(xe); + } else { + /* BOOM! */ + __xe_driver_flr(xe); + } } /** @@ -835,31 +873,37 @@ void xe_device_td_flush(struct xe_device *xe) if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) return; - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { + gt = xe_root_mmio_gt(xe); + if (XE_WA(gt, 16023588340)) { + /* A transient flush is not sufficient: flush the L2 */ xe_device_l2_flush(xe); - return; - } - - for_each_gt(gt, xe, id) { - if (xe_gt_is_media_type(gt)) - continue; - - if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)) - return; - - xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); - /* - * FIXME: We can likely do better here with our choice of - * timeout. Currently we just assume the worst case, i.e. 150us, - * which is believed to be sufficient to cover the worst case - * scenario on current platforms if all cache entries are - * transient and need to be flushed.. - */ - if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0, - 150, NULL, false)) - xe_gt_err_once(gt, "TD flush timeout\n"); - - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + } else { + xe_guc_pc_apply_flush_freq_limit(>->uc.guc.pc); + + /* Execute TDF flush on all graphics GTs */ + for_each_gt(gt, xe, id) { + if (xe_gt_is_media_type(gt)) + continue; + + if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)) + return; + + xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* + * FIXME: We can likely do better here with our choice of + * timeout. Currently we just assume the worst case, i.e. 150us, + * which is believed to be sufficient to cover the worst case + * scenario on current platforms if all cache entries are + * transient and need to be flushed.. + */ + if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0, + 150, NULL, false)) + xe_gt_err_once(gt, "TD flush timeout\n"); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + } + + xe_guc_pc_remove_flush_freq_limit(&xe_root_mmio_gt(xe)->uc.guc.pc); } } diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 68f309f5e981..78204578443f 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -58,7 +58,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) * 1) Avoid pinning in a placement not accessible to some importers. * 2) Pinning in VRAM requires PIN accounting which is a to-do. */ - if (xe_bo_is_pinned(bo) && bo->ttm.resource->placement != XE_PL_TT) { + if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT)) { drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n"); return -EINVAL; } @@ -145,10 +145,7 @@ static void xe_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { - struct dma_buf *dma_buf = attach->dmabuf; - struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv); - - if (!xe_bo_is_vram(bo)) { + if (sg_page(sgt->sgl)) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index fb52a23e28f8..a89fbfbdab32 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -135,8 +135,8 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, XE_WARN_ON(bo->client); XE_WARN_ON(!list_empty(&bo->client_link)); - spin_lock(&client->bos_lock); bo->client = xe_drm_client_get(client); + spin_lock(&client->bos_lock); list_add_tail(&bo->client_link, &client->bos_list); spin_unlock(&client->bos_lock); } diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 1608a55edc84..a2577672f4e3 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -46,20 +46,4 @@ xe_force_wake_assert_held(struct xe_force_wake *fw, xe_gt_assert(fw->gt, fw->awake_domains & domain); } -/** - * xe_force_wake_ref_has_domain - verifies if the domains are in fw_ref - * @fw_ref : the force_wake reference - * @domain : forcewake domain to verify - * - * This function confirms whether the @fw_ref includes a reference to the - * specified @domain. - * - * Return: true if domain is refcounted. - */ -static inline bool -xe_force_wake_ref_has_domain(unsigned int fw_ref, enum xe_force_wake_domains domain) -{ - return fw_ref & domain; -} - #endif diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index 904cf47925aa..ed9183599e31 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -28,10 +28,10 @@ "\n" \ "#endif\n" -static void print_usage(FILE *f) +static void print_usage(FILE *f, const char *progname) { fprintf(f, "usage: %s <input-rule-file> <generated-c-source-file> <generated-c-header-file>\n", - program_invocation_short_name); + progname); } static void print_parse_error(const char *err_msg, const char *line, @@ -144,7 +144,7 @@ int main(int argc, const char *argv[]) if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); - print_usage(stderr); + print_usage(stderr, argv[0]); return 1; } diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index ff19eca5d358..76e1092f51d9 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -198,6 +198,13 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -254,6 +261,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0)); if (err) @@ -594,10 +605,10 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) u64 start; u64 offset, pte; - if (XE_WARN_ON(!bo->ggtt_node)) + if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id])) return; - start = bo->ggtt_node->base.start; + start = bo->ggtt_node[ggtt->tile->id]->base.start; for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); @@ -608,15 +619,16 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { + u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; + u8 tile_id = ggtt->tile->id; int err; - u64 alignment = XE_PAGE_SIZE; if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node)) { + if (XE_WARN_ON(bo->ggtt_node[tile_id])) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); return 0; } @@ -626,19 +638,19 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); - bo->ggtt_node = xe_ggtt_node_init(ggtt); - if (IS_ERR(bo->ggtt_node)) { - err = PTR_ERR(bo->ggtt_node); - bo->ggtt_node = NULL; + bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt); + if (IS_ERR(bo->ggtt_node[tile_id])) { + err = PTR_ERR(bo->ggtt_node[tile_id]); + bo->ggtt_node[tile_id] = NULL; goto out; } mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, - alignment, 0, start, end, 0); + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, + bo->size, alignment, 0, start, end, 0); if (err) { - xe_ggtt_node_fini(bo->ggtt_node); - bo->ggtt_node = NULL; + xe_ggtt_node_fini(bo->ggtt_node[tile_id]); + bo->ggtt_node[tile_id] = NULL; } else { xe_ggtt_map_bo(ggtt, bo); } @@ -687,13 +699,15 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - if (XE_WARN_ON(!bo->ggtt_node)) + u8 tile_id = ggtt->tile->id; + + if (XE_WARN_ON(!bo->ggtt_node[tile_id])) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); - xe_ggtt_node_remove(bo->ggtt_node, + xe_ggtt_node_remove(bo->ggtt_node[tile_id], bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 64b2ae6839db..400b2e9e89ab 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -51,7 +51,15 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched) static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) { - drm_sched_resubmit_jobs(&sched->base); + struct drm_sched_job *s_job; + + list_for_each_entry(s_job, &sched->base.pending_list, list) { + struct drm_sched_fence *s_fence = s_job->s_fence; + struct dma_fence *hw_fence = s_fence->parent; + + if (hw_fence && !dma_fence_is_signaled(hw_fence)) + sched->base.ops->run_job(s_job); + } } static inline bool diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 6fbea70d3d36..feb680d127e6 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -564,6 +564,28 @@ void xe_gsc_remove(struct xe_gsc *gsc) xe_gsc_proxy_remove(gsc); } +void xe_gsc_stop_prepare(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int ret; + + if (!xe_uc_fw_is_loadable(&gsc->fw) || xe_uc_fw_is_in_error_state(&gsc->fw)) + return; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GSC); + + /* + * If the GSC FW load or the proxy init are interrupted, the only way + * to recover it is to do an FLR and reload the GSC from scratch. + * Therefore, let's wait for the init to complete before stopping + * operations. The proxy init is the last step, so we can just wait on + * that + */ + ret = xe_gsc_wait_for_proxy_init_done(gsc); + if (ret) + xe_gt_err(gt, "failed to wait for GSC init completion before uc stop\n"); +} + /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index e282b9ef6ec4..c31fe24c4b66 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -16,6 +16,7 @@ struct xe_hw_engine; int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); +void xe_gsc_stop_prepare(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); void xe_gsc_remove(struct xe_gsc *gsc); void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 2d6ea8c01445..85801de4fab1 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -71,6 +71,17 @@ bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) HECI1_FWSTS1_PROXY_STATE_NORMAL; } +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + /* Proxy init can take up to 500ms, so wait double that for safety */ + return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + HECI1_FWSTS1_CURRENT_STATE, + HECI1_FWSTS1_PROXY_STATE_NORMAL, + USEC_PER_SEC, NULL, false); +} + static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set) { struct xe_gt *gt = gsc_to_gt(gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index c511ade6b863..e2498aa6de18 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -13,6 +13,7 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); void xe_gsc_proxy_remove(struct xe_gsc *gsc); +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index b940688c3613..3b53d46aad54 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -32,6 +32,7 @@ #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" +#include "xe_gt_sriov_vf.h" #include "xe_gt_sysfs.h" #include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" @@ -113,7 +114,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } @@ -379,9 +380,7 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_wa_process_gt(gt); xe_wa_process_oob(gt); - xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); @@ -390,6 +389,8 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; + xe_mocs_init_early(gt); + return 0; } @@ -469,6 +470,8 @@ static int all_fw_domain_init(struct xe_gt *gt) goto err_hw_fence_irq; xe_gt_mcr_set_implicit_defaults(gt); + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_gt_clock_init(gt); @@ -589,17 +592,15 @@ int xe_gt_init(struct xe_gt *gt) xe_hw_fence_irq_init(>->fence_irq[i]); } - err = xe_gt_pagefault_init(gt); + err = xe_gt_sysfs_init(gt); if (err) return err; - xe_mocs_init_early(gt); - - err = xe_gt_sysfs_init(gt); + err = gt_fw_domain_init(gt); if (err) return err; - err = gt_fw_domain_init(gt); + err = xe_gt_pagefault_init(gt); if (err) return err; @@ -647,6 +648,9 @@ static int do_gt_reset(struct xe_gt *gt) { int err; + if (IS_SRIOV_VF(gt_to_xe(gt))) + return xe_gt_sriov_vf_reset(gt); + xe_gsc_wa_14015076503(gt, true); xe_mmio_write32(gt, GDRST, GRDOM_FULL); @@ -764,6 +768,9 @@ static int gt_reset(struct xe_gt *gt) if (err) goto err_msg; + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); @@ -824,7 +831,7 @@ void xe_gt_suspend_prepare(struct xe_gt *gt) { XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_uc_stop_prepare(>->uc); + xe_uc_suspend_prepare(>->uc); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); } @@ -861,6 +868,13 @@ err_msg: return err; } +void xe_gt_shutdown(struct xe_gt *gt) +{ + xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + do_gt_reset(gt); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); +} + /** * xe_gt_sanitize_freq() - Restore saved frequencies if necessary. * @gt: the GT object diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index ee138e9768a2..881f1cbc2c49 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -48,6 +48,7 @@ void xe_gt_record_user_engines(struct xe_gt *gt); void xe_gt_suspend_prepare(struct xe_gt *gt); int xe_gt_suspend(struct xe_gt *gt); +void xe_gt_shutdown(struct xe_gt *gt); int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); void xe_gt_sanitize(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index ab76973f3e1e..a05fde2c7b12 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -32,6 +32,7 @@ * Xe's Freq provides a sysfs API for frequency management: * * device/tile#/gt#/freq0/<item>_freq *read-only* files: + * * - act_freq: The actual resolved frequency decided by PCODE. * - cur_freq: The current one requested by GuC PC to the PCODE. * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. @@ -39,6 +40,7 @@ * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. * * device/tile#/gt#/freq0/<item>_freq *read-write* files: + * * - min_freq: Min frequency request. * - max_freq: Max frequency request. * If max <= min, then freq_min becomes a fixed frequency request. diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 79c426dc2505..656c2ab6ca9f 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -423,9 +423,17 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, XE_MAX_EU_FUSE_BITS) * num_dss; - /* user can issue separate page faults per EU and per CS */ + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); +#undef PF_MULTIPLIER pf_queue->gt = gt; pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 905f409db74b..57e9eddc092e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -5,14 +5,20 @@ #include <drm/drm_managed.h> +#include "regs/xe_guc_regs.h" #include "regs/xe_regs.h" +#include "xe_gt.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_service.h" +#include "xe_gt_sriov_printk.h" #include "xe_mmio.h" +#include "xe_pm.h" + +static void pf_worker_restart_func(struct work_struct *w); /* * VF's metadata is maintained in the flexible array where: @@ -38,6 +44,11 @@ static int pf_alloc_metadata(struct xe_gt *gt) return 0; } +static void pf_init_workers(struct xe_gt *gt) +{ + INIT_WORK(>->sriov.pf.workers.restart, pf_worker_restart_func); +} + /** * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -62,6 +73,8 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) if (err) return err; + pf_init_workers(gt); + return 0; } @@ -89,14 +102,111 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) xe_gt_sriov_pf_service_update(gt); } +static u32 pf_get_vf_regs_stride(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000; +} + +static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride) +{ + struct xe_reg pf_reg = vf_reg; + + pf_reg.vf = 0; + pf_reg.addr += stride * vfid; + + return pf_reg; +} + +static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid) +{ + u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt)); + struct xe_reg scratch; + int n, count; + + if (xe_gt_is_media_type(gt)) { + count = MED_VF_SW_FLAG_COUNT; + for (n = 0; n < count; n++) { + scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride); + xe_mmio_write32(gt, scratch, 0); + } + } else { + count = VF_SW_FLAG_COUNT; + for (n = 0; n < count; n++) { + scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride); + xe_mmio_write32(gt, scratch, 0); + } + } +} + /** - * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset. + * xe_gt_sriov_pf_sanitize_hw() - Reset hardware state related to a VF. * @gt: the &xe_gt + * @vfid: the VF identifier * * This function can only be called on PF. */ -void xe_gt_sriov_pf_restart(struct xe_gt *gt) +void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + pf_clear_vf_scratch_regs(gt, vfid); +} + +static void pf_cancel_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (cancel_work_sync(>->sriov.pf.workers.restart)) + xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); +} + +/** + * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support. + * @gt: the &xe_gt + * + * This function can only be called on the PF. + */ +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) { + pf_cancel_restart(gt); +} + +static void pf_restart(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); xe_gt_sriov_pf_config_restart(gt); xe_gt_sriov_pf_control_restart(gt); + xe_pm_runtime_put(xe); + + xe_gt_sriov_dbg(gt, "restart completed\n"); +} + +static void pf_worker_restart_func(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, typeof(*gt), sriov.pf.workers.restart); + + pf_restart(gt); +} + +static void pf_queue_restart(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) + xe_gt_sriov_dbg(gt, "restart already in queue!\n"); +} + +/** + * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset. + * @gt: the &xe_gt + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_restart(struct xe_gt *gt) +{ + pf_queue_restart(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f0cb726a6919..165ba31d0391 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -11,6 +11,8 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); +void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) @@ -22,6 +24,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } +static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ +} + static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index c9ed996b9cb0..786f0dba4143 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -323,6 +323,26 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) return err; } +static int pf_push_vf_cfg(struct xe_gt *gt, unsigned int vfid, bool reset) +{ + int err = 0; + + xe_gt_assert(gt, vfid); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (reset) + err = pf_send_vf_cfg_reset(gt, vfid); + if (!err) + err = pf_push_full_vf_config(gt, vfid); + + return err; +} + +static int pf_refresh_vf_cfg(struct xe_gt *gt, unsigned int vfid) +{ + return pf_push_vf_cfg(gt, vfid, true); +} + static u64 pf_get_ggtt_alignment(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -419,6 +439,10 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) return err; pf_release_vf_config_ggtt(gt, config); + + err = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(err)) + return err; } xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); @@ -744,6 +768,10 @@ static int pf_provision_vf_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctx return ret; pf_release_config_ctxs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_ctxs) @@ -1041,6 +1069,10 @@ static int pf_provision_vf_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs) return ret; pf_release_config_dbs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_dbs) @@ -2003,10 +2035,7 @@ int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh xe_gt_assert(gt, vfid); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - if (refresh) - err = pf_send_vf_cfg_reset(gt, vfid); - if (!err) - err = pf_push_full_vf_config(gt, vfid); + err = pf_push_vf_cfg(gt, vfid, refresh); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); if (unlikely(err)) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 02f7328bd6ce..b4fd5a81aff1 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -9,6 +9,7 @@ #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_helpers.h" @@ -1008,7 +1009,7 @@ static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) return false; - /* XXX: placeholder */ + xe_gt_sriov_pf_sanitize_hw(gt, vfid); pf_enter_vf_flr_send_finish(gt, vfid); return true; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index 28e1b130bf87..a69d128c4f45 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -32,7 +32,16 @@ struct xe_gt_sriov_metadata { }; /** + * struct xe_gt_sriov_pf_workers - GT level workers used by the PF. + */ +struct xe_gt_sriov_pf_workers { + /** @restart: worker that executes actions post GT reset */ + struct work_struct restart; +}; + +/** * struct xe_gt_sriov_pf - GT level PF virtualization data. + * @workers: workers data. * @service: service data. * @control: control data. * @policy: policy data. @@ -40,6 +49,7 @@ struct xe_gt_sriov_metadata { * @vfs: metadata for all VFs. */ struct xe_gt_sriov_pf { + struct xe_gt_sriov_pf_workers workers; struct xe_gt_sriov_pf_service service; struct xe_gt_sriov_pf_control control; struct xe_gt_sriov_pf_policy policy; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 4ebc82e607af..29badbd829ab 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -46,17 +46,40 @@ static int guc_action_vf_reset(struct xe_guc *guc) return ret > 0 ? -EPROTO : ret; } +#define GUC_RESET_VF_STATE_RETRY_MAX 10 static int vf_reset_guc_state(struct xe_gt *gt) { + unsigned int retry = GUC_RESET_VF_STATE_RETRY_MAX; struct xe_guc *guc = >->uc.guc; int err; - err = guc_action_vf_reset(guc); + do { + err = guc_action_vf_reset(guc); + if (!err || err != -ETIMEDOUT) + break; + } while (--retry); + if (unlikely(err)) xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err)); return err; } +/** + * xe_gt_sriov_vf_reset - Reset GuC VF internal state. + * @gt: the &xe_gt + * + * It requires functional `GuC MMIO based communication`_. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_vf_reset(struct xe_gt *gt) +{ + if (!xe_device_uc_enabled(gt_to_xe(gt))) + return -ENODEV; + + return vf_reset_guc_state(gt); +} + static int guc_action_match_version(struct xe_guc *guc, u32 wanted_branch, u32 wanted_major, u32 wanted_minor, u32 *branch, u32 *major, u32 *minor, u32 *patch) @@ -212,6 +235,9 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) { int err; + if (!xe_device_uc_enabled(gt_to_xe(gt))) + return -ENODEV; + err = vf_reset_guc_state(gt); if (unlikely(err)) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index e541ce57bec2..576ff5e795a8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -12,6 +12,7 @@ struct drm_printer; struct xe_gt; struct xe_reg; +int xe_gt_sriov_vf_reset(struct xe_gt *gt); int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt); int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index ace1fe831a7b..9deb9b44c3c3 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -138,6 +138,14 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) int pending_seqno; /* + * we can get here before the CTs are even initialized if we're wedging + * very early, in which case there are not going to be any pending + * fences so we can bail immediately. + */ + if (!xe_guc_ct_initialized(>->uc.guc.ct)) + return; + + /* * CT channel is already disabled at this point. No new TLB requests can * appear. */ @@ -310,6 +318,13 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +/* + * Ensure that roundup_pow_of_two(length) doesn't overflow. + * Note that roundup_pow_of_two() operates on unsigned long, + * not on u64. + */ +#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) + /** * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an * address range @@ -334,6 +349,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7 u32 action[MAX_TLB_INVALIDATION_LEN]; + u64 length = end - start; int len = 0; xe_gt_assert(gt, fence); @@ -346,11 +362,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ - if (!xe->info.has_range_tlb_invalidation) { + if (!xe->info.has_range_tlb_invalidation || + length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { u64 orig_start = start; - u64 length = end - start; u64 align; if (length < SZ_4K) @@ -399,6 +415,28 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, } /** + * xe_gt_tlb_invalidation_vm - Issue a TLB invalidation on this GT for a VM + * @gt: graphics tile + * @vm: VM to invalidate + * + * Invalidate entire VM's address space + */ +void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm) +{ + struct xe_gt_tlb_invalidation_fence fence; + u64 range = 1ull << vm->xe->info.va_bits; + int ret; + + xe_gt_tlb_invalidation_fence_init(gt, &fence, true); + + ret = xe_gt_tlb_invalidation_range(gt, &fence, 0, range, vm->usm.asid); + if (ret < 0) + return; + + xe_gt_tlb_invalidation_fence_wait(&fence); +} + +/** * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 672acfcdf0d7..abe9b03d543e 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -12,6 +12,7 @@ struct xe_gt; struct xe_guc; +struct xe_vm; struct xe_vma; int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); @@ -21,6 +22,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, struct xe_vma *vma); +void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 52df28032a6f..96373cdb366b 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -985,7 +985,7 @@ timeout: BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS); BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1); - ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask, + ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask, 1000000, &header, false); if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != @@ -1175,7 +1175,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - xe_guc_ct_print(&guc->ct, p, false); + xe_guc_ct_print(&guc->ct, p); xe_guc_submit_print(guc, p); } diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index d1902a8581ca..e144fd41c0a7 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -483,24 +483,52 @@ static void fill_engine_enable_masks(struct xe_gt *gt, engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); } -static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) +/* + * Write the offsets corresponding to the golden LRCs. The actual data is + * populated later by guc_golden_lrc_populate() + */ +static void guc_golden_lrc_init(struct xe_guc_ads *ads) { struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), offsetof(struct __guc_ads_blob, system_info)); - u8 guc_class; + size_t alloc_size, real_size; + u32 addr_ggtt, offset; + int class; + + offset = guc_ads_golden_lrc_offset(ads); + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + u8 guc_class; + + guc_class = xe_engine_class_to_guc_class(class); - for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { if (!info_map_read(xe, &info_map, engine_enabled_masks[guc_class])) continue; + real_size = xe_gt_lrc_size(gt, class); + alloc_size = PAGE_ALIGN(real_size); + + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists LRC registers. This is + * required to allow the GuC to restore just the engine state + * when a watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ ads_blob_write(ads, ads.eng_state_size[guc_class], - guc_ads_golden_lrc_size(ads) - - xe_lrc_skip_size(xe)); + real_size - xe_lrc_skip_size(xe)); ads_blob_write(ads, ads.golden_context_lrca[guc_class], - xe_bo_ggtt_addr(ads->bo) + - guc_ads_golden_lrc_offset(ads)); + addr_ggtt); + + addr_ggtt += alloc_size; } } @@ -710,7 +738,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); guc_policies_init(ads); - guc_prep_golden_lrc_null(ads); + guc_golden_lrc_init(ads); guc_mapping_table_init_invalid(gt, &info_map); guc_doorbell_init(ads); @@ -736,7 +764,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) guc_policies_init(ads); fill_engine_enable_masks(gt, &info_map); guc_mmio_reg_state_init(ads); - guc_prep_golden_lrc_null(ads); + guc_golden_lrc_init(ads); guc_mapping_table_init(gt, &info_map); guc_capture_list_init(ads); guc_doorbell_init(ads); @@ -756,18 +784,22 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) guc_ads_private_data_offset(ads)); } -static void guc_populate_golden_lrc(struct xe_guc_ads *ads) +/* + * After the golden LRC's are recorded for each engine class by the first + * submission, copy them to the ADS, as initialized earlier by + * guc_golden_lrc_init(). + */ +static void guc_golden_lrc_populate(struct xe_guc_ads *ads) { struct xe_device *xe = ads_to_xe(ads); struct xe_gt *gt = ads_to_gt(ads); struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), offsetof(struct __guc_ads_blob, system_info)); size_t total_size = 0, alloc_size, real_size; - u32 addr_ggtt, offset; + u32 offset; int class; offset = guc_ads_golden_lrc_offset(ads); - addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { u8 guc_class; @@ -784,26 +816,9 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) alloc_size = PAGE_ALIGN(real_size); total_size += alloc_size; - /* - * This interface is slightly confusing. We need to pass the - * base address of the full golden context and the size of just - * the engine state, which is the section of the context image - * that starts after the execlists LRC registers. This is - * required to allow the GuC to restore just the engine state - * when a watchdog reset occurs. - * We calculate the engine state size by removing the size of - * what comes before it in the context image (which is identical - * on all engines). - */ - ads_blob_write(ads, ads.eng_state_size[guc_class], - real_size - xe_lrc_skip_size(xe)); - ads_blob_write(ads, ads.golden_context_lrca[guc_class], - addr_ggtt); - xe_map_memcpy_to(xe, ads_to_map(ads), offset, gt->default_lrc[class], real_size); - addr_ggtt += alloc_size; offset += alloc_size; } @@ -812,7 +827,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) { - guc_populate_golden_lrc(ads); + guc_golden_lrc_populate(ads); } static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index cd6a5f09d631..f1ce4e14dcb5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -25,12 +25,53 @@ #include "xe_gt_sriov_pf_monitor.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" +#include "xe_guc_log.h" #include "xe_guc_relay.h" #include "xe_guc_submit.h" #include "xe_map.h" #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +enum { + /* Internal states, not error conditions */ + CT_DEAD_STATE_REARM, /* 0x0001 */ + CT_DEAD_STATE_CAPTURE, /* 0x0002 */ + + /* Error conditions */ + CT_DEAD_SETUP, /* 0x0004 */ + CT_DEAD_H2G_WRITE, /* 0x0008 */ + CT_DEAD_H2G_HAS_ROOM, /* 0x0010 */ + CT_DEAD_G2H_READ, /* 0x0020 */ + CT_DEAD_G2H_RECV, /* 0x0040 */ + CT_DEAD_G2H_RELEASE, /* 0x0080 */ + CT_DEAD_DEADLOCK, /* 0x0100 */ + CT_DEAD_PROCESS_FAILED, /* 0x0200 */ + CT_DEAD_FAST_G2H, /* 0x0400 */ + CT_DEAD_PARSE_G2H_RESPONSE, /* 0x0800 */ + CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */ + CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */ + CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */ +}; + +static void ct_dead_worker_func(struct work_struct *w); +static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code); + +#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code) +#else +#define CT_DEAD(ct, ctb, reason) \ + do { \ + struct guc_ctb *_ctb = (ctb); \ + if (_ctb) \ + _ctb->info.broken = true; \ + } while (0) +#endif + /* Used when a CT send wants to block and / or receive data */ struct g2h_fence { u32 *response_buffer; @@ -147,14 +188,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) @@ -182,7 +220,11 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); - INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); + INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + spin_lock_init(&ct->dead.lock); + INIT_WORK(&ct->dead.worker, ct_dead_worker_func); +#endif init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); @@ -419,10 +461,22 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) if (ct_needs_safe_mode(ct)) ct_enter_safe_mode(ct); +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + /* + * The CT has now been reset so the dumper can be re-armed + * after any existing dead state has been dumped. + */ + spin_lock_irq(&ct->dead.lock); + if (ct->dead.reason) + ct->dead.reason |= (1 << CT_DEAD_STATE_REARM); + spin_unlock_irq(&ct->dead.lock); +#endif + return 0; err_out: xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err)); + CT_DEAD(ct, NULL, SETUP); return err; } @@ -454,6 +508,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct) */ void xe_guc_ct_stop(struct xe_guc_ct *ct) { + if (!xe_guc_ct_initialized(ct)) + return; + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -466,6 +523,19 @@ static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len) if (cmd_len > h2g->info.space) { h2g->info.head = desc_read(ct_to_xe(ct), h2g, head); + + if (h2g->info.head > h2g->info.size) { + struct xe_device *xe = ct_to_xe(ct); + u32 desc_status = desc_read(xe, h2g, status); + + desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW); + + xe_gt_err(ct_to_gt(ct), "CT: invalid head offset %u >= %u)\n", + h2g->info.head, h2g->info.size); + CT_DEAD(ct, h2g, H2G_HAS_ROOM); + return false; + } + h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head, h2g->info.size) - h2g->info.resv_space; @@ -521,10 +591,24 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) { + bool bad = false; + lockdep_assert_held(&ct->fast_lock); - xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <= - ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); - xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding); + + bad = ct->ctbs.g2h.info.space + g2h_len > + ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space; + bad |= !ct->g2h_outstanding; + + if (bad) { + xe_gt_err(ct_to_gt(ct), "Invalid G2H release: %d + %d vs %d - %d -> %d vs %d, outstanding = %d!\n", + ct->ctbs.g2h.info.space, g2h_len, + ct->ctbs.g2h.info.size, ct->ctbs.g2h.info.resv_space, + ct->ctbs.g2h.info.space + g2h_len, + ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space, + ct->g2h_outstanding); + CT_DEAD(ct, &ct->ctbs.g2h, G2H_RELEASE); + return; + } ct->ctbs.g2h.info.space += g2h_len; if (!--ct->g2h_outstanding) @@ -551,12 +635,43 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 full_len; struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds, tail * sizeof(u32)); + u32 desc_status; full_len = len + GUC_CTB_HDR_LEN; lockdep_assert_held(&ct->lock); xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN); - xe_gt_assert(gt, tail <= h2g->info.size); + + desc_status = desc_read(xe, h2g, status); + if (desc_status) { + xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status); + goto corrupted; + } + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + u32 desc_tail = desc_read(xe, h2g, tail); + u32 desc_head = desc_read(xe, h2g, head); + + if (tail != desc_tail) { + desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH); + xe_gt_err(gt, "CT write: tail was modified %u != %u\n", desc_tail, tail); + goto corrupted; + } + + if (tail > h2g->info.size) { + desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW); + xe_gt_err(gt, "CT write: tail out of range: %u vs %u\n", + tail, h2g->info.size); + goto corrupted; + } + + if (desc_head >= h2g->info.size) { + desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW); + xe_gt_err(gt, "CT write: invalid head offset %u >= %u)\n", + desc_head, h2g->info.size); + goto corrupted; + } + } /* Command will wrap, zero fill (NOPs), return and check credits again */ if (tail + full_len > h2g->info.size) { @@ -609,6 +724,10 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, desc_read(xe, h2g, head), h2g->info.tail); return 0; + +corrupted: + CT_DEAD(ct, &ct->ctbs.h2g, H2G_WRITE); + return -EPIPE; } /* @@ -638,7 +757,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u16 seqno; int ret; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); xe_gt_assert(gt, !g2h_len || !g2h_fence); xe_gt_assert(gt, !num_g2h || !g2h_fence); xe_gt_assert(gt, !g2h_len || num_g2h); @@ -716,7 +835,6 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); - struct drm_printer p = xe_gt_info_printer(gt); unsigned int sleep_period_ms = 1; int ret; @@ -769,8 +887,13 @@ try_again: goto broken; #undef g2h_avail - if (dequeue_one_g2h(ct) < 0) + ret = dequeue_one_g2h(ct); + if (ret < 0) { + if (ret != -ECANCELED) + xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)", + ERR_PTR(ret)); goto broken; + } goto try_again; } @@ -779,8 +902,7 @@ try_again: broken: xe_gt_err(gt, "No forward process on H2G, reset required\n"); - xe_guc_ct_print(ct, &p, true); - ct->ctbs.h2g.info.broken = true; + CT_DEAD(ct, &ct->ctbs.h2g, DEADLOCK); return -EDEADLK; } @@ -848,7 +970,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) #define ct_alive(ct) \ (xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \ !ct->ctbs.g2h.info.broken) - if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) + if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) return false; #undef ct_alive @@ -1046,6 +1168,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) else xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n", type, fence); + CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE); return -EPROTO; } @@ -1053,6 +1176,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) g2h_fence = xa_erase(&ct->fence_lookup, fence); if (unlikely(!g2h_fence)) { /* Don't tear down channel, as send could've timed out */ + /* CT_DEAD(ct, NULL, PARSE_G2H_UNKNOWN); */ xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence); g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); return 0; @@ -1097,7 +1221,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n", origin); - ct->ctbs.g2h.info.broken = true; + CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_ORIGIN); return -EPROTO; } @@ -1115,7 +1239,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) default: xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n", type); - ct->ctbs.g2h.info.broken = true; + CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_TYPE); ret = -EOPNOTSUPP; } @@ -1192,9 +1316,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } - if (ret) + if (ret) { xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", action, ERR_PTR(ret)); + CT_DEAD(ct, NULL, PROCESS_FAILED); + } return 0; } @@ -1204,12 +1330,12 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; - u32 tail, head, len; + u32 tail, head, len, desc_status; s32 avail; u32 action; u32 *hxg; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) @@ -1223,6 +1349,63 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) xe_gt_assert(gt, xe_guc_ct_enabled(ct)); + desc_status = desc_read(xe, g2h, status); + if (desc_status) { + if (desc_status & GUC_CTB_STATUS_DISABLED) { + /* + * Potentially valid if a CLIENT_RESET request resulted in + * contexts/engines being reset. But should never happen as + * no contexts should be active when CLIENT_RESET is sent. + */ + xe_gt_err(gt, "CT read: unexpected G2H after GuC has stopped!\n"); + desc_status &= ~GUC_CTB_STATUS_DISABLED; + } + + if (desc_status) { + xe_gt_err(gt, "CT read: non-zero status: %u\n", desc_status); + goto corrupted; + } + } + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + u32 desc_tail = desc_read(xe, g2h, tail); + /* + u32 desc_head = desc_read(xe, g2h, head); + + * info.head and desc_head are updated back-to-back at the end of + * this function and nowhere else. Hence, they cannot be different + * unless two g2h_read calls are running concurrently. Which is not + * possible because it is guarded by ct->fast_lock. And yet, some + * discrete platforms are reguarly hitting this error :(. + * + * desc_head rolling backwards shouldn't cause any noticeable + * problems - just a delay in GuC being allowed to proceed past that + * point in the queue. So for now, just disable the error until it + * can be root caused. + * + if (g2h->info.head != desc_head) { + desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_MISMATCH); + xe_gt_err(gt, "CT read: head was modified %u != %u\n", + desc_head, g2h->info.head); + goto corrupted; + } + */ + + if (g2h->info.head > g2h->info.size) { + desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW); + xe_gt_err(gt, "CT read: head out of range: %u vs %u\n", + g2h->info.head, g2h->info.size); + goto corrupted; + } + + if (desc_tail >= g2h->info.size) { + desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW); + xe_gt_err(gt, "CT read: invalid tail offset %u >= %u)\n", + desc_tail, g2h->info.size); + goto corrupted; + } + } + /* Calculate DW available to read */ tail = desc_read(xe, g2h, tail); avail = tail - g2h->info.head; @@ -1239,9 +1422,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) if (len > avail) { xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n", avail, len); - g2h->info.broken = true; - - return -EPROTO; + goto corrupted; } head = (g2h->info.head + 1) % g2h->info.size; @@ -1287,6 +1468,10 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) action, len, g2h->info.head, tail); return len; + +corrupted: + CT_DEAD(ct, &ct->ctbs.g2h, G2H_READ); + return -EPROTO; } static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) @@ -1313,9 +1498,11 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) xe_gt_warn(gt, "NOT_POSSIBLE"); } - if (ret) + if (ret) { xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", action, ERR_PTR(ret)); + CT_DEAD(ct, NULL, FAST_G2H); + } } /** @@ -1375,7 +1562,6 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct) static void receive_g2h(struct xe_guc_ct *ct) { - struct xe_gt *gt = ct_to_gt(ct); bool ongoing; int ret; @@ -1412,9 +1598,8 @@ static void receive_g2h(struct xe_guc_ct *ct) mutex_unlock(&ct->lock); if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) { - struct drm_printer p = xe_gt_info_printer(gt); - - xe_guc_ct_print(ct, &p, false); + xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret); + CT_DEAD(ct, NULL, G2H_RECV); kick_reset(ct); } } while (ret == 1); @@ -1442,9 +1627,8 @@ static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32), atomic ? GFP_ATOMIC : GFP_KERNEL); - if (!snapshot->cmds) { - drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n"); + drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CT info will be available.\n"); return; } @@ -1525,7 +1709,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, atomic ? GFP_ATOMIC : GFP_KERNEL); if (!snapshot) { - drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n"); + xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n"); return NULL; } @@ -1589,16 +1773,119 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot) * xe_guc_ct_print - GuC CT Print. * @ct: GuC CT. * @p: drm_printer where it will be printed out. - * @atomic: Boolean to indicate if this is called from atomic context like - * reset or CTB handler or from some regular path like debugfs. * * This function quickly capture a snapshot and immediately print it out. */ -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic) +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) { struct xe_guc_ct_snapshot *snapshot; - snapshot = xe_guc_ct_snapshot_capture(ct, atomic); + snapshot = xe_guc_ct_snapshot_capture(ct, false); xe_guc_ct_snapshot_print(snapshot, p); xe_guc_ct_snapshot_free(snapshot); } + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code) +{ + struct xe_guc_log_snapshot *snapshot_log; + struct xe_guc_ct_snapshot *snapshot_ct; + struct xe_guc *guc = ct_to_guc(ct); + unsigned long flags; + bool have_capture; + + if (ctb) + ctb->info.broken = true; + + /* Ignore further errors after the first dump until a reset */ + if (ct->dead.reported) + return; + + spin_lock_irqsave(&ct->dead.lock, flags); + + /* And only capture one dump at a time */ + have_capture = ct->dead.reason & (1 << CT_DEAD_STATE_CAPTURE); + ct->dead.reason |= (1 << reason_code) | + (1 << CT_DEAD_STATE_CAPTURE); + + spin_unlock_irqrestore(&ct->dead.lock, flags); + + if (have_capture) + return; + + snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true); + snapshot_ct = xe_guc_ct_snapshot_capture((ct), true); + + spin_lock_irqsave(&ct->dead.lock, flags); + + if (ct->dead.snapshot_log || ct->dead.snapshot_ct) { + xe_gt_err(ct_to_gt(ct), "Got unexpected dead CT capture!\n"); + xe_guc_log_snapshot_free(snapshot_log); + xe_guc_ct_snapshot_free(snapshot_ct); + } else { + ct->dead.snapshot_log = snapshot_log; + ct->dead.snapshot_ct = snapshot_ct; + } + + spin_unlock_irqrestore(&ct->dead.lock, flags); + + queue_work(system_unbound_wq, &(ct)->dead.worker); +} + +static void ct_dead_print(struct xe_dead_ct *dead) +{ + struct xe_guc_ct *ct = container_of(dead, struct xe_guc_ct, dead); + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + static int g_count; + struct drm_printer ip = xe_gt_info_printer(gt); + struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count); + + if (!dead->reason) { + xe_gt_err(gt, "CTB is dead for no reason!?\n"); + return; + } + + drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason); + + /* Can't generate a genuine core dump at this point, so just do the good bits */ + drm_puts(&lp, "**** Xe Device Coredump ****\n"); + xe_device_snapshot_print(xe, &lp); + + drm_printf(&lp, "**** GT #%d ****\n", gt->info.id); + drm_printf(&lp, "\tTile: %d\n", gt->tile->id); + + drm_puts(&lp, "**** GuC Log ****\n"); + xe_guc_log_snapshot_print(dead->snapshot_log, &lp); + + drm_puts(&lp, "**** GuC CT ****\n"); + xe_guc_ct_snapshot_print(dead->snapshot_ct, &lp); + + drm_puts(&lp, "Done.\n"); +} + +static void ct_dead_worker_func(struct work_struct *w) +{ + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, dead.worker); + + if (!ct->dead.reported) { + ct->dead.reported = true; + ct_dead_print(&ct->dead); + } + + spin_lock_irq(&ct->dead.lock); + + xe_guc_log_snapshot_free(ct->dead.snapshot_log); + ct->dead.snapshot_log = NULL; + xe_guc_ct_snapshot_free(ct->dead.snapshot_ct); + ct->dead.snapshot_ct = NULL; + + if (ct->dead.reason & (1 << CT_DEAD_STATE_REARM)) { + /* A reset has occurred so re-arm the error reporting */ + ct->dead.reason = 0; + ct->dead.reported = false; + } + + spin_unlock_irq(&ct->dead.lock); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 190202fce2d0..c7ac9407b861 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -21,7 +21,12 @@ xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic); void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p); void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic); +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); + +static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) +{ + return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; +} static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 761cb9031298..85e127ec91d7 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -86,6 +86,24 @@ enum xe_guc_ct_state { XE_GUC_CT_STATE_ENABLED, }; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +/** struct xe_dead_ct - Information for debugging a dead CT */ +struct xe_dead_ct { + /** @lock: protects memory allocation/free operations, and @reason updates */ + spinlock_t lock; + /** @reason: bit mask of CT_DEAD_* reason codes */ + unsigned int reason; + /** @reported: for preventing multiple dumps per error sequence */ + bool reported; + /** @worker: worker thread to get out of interrupt context before dumping */ + struct work_struct worker; + /** snapshot_ct: copy of CT state and CTB content at point of error */ + struct xe_guc_ct_snapshot *snapshot_ct; + /** snapshot_log: copy of GuC log at point of error */ + struct xe_guc_log_snapshot *snapshot_log; +}; +#endif + /** * struct xe_guc_ct - GuC command transport (CT) layer * @@ -128,6 +146,11 @@ struct xe_guc_ct { u32 msg[GUC_CTB_MSG_MAX_LEN]; /** @fast_msg: Message buffer */ u32 fast_msg[GUC_CTB_MSG_MAX_LEN]; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + /** @dead: information for debugging dead CTs */ + struct xe_dead_ct dead; +#endif }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index 4c39f01e4f52..a3f421e2adc0 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -20,6 +20,8 @@ struct xe_exec_queue; struct xe_guc_exec_queue { /** @q: Backpointer to parent xe_exec_queue */ struct xe_exec_queue *q; + /** @rcu: For safe freeing of exported dma fences */ + struct rcu_head rcu; /** @sched: GPU scheduler for this xe_exec_queue */ struct xe_gpu_scheduler sched; /** @entity: Scheduler entity for this xe_exec_queue */ diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index be47780ec2a7..50851638003b 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -78,7 +78,7 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p) xe_map_memcpy_from(xe, copy, &log->bo->vmap, 0, size); - xe_print_blob_ascii85(p, "Log data", copy, 0, size); + xe_print_blob_ascii85(p, "Log data", '\n', copy, 0, size); vfree(copy); } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 034b29984d5e..af02803c145b 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -6,6 +6,9 @@ #include "xe_guc_pc.h" #include <linux/delay.h> +#include <linux/jiffies.h> +#include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> #include <generated/xe_wa_oob.h> @@ -47,6 +50,12 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 + +#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ +#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -133,6 +142,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -584,6 +623,11 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { int ret; + if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; + } + mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -793,6 +837,106 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + return XE_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + mutex_lock(&pc->freq_lock); + + if (!pc->freq_ready) { + mutex_unlock(&pc->freq_lock); + return; + } + + ret = pc_action_query_task_state(pc); + if (ret) { + mutex_unlock(&pc->freq_lock); + return; + } + + max_freq = pc_get_max_freq(pc); + if (max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + mutex_unlock(&pc->freq_lock); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); + + /* + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. + */ + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; + } + + mutex_unlock(&pc->freq_lock); + + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) { int ret = 0; @@ -975,7 +1119,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; } - memset(pc->bo->vmap.vaddr, 0, size); + xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); slpc_shared_data_write(pc, header.size, size); ret = pc_action_reset(pc); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index efda432fadfc..7154b3aab0d8 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -34,5 +34,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 13810be015db..5b86d91296cb 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpe_freq: HW RPe frequency - The Efficient one */ diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c index ade6162dc259..0e5b43e1518e 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay.c +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -224,7 +224,7 @@ __relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u * with CTB lock held which is marked as used in the reclaim path. * Btw, that's one of the reason why we use mempool here! */ - txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_KERNEL); + txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_NOWAIT); if (!txn) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index fed23304e4da..cf6946424fc3 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -227,6 +227,17 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = wait_event_timeout(guc->submission_state.fini_wq, + xa_empty(&guc->submission_state.exec_queue_lookup), + HZ * 5); + + drain_workqueue(xe->destroy_wq); + + xe_gt_assert(gt, ret); xa_destroy(&guc->submission_state.exec_queue_lookup); } @@ -298,6 +309,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) primelockdep(guc); + guc->submission_state.initialized = true; + return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } @@ -826,6 +839,13 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); + /* + * If device is being wedged even before submission_state is + * initialized, there's nothing to do here. + */ + if (!guc->submission_state.initialized) + return; + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { @@ -1213,13 +1233,19 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) xe_pm_runtime_get(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); + release_guc_id(guc, q); if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); - release_guc_id(guc, q); + /* Confirm no work left behind accessing device structures */ + cancel_delayed_work_sync(&ge->sched.base.work_tdr); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); - kfree(ge); + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); xe_exec_queue_fini(q); xe_pm_runtime_put(guc_to_xe(guc)); } @@ -1405,6 +1431,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->guc = ge; ge->q = q; + init_rcu_head(&ge->rcu); init_waitqueue_head(&ge->suspend_wait); for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) @@ -1700,6 +1727,9 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; + if (!guc->submission_state.initialized) + return 0; + /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index ed150fc09ad0..7842b71e68be 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -74,6 +74,11 @@ struct xe_guc { struct mutex lock; /** @submission_state.enabled: submission is enabled */ bool enabled; + /** + * @submission_state.initialized: mark when submission state is + * even initialized - before that not even the lock is valid + */ + bool initialized; /** @submission_state.fini_wq: submit fini wait queue */ wait_queue_head_t fini_wq; } submission_state; diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 2c32dc46f7d4..3d0278c3db93 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -19,39 +19,51 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end) return (end - start) >> PAGE_SHIFT; } -/* - * xe_mark_range_accessed() - mark a range is accessed, so core mm - * have such information for memory eviction or write back to - * hard disk - * - * @range: the range to mark - * @write: if write to this range, we mark pages in this range - * as dirty - */ -static void xe_mark_range_accessed(struct hmm_range *range, bool write) +static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, + struct hmm_range *range, struct rw_semaphore *notifier_sem) { - struct page *page; - u64 i, npages; + unsigned long i, npages, hmm_pfn; + unsigned long num_chunks = 0; + int ret; + + /* HMM docs says this is needed. */ + ret = down_read_interruptible(notifier_sem); + if (ret) + return ret; + + if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { + up_read(notifier_sem); + return -EAGAIN; + } npages = xe_npages_in_range(range->start, range->end); - for (i = 0; i < npages; i++) { - page = hmm_pfn_to_page(range->hmm_pfns[i]); - if (write) - set_page_dirty_lock(page); + for (i = 0; i < npages;) { + unsigned long len; + + hmm_pfn = range->hmm_pfns[i]; + xe_assert(xe, hmm_pfn & HMM_PFN_VALID); - mark_page_accessed(page); + len = 1UL << hmm_pfn_to_map_order(hmm_pfn); + + /* If order > 0 the page may extend beyond range->start */ + len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); + i += len; + num_chunks++; } + up_read(notifier_sem); + + return sg_alloc_table(st, num_chunks, GFP_KERNEL); } -/* +/** * xe_build_sg() - build a scatter gather table for all the physical pages/pfn * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table * and will be used to program GPU page table later. - * * @xe: the xe device who will access the dma-address in sg table * @range: the hmm range that we build the sg table from. range->hmm_pfns[] * has the pfn numbers of pages that back up this hmm address range. * @st: pointer to the sg table. + * @notifier_sem: The xe notifier lock. * @write: whether we write to this range. This decides dma map direction * for system pages. If write we map it bi-diretional; otherwise * DMA_TO_DEVICE @@ -78,43 +90,88 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) * Returns 0 if successful; -ENOMEM if fails to allocate memory */ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, - struct sg_table *st, bool write) + struct sg_table *st, + struct rw_semaphore *notifier_sem, + bool write) { + unsigned long npages = xe_npages_in_range(range->start, range->end); struct device *dev = xe->drm.dev; - struct page **pages; - u64 i, npages; - int ret; + struct scatterlist *sgl; + struct page *page; + unsigned long i, j; - npages = xe_npages_in_range(range->start, range->end); - pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; + lockdep_assert_held(notifier_sem); + + i = 0; + for_each_sg(st->sgl, sgl, st->nents, j) { + unsigned long hmm_pfn, size; + + hmm_pfn = range->hmm_pfns[i]; + page = hmm_pfn_to_page(hmm_pfn); + xe_assert(xe, !is_device_private_page(page)); + + size = 1UL << hmm_pfn_to_map_order(hmm_pfn); + size -= page_to_pfn(page) & (size - 1); + i += size; - for (i = 0; i < npages; i++) { - pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); - xe_assert(xe, !is_device_private_page(pages[i])); + if (unlikely(j == st->nents - 1)) { + xe_assert(xe, i >= npages); + if (i > npages) + size -= (i - npages); + + sg_mark_end(sgl); + } else { + xe_assert(xe, i < npages); + } + + sg_set_page(sgl, page, size << PAGE_SHIFT, 0); } - ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT, - xe_sg_segment_size(dev), GFP_KERNEL); - if (ret) - goto free_pages; + return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); +} + +static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->userptr.notifier_lock); + + mutex_lock(&userptr->unmap_mutex); + xe_assert(vm->xe, !userptr->mapped); + userptr->mapped = true; + mutex_unlock(&userptr->unmap_mutex); +} - ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); - if (ret) { - sg_free_table(st); - st = NULL; +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + bool write = !xe_vma_read_only(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + + if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && + !lockdep_is_held_type(&vm->lock, 0) && + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { + /* Don't unmap in exec critical section. */ + xe_vm_assert_held(vm); + /* Don't unmap while mapping the sg. */ + lockdep_assert_held(&vm->lock); } -free_pages: - kvfree(pages); - return ret; + mutex_lock(&userptr->unmap_mutex); + if (userptr->sg && userptr->mapped) + dma_unmap_sgtable(xe->drm.dev, userptr->sg, + write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + userptr->mapped = false; + mutex_unlock(&userptr->unmap_mutex); } -/* +/** * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr - * * @uvma: the userptr vma which hold the scatter gather table * * With function xe_userptr_populate_range, we allocate storage of @@ -124,16 +181,9 @@ free_pages: void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) { struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - bool write = !xe_vma_read_only(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - struct device *dev = xe->drm.dev; - - xe_assert(xe, userptr->sg); - dma_unmap_sgtable(dev, userptr->sg, - write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); + xe_hmm_userptr_unmap(uvma); sg_free_table(userptr->sg); userptr->sg = NULL; } @@ -166,13 +216,20 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, { unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - unsigned long *pfns, flags = HMM_PFN_REQ_FAULT; + unsigned long *pfns; struct xe_userptr *userptr; struct xe_vma *vma = &uvma->vma; u64 userptr_start = xe_vma_userptr(vma); u64 userptr_end = userptr_start + xe_vma_size(vma); struct xe_vm *vm = xe_vma_vm(vma); - struct hmm_range hmm_range; + struct hmm_range hmm_range = { + .pfn_flags_mask = 0, /* ignore pfns */ + .default_flags = HMM_PFN_REQ_FAULT, + .start = userptr_start, + .end = userptr_end, + .notifier = &uvma->userptr.notifier, + .dev_private_owner = vm->xe, + }; bool write = !xe_vma_read_only(vma); unsigned long notifier_seq; u64 npages; @@ -199,19 +256,14 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, return -ENOMEM; if (write) - flags |= HMM_PFN_REQ_WRITE; + hmm_range.default_flags |= HMM_PFN_REQ_WRITE; if (!mmget_not_zero(userptr->notifier.mm)) { ret = -EFAULT; goto free_pfns; } - hmm_range.default_flags = flags; hmm_range.hmm_pfns = pfns; - hmm_range.notifier = &userptr->notifier; - hmm_range.start = userptr_start; - hmm_range.end = userptr_end; - hmm_range.dev_private_owner = vm->xe; while (true) { hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); @@ -238,16 +290,36 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, if (ret) goto free_pfns; - ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write); + ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); if (ret) goto free_pfns; - xe_mark_range_accessed(&hmm_range, write); + ret = down_read_interruptible(&vm->userptr.notifier_lock); + if (ret) + goto free_st; + + if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, + &vm->userptr.notifier_lock, write); + if (ret) + goto out_unlock; + userptr->sg = &userptr->sgt; + xe_hmm_userptr_set_mapped(uvma); userptr->notifier_seq = hmm_range.notifier_seq; + up_read(&vm->userptr.notifier_lock); + kvfree(pfns); + return 0; +out_unlock: + up_read(&vm->userptr.notifier_lock); +free_st: + sg_free_table(&userptr->sgt); free_pfns: kvfree(pfns); return ret; } - diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h index 909dc2bdcd97..0ea98d8e7bbc 100644 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -3,9 +3,16 @@ * Copyright © 2024 Intel Corporation */ +#ifndef _XE_HMM_H_ +#define _XE_HMM_H_ + #include <linux/types.h> struct xe_userptr_vma; int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); + void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma); +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 547919e8ce9e..0248597c6269 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -381,12 +381,6 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) blit_cctl_val, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - /* Use Fixed slice CCS mode */ - { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), - XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), - XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, - RCU_MODE_FIXED_SLICE_CCS_MODE)) - }, /* Disable WMTP if HW doesn't support it */ { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), @@ -417,7 +411,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) * Bspec: 72161 */ const u8 mocs_write_idx = gt->mocs.uc_index; - const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && + const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? gt->mocs.wb_index : gt->mocs.uc_index; u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | @@ -454,6 +448,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Use Fixed slice CCS mode */ + { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), + XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), + XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, + RCU_MODE_FIXED_SLICE_CCS_MODE)) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index b53e8d2accdb..a440442b4d72 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -32,14 +32,61 @@ bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max) return timeout >= min && timeout <= max; } -static void kobj_xe_hw_engine_release(struct kobject *kobj) +static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) { kfree(kobj); } +static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->show) { + xe_pm_runtime_get(xe); + ret = kattr->show(kobj, kattr, buf); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->store) { + xe_pm_runtime_get(xe); + ret = kattr->store(kobj, kattr, buf, count); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { + .show = xe_hw_engine_class_sysfs_attr_show, + .store = xe_hw_engine_class_sysfs_attr_store, +}; + static const struct kobj_type kobj_xe_hw_engine_type = { - .release = kobj_xe_hw_engine_release, - .sysfs_ops = &kobj_sysfs_ops + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &xe_hw_engine_class_sysfs_ops, +}; + +static const struct kobj_type kobj_xe_hw_engine_type_def = { + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, }; static ssize_t job_timeout_max_store(struct kobject *kobj, @@ -543,7 +590,7 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe, if (!kobj) return -ENOMEM; - kobject_init(kobj, &kobj_xe_hw_engine_type); + kobject_init(kobj, &kobj_xe_hw_engine_type_def); err = kobject_add(kobj, parent, "%s", ".defaults"); if (err) goto err_object; @@ -559,57 +606,6 @@ err_object: return err; } -static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) -{ - kfree(kobj); -} - -static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct xe_device *xe = kobj_to_xe(kobj); - struct kobj_attribute *kattr; - ssize_t ret = -EIO; - - kattr = container_of(attr, struct kobj_attribute, attr); - if (kattr->show) { - xe_pm_runtime_get(xe); - ret = kattr->show(kobj, kattr, buf); - xe_pm_runtime_put(xe); - } - - return ret; -} - -static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, - size_t count) -{ - struct xe_device *xe = kobj_to_xe(kobj); - struct kobj_attribute *kattr; - ssize_t ret = -EIO; - - kattr = container_of(attr, struct kobj_attribute, attr); - if (kattr->store) { - xe_pm_runtime_get(xe); - ret = kattr->store(kobj, kattr, buf, count); - xe_pm_runtime_put(xe); - } - - return ret; -} - -static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { - .show = xe_hw_engine_class_sysfs_attr_show, - .store = xe_hw_engine_class_sysfs_attr_store, -}; - -static const struct kobj_type xe_hw_engine_sysfs_kobj_type = { - .release = xe_hw_engine_sysfs_kobj_release, - .sysfs_ops = &xe_hw_engine_class_sysfs_ops, -}; static void hw_engine_class_sysfs_fini(void *arg) { @@ -640,7 +636,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) if (!kobj) return -ENOMEM; - kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type); + kobject_init(kobj, &kobj_xe_hw_engine_type); err = kobject_add(kobj, gt->sysfs, "engines"); if (err) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 0b4f12be3692..6e2221b60688 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -100,6 +100,9 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) spin_unlock_irqrestore(&irq->lock, flags); dma_fence_end_signalling(tmp); } + + /* Safe release of the irq->lock used in dma_fence_init. */ + synchronize_rcu(); } void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5f2c368c35ad..14c3a476597a 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -173,7 +173,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) if (ccs_mask & (BIT(0)|BIT(1))) xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask); + xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask); } if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) { @@ -504,7 +504,7 @@ static void gt_irq_reset(struct xe_tile *tile) if (ccs_mask & (BIT(0)|BIT(1))) xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); + xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); if ((tile->media_gt && xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) || diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 8999ac511555..485658f69fba 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -78,6 +78,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, bo->size); pt->level = level; pt->bo = bo; @@ -91,6 +94,9 @@ out: static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -226,9 +232,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index aec7db39c061..ce6d2167b94a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -694,7 +694,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ + /* This is stored in the driver-defined portion of PPHWSP */ return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; } @@ -874,7 +874,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); + u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); @@ -905,6 +905,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, int err; kref_init(&lrc->refcount); + lrc->gt = gt; lrc->flags = 0; lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); if (xe_gt_has_indirect_ring_state(gt)) @@ -923,7 +924,6 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, return PTR_ERR(lrc->bo); lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; lrc->ctx_timestamp = 0; diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 71ecb453f811..cd38586ae989 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -25,8 +25,8 @@ struct xe_lrc { /** @size: size of lrc including any indirect ring state page */ u32 size; - /** @tile: tile which this LRC belongs to */ - struct xe_tile *tile; + /** @gt: gt which this LRC belongs to */ + struct xe_gt *gt; /** @flags: LRC flags */ #define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1b97d90aadda..c2da2691fd2b 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -860,7 +860,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) @@ -1177,7 +1177,7 @@ err: err_sync: /* Sync partial copies if any. FIXME: job_mutex? */ if (fence) { - dma_fence_wait(m->fence, false); + dma_fence_wait(fence, false); dma_fence_put(fence); } diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 78823f53d290..d306ed0a0443 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -36,11 +36,22 @@ #include "xe_pm.h" #include "xe_sched_job.h" #include "xe_sriov.h" +#include "xe_sync.h" #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX +enum xe_oa_submit_deps { + XE_OA_SUBMIT_NO_DEPS, + XE_OA_SUBMIT_ADD_DEPS, +}; + +enum xe_oa_user_extn_from { + XE_OA_USER_EXTN_FROM_OPEN, + XE_OA_USER_EXTN_FROM_CONFIG, +}; + struct xe_oa_reg { struct xe_reg addr; u32 value; @@ -63,13 +74,8 @@ struct xe_oa_config { struct rcu_head rcu; }; -struct flex { - struct xe_reg reg; - u32 offset; - u32 value; -}; - struct xe_oa_open_param { + struct xe_file *xef; u32 oa_unit_id; bool sample; u32 metric_set; @@ -81,6 +87,9 @@ struct xe_oa_open_param { struct xe_exec_queue *exec_q; struct xe_hw_engine *hwe; bool no_preempt; + struct drm_xe_sync __user *syncs_user; + int num_syncs; + struct xe_sync_entry *syncs; }; struct xe_oa_config_bo { @@ -90,6 +99,17 @@ struct xe_oa_config_bo { struct xe_bb *bb; }; +struct xe_oa_fence { + /* @base: dma fence base */ + struct dma_fence base; + /* @lock: lock for the fence */ + spinlock_t lock; + /* @work: work to signal @base */ + struct delayed_work work; + /* @cb: callback to schedule @work */ + struct dma_fence_cb cb; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -162,10 +182,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se return oa_config; } -static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo) +static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence) { xe_oa_config_put(oa_bo->oa_config); - xe_bb_free(oa_bo->bb, NULL); + xe_bb_free(oa_bo->bb, last_fence); kfree(oa_bo); } @@ -515,6 +535,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf, mutex_unlock(&stream->stream_lock); } while (!offset && !ret); } else { + xe_oa_buffer_check_unlocked(stream); mutex_lock(&stream->stream_lock); ret = __xe_oa_read(stream, buf, count, &offset); mutex_unlock(&stream->stream_lock); @@ -567,32 +588,60 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) return ret; } -static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) +static void xe_oa_lock_vma(struct xe_exec_queue *q) +{ + if (q->vm) { + down_read(&q->vm->lock); + xe_vm_lock(q->vm, false); + } +} + +static void xe_oa_unlock_vma(struct xe_exec_queue *q) +{ + if (q->vm) { + xe_vm_unlock(q->vm); + up_read(&q->vm->lock); + } +} + +static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, + struct xe_bb *bb) { + struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; struct xe_sched_job *job; struct dma_fence *fence; - long timeout; int err = 0; - /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ - job = xe_bb_create_job(stream->k_exec_q, bb); + xe_oa_lock_vma(q); + + job = xe_bb_create_job(q, bb); if (IS_ERR(job)) { err = PTR_ERR(job); goto exit; } + job->ggtt = true; + + if (deps == XE_OA_SUBMIT_ADD_DEPS) { + for (int i = 0; i < stream->num_syncs && !err; i++) + err = xe_sync_entry_add_deps(&stream->syncs[i], job); + if (err) { + drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err); + goto err_put_job; + } + } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); - if (timeout < 0) - err = timeout; - else if (!timeout) - err = -ETIME; + xe_oa_unlock_vma(q); + + return fence; +err_put_job: + xe_sched_job_put(job); exit: - return err; + xe_oa_unlock_vma(q); + return ERR_PTR(err); } static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) @@ -636,57 +685,34 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream) xe_oa_config_put(stream->oa_config); llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) - free_oa_config_bo(oa_bo); -} - -static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, - struct xe_bb *bb, const struct flex *flex, u32 count) -{ - u32 offset = xe_bo_ggtt_addr(lrc->bo); - - do { - bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); - bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = flex->value; - - } while (flex++, --count); + free_oa_config_bo(oa_bo, stream->last_fence); + dma_fence_put(stream->last_fence); } -static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, - const struct flex *flex, u32 count) +static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) { + struct dma_fence *fence; struct xe_bb *bb; int err; - bb = xe_bb_new(stream->gt, 4 * count, false); + bb = xe_bb_new(stream->gt, 2 * count + 1, false); if (IS_ERR(bb)) { err = PTR_ERR(bb); goto exit; } - xe_oa_store_flex(stream, lrc, bb, flex, count); - - err = xe_oa_submit_bb(stream, bb); - xe_bb_free(bb, NULL); -exit: - return err; -} + write_cs_mi_lri(bb, reg_lri, count); -static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) -{ - struct xe_bb *bb; - int err; - - bb = xe_bb_new(stream->gt, 3, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto free_bb; } + xe_bb_free(bb, fence); + dma_fence_put(fence); - write_cs_mi_lri(bb, reg_lri, 1); - - err = xe_oa_submit_bb(stream, bb); + return 0; +free_bb: xe_bb_free(bb, NULL); exit: return err; @@ -695,70 +721,54 @@ exit: static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, { + OAR_OACONTROL, + oacontrol, + }, + { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) }, }; - struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; - int err; - - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, { + OAC_OACONTROL, + oacontrol + }, + { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; - struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; - int err; /* Set ccs select to enable programming of OAC_OACONTROL */ xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) @@ -840,6 +850,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); xe_oa_free_configs(stream); + xe_file_put(stream->xef); } static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) @@ -910,11 +921,62 @@ out: return oa_bo; } +static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence) +{ + dma_fence_put(stream->last_fence); + stream->last_fence = dma_fence_get(fence); +} + +static void xe_oa_fence_work_fn(struct work_struct *w) +{ + struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work); + + /* Signal fence to indicate new OA configuration is active */ + dma_fence_signal(&ofence->base); + dma_fence_put(&ofence->base); +} + +static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + /* Additional empirical delay needed for NOA programming after registers are written */ +#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 + + struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb); + + INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn); + queue_delayed_work(system_unbound_wq, &ofence->work, + usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US)); + dma_fence_put(fence); +} + +static const char *xe_oa_get_driver_name(struct dma_fence *fence) +{ + return "xe_oa"; +} + +static const char *xe_oa_get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static const struct dma_fence_ops xe_oa_fence_ops = { + .get_driver_name = xe_oa_get_driver_name, + .get_timeline_name = xe_oa_get_timeline_name, +}; + static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) { #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 struct xe_oa_config_bo *oa_bo; - int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; + struct xe_oa_fence *ofence; + int i, err, num_signal = 0; + struct dma_fence *fence; + + ofence = kzalloc(sizeof(*ofence), GFP_KERNEL); + if (!ofence) { + err = -ENOMEM; + goto exit; + } oa_bo = xe_oa_alloc_config_buffer(stream, config); if (IS_ERR(oa_bo)) { @@ -922,11 +984,50 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config goto exit; } - err = xe_oa_submit_bb(stream, oa_bo->bb); + /* Emit OA configuration batch */ + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto exit; + } - /* Additional empirical delay needed for NOA programming after registers are written */ - usleep_range(us, 2 * us); + /* Point of no return: initialize and set fence to signal */ + spin_lock_init(&ofence->lock); + dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0); + + for (i = 0; i < stream->num_syncs; i++) { + if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL) + num_signal++; + xe_sync_entry_signal(&stream->syncs[i], &ofence->base); + } + + /* Additional dma_fence_get in case we dma_fence_wait */ + if (!num_signal) + dma_fence_get(&ofence->base); + + /* Update last fence too before adding callback */ + xe_oa_update_last_fence(stream, fence); + + /* Add job fence callback to schedule work to signal ofence->base */ + err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb); + xe_gt_assert(stream->gt, !err || err == -ENOENT); + if (err == -ENOENT) + xe_oa_config_cb(fence, &ofence->cb); + + /* If nothing needs to be signaled we wait synchronously */ + if (!num_signal) { + dma_fence_wait(&ofence->base, false); + dma_fence_put(&ofence->base); + } + + /* Done with syncs */ + for (i = 0; i < stream->num_syncs; i++) + xe_sync_entry_cleanup(&stream->syncs[i]); + kfree(stream->syncs); + + return 0; exit: + kfree(ofence); return err; } @@ -997,6 +1098,262 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) return xe_oa_emit_oa_config(stream, stream->oa_config); } +static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) +{ + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); + int idx; + + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { + const struct xe_oa_format *f = &oa->oa_formats[idx]; + + if (counter_size == f->counter_size && bc_report == f->bc_report && + type == f->type && counter_sel == f->counter_select) { + *name = idx; + return 0; + } + } + + return -EINVAL; +} + +static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (value >= oa->oa_unit_ids) { + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); + return -EINVAL; + } + param->oa_unit_id = value; + return 0; +} + +static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->sample = value; + return 0; +} + +static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->metric_set = value; + return 0; +} + +static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + int ret = decode_oa_format(oa, value, ¶m->oa_format); + + if (ret) { + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); + return ret; + } + return 0; +} + +static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ +#define OA_EXPONENT_MAX 31 + + if (value > OA_EXPONENT_MAX) { + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); + return -EINVAL; + } + param->period_exponent = value; + return 0; +} + +static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->disabled = value; + return 0; +} + +static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->exec_queue_id = value; + return 0; +} + +static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->engine_instance = value; + return 0; +} + +static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->no_preempt = value; + return 0; +} + +static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->num_syncs = value; + return 0; +} + +static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->syncs_user = u64_to_user_ptr(value); + return 0; +} + +static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + return -EINVAL; +} + +typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param); +static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, + [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, + [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, +}; + +static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, + [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, +}; + +static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, + u64 extension, struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) != + ARRAY_SIZE(xe_oa_set_property_funcs_config)); + + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) || + XE_IOCTL_DBG(oa->xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open)); + + if (from == XE_OA_USER_EXTN_FROM_CONFIG) + return xe_oa_set_property_funcs_config[idx](oa, ext.value, param); + else + return xe_oa_set_property_funcs_open[idx](oa, ext.value, param); +} + +typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from, + u64 extension, struct xe_oa_open_param *param); +static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension, + int ext_number, struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.pad) || + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); + err = xe_oa_user_extension_funcs[idx](oa, from, extension, param); + if (XE_IOCTL_DBG(oa->xe, err)) + return err; + + if (ext.next_extension) + return xe_oa_user_extensions(oa, from, ext.next_extension, ++ext_number, param); + + return 0; +} + +static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) +{ + int ret, num_syncs, num_ufence = 0; + + if (param->num_syncs && !param->syncs_user) { + drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n"); + ret = -EINVAL; + goto exit; + } + + if (param->num_syncs) { + param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL); + if (!param->syncs) { + ret = -ENOMEM; + goto exit; + } + } + + for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { + ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], + ¶m->syncs_user[num_syncs], 0); + if (ret) + goto err_syncs; + + if (xe_sync_is_ufence(¶m->syncs[num_syncs])) + num_ufence++; + } + + if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) { + ret = -EINVAL; + goto err_syncs; + } + + return 0; + +err_syncs: + while (num_syncs--) + xe_sync_entry_cleanup(¶m->syncs[num_syncs]); + kfree(param->syncs); +exit: + return ret; +} + static void xe_oa_stream_enable(struct xe_oa_stream *stream) { stream->pollin = false; @@ -1090,36 +1447,38 @@ static int xe_oa_disable_locked(struct xe_oa_stream *stream) static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) { - struct drm_xe_ext_set_property ext; + struct xe_oa_open_param param = {}; long ret = stream->oa_config->id; struct xe_oa_config *config; int err; - err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); - if (XE_IOCTL_DBG(stream->oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || - XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) - return -EINVAL; + err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, ¶m); + if (err) + return err; - config = xe_oa_get_oa_config(stream->oa, ext.value); + config = xe_oa_get_oa_config(stream->oa, param.metric_set); if (!config) return -ENODEV; - if (config != stream->oa_config) { - err = xe_oa_emit_oa_config(stream, config); - if (!err) - config = xchg(&stream->oa_config, config); - else - ret = err; + param.xef = stream->xef; + err = xe_oa_parse_syncs(stream->oa, ¶m); + if (err) + goto err_config_put; + + stream->num_syncs = param.num_syncs; + stream->syncs = param.syncs; + + err = xe_oa_emit_oa_config(stream, config); + if (!err) { + config = xchg(&stream->oa_config, config); + drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n", + stream->oa_config->uuid); } +err_config_put: xe_oa_config_put(config); - return ret; + return err ?: ret; } static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) @@ -1358,10 +1717,14 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; stream->sample = param->sample; - stream->periodic = param->period_exponent > 0; + stream->periodic = param->period_exponent >= 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; + stream->xef = xe_file_get(param->xef); + stream->num_syncs = param->num_syncs; + stream->syncs = param->syncs; + /* * For Xe2+, when overrun mode is enabled, there are no partial reports at the end * of buffer, making the OA buffer effectively a non-power-of-2 size circular @@ -1458,6 +1821,7 @@ err_fw_put: err_free_configs: xe_oa_free_configs(stream); exit: + xe_file_put(stream->xef); return ret; } @@ -1567,27 +1931,6 @@ static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) } } -static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) -{ - u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); - u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); - u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); - u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); - int idx; - - for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { - const struct xe_oa_format *f = &oa->oa_formats[idx]; - - if (counter_size == f->counter_size && bc_report == f->bc_report && - type == f->type && counter_sel == f->counter_select) { - *name = idx; - return 0; - } - } - - return -EINVAL; -} - /** * xe_oa_unit_id - Return OA unit ID for a hardware engine * @hwe: @xe_hw_engine @@ -1634,155 +1977,6 @@ out: return ret; } -static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - if (value >= oa->oa_unit_ids) { - drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); - return -EINVAL; - } - param->oa_unit_id = value; - return 0; -} - -static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->sample = value; - return 0; -} - -static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->metric_set = value; - return 0; -} - -static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - int ret = decode_oa_format(oa, value, ¶m->oa_format); - - if (ret) { - drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); - return ret; - } - return 0; -} - -static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ -#define OA_EXPONENT_MAX 31 - - if (value > OA_EXPONENT_MAX) { - drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); - return -EINVAL; - } - param->period_exponent = value; - return 0; -} - -static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->disabled = value; - return 0; -} - -static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->exec_queue_id = value; - return 0; -} - -static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->engine_instance = value; - return 0; -} - -static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->no_preempt = value; - return 0; -} - -typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param); -static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { - [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, - [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, - [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, - [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, - [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, - [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, - [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, - [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, - [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, -}; - -static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_set_property ext; - int err; - u32 idx; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || - XE_IOCTL_DBG(oa->xe, ext.pad)) - return -EINVAL; - - idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); - return xe_oa_set_property_funcs[idx](oa, ext.value, param); -} - -typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param); -static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { - [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_user_extension ext; - int err; - u32 idx; - - if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.pad) || - XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) - return -EINVAL; - - idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); - err = xe_oa_user_extension_funcs[idx](oa, extension, param); - if (XE_IOCTL_DBG(oa->xe, err)) - return err; - - if (ext.next_extension) - return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); - - return 0; -} - /** * xe_oa_stream_open_ioctl - Opens an OA stream * @dev: @drm_device @@ -1808,7 +2002,9 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f return -ENODEV; } - ret = xe_oa_user_extensions(oa, data, 0, ¶m); + param.xef = xef; + param.period_exponent = -1; + ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, ¶m); if (ret) return ret; @@ -1817,8 +2013,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; - if (param.exec_q->width > 1) - drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) + return -EOPNOTSUPP; } /* @@ -1862,7 +2058,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - if (param.period_exponent > 0) { + if (param.period_exponent >= 0) { u64 oa_period, oa_freq_hz; /* Requesting samples from OAG buffer is a privileged operation */ @@ -1876,11 +2072,24 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); } + ret = xe_oa_parse_syncs(oa, ¶m); + if (ret) + goto err_exec_q; + mutex_lock(¶m.hwe->gt->oa.gt_lock); ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); mutex_unlock(¶m.hwe->gt->oa.gt_lock); + if (ret < 0) + goto err_sync_cleanup; + + return ret; + +err_sync_cleanup: + while (param.num_syncs--) + xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); + kfree(param.syncs); err_exec_q: - if (ret < 0 && param.exec_q) + if (param.exec_q) xe_exec_queue_put(param.exec_q); return ret; } @@ -1980,6 +2189,7 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 8862eca73fbe..fea9d981e414 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -238,5 +238,17 @@ struct xe_oa_stream { /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ u32 no_preempt; + + /** @xef: xe_file with which the stream was opened */ + struct xe_file *xef; + + /** @last_fence: fence to use in stream destroy when needed */ + struct dma_fence *last_fence; + + /** @num_syncs: size of @syncs array */ + u32 num_syncs; + + /** @syncs: syncs to wait on and to signal */ + struct xe_sync_entry *syncs; }; #endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 025d64943467..da09c26249f5 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -164,7 +164,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .va_bits = 48, \ @@ -910,6 +909,7 @@ static int xe_pci_suspend(struct device *dev) pci_save_state(pdev); pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3cold); return 0; } diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index aaceee748287..09ee8a06fe2e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -62,6 +62,55 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) xe_gt_sriov_pf_control_trigger_flr(gt, n); } +static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* caller must use pci_dev_put() */ + return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), + pdev->bus->number, + pci_iov_virtfn_devfn(pdev, vf_id)); +} + +static void pf_link_vfs(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev_pf = to_pci_dev(xe->drm.dev); + struct device_link *link; + struct pci_dev *pdev_vf; + unsigned int n; + + /* + * When both PF and VF devices are enabled on the host, during system + * resume they are resuming in parallel. + * + * But PF has to complete the provision of VF first to allow any VFs to + * successfully resume. + * + * Create a parent-child device link between PF and VF devices that will + * enforce correct resume order. + */ + for (n = 1; n <= num_vfs; n++) { + pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1); + + /* unlikely, something weird is happening, abort */ + if (!pdev_vf) { + xe_sriov_err(xe, "Cannot find VF%u device, aborting link%s creation!\n", + n, str_plural(num_vfs)); + break; + } + + link = device_link_add(&pdev_vf->dev, &pdev_pf->dev, + DL_FLAG_AUTOREMOVE_CONSUMER); + /* unlikely and harmless, continue with other VFs */ + if (!link) + xe_sriov_notice(xe, "Failed linking VF%u\n", n); + + pci_dev_put(pdev_vf); + } +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -92,6 +141,8 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err < 0) goto failed; + pf_link_vfs(xe, num_vfs); + xe_sriov_info(xe, "Enabled %u of %u VF%s\n", num_vfs, total_vfs, str_plural(total_vfs)); return num_vfs; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 33eb039053e4..46c73ff10c74 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -264,6 +264,15 @@ int xe_pm_init_early(struct xe_device *xe) return 0; } +static u32 vram_threshold_value(struct xe_device *xe) +{ + /* FIXME: D3Cold temporarily disabled by default on BMG */ + if (xe->info.platform == XE_BATTLEMAGE) + return 0; + + return DEFAULT_VRAM_THRESHOLD; +} + /** * xe_pm_init - Initialize Xe Power Management * @xe: xe device instance @@ -274,6 +283,7 @@ int xe_pm_init_early(struct xe_device *xe) */ int xe_pm_init(struct xe_device *xe) { + u32 vram_threshold; int err; /* For now suspend/resume is only allowed with GuC */ @@ -287,7 +297,8 @@ int xe_pm_init(struct xe_device *xe) if (err) return err; - err = xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + vram_threshold = vram_threshold_value(xe); + err = xe_pm_set_vram_threshold(xe, vram_threshold); if (err) return err; } @@ -671,11 +682,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 797576690356..fb94ff55c736 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -28,6 +28,8 @@ struct xe_pt_dir { struct xe_pt pt; /** @children: Array of page-table child nodes */ struct xe_ptw *children[XE_PDES]; + /** @staging: Array of page-table staging nodes */ + struct xe_ptw *staging[XE_PDES]; }; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) @@ -48,9 +50,10 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) return container_of(pt, struct xe_pt_dir, pt); } -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) +static struct xe_pt * +xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index) { - return container_of(pt_dir->children[index], struct xe_pt, base); + return container_of(pt_dir->staging[index], struct xe_pt, base); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -125,6 +128,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, } pt->bo = bo; pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; + pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL; if (vm->xef) xe_drm_client_add_bo(vm->xef->client, pt->bo); @@ -205,8 +209,8 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, + if (xe_pt_entry_staging(pt_dir, i)) + xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags, deferred); } } @@ -214,6 +218,20 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) } /** + * xe_pt_clear() - Clear a page-table. + * @xe: xe device. + * @pt: The page-table. + * + * Clears page-table by setting to zero. + */ +void xe_pt_clear(struct xe_device *xe, struct xe_pt *pt) +{ + struct iosys_map *map = &pt->bo->vmap; + + xe_map_memset(xe, map, 0, 0, SZ_4K); +} + +/** * DOC: Pagetable building * * Below we use the term "page-table" for both page-directories, containing @@ -375,8 +393,10 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, /* Continue building a non-connected subtree. */ struct iosys_map *map = &parent->bo->vmap; - if (unlikely(xe_child)) + if (unlikely(xe_child)) { parent->base.children[offset] = &xe_child->base; + parent->base.staging[offset] = &xe_child->base; + } xe_pt_write(xe_walk->vm->xe, map, offset, pte); parent->num_live++; @@ -613,6 +633,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_bind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .vm = xe_vma_vm(vma), .tile = tile, @@ -872,7 +893,7 @@ static void xe_pt_cancel_bind(struct xe_vma *vma, } } -static void xe_pt_commit_locks_assert(struct xe_vma *vma) +static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); @@ -884,6 +905,16 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) xe_vm_assert_held(vm); } +static void xe_pt_commit_locks_assert(struct xe_vma *vma) +{ + struct xe_vm *vm = xe_vma_vm(vma); + + xe_pt_commit_prepare_locks_assert(vma); + + if (xe_vma_is_userptr(vma)) + lockdep_assert_held_read(&vm->userptr.notifier_lock); +} + static void xe_pt_commit(struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 num_entries, struct llist_head *deferred) @@ -894,13 +925,17 @@ static void xe_pt_commit(struct xe_vma *vma, for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; + struct xe_pt_dir *pt_dir; if (!pt->level) continue; + pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { struct xe_pt *oldpte = entries[i].pt_entries[j].pt; + int j_ = j + entries[i].ofs; + pt_dir->children[j_] = pt_dir->staging[j_]; xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred); } } @@ -912,7 +947,7 @@ static void xe_pt_abort_bind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_pt *pt = entries[i].pt; @@ -927,10 +962,10 @@ static void xe_pt_abort_bind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = xe_pt_entry(pt_dir, j_); + struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_); struct xe_pt *oldpte = entries[i].pt_entries[j].pt; - pt_dir->children[j_] = oldpte ? &oldpte->base : 0; + pt_dir->staging[j_] = oldpte ? &oldpte->base : 0; xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL); } } @@ -942,7 +977,7 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, { u32 i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; @@ -960,10 +995,10 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, struct xe_pt *newpte = entries[i].pt_entries[j].pt; struct xe_pt *oldpte = NULL; - if (xe_pt_entry(pt_dir, j_)) - oldpte = xe_pt_entry(pt_dir, j_); + if (xe_pt_entry_staging(pt_dir, j_)) + oldpte = xe_pt_entry_staging(pt_dir, j_); - pt_dir->children[j_] = &newpte->base; + pt_dir->staging[j_] = &newpte->base; entries[i].pt_entries[j].pt = oldpte; } } @@ -1212,42 +1247,22 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, return 0; uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; + if (xe_pt_userptr_inject_eagain(uvma)) + xe_vma_userptr_force_invalidate(uvma); - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; + notifier_seq = uvma->userptr.notifier_seq; if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) + notifier_seq)) return 0; - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm)) return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } + /* + * Just continue the operation since exec or rebind worker + * will take care of rebinding. + */ return 0; } @@ -1513,6 +1528,7 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_unbind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .tile = tile, .modified_start = xe_vma_start(vma), @@ -1554,7 +1570,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1567,7 +1583,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, continue; for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) - pt_dir->children[j] = + pt_dir->staging[j] = entries[i].pt_entries[j - entry->ofs].pt ? &entries[i].pt_entries[j - entry->ofs].pt->base : NULL; } @@ -1580,7 +1596,7 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; ++i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1594,8 +1610,8 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) { entry->pt_entries[j - entry->ofs].pt = - xe_pt_entry(pt_dir, j); - pt_dir->children[j] = NULL; + xe_pt_entry_staging(pt_dir, j); + pt_dir->staging[j] = NULL; } } } diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 9ab386431cad..8e43912ae8e9 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -13,6 +13,7 @@ struct dma_fence; struct xe_bo; struct xe_device; struct xe_exec_queue; +struct xe_svm_range; struct xe_sync_entry; struct xe_tile; struct xe_vm; @@ -35,6 +36,8 @@ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred); +void xe_pt_clear(struct xe_device *xe, struct xe_pt *pt); + int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops); struct dma_fence *xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops); diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c index b8b3d2aea492..be602a763ff3 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.c +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -74,7 +74,8 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, u64 addr, u64 end, struct xe_pt_walk *walk) { pgoff_t offset = xe_pt_offset(addr, level, walk); - struct xe_ptw **entries = parent->children ? parent->children : NULL; + struct xe_ptw **entries = walk->staging ? (parent->staging ?: NULL) : + (parent->children ?: NULL); const struct xe_pt_walk_ops *ops = walk->ops; enum page_walk_action action; struct xe_ptw *child; diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h index 5ecc4d2f0f65..5c02c244f7de 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.h +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -11,12 +11,14 @@ /** * struct xe_ptw - base class for driver pagetable subclassing. * @children: Pointer to an array of children if any. + * @staging: Pointer to an array of staging if any. * * Drivers could subclass this, and if it's a page-directory, typically * embed an array of xe_ptw pointers. */ struct xe_ptw { struct xe_ptw **children; + struct xe_ptw **staging; }; /** @@ -41,6 +43,8 @@ struct xe_pt_walk { * as shared pagetables. */ bool shared_pt_mode; + /** @staging: Walk staging PT structure */ + bool staging; }; /** diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 1c96375bd7df..6e7c940d7e22 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -366,6 +366,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query struct drm_xe_query_gt_list __user *query_ptr = u64_to_user_ptr(query->data); struct drm_xe_query_gt_list *gt_list; + int iter = 0; u8 id; if (query->size == 0) { @@ -383,12 +384,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MEDIA; else - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; - gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; - gt_list->gt_list[id].gt_id = gt->info.id; - gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[iter].tile_id = gt_to_tile(gt)->id; + gt_list->gt_list[iter].gt_id = gt->info.id; + gt_list->gt_list[iter].reference_clock = gt->info.reference_clock; /* * The mem_regions indexes in the mask below need to * directly identify the struct @@ -404,19 +405,21 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query * assumption. */ if (!IS_DGFX(xe)) - gt_list->gt_list[id].near_mem_regions = 0x1; + gt_list->gt_list[iter].near_mem_regions = 0x1; else - gt_list->gt_list[id].near_mem_regions = + gt_list->gt_list[iter].near_mem_regions = BIT(gt_to_tile(gt)->id) << 1; - gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ - gt_list->gt_list[id].near_mem_regions; + gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[iter].near_mem_regions; - gt_list->gt_list[id].ip_ver_major = + gt_list->gt_list[iter].ip_ver_major = REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_minor = + gt_list->gt_list[iter].ip_ver_minor = REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_rev = + gt_list->gt_list[iter].ip_ver_rev = REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); + + iter++; } if (copy_to_user(query_ptr, gt_list, size)) { @@ -679,7 +682,7 @@ static int query_oa_units(struct xe_device *xe, du->oa_unit_id = u->oa_unit_id; du->oa_unit_type = u->type; du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); - du->capabilities = DRM_XE_OA_CAPS_BASE; + du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 0be4f489d3e1..fb31e09acb51 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -221,18 +221,18 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, static u32 get_ppgtt_flag(struct xe_sched_job *job) { - return job->q->vm ? BIT(8) : 0; + if (job->q->vm && !job->ggtt) + return BIT(8); + + return 0; } static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) { - dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | - MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + dw[i++] = RING_CTX_TIMESTAMP(0).addr; dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); dw[i++] = 0; - dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); - dw[i++] = 0; - dw[i++] = MI_NOOP; return i; } diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index fe2cb2a96f78..1d4254759006 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -57,8 +57,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 } sa_manager->bo = bo; sa_manager->is_iomem = bo->vmap.is_iomem; - - drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); if (bo->vmap.is_iomem) { @@ -72,6 +70,7 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); } + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, sa_manager); if (ret) diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index 0d3f76fb05ce..c207361bf43e 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -57,6 +57,8 @@ struct xe_sched_job { u32 migrate_flush_flags; /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ bool ring_ops_flush_tlb; + /** @ggtt: mapped in ggtt. */ + bool ggtt; /** @ptrs: per instance pointers. */ struct xe_job_ptrs ptrs[]; }; diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index dda5268507d8..36c87d7c72fb 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -167,17 +167,19 @@ int xe_tile_init_noalloc(struct xe_tile *tile) if (err) return err; + xe_wa_apply_tile_workarounds(tile); + + return xe_tile_sysfs_init(tile); +} + +int xe_tile_init(struct xe_tile *tile) +{ tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); - xe_wa_apply_tile_workarounds(tile); - - err = xe_tile_sysfs_init(tile); - return 0; } - void xe_tile_migrate_wait(struct xe_tile *tile) { xe_migrate_wait(tile->migrate); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 1c9e42ade6b0..eb939316d55b 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -12,6 +12,7 @@ struct xe_tile; int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); +int xe_tile_init(struct xe_tile *tile); void xe_tile_migrate_wait(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index 9b1a1d4304ae..4ff023b5d040 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -55,8 +55,8 @@ TRACE_EVENT(xe_bo_move, TP_STRUCT__entry( __field(struct xe_bo *, bo) __field(size_t, size) - __field(u32, new_placement) - __field(u32, old_placement) + __string(new_placement_name, xe_mem_type_to_name[new_placement]) + __string(old_placement_name, xe_mem_type_to_name[old_placement]) __string(device_id, __dev_name_bo(bo)) __field(bool, move_lacks_source) ), @@ -64,15 +64,15 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; __entry->size = bo->size; - __entry->new_placement = new_placement; - __entry->old_placement = old_placement; + __assign_str(new_placement_name); + __assign_str(old_placement_name); __assign_str(device_id); __entry->move_lacks_source = move_lacks_source; ), TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, - xe_mem_type_to_name[__entry->old_placement], - xe_mem_type_to_name[__entry->new_placement], __get_str(device_id)) + __get_str(old_placement_name), + __get_str(new_placement_name), __get_str(device_id)) ); DECLARE_EVENT_CLASS(xe_vma, @@ -189,7 +189,7 @@ DECLARE_EVENT_CLASS(xe_vm, ), TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev), - __entry->vm, __entry->asid) + __entry->vm, __entry->asid) ); DEFINE_EVENT(xe_vm, xe_vm_kill, diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index f7113cf6109d..34e38bb167ba 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -57,12 +57,35 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); } +static u32 get_wopcm_size(struct xe_device *xe) +{ + u32 wopcm_size; + u64 val; + + val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED); + val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); + + switch (val) { + case 0x5 ... 0x6: + val--; + fallthrough; + case 0x0 ... 0x3: + wopcm_size = (1U << val) * SZ_1M; + break; + default: + WARN(1, "Missing case wopcm_size=%llx\n", val); + wopcm_size = 0; + } + + return wopcm_size; +} + static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *mmio = xe_root_mmio_gt(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - u64 stolen_size; + u64 stolen_size, wopcm_size; u64 tile_offset; u64 tile_size; @@ -74,7 +97,13 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) return 0; + /* Carve out the top of DSM as it contains the reserved WOPCM region */ + wopcm_size = get_wopcm_size(xe); + if (drm_WARN_ON(&xe->drm, !wopcm_size)) + return 0; + stolen_size = tile_size - mgr->stolen_base; + stolen_size -= wopcm_size; /* Verify usage fits in the actual resource available */ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) @@ -89,29 +118,6 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) return ALIGN_DOWN(stolen_size, SZ_1M); } -static u32 get_wopcm_size(struct xe_device *xe) -{ - u32 wopcm_size; - u64 val; - - val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED); - val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); - - switch (val) { - case 0x5 ... 0x6: - val--; - fallthrough; - case 0x0 ... 0x3: - wopcm_size = (1U << val) * SZ_1M; - break; - default: - WARN(1, "Missing case wopcm_size=%llx\n", val); - wopcm_size = 0; - } - - return wopcm_size; -} - static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -201,17 +207,16 @@ static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) #endif } -void xe_ttm_stolen_mgr_init(struct xe_device *xe) +int xe_ttm_stolen_mgr_init(struct xe_device *xe) { - struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_ttm_stolen_mgr *mgr; u64 stolen_size, io_size; int err; - if (!mgr) { - drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n"); - return; - } + mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return -ENOMEM; if (IS_SRIOV_VF(xe)) stolen_size = 0; @@ -224,7 +229,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (!stolen_size) { drm_dbg_kms(&xe->drm, "No stolen memory support\n"); - return; + return 0; } /* @@ -240,7 +245,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) io_size, PAGE_SIZE); if (err) { drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); - return; + return err; } drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", @@ -248,6 +253,8 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (io_size) mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); + + return 0; } u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h index 1777245ff810..8e877d1e839b 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h @@ -12,7 +12,7 @@ struct ttm_resource; struct xe_bo; struct xe_device; -void xe_ttm_stolen_mgr_init(struct xe_device *xe); +int xe_ttm_stolen_mgr_init(struct xe_device *xe); int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem); bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe); u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 0d5e04158917..1fb12da21c9e 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -97,14 +97,6 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { }; static const struct xe_rtp_entry_sr lrc_tunings[] = { - { XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - /* read verification is ignored due to 1608008084. */ - XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2, - FF_MODE2_GS_TIMER_MASK, - FF_MODE2_GS_TIMER_224)) - }, - /* DG2 */ { XE_RTP_NAME("Tuning: L3 cache"), diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 0d073a9987c2..bb03c524613f 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -241,7 +241,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc) void xe_uc_stop_prepare(struct xe_uc *uc) { - xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_gsc_stop_prepare(&uc->gsc); xe_guc_stop_prepare(&uc->guc); } @@ -275,6 +275,12 @@ again: goto again; } +void xe_uc_suspend_prepare(struct xe_uc *uc) +{ + xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_guc_stop_prepare(&uc->guc); +} + int xe_uc_suspend(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 506517c11333..ba2937ab94cf 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -18,6 +18,7 @@ int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); +void xe_uc_suspend_prepare(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); void xe_uc_remove(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c99380271de6..15fd497c920c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -8,6 +8,7 @@ #include <linux/dma-fence-array.h> #include <linux/nospec.h> +#include <drm/drm_drv.h> #include <drm/drm_exec.h> #include <drm/drm_print.h> #include <drm/ttm/ttm_execbuf_util.h> @@ -580,51 +581,26 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) { - struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); - struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); + struct xe_userptr *userptr = &uvma->userptr; struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; long err; - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - /* No need to stop gpu access if the userptr is not yet bound. */ - if (!userptr->initial_bind) { - up_write(&vm->userptr.notifier_lock); - return true; - } - /* * Tell exec and rebind worker they need to repin and rebind this * userptr. */ if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { spin_lock(&vm->userptr.invalidated_lock); list_move_tail(&userptr->invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); } - up_write(&vm->userptr.notifier_lock); - /* * Preempt fences turn into schedule disables, pipeline these. * Note that even in fault mode, we need to wait for binds and @@ -642,11 +618,37 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, false, MAX_SCHEDULE_TIMEOUT); XE_WARN_ON(err <= 0); - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { err = xe_vm_invalidate_vma(vma); XE_WARN_ON(err); } + xe_hmm_userptr_unmap(uvma); +} + +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + + down_write(&vm->userptr.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + __vma_userptr_invalidate(vm, uvma); + up_write(&vm->userptr.notifier_lock); trace_xe_vma_userptr_invalidate_complete(vma); return true; @@ -656,6 +658,34 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { .invalidate = vma_userptr_invalidate, }; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +/** + * xe_vma_userptr_force_invalidate() - force invalidate a userptr + * @uvma: The userptr vma to invalidate + * + * Perform a forced userptr invalidation for testing purposes. + */ +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + /* Protect against concurrent userptr pinning */ + lockdep_assert_held(&vm->lock); + /* Protect against concurrent notifiers */ + lockdep_assert_held(&vm->userptr.notifier_lock); + /* + * Protect against concurrent instances of this function and + * the critical exec sections + */ + xe_vm_assert_held(vm); + + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + uvma->userptr.notifier_seq)) + uvma->userptr.notifier_seq -= 2; + __vma_userptr_invalidate(vm, uvma); +} +#endif + int xe_vm_userptr_pin(struct xe_vm *vm) { struct xe_userptr_vma *uvma, *next; @@ -667,20 +697,33 @@ int xe_vm_userptr_pin(struct xe_vm *vm) /* Collect invalidated userptrs */ spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, userptr.invalidate_link) { list_del_init(&uvma->userptr.invalidate_link); - list_move_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); + list_add_tail(&uvma->userptr.repin_link, + &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); - /* Pin and move to temporary list */ + /* Pin and move to bind list */ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, userptr.repin_link) { err = xe_vma_userptr_pin_pages(uvma); if (err == -EFAULT) { list_del_init(&uvma->userptr.repin_link); + /* + * We might have already done the pin once already, but + * then had to retry before the re-bind happened, due + * some other condition in the caller, but in the + * meantime the userptr got dinged by the notifier such + * that we need to revalidate here, but this time we hit + * the EFAULT. In such a case make sure we remove + * ourselves from the rebind list to avoid going down in + * flames. + */ + if (!list_empty(&uvma->vma.combined_links.rebind)) + list_del_init(&uvma->vma.combined_links.rebind); /* Wait for pending binds */ xe_vm_lock(vm, false); @@ -691,10 +734,10 @@ int xe_vm_userptr_pin(struct xe_vm *vm) err = xe_vm_invalidate_vma(&uvma->vma); xe_vm_unlock(vm); if (err) - return err; + break; } else { - if (err < 0) - return err; + if (err) + break; list_del_init(&uvma->userptr.repin_link); list_move_tail(&uvma->vma.combined_links.rebind, @@ -702,7 +745,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm) } } - return 0; + if (err) { + down_write(&vm->userptr.notifier_lock); + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + } + spin_unlock(&vm->userptr.invalidated_lock); + up_write(&vm->userptr.notifier_lock); + } + return err; } /** @@ -987,6 +1042,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, INIT_LIST_HEAD(&userptr->invalidate_link); INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; + mutex_init(&userptr->unmap_mutex); err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, @@ -1028,6 +1084,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) * them anymore */ mmu_interval_notifier_remove(&userptr->notifier); + mutex_destroy(&userptr->unmap_mutex); xe_vm_put(vm); } else if (xe_vma_is_null(vma)) { xe_vm_put(vm); @@ -1066,6 +1123,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); list_del(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } else if (!xe_vma_is_null(vma)) { @@ -1419,8 +1477,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) * scheduler drops all the references of it, hence protecting the VM * for this case is necessary. */ - if (flags & XE_VM_FLAG_LR_MODE) + if (flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); xe_pm_runtime_get_noresume(xe); + } vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { @@ -1465,10 +1525,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = true; } - if (vm->flags & XE_VM_FLAG_LR_MODE) { - INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + if (vm->flags & XE_VM_FLAG_LR_MODE) vm->batch_invalidate_tlb = false; - } /* Fill pt_root after allocating scratch tables */ for_each_tile(tile, xe, id) { @@ -1524,9 +1582,40 @@ err_no_resv: static void xe_vm_close(struct xe_vm *vm) { + struct xe_device *xe = vm->xe; + bool bound; + int idx; + + bound = drm_dev_enter(&xe->drm, &idx); + down_write(&vm->lock); + vm->size = 0; + + if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { + struct xe_tile *tile; + struct xe_gt *gt; + u8 id; + + /* Wait for pending binds */ + dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + + if (bound) { + for_each_tile(tile, xe, id) + if (vm->pt_root[id]) + xe_pt_clear(xe, vm->pt_root[id]); + + for_each_gt(gt, xe, id) + xe_gt_tlb_invalidation_vm(gt, vm); + } + } + up_write(&vm->lock); + + if (bound) + drm_dev_exit(idx); } void xe_vm_close_and_put(struct xe_vm *vm) @@ -2258,8 +2347,17 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, break; } case DRM_GPUVA_OP_UNMAP: + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + break; case DRM_GPUVA_OP_PREFETCH: - /* FIXME: Need to skip some prefetch ops */ + vma = gpuva_to_vma(op->base.prefetch.va); + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + if (err) + return err; + } + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); break; default: diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index c864dba35e1d..d2406532fcc5 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -275,9 +275,17 @@ static inline void vm_dbg(const struct drm_device *dev, const char *format, ...) { /* noop */ } #endif -#endif struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); +#else +static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ +} +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 7f9a303e51d8..a4b4091cfd0d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -59,12 +59,16 @@ struct xe_userptr { struct sg_table *sg; /** @notifier_seq: notifier sequence number */ unsigned long notifier_seq; + /** @unmap_mutex: Mutex protecting dma-unmapping */ + struct mutex unmap_mutex; /** * @initial_bind: user pointer has been bound at least once. * write: vm->userptr.notifier_lock in read mode and vm->resv held. * read: vm->userptr.notifier_lock in write mode or vm->resv held. */ bool initial_bind; + /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */ + bool mapped; #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) u32 divisor; #endif @@ -227,8 +231,8 @@ struct xe_vm { * up for revalidation. Protected from access with the * @invalidated_lock. Removing items from the list * additionally requires @lock in write mode, and adding - * items to the list requires the @userptr.notifer_lock in - * write mode. + * items to the list requires either the @userptr.notifer_lock in + * write mode, OR @lock in write mode. */ struct list_head invalidated; } userptr; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 37e592b2bf06..aea6034a8107 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -606,6 +606,13 @@ static const struct xe_rtp_entry_sr engine_was[] = { }; static const struct xe_rtp_entry_sr lrc_was[] = { + { XE_RTP_NAME("16011163337"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + /* read verification is ignored due to 1608008084. */ + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2, + FF_MODE2_GS_TIMER_MASK, + FF_MODE2_GS_TIMER_224)) + }, { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3, @@ -776,6 +783,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 264d6e116499..93fa2708ee37 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -29,8 +29,10 @@ 13011645652 GRAPHICS_VERSION(2004) 14022293748 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019794406 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) diff --git a/drivers/gpu/drm/xlnx/zynqmp_dpsub.c b/drivers/gpu/drm/xlnx/zynqmp_dpsub.c index f5781939de9c..a25b22238e3d 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dpsub.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dpsub.c @@ -231,6 +231,8 @@ static int zynqmp_dpsub_probe(struct platform_device *pdev) if (ret) return ret; + dma_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32)); + /* Try the reserved memory. Proceed if there's none. */ of_reserved_mem_device_init(&pdev->dev); |