diff options
Diffstat (limited to 'drivers/gpu')
268 files changed, 3801 insertions, 1784 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 7f515be5185d..7fb0b93bc1ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -103,6 +103,23 @@ config DRM_AMDGPU_WERROR Add -Werror to the build flags for amdgpu.ko. Only enable this if you are warning code for amdgpu.ko. + +config GCOV_PROFILE_AMDGPU + bool "Enable GCOV profiling on amdgpu" + depends on DRM_AMDGPU + depends on GCOV_KERNEL + default n + help + Enable GCOV profiling on the amdgpu driver for checking which + functions/lines are executed during testing. This adds compiler + instrumentation flags to all amdgpu source files, producing + .gcda/.gcno coverage data accessible via debugfs. + + This increases the amdgpu module size by ~50% and adds ~2-5% + runtime overhead on GPU submission paths. + + If unsure, say N. + source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig" source "drivers/gpu/drm/amd/amdkfd/Kconfig" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 6a7e9bfec59e..db66c6372199 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -27,6 +27,10 @@ FULL_AMD_PATH=$(src)/.. DISPLAY_FOLDER_NAME=display FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME) +ifdef CONFIG_GCOV_PROFILE_AMDGPU +GCOV_PROFILE := y +endif + ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_PATH)/include \ -I$(FULL_AMD_PATH)/amdgpu \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 938fb0b2368d..8686c6dc2c08 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -179,7 +179,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, list_for_each_entry(tmp_adev, reset_device_list, reset_list) { /* For XGMI run all resets in parallel to speed up the process */ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - if (!queue_work(system_unbound_wq, + if (!queue_work(system_dfl_wq, &tmp_adev->reset_cntl->reset_work)) r = -EALREADY; } else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 49e7881750fa..8bc591deb546 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1045,11 +1045,6 @@ struct amdgpu_device { struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM]; const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM]; - /* xarray used to retrieve the user queue fence driver reference - * in the EOP interrupt handler to signal the particular user - * queue fence. - */ - struct xarray userq_xa; /** * @userq_doorbell_xa: Global user queue map (doorbell index → queue) * Key: doorbell_index (unique global identifier for the queue) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 4f27c75abedb..d9e283f3b57d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -805,7 +805,10 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) } else { tmp = adev->gmc.mem_partitions[mem_id].size; } - do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); + + if (adev->xcp_mgr->mem_alloc_mode == AMDGPU_PARTITION_MEM_CAPPING_EVEN) + do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); + return ALIGN_DOWN(tmp, PAGE_SIZE); } else if (adev->apu_prefer_gtt) { return (ttm_tt_pages_limit() << PAGE_SHIFT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 29b400cdd6d5..72a5a29e63f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1735,7 +1735,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; } else { - alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; + alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE | + AMDGPU_GEM_CREATE_VRAM_CLEARED; alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 9f38b7dd1011..3698dd0330ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1685,9 +1685,9 @@ static int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { /* Firmware request VRAM reservation for SR-IOV */ - adev->mman.fw_vram_usage_start_offset = (start_addr & - (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; - adev->mman.fw_vram_usage_size = size << 10; + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW_VRAM_USAGE, + (start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10, + size << 10, true); /* Use the default scratch size */ usage_bytes = 0; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index cd9aa5b45e94..6860a3a4d466 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -120,9 +120,9 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev, (u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { /* Firmware request VRAM reservation for SR-IOV */ - adev->mman.fw_vram_usage_start_offset = (start_addr & - (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; - adev->mman.fw_vram_usage_size = fw_size << 10; + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW_VRAM_USAGE, + (start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10, + fw_size << 10, true); /* Use the default scratch size */ *usage_bytes = 0; } else { @@ -152,18 +152,18 @@ static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev, ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { /* Firmware request VRAM reservation for SR-IOV */ - adev->mman.fw_vram_usage_start_offset = (fw_start_addr & - (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; - adev->mman.fw_vram_usage_size = fw_size << 10; + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW_VRAM_USAGE, + (fw_start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10, + fw_size << 10, true); } if (amdgpu_sriov_vf(adev) && ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { /* driver request VRAM reservation for SR-IOV */ - adev->mman.drv_vram_usage_start_offset = (drv_start_addr & - (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; - adev->mman.drv_vram_usage_size = drv_size << 10; + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_DRV_VRAM_USAGE, + (drv_start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10, + drv_size << 10, true); } *usage_bytes = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index b04fa9fd90b7..92c98e999efe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -866,6 +866,7 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) if (dret) { amdgpu_connector->detected_by_load = false; drm_edid_free(amdgpu_connector->edid); + amdgpu_connector->edid = NULL; amdgpu_connector_get_edid(connector); if (!amdgpu_connector->edid) { @@ -882,6 +883,7 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) */ if (amdgpu_connector->use_digital && amdgpu_connector->shared_ddc) { drm_edid_free(amdgpu_connector->edid); + amdgpu_connector->edid = NULL; ret = connector_status_disconnected; } else { ret = connector_status_connected; @@ -977,6 +979,7 @@ static void amdgpu_connector_shared_ddc(enum drm_connector_status *status, if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { drm_edid_free(amdgpu_connector->edid); + amdgpu_connector->edid = NULL; *status = connector_status_disconnected; } } @@ -1046,6 +1049,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) if (dret) { amdgpu_connector->detected_by_load = false; drm_edid_free(amdgpu_connector->edid); + amdgpu_connector->edid = NULL; amdgpu_connector_get_edid(connector); if (!amdgpu_connector->edid) { @@ -1062,6 +1066,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) */ if ((!amdgpu_connector->use_digital) && amdgpu_connector->shared_ddc) { drm_edid_free(amdgpu_connector->edid); + amdgpu_connector->edid = NULL; ret = connector_status_disconnected; } else { ret = connector_status_connected; @@ -1412,6 +1417,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) } drm_edid_free(amdgpu_connector->edid); + amdgpu_connector->edid = NULL; if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) || (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index c72c345334d0..4e6e390854e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -32,6 +32,8 @@ static const guid_t BOOT = BOOT_TYPE; static const guid_t CRASHDUMP = AMD_CRASHDUMP; static const guid_t RUNTIME = AMD_GPU_NONSTANDARD_ERROR; +#define CPER_SIGNATURE_SZ (sizeof(((struct cper_hdr *)0)->signature)) + static void __inc_entry_length(struct cper_hdr *hdr, uint32_t size) { hdr->record_length += size; @@ -425,23 +427,40 @@ int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev, static bool amdgpu_cper_is_hdr(struct amdgpu_ring *ring, u64 pos) { - struct cper_hdr *chdr; + char signature[CPER_SIGNATURE_SZ]; + + if ((pos << 2) >= ring->ring_size) + return false; - chdr = (struct cper_hdr *)&(ring->ring[pos]); - return strcmp(chdr->signature, "CPER") ? false : true; + if ((pos << 2) + CPER_SIGNATURE_SZ <= ring->ring_size) { + memcpy(signature, &ring->ring[pos], CPER_SIGNATURE_SZ); + } else { + u32 chunk = ring->ring_size - (pos << 2); + + memcpy(signature, &ring->ring[pos], chunk); + memcpy(signature + chunk, ring->ring, CPER_SIGNATURE_SZ - chunk); + } + + return !memcmp(signature, "CPER", CPER_SIGNATURE_SZ); } static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos) { - struct cper_hdr *chdr; + struct cper_hdr chdr; u64 p; u32 chunk, rec_len = 0; - chdr = (struct cper_hdr *)&(ring->ring[pos]); chunk = ring->ring_size - (pos << 2); - if (!strcmp(chdr->signature, "CPER")) { - rec_len = chdr->record_length; + if (amdgpu_cper_is_hdr(ring, pos)) { + if (chunk >= sizeof(chdr)) { + memcpy(&chdr, &ring->ring[pos], sizeof(chdr)); + } else { + memcpy(&chdr, &ring->ring[pos], chunk); + memcpy((u8 *)&chdr + chunk, ring->ring, sizeof(chdr) - chunk); + } + + rec_len = chdr.record_length; goto calc; } @@ -450,8 +469,7 @@ static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos) goto calc; for (p = pos + 1; p <= ring->buf_mask; p++) { - chdr = (struct cper_hdr *)&(ring->ring[p]); - if (!strcmp(chdr->signature, "CPER")) { + if (amdgpu_cper_is_hdr(ring, p)) { rec_len = (p - pos) << 2; goto calc; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index fddf4e1252bd..d386bc775d03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -210,12 +210,24 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, static ssize_t amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump) { + struct amdgpu_device *adev = coredump->adev; struct drm_printer p; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; + struct amdgpu_bo_va_mapping *mapping; struct amdgpu_ip_block *ip_block; - int ver; - + struct amdgpu_res_cursor cursor; + struct amdgpu_bo *abo, *root; + uint64_t va_start, offset; + struct amdgpu_ring *ring; + struct amdgpu_vm *vm; + u32 *ib_content; + uint8_t *kptr; + int ver, i, j, r; + u32 ring_idx, off; + bool sizing_pass; + + sizing_pass = buffer == NULL; iter.data = buffer; iter.offset = 0; iter.remain = count; @@ -303,23 +315,25 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf /* Add ring buffer information */ drm_printf(&p, "Ring buffer information\n"); - for (int i = 0; i < coredump->adev->num_rings; i++) { - int j = 0; - struct amdgpu_ring *ring = coredump->adev->rings[i]; - - drm_printf(&p, "ring name: %s\n", ring->name); - drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", - amdgpu_ring_get_rptr(ring), - amdgpu_ring_get_wptr(ring), - ring->buf_mask); - drm_printf(&p, "Ring size in dwords: %d\n", - ring->ring_size / 4); - drm_printf(&p, "Ring contents\n"); - drm_printf(&p, "Offset \t Value\n"); - - while (j < ring->ring_size) { - drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]); - j += 4; + if (coredump->num_rings) { + for (i = 0; i < coredump->num_rings; i++) { + ring_idx = coredump->rings[i].ring_index; + ring = coredump->adev->rings[ring_idx]; + off = coredump->rings[i].offset; + + drm_printf(&p, "ring name: %s\n", ring->name); + drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", + coredump->rings[i].rptr, + coredump->rings[i].wptr, + ring->buf_mask); + drm_printf(&p, "Ring size in dwords: %d\n", + ring->ring_size / 4); + drm_printf(&p, "Ring contents\n"); + drm_printf(&p, "Offset \t Value\n"); + + for (j = 0; j < ring->ring_size; j += 4) + drm_printf(&p, "0x%x \t 0x%x\n", j, + coredump->rings_dw[off + j / 4]); } } @@ -328,6 +342,87 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf else if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->num_ibs) { + /* Don't try to lookup the VM or map the BOs when calculating the + * size required to store the devcoredump. + */ + if (sizing_pass) + vm = NULL; + else + vm = amdgpu_vm_lock_by_pasid(adev, &root, coredump->pasid); + + for (int i = 0; i < coredump->num_ibs && (sizing_pass || vm); i++) { + ib_content = kvmalloc_array(coredump->ibs[i].ib_size_dw, 4, + GFP_KERNEL); + if (!ib_content) + continue; + + /* vm=NULL can only happen when 'sizing_pass' is true. Skip to the + * drm_printf() calls (ib_content doesn't need to be initialized + * as its content won't be written anywhere). + */ + if (!vm) + goto output_ib_content; + + va_start = coredump->ibs[i].gpu_addr & AMDGPU_GMC_HOLE_MASK; + mapping = amdgpu_vm_bo_lookup_mapping(vm, va_start / AMDGPU_GPU_PAGE_SIZE); + if (!mapping) + goto free_ib_content; + + offset = va_start - (mapping->start * AMDGPU_GPU_PAGE_SIZE); + abo = amdgpu_bo_ref(mapping->bo_va->base.bo); + r = amdgpu_bo_reserve(abo, false); + if (r) + goto free_ib_content; + + if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) { + off = 0; + + if (abo->tbo.resource->mem_type != TTM_PL_VRAM) + goto unreserve_abo; + + amdgpu_res_first(abo->tbo.resource, offset, + coredump->ibs[i].ib_size_dw * 4, + &cursor); + while (cursor.remaining) { + amdgpu_device_mm_access(adev, cursor.start / 4, + &ib_content[off], cursor.size / 4, + false); + off += cursor.size; + amdgpu_res_next(&cursor, cursor.size); + } + } else { + r = ttm_bo_kmap(&abo->tbo, 0, + PFN_UP(abo->tbo.base.size), + &abo->kmap); + if (r) + goto unreserve_abo; + + kptr = amdgpu_bo_kptr(abo); + kptr += offset; + memcpy(ib_content, kptr, + coredump->ibs[i].ib_size_dw * 4); + + amdgpu_bo_kunmap(abo); + } + +output_ib_content: + drm_printf(&p, "\nIB #%d 0x%llx %d dw\n", + i, coredump->ibs[i].gpu_addr, coredump->ibs[i].ib_size_dw); + for (int j = 0; j < coredump->ibs[i].ib_size_dw; j++) + drm_printf(&p, "0x%08x\n", ib_content[j]); +unreserve_abo: + if (vm) + amdgpu_bo_unreserve(abo); +free_ib_content: + kvfree(ib_content); + } + if (vm) { + amdgpu_bo_unreserve(root); + amdgpu_bo_unref(&root); + } + } + return count - iter.remain; } @@ -359,6 +454,8 @@ static void amdgpu_devcoredump_free(void *data) struct amdgpu_coredump_info *coredump = data; kvfree(coredump->formatted); + kvfree(coredump->rings); + kvfree(coredump->rings_dw); kvfree(data); } @@ -367,6 +464,9 @@ static void amdgpu_devcoredump_deferred_work(struct work_struct *work) struct amdgpu_device *adev = container_of(work, typeof(*adev), coredump_work); struct amdgpu_coredump_info *coredump = adev->coredump; + if (!coredump) + goto end; + /* Do a one-time preparation of the coredump output because * repeatingly calling drm_coredump_printer is very slow. */ @@ -395,13 +495,20 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, { struct drm_device *dev = adev_to_drm(adev); struct amdgpu_coredump_info *coredump; + size_t size = sizeof(*coredump); struct drm_sched_job *s_job; + u64 total_ring_size, ring_count; + struct amdgpu_ring *ring; + int i, off, idx; /* No need to generate a new coredump if there's one in progress already. */ - if (work_pending(&adev->coredump_work)) + if (work_busy(&adev->coredump_work)) return; - coredump = kzalloc_obj(*coredump, GFP_NOWAIT); + if (job && job->pasid) + size += sizeof(struct amdgpu_coredump_ib_info) * job->num_ibs; + + coredump = kzalloc(size, GFP_NOWAIT); if (!coredump) return; @@ -416,6 +523,12 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, coredump->reset_task_info = *ti; amdgpu_vm_put_task_info(ti); } + coredump->pasid = job->pasid; + coredump->num_ibs = job->num_ibs; + for (i = 0; i < job->num_ibs; ++i) { + coredump->ibs[i].gpu_addr = job->ibs[i].gpu_addr; + coredump->ibs[i].ib_size_dw = job->ibs[i].length_dw; + } } if (job) { @@ -423,6 +536,47 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, coredump->ring = to_amdgpu_ring(s_job->sched); } + /* Dump ring content if memory allocation succeeds. */ + ring_count = 0; + total_ring_size = 0; + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + + /* Only dump rings with unsignalled fences. */ + if (atomic_read(&ring->fence_drv.last_seq) == ring->fence_drv.sync_seq && + coredump->ring != ring) + continue; + + total_ring_size += ring->ring_size; + ring_count++; + } + coredump->rings_dw = kzalloc(total_ring_size, GFP_NOWAIT); + coredump->rings = kcalloc(ring_count, sizeof(struct amdgpu_coredump_ring), GFP_NOWAIT); + if (coredump->rings && coredump->rings_dw) { + for (i = 0, off = 0, idx = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + + if (atomic_read(&ring->fence_drv.last_seq) == ring->fence_drv.sync_seq && + coredump->ring != ring) + continue; + + coredump->rings[idx].ring_index = ring->idx; + coredump->rings[idx].rptr = amdgpu_ring_get_rptr(ring); + coredump->rings[idx].wptr = amdgpu_ring_get_wptr(ring); + coredump->rings[idx].offset = off; + + memcpy(&coredump->rings_dw[off], ring->ring, ring->ring_size); + off += ring->ring_size / 4; + idx++; + } + coredump->num_rings = idx; + } else { + kvfree(coredump->rings_dw); + kvfree(coredump->rings); + coredump->rings_dw = NULL; + coredump->rings = NULL; + } + coredump->adev = adev; ktime_get_ts64(&coredump->reset_time); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h index f8f2f4df129b..2371e20fc68b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -31,6 +31,18 @@ #define AMDGPU_COREDUMP_VERSION "1" +struct amdgpu_coredump_ring { + u64 rptr; + u64 wptr; + u32 ring_index; + u32 offset; +}; + +struct amdgpu_coredump_ib_info { + uint64_t gpu_addr; + u32 ib_size_dw; +}; + struct amdgpu_coredump_info { struct amdgpu_device *adev; struct amdgpu_task_info reset_task_info; @@ -39,11 +51,20 @@ struct amdgpu_coredump_info { bool skip_vram_check; bool reset_vram_lost; struct amdgpu_ring *ring; + + struct amdgpu_coredump_ring *rings; + u32 *rings_dw; + u32 num_rings; + /* Readable form of coredevdump, generate once to speed up * reading it (see drm_coredump_printer's documentation). */ ssize_t formatted_size; char *formatted; + + unsigned int pasid; + int num_ibs; + struct amdgpu_coredump_ib_info ibs[] __counted_by(num_ibs); }; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9c936519bb2b..66ca043658ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1334,18 +1334,15 @@ static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) #if IS_ENABLED(CONFIG_X86) struct cpuinfo_x86 *c = &cpu_data(0); - if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) - return false; - - if (c->x86 == 6 && - adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) { + if (c->x86_vendor == X86_VENDOR_INTEL) { switch (c->x86_model) { case VFM_MODEL(INTEL_ALDERLAKE): case VFM_MODEL(INTEL_ALDERLAKE_L): case VFM_MODEL(INTEL_RAPTORLAKE): case VFM_MODEL(INTEL_RAPTORLAKE_P): case VFM_MODEL(INTEL_RAPTORLAKE_S): + case VFM_MODEL(INTEL_TIGERLAKE): + case VFM_MODEL(INTEL_TIGERLAKE_L): return true; default: return false; @@ -2842,8 +2839,12 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) * that checks whether the PSP is running. A solution for those issues * in the APU is to trigger a GPU reset, but this should be done during * the unload phase to avoid adding boot latency and screen flicker. + * GFX V11 has GC block as default off IP. Every time AMDGPU driver sends + * a request to PMFW to unload MP1, PMFW will put GC in reset and power down + * the voltage. Hence, skipping reset for APUs with GFX V11 or later. */ - if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) { + if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 0, 0)) { r = amdgpu_asic_reset(adev); if (r) dev_err(adev->dev, "asic reset on %s failed\n", __func__); @@ -3757,15 +3758,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->virt.rlcg_reg_lock); spin_lock_init(&adev->wb.lock); - xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ); - INIT_LIST_HEAD(&adev->reset_list); INIT_LIST_HEAD(&adev->ras_list); INIT_LIST_HEAD(&adev->pm.od_kobj_list); - xa_init(&adev->userq_doorbell_xa); + xa_init_flags(&adev->userq_doorbell_xa, XA_FLAGS_LOCK_IRQ); INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); @@ -4065,7 +4064,7 @@ fence_driver_init: } /* must succeed. */ amdgpu_ras_resume(adev); - queue_delayed_work(system_wq, &adev->delayed_init_work, + queue_delayed_work(system_dfl_wq, &adev->delayed_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); } @@ -4630,7 +4629,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) if (r) goto exit; - queue_delayed_work(system_wq, &adev->delayed_init_work, + queue_delayed_work(system_dfl_wq, &adev->delayed_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); exit: if (amdgpu_sriov_vf(adev)) { @@ -5339,7 +5338,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, list_for_each_entry(tmp_adev, device_list_handle, reset_list) { /* For XGMI run all resets in parallel to speed up the process */ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - if (!queue_work(system_unbound_wq, + if (!queue_work(system_dfl_wq, &tmp_adev->xgmi_reset_work)) r = -EALREADY; } else @@ -5520,8 +5519,6 @@ static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev, list_add_tail(&tmp_adev->reset_list, device_list); if (adev->shutdown) tmp_adev->shutdown = true; - if (amdgpu_reset_in_dpc(adev)) - tmp_adev->pcie_reset_ctx.in_link_reset = true; } if (!list_is_first(&adev->reset_list, device_list)) list_rotate_to_front(&adev->reset_list, device_list); @@ -6293,6 +6290,9 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta amdgpu_reset_set_dpc_status(adev, true); mutex_lock(&hive->hive_lock); + } else { + if (amdgpu_device_bus_status_check(adev)) + amdgpu_reset_set_dpc_status(adev, true); } memset(&reset_context, 0, sizeof(reset_context)); INIT_LIST_HEAD(&device_list); @@ -6413,6 +6413,7 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) tmp_adev->pcie_reset_ctx.in_link_reset = true; } else { + adev->pcie_reset_ctx.in_link_reset = true; set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); } @@ -6469,9 +6470,10 @@ void amdgpu_pci_resume(struct pci_dev *pdev) tmp_adev->pcie_reset_ctx.in_link_reset = false; list_add_tail(&tmp_adev->reset_list, &device_list); } - } else + } else { + adev->pcie_reset_ctx.in_link_reset = false; list_add_tail(&adev->reset_list, &device_list); - + } amdgpu_device_sched_resume(&device_list, NULL, NULL); amdgpu_device_gpu_resume(adev, &device_list, false); amdgpu_device_recovery_put_reset_lock(adev, &device_list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 8ec5465c3349..8d99bfaa498f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -535,10 +535,11 @@ static int amdgpu_discovery_get_table_info(struct amdgpu_device *adev, *info = &bhdrv2->table_list[table_id]; break; case 1: + case 0: *info = &bhdr->table_list[table_id]; break; default: - dev_err(adev->dev, "Invalid ip discovery table version\n"); + dev_err(adev->dev, "Invalid ip discovery table version %d\n",bhdr->version_major); return -EINVAL; } @@ -3089,10 +3090,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): - adev->family = AMDGPU_FAMILY_GC_11_5_0; - break; case IP_VERSION(11, 5, 4): - adev->family = AMDGPU_FAMILY_GC_11_5_4; + adev->family = AMDGPU_FAMILY_GC_11_5_0; break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8ed637f92322..60debd543e44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -641,9 +641,7 @@ module_param_named(si_support, amdgpu_si_support, int, 0444); * CIK (Sea Islands) are second generation GCN GPUs, supported by both * drivers: radeon (old) and amdgpu (new). This parameter controls whether * amdgpu should support CIK. - * By default: - * - CIK dedicated GPUs are supported by amdgpu. - * - CIK APUs are supported by radeon (except when radeon is not built). + * By default, CIK dedicated GPUs and APUs are supported by amdgpu. * Only relevant when CONFIG_DRM_AMDGPU_CIK is enabled to build CIK support in amdgpu. * See also radeon.cik_support which should be disabled when amdgpu.cik_support is * enabled, and vice versa. @@ -2323,8 +2321,6 @@ static bool amdgpu_support_enabled(struct device *dev, case CHIP_BONAIRE: case CHIP_HAWAII: - support_by_default = true; - fallthrough; case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: @@ -2332,6 +2328,7 @@ static bool amdgpu_support_enabled(struct device *dev, param = "cik_support"; module_param = amdgpu_cik_support; amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_CIK); + support_by_default = true; break; default: @@ -3152,17 +3149,15 @@ static int __init amdgpu_init(void) r = amdgpu_sync_init(); if (r) - goto error_sync; - - r = amdgpu_userq_fence_slab_init(); - if (r) - goto error_fence; + return r; amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); - /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ - amdgpu_amdkfd_init(); + /* Ignore KFD init failures when CONFIG_HSA_AMD is not set. */ + r = amdgpu_amdkfd_init(); + if (r && r != -ENOENT) + goto error_fini_sync; if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) { add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); @@ -3173,10 +3168,8 @@ static int __init amdgpu_init(void) /* let modprobe override vga console setting */ return pci_register_driver(&amdgpu_kms_pci_driver); -error_fence: +error_fini_sync: amdgpu_sync_fini(); - -error_sync: return r; } @@ -3187,7 +3180,6 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_acpi_release(); amdgpu_sync_fini(); - amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index bc772ca3dab7..b6f849d51c2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -262,12 +262,19 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) */ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { + int r; + if (adev->gart.bo != NULL) return 0; - return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, - NULL, (void *)&adev->gart.ptr); + r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, + NULL, (void *)&adev->gart.ptr); + if (r) + return r; + + memset_io(adev->gart.ptr, adev->gart.gart_pte_flags, adev->gart.table_size); + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index cab3196a87fb..b8ca876694ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1580,6 +1580,36 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, return count; } +static ssize_t compute_partition_mem_alloc_mode_show(struct device *dev, + struct device_attribute *addr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int mode = adev->xcp_mgr->mem_alloc_mode; + + return sysfs_emit(buf, "%s\n", + amdgpu_gfx_compute_mem_alloc_mode_desc(mode)); +} + + +static ssize_t compute_partition_mem_alloc_mode_store(struct device *dev, + struct device_attribute *addr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (!strncasecmp("CAPPING", buf, strlen("CAPPING"))) + adev->xcp_mgr->mem_alloc_mode = AMDGPU_PARTITION_MEM_CAPPING_EVEN; + else if (!strncasecmp("ALL", buf, strlen("ALL"))) + adev->xcp_mgr->mem_alloc_mode = AMDGPU_PARTITION_MEM_ALLOC_ALL; + else + return -EINVAL; + + return count; +} + static const char *xcp_desc[] = { [AMDGPU_SPX_PARTITION_MODE] = "SPX", [AMDGPU_DPX_PARTITION_MODE] = "DPX", @@ -1935,6 +1965,10 @@ static DEVICE_ATTR(gfx_reset_mask, 0444, static DEVICE_ATTR(compute_reset_mask, 0444, amdgpu_gfx_get_compute_reset_mask, NULL); +static DEVICE_ATTR(compute_partition_mem_alloc_mode, 0644, + compute_partition_mem_alloc_mode_show, + compute_partition_mem_alloc_mode_store); + static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev) { struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; @@ -1955,6 +1989,11 @@ static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev) if (r) return r; + r = device_create_file(adev->dev, + &dev_attr_compute_partition_mem_alloc_mode); + if (r) + return r; + if (xcp_switch_supported) r = device_create_file(adev->dev, &dev_attr_available_compute_partition); @@ -1974,6 +2013,8 @@ static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev) (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode); device_remove_file(adev->dev, &dev_attr_current_compute_partition); + device_remove_file(adev->dev, &dev_attr_compute_partition_mem_alloc_mode); + if (xcp_switch_supported) device_remove_file(adev->dev, &dev_attr_available_compute_partition); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 2785eda6fea5..a0cf0a3b41da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -71,6 +71,11 @@ enum amdgpu_gfx_partition { AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2, }; +enum amdgpu_gfx_partition_mem_alloc_mode { + AMDGPU_PARTITION_MEM_CAPPING_EVEN = 0, + AMDGPU_PARTITION_MEM_ALLOC_ALL = 1, +}; + #define NUM_XCC(x) hweight16(x) enum amdgpu_gfx_ras_mem_id_type { @@ -677,4 +682,16 @@ static inline const char *amdgpu_gfx_compute_mode_desc(int mode) } } +static inline const char *amdgpu_gfx_compute_mem_alloc_mode_desc(int mode) +{ + switch (mode) { + case AMDGPU_PARTITION_MEM_CAPPING_EVEN: + return "CAPPING"; + case AMDGPU_PARTITION_MEM_ALLOC_ALL: + return "ALL"; + default: + return "UNKNOWN"; + } +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ec74f3971732..3d9497d121ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -314,7 +314,10 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, mc->gart_start = max_mc_address - mc->gart_size + 1; break; case AMDGPU_GART_PLACEMENT_LOW: - mc->gart_start = 0; + if (size_bf >= mc->gart_size) + mc->gart_start = 0; + else + mc->gart_start = ALIGN(mc->fb_end, four_gb); break; case AMDGPU_GART_PLACEMENT_BEST_FIT: default: @@ -1033,17 +1036,17 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, } } -void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) +void amdgpu_gmc_init_vga_resv_regions(struct amdgpu_device *adev) { unsigned size; + if (adev->gmc.is_app_apu) + return; + /* * Some ASICs need to reserve a region of video memory to avoid access * from driver */ - adev->mman.stolen_reserved_offset = 0; - adev->mman.stolen_reserved_size = 0; - /* * TODO: * Currently there is a bug where some memory client outside @@ -1060,8 +1063,8 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) */ #ifdef CONFIG_X86 if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) { - adev->mman.stolen_reserved_offset = 0x500000; - adev->mman.stolen_reserved_size = 0x200000; + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_STOLEN_RESERVED, + 0x500000, 0x200000, false); } #endif break; @@ -1099,11 +1102,14 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) size = 0; if (size > AMDGPU_VBIOS_VGA_ALLOCATION) { - adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION; - adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size; + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_STOLEN_VGA, + 0, AMDGPU_VBIOS_VGA_ALLOCATION, false); + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_STOLEN_EXTENDED, + AMDGPU_VBIOS_VGA_ALLOCATION, + size - AMDGPU_VBIOS_VGA_ALLOCATION, false); } else { - adev->mman.stolen_vga_size = size; - adev->mman.stolen_extended_size = 0; + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_STOLEN_VGA, + 0, size, false); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 32e73e8ba778..6ab4c1e297fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -456,7 +456,7 @@ extern void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, bool enable); -void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev); +void amdgpu_gmc_init_vga_resv_regions(struct amdgpu_device *adev); void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev); uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 569c5a89ff10..124fb38eb465 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -22,7 +22,7 @@ */ #include "amdgpu_ids.h" -#include <linux/idr.h> +#include <linux/xarray.h> #include <linux/dma-fence-array.h> @@ -40,8 +40,8 @@ * VMs are looked up from the PASID per amdgpu_device. */ -static DEFINE_IDR(amdgpu_pasid_idr); -static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock); +static DEFINE_XARRAY_FLAGS(amdgpu_pasid_xa, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ALLOC1); +static u32 amdgpu_pasid_xa_next; /* Helper to free pasid from a fence callback */ struct amdgpu_pasid_cb { @@ -62,36 +62,37 @@ struct amdgpu_pasid_cb { */ int amdgpu_pasid_alloc(unsigned int bits) { - int pasid; + u32 pasid; + int r; if (bits == 0) return -EINVAL; - spin_lock(&amdgpu_pasid_idr_lock); - /* TODO: Need to replace the idr with an xarry, and then - * handle the internal locking with ATOMIC safe paths. - */ - pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1, - 1U << bits, GFP_ATOMIC); - spin_unlock(&amdgpu_pasid_idr_lock); - - if (pasid >= 0) - trace_amdgpu_pasid_allocated(pasid); + r = xa_alloc_cyclic_irq(&amdgpu_pasid_xa, &pasid, xa_mk_value(0), + XA_LIMIT(1, (1U << bits) - 1), + &amdgpu_pasid_xa_next, GFP_KERNEL); + if (r < 0) + return r; + trace_amdgpu_pasid_allocated(pasid); return pasid; } /** * amdgpu_pasid_free - Free a PASID * @pasid: PASID to free + * + * Called in IRQ context. */ void amdgpu_pasid_free(u32 pasid) { + unsigned long flags; + trace_amdgpu_pasid_freed(pasid); - spin_lock(&amdgpu_pasid_idr_lock); - idr_remove(&amdgpu_pasid_idr, pasid); - spin_unlock(&amdgpu_pasid_idr_lock); + xa_lock_irqsave(&amdgpu_pasid_xa, flags); + __xa_erase(&amdgpu_pasid_xa, pasid); + xa_unlock_irqrestore(&amdgpu_pasid_xa, flags); } static void amdgpu_pasid_free_cb(struct dma_fence *fence, @@ -634,7 +635,5 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) */ void amdgpu_pasid_mgr_cleanup(void) { - spin_lock(&amdgpu_pasid_idr_lock); - idr_destroy(&amdgpu_pasid_idr); - spin_unlock(&amdgpu_pasid_idr_lock); + xa_destroy(&amdgpu_pasid_xa); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 06efce38f323..71272f40feef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -873,68 +873,59 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { - int ret = 0; - unsigned int n, alloc_size; - uint32_t *regs; unsigned int se_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SE_INDEX_SHIFT) & AMDGPU_INFO_MMR_SE_INDEX_MASK; unsigned int sh_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SH_INDEX_SHIFT) & AMDGPU_INFO_MMR_SH_INDEX_MASK; - - if (!down_read_trylock(&adev->reset_domain->sem)) - return -ENOENT; + unsigned int alloc_size; + uint32_t *regs; + int ret; /* set full masks if the userspace set all bits * in the bitfields */ - if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) { + if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) se_num = 0xffffffff; - } else if (se_num >= AMDGPU_GFX_MAX_SE) { - ret = -EINVAL; - goto out; - } + else if (se_num >= AMDGPU_GFX_MAX_SE) + return -EINVAL; - if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) { + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) sh_num = 0xffffffff; - } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) { - ret = -EINVAL; - goto out; - } + else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) + return -EINVAL; - if (info->read_mmr_reg.count > 128) { - ret = -EINVAL; - goto out; - } + if (info->read_mmr_reg.count > 128) + return -EINVAL; - regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); - if (!regs) { - ret = -ENOMEM; - goto out; - } + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), + GFP_KERNEL); + if (!regs) + return -ENOMEM; + down_read(&adev->reset_domain->sem); alloc_size = info->read_mmr_reg.count * sizeof(*regs); - amdgpu_gfx_off_ctrl(adev, false); + ret = 0; for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); - kfree(regs); - amdgpu_gfx_off_ctrl(adev, true); ret = -EFAULT; - goto out; + break; } } amdgpu_gfx_off_ctrl(adev, true); - n = copy_to_user(out, regs, min(size, alloc_size)); - kfree(regs); - ret = (n ? -EFAULT : 0); -out: up_read(&adev->reset_domain->sem); + + if (!ret) { + ret = copy_to_user(out, regs, min(size, alloc_size)) + ? -EFAULT : 0; + } + kfree(regs); return ret; } case AMDGPU_INFO_DEV_INFO: { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index d39b695cd925..f0e4d020f4c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1076,24 +1076,25 @@ int psp_update_fw_reservation(struct psp_context *psp) return 0; } - amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_FW); reserv_size = roundup(reserv_size, SZ_1M); - ret = amdgpu_bo_create_kernel_at(adev, reserv_addr, reserv_size, &adev->mman.fw_reserved_memory, NULL); + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW, + reserv_addr, reserv_size, false); + ret = amdgpu_ttm_mark_vram_reserved(adev, AMDGPU_RESV_FW); if (ret) { dev_err(adev->dev, "reserve fw region failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); return ret; } reserv_size_ext = roundup(reserv_size_ext, SZ_1M); - ret = amdgpu_bo_create_kernel_at(adev, reserv_addr_ext, reserv_size_ext, - &adev->mman.fw_reserved_memory_extend, NULL); + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW_EXTEND, + reserv_addr_ext, reserv_size_ext, false); + ret = amdgpu_ttm_mark_vram_reserved(adev, AMDGPU_RESV_FW_EXTEND); if (ret) { dev_err(adev->dev, "reserve extend fw region failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, NULL); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 79a49cba8d40..7e94ec11c57e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -277,7 +277,6 @@ struct psp_memory_training_context { /*vram offset of the c2p training data*/ u64 c2p_train_data_offset; - struct amdgpu_bo *c2p_bo; enum psp_memory_training_init_flag init; u32 training_cnt; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 6edcb7713299..6c644cfe6695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -5726,7 +5726,7 @@ out: static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev) { - amdgpu_ras_add_critical_region(adev, adev->mman.fw_reserved_memory); + amdgpu_ras_add_critical_region(adev, adev->mman.resv_region[AMDGPU_RESV_FW].bo); } static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index cdf4909592d2..0c57fe259894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1950,7 +1950,7 @@ void amdgpu_ras_check_bad_page_status(struct amdgpu_device *adev) if (!control || amdgpu_bad_page_threshold == 0) return; - if (control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) { + if (control->ras_num_bad_pages > ras->bad_page_cnt_threshold) { if (amdgpu_dpm_send_rma_reason(adev)) dev_warn(adev->dev, "Unable to send out-of-band RMA CPER"); else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 7a2fcb7ded1d..1b982b803e6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -116,7 +116,7 @@ static int amdgpu_reset_xgmi_reset_on_init_perform_reset( /* Mode1 reset needs to be triggered on all devices together */ list_for_each_entry(tmp_adev, reset_device_list, reset_list) { /* For XGMI run all resets in parallel to speed up the process */ - if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) + if (!queue_work(system_dfl_wq, &tmp_adev->xgmi_reset_work)) r = -EALREADY; if (r) { dev_err(tmp_adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index ce5af137ee40..715c9e43e13a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -559,15 +559,18 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, int amdgpu_ring_init_mqd(struct amdgpu_ring *ring); -static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx) +static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, uint32_t idx) { - return ib->ptr[idx]; + if (idx < ib->length_dw) + return ib->ptr[idx]; + return 0; } -static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx, +static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, uint32_t idx, uint32_t value) { - ib->ptr[idx] = value; + if (idx < ib->length_dw) + ib->ptr[idx] = value; } int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index afaaab6496de..3d2e00efc741 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -75,6 +75,9 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, unsigned int type, uint64_t size_in_page) { + if (!size_in_page) + return 0; + return ttm_range_man_init(&adev->mman.bdev, type, false, size_in_page); } @@ -1671,87 +1674,160 @@ static struct ttm_device_funcs amdgpu_bo_driver = { .access_memory = &amdgpu_ttm_access_memory, }; -/* - * Firmware Reservation functions - */ -/** - * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram - * - * @adev: amdgpu_device pointer - * - * free fw reserved vram if it has been reserved. - */ -static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) +void amdgpu_ttm_init_vram_resv(struct amdgpu_device *adev, + enum amdgpu_resv_region_id id, + uint64_t offset, uint64_t size, + bool needs_cpu_map) { - amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo, - NULL, &adev->mman.fw_vram_usage_va); + struct amdgpu_vram_resv *resv; + + if (id >= AMDGPU_RESV_MAX) + return; + + resv = &adev->mman.resv_region[id]; + resv->offset = offset; + resv->size = size; + resv->needs_cpu_map = needs_cpu_map; } -/* - * Driver Reservation functions - */ -/** - * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram - * - * @adev: amdgpu_device pointer - * - * free drv reserved vram if it has been reserved. - */ -static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev) +static void amdgpu_ttm_init_fw_resv_region(struct amdgpu_device *adev) { - amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo, - NULL, - &adev->mman.drv_vram_usage_va); + uint32_t reserve_size = 0; + + if (!adev->discovery.reserve_tmr) + return; + + /* + * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all + * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc) + * + * Otherwise, fallback to legacy approach to check and reserve tmr block for ip + * discovery data and G6 memory training data respectively + */ + if (adev->bios) + reserve_size = + amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); + + if (!adev->bios && + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))) + reserve_size = max(reserve_size, (uint32_t)280 << 20); + else if (!adev->bios && + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) { + if (hweight32(adev->aid_mask) == 1) + reserve_size = max(reserve_size, (uint32_t)128 << 20); + else + reserve_size = max(reserve_size, (uint32_t)144 << 20); + } else if (!reserve_size) + reserve_size = DISCOVERY_TMR_OFFSET; + + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW, + adev->gmc.real_vram_size - reserve_size, + reserve_size, false); } -/** - * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw - * - * @adev: amdgpu_device pointer - * - * create bo vram reservation from fw. - */ -static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) +static void amdgpu_ttm_init_mem_train_resv_region(struct amdgpu_device *adev) +{ + uint64_t reserve_size; + uint64_t offset; + + if (!adev->discovery.reserve_tmr) + return; + + if (!adev->bios || amdgpu_sriov_vf(adev)) + return; + + if (!amdgpu_atomfirmware_mem_training_supported(adev)) + return; + + reserve_size = adev->mman.resv_region[AMDGPU_RESV_FW].size; + offset = ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M); + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_MEM_TRAIN, + offset, + GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES, + false); +} + +static void amdgpu_ttm_init_vram_resv_regions(struct amdgpu_device *adev) { uint64_t vram_size = adev->gmc.visible_vram_size; - adev->mman.fw_vram_usage_va = NULL; - adev->mman.fw_vram_usage_reserved_bo = NULL; + /* Initialize memory reservations as required for VGA. + * This is used for VGA emulation and pre-OS scanout buffers to + * avoid display artifacts while transitioning between pre-OS + * and driver. + */ + amdgpu_gmc_init_vga_resv_regions(adev); + amdgpu_ttm_init_fw_resv_region(adev); + amdgpu_ttm_init_mem_train_resv_region(adev); - if (adev->mman.fw_vram_usage_size == 0 || - adev->mman.fw_vram_usage_size > vram_size) - return 0; + if (adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].size > vram_size) + adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].size = 0; - return amdgpu_bo_create_kernel_at(adev, - adev->mman.fw_vram_usage_start_offset, - adev->mman.fw_vram_usage_size, - &adev->mman.fw_vram_usage_reserved_bo, - &adev->mman.fw_vram_usage_va); + if (adev->mman.resv_region[AMDGPU_RESV_DRV_VRAM_USAGE].size > vram_size) + adev->mman.resv_region[AMDGPU_RESV_DRV_VRAM_USAGE].size = 0; } -/** - * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver - * - * @adev: amdgpu_device pointer - * - * create bo vram reservation from drv. - */ -static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) +int amdgpu_ttm_mark_vram_reserved(struct amdgpu_device *adev, + enum amdgpu_resv_region_id id) { - u64 vram_size = adev->gmc.visible_vram_size; + struct amdgpu_vram_resv *resv; + int ret; - adev->mman.drv_vram_usage_va = NULL; - adev->mman.drv_vram_usage_reserved_bo = NULL; + if (id >= AMDGPU_RESV_MAX) + return -EINVAL; - if (adev->mman.drv_vram_usage_size == 0 || - adev->mman.drv_vram_usage_size > vram_size) + resv = &adev->mman.resv_region[id]; + if (!resv->size) return 0; - return amdgpu_bo_create_kernel_at(adev, - adev->mman.drv_vram_usage_start_offset, - adev->mman.drv_vram_usage_size, - &adev->mman.drv_vram_usage_reserved_bo, - &adev->mman.drv_vram_usage_va); + ret = amdgpu_bo_create_kernel_at(adev, resv->offset, resv->size, + &resv->bo, + resv->needs_cpu_map ? &resv->cpu_ptr : NULL); + if (ret) { + dev_err(adev->dev, + "reserve vram failed: id=%d offset=0x%llx size=0x%llx ret=%d\n", + id, resv->offset, resv->size, ret); + memset(resv, 0, sizeof(*resv)); + } + + return ret; +} + +void amdgpu_ttm_unmark_vram_reserved(struct amdgpu_device *adev, + enum amdgpu_resv_region_id id) +{ + struct amdgpu_vram_resv *resv; + + if (id >= AMDGPU_RESV_MAX) + return; + + resv = &adev->mman.resv_region[id]; + if (!resv->bo) + return; + + amdgpu_bo_free_kernel(&resv->bo, NULL, + resv->needs_cpu_map ? &resv->cpu_ptr : NULL); + memset(resv, 0, sizeof(*resv)); +} + +/* + * Reserve all regions with non-zero size. Regions whose info is not + * yet available (e.g., fw extended region) may still be reserved + * during runtime. + */ +static int amdgpu_ttm_alloc_vram_resv_regions(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < AMDGPU_RESV_MAX; i++) { + r = amdgpu_ttm_mark_vram_reserved(adev, i); + if (r) + return r; + } + + return 0; } /* @@ -1770,25 +1846,23 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; - amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL); - ctx->c2p_bo = NULL; + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_MEM_TRAIN); return 0; } -static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev, - uint32_t reserve_size) +static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) { struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; + struct amdgpu_vram_resv *resv = + &adev->mman.resv_region[AMDGPU_RESV_MEM_TRAIN]; memset(ctx, 0, sizeof(*ctx)); - ctx->c2p_train_data_offset = - ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M); + ctx->c2p_train_data_offset = resv->offset; ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); - ctx->train_data_size = - GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; + ctx->train_data_size = resv->size; DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", ctx->train_data_size, @@ -1796,78 +1870,6 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev, ctx->c2p_train_data_offset); } -/* - * reserve TMR memory at the top of VRAM which holds - * IP Discovery data and is protected by PSP. - */ -static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) -{ - struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; - bool mem_train_support = false; - uint32_t reserve_size = 0; - int ret; - - if (adev->bios && !amdgpu_sriov_vf(adev)) { - if (amdgpu_atomfirmware_mem_training_supported(adev)) - mem_train_support = true; - else - DRM_DEBUG("memory training does not support!\n"); - } - - /* - * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all - * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc) - * - * Otherwise, fallback to legacy approach to check and reserve tmr block for ip - * discovery data and G6 memory training data respectively - */ - if (adev->bios) - reserve_size = - amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); - - if (!adev->bios && - (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))) - reserve_size = max(reserve_size, (uint32_t)280 << 20); - else if (!adev->bios && - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) { - if (hweight32(adev->aid_mask) == 1) - reserve_size = max(reserve_size, (uint32_t)128 << 20); - else - reserve_size = max(reserve_size, (uint32_t)144 << 20); - } else if (!reserve_size) - reserve_size = DISCOVERY_TMR_OFFSET; - - if (mem_train_support) { - /* reserve vram for mem train according to TMR location */ - amdgpu_ttm_training_data_block_init(adev, reserve_size); - ret = amdgpu_bo_create_kernel_at(adev, - ctx->c2p_train_data_offset, - ctx->train_data_size, - &ctx->c2p_bo, - NULL); - if (ret) { - dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret); - amdgpu_ttm_training_reserve_vram_fini(adev); - return ret; - } - ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; - } - - ret = amdgpu_bo_create_kernel_at( - adev, adev->gmc.real_vram_size - reserve_size, reserve_size, - &adev->mman.fw_reserved_memory, NULL); - if (ret) { - dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, - NULL); - return ret; - } - - return 0; -} - static int amdgpu_ttm_pools_init(struct amdgpu_device *adev) { int i; @@ -2115,63 +2117,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->gmc.visible_vram_size); #endif - /* - *The reserved vram for firmware must be pinned to the specified - *place on the VRAM, so reserve it early. - */ - r = amdgpu_ttm_fw_reserve_vram_init(adev); - if (r) - return r; + amdgpu_ttm_init_vram_resv_regions(adev); - /* - * The reserved VRAM for the driver must be pinned to a specific - * location in VRAM, so reserve it early. - */ - r = amdgpu_ttm_drv_reserve_vram_init(adev); + r = amdgpu_ttm_alloc_vram_resv_regions(adev); if (r) return r; - /* - * only NAVI10 and later ASICs support IP discovery. - * If IP discovery is enabled, a block of memory should be - * reserved for it. - */ - if (adev->discovery.reserve_tmr) { - r = amdgpu_ttm_reserve_tmr(adev); - if (r) - return r; - } + if (adev->mman.resv_region[AMDGPU_RESV_MEM_TRAIN].size) { + struct psp_memory_training_context *ctx = + &adev->psp.mem_train_ctx; - /* allocate memory as required for VGA - * This is used for VGA emulation and pre-OS scanout buffers to - * avoid display artifacts while transitioning between pre-OS - * and driver. - */ - if (!adev->gmc.is_app_apu) { - r = amdgpu_bo_create_kernel_at(adev, 0, - adev->mman.stolen_vga_size, - &adev->mman.stolen_vga_memory, - NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, - adev->mman.stolen_extended_size, - &adev->mman.stolen_extended_memory, - NULL); - - if (r) - return r; - - r = amdgpu_bo_create_kernel_at(adev, - adev->mman.stolen_reserved_offset, - adev->mman.stolen_reserved_size, - &adev->mman.stolen_reserved_memory, - NULL); - if (r) - return r; - } else { - DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); + amdgpu_ttm_training_data_block_init(adev); + ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; } dev_info(adev->dev, " %uM of VRAM memory ready\n", @@ -2284,23 +2241,19 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_training_reserve_vram_fini(adev); /* return the stolen vga memory back to VRAM */ if (!adev->gmc.is_app_apu) { - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); - amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_STOLEN_VGA); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_STOLEN_EXTENDED); /* return the FW reserved memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, - NULL); - amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, - NULL); - if (adev->mman.stolen_reserved_size) - amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, - NULL, NULL); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_FW); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_FW_EXTEND); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_STOLEN_RESERVED); } amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); amdgpu_ttm_free_mmio_remap_bo(adev); - amdgpu_ttm_fw_reserve_vram_fini(adev); - amdgpu_ttm_drv_reserve_vram_fini(adev); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_FW_VRAM_USAGE); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_DRV_VRAM_USAGE); if (drm_dev_enter(adev_to_drm(adev), &idx)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 3b1973611446..f2f23a42b3cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -59,6 +59,26 @@ struct amdgpu_ttm_buffer_entity { u64 gart_window_offs[2]; }; +enum amdgpu_resv_region_id { + AMDGPU_RESV_STOLEN_VGA, + AMDGPU_RESV_STOLEN_EXTENDED, + AMDGPU_RESV_STOLEN_RESERVED, + AMDGPU_RESV_FW, + AMDGPU_RESV_FW_EXTEND, + AMDGPU_RESV_FW_VRAM_USAGE, + AMDGPU_RESV_DRV_VRAM_USAGE, + AMDGPU_RESV_MEM_TRAIN, + AMDGPU_RESV_MAX +}; + +struct amdgpu_vram_resv { + uint64_t offset; + uint64_t size; + struct amdgpu_bo *bo; + void *cpu_ptr; + bool needs_cpu_map; +}; + struct amdgpu_mman { struct ttm_device bdev; struct ttm_pool *ttm_pools; @@ -83,31 +103,9 @@ struct amdgpu_mman { struct amdgpu_gtt_mgr gtt_mgr; struct ttm_resource_manager preempt_mgr; - uint64_t stolen_vga_size; - struct amdgpu_bo *stolen_vga_memory; - uint64_t stolen_extended_size; - struct amdgpu_bo *stolen_extended_memory; bool keep_stolen_vga_memory; - struct amdgpu_bo *stolen_reserved_memory; - uint64_t stolen_reserved_offset; - uint64_t stolen_reserved_size; - - /* fw reserved memory */ - struct amdgpu_bo *fw_reserved_memory; - struct amdgpu_bo *fw_reserved_memory_extend; - - /* firmware VRAM reservation */ - u64 fw_vram_usage_start_offset; - u64 fw_vram_usage_size; - struct amdgpu_bo *fw_vram_usage_reserved_bo; - void *fw_vram_usage_va; - - /* driver VRAM reservation */ - u64 drv_vram_usage_start_offset; - u64 drv_vram_usage_size; - struct amdgpu_bo *drv_vram_usage_reserved_bo; - void *drv_vram_usage_va; + struct amdgpu_vram_resv resv_region[AMDGPU_RESV_MAX]; /* PAGE_SIZE'd BO for process memory r/w over SDMA. */ struct amdgpu_bo *sdma_access_bo; @@ -175,6 +173,15 @@ void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev); bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, struct ttm_resource *res); +void amdgpu_ttm_init_vram_resv(struct amdgpu_device *adev, + enum amdgpu_resv_region_id id, + uint64_t offset, uint64_t size, + bool needs_cpu_map); +int amdgpu_ttm_mark_vram_reserved(struct amdgpu_device *adev, + enum amdgpu_resv_region_id id); +void amdgpu_ttm_unmark_vram_reserved(struct amdgpu_device *adev, + enum amdgpu_resv_region_id id); + int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 366728ed03e3..de140a8ed135 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -205,6 +205,19 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) msecs_to_jiffies(timeout_ms)); } +void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) +{ + struct xarray *xa = &adev->userq_doorbell_xa; + struct amdgpu_usermode_queue *queue; + unsigned long flags; + + xa_lock_irqsave(xa, flags); + queue = xa_load(xa, doorbell); + if (queue) + amdgpu_userq_fence_driver_process(queue->fence_drv); + xa_unlock_irqrestore(xa, flags); +} + static void amdgpu_userq_init_hang_detect_work(struct amdgpu_usermode_queue *queue) { INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work); @@ -239,13 +252,12 @@ int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, u64 size; int r = 0; + /* Caller must hold vm->root.bo reservation */ + dma_resv_assert_held(queue->vm->root.bo->tbo.base.resv); + user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT; size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; - r = amdgpu_bo_reserve(vm->root.bo, false); - if (r) - return r; - va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr); if (!va_map) { r = -EINVAL; @@ -255,13 +267,11 @@ int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, if (user_addr >= va_map->start && va_map->last - user_addr + 1 >= size) { amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr); - amdgpu_bo_unreserve(vm->root.bo); return 0; } r = -EINVAL; out_err: - amdgpu_bo_unreserve(vm->root.bo); return r; } @@ -270,15 +280,13 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) struct amdgpu_bo_va_mapping *mapping; bool r; - if (amdgpu_bo_reserve(vm->root.bo, false)) - return false; + dma_resv_assert_held(vm->root.bo->tbo.base.resv); mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped)) r = true; else r = false; - amdgpu_bo_unreserve(vm->root.bo); return r; } @@ -314,25 +322,21 @@ static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, { struct amdgpu_userq_va_cursor *va_cursor, *tmp; struct amdgpu_bo_va_mapping *mapping; - int r; - r = amdgpu_bo_reserve(queue->vm->root.bo, false); - if (r) - return r; + /* Caller must hold vm->root.bo reservation */ + dma_resv_assert_held(queue->vm->root.bo->tbo.base.resv); list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr); if (!mapping) { - r = -EINVAL; - goto err; + return -EINVAL; } dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", queue, va_cursor->gpu_addr); amdgpu_userq_buffer_va_list_del(mapping, va_cursor); } -err: - amdgpu_bo_unreserve(queue->vm->root.bo); - return r; + + return 0; } static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) @@ -427,23 +431,14 @@ static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) return r; } -static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) +static void amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) { - struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct dma_fence *f = queue->last_fence; - int ret = 0; - if (f && !dma_fence_is_signaled(f)) { - ret = dma_fence_wait_timeout(f, true, MAX_SCHEDULE_TIMEOUT); - if (ret <= 0) { - drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", - f->context, f->seqno); - queue->state = AMDGPU_USERQ_STATE_HUNG; - return -ETIME; - } - } + if (!f) + return; - return ret; + dma_fence_wait(f, false); } static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue) @@ -455,19 +450,26 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue) /* Wait for mode-1 reset to complete */ down_read(&adev->reset_domain->sem); - /* Drop the userq reference. */ - amdgpu_userq_buffer_vas_list_cleanup(adev, queue); uq_funcs->mqd_destroy(queue); - amdgpu_userq_fence_driver_free(queue); /* Use interrupt-safe locking since IRQ handlers may access these XArrays */ xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index); + amdgpu_userq_fence_driver_free(queue); + queue->fence_drv = NULL; queue->userq_mgr = NULL; list_del(&queue->userq_va_list); - kfree(queue); up_read(&adev->reset_domain->sem); } +/** + * amdgpu_userq_ensure_ev_fence - ensure a valid, unsignaled eviction fence exists + * @uq_mgr: the usermode queue manager for this process + * @evf_mgr: the eviction fence manager to check and rearm + * + * Ensures that a valid and not yet signaled eviction fence is attached to the + * usermode queue before any queue operations proceed. If it is signalled, then + * rearm a new eviction fence. + */ void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr) @@ -627,6 +629,9 @@ static int amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_vm *vm = &fpriv->vm; + int r = 0; cancel_delayed_work_sync(&uq_mgr->resume_work); @@ -634,38 +639,38 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que /* Cancel any pending hang detection work and cleanup */ cancel_delayed_work_sync(&queue->hang_detect_work); + r = amdgpu_bo_reserve(vm->root.bo, false); + if (r) { + drm_file_err(uq_mgr->file, "Failed to reserve root bo during userqueue destroy\n"); + return r; + } + amdgpu_userq_buffer_vas_list_cleanup(adev, queue); + amdgpu_bo_unreserve(vm->root.bo); + mutex_lock(&uq_mgr->userq_mutex); queue->hang_detect_fence = NULL; amdgpu_userq_wait_for_last_fence(queue); - r = amdgpu_bo_reserve(queue->db_obj.obj, true); - if (!r) { - amdgpu_bo_unpin(queue->db_obj.obj); - amdgpu_bo_unreserve(queue->db_obj.obj); - } - amdgpu_bo_unref(&queue->db_obj.obj); - - r = amdgpu_bo_reserve(queue->wptr_obj.obj, true); - if (!r) { - amdgpu_bo_unpin(queue->wptr_obj.obj); - amdgpu_bo_unreserve(queue->wptr_obj.obj); - } - amdgpu_bo_unref(&queue->wptr_obj.obj); - - atomic_dec(&uq_mgr->userq_count[queue->queue_type]); #if defined(CONFIG_DEBUG_FS) debugfs_remove_recursive(queue->debugfs_queue); #endif amdgpu_userq_detect_and_reset_queues(uq_mgr); r = amdgpu_userq_unmap_helper(queue); - /*TODO: It requires a reset for userq hw unmap error*/ - if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) { - drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); - queue->state = AMDGPU_USERQ_STATE_HUNG; - } + atomic_dec(&uq_mgr->userq_count[queue->queue_type]); amdgpu_userq_cleanup(queue); mutex_unlock(&uq_mgr->userq_mutex); + amdgpu_bo_reserve(queue->db_obj.obj, true); + amdgpu_bo_unpin(queue->db_obj.obj); + amdgpu_bo_unreserve(queue->db_obj.obj); + amdgpu_bo_unref(&queue->db_obj.obj); + + amdgpu_bo_reserve(queue->wptr_obj.obj, true); + amdgpu_bo_unpin(queue->wptr_obj.obj); + amdgpu_bo_unreserve(queue->wptr_obj.obj); + amdgpu_bo_unref(&queue->wptr_obj.obj); + kfree(queue); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; @@ -738,35 +743,25 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) if (r) return r; - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + r = pm_runtime_resume_and_get(adev_to_drm(adev)->dev); if (r < 0) { - drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n"); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + drm_file_err(uq_mgr->file, "pm_runtime_resume_and_get() failed for userqueue create\n"); return r; } - /* - * There could be a situation that we are creating a new queue while - * the other queues under this UQ_mgr are suspended. So if there is any - * resume work pending, wait for it to get done. - * - * This will also make sure we have a valid eviction fence ready to be used. - */ - amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); - uq_funcs = adev->userq_funcs[args->in.ip_type]; if (!uq_funcs) { drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n", args->in.ip_type); r = -EINVAL; - goto unlock; + goto err_pm_runtime; } queue = kzalloc_obj(struct amdgpu_usermode_queue); if (!queue) { drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n"); r = -ENOMEM; - goto unlock; + goto err_pm_runtime; } INIT_LIST_HEAD(&queue->userq_va_list); @@ -781,28 +776,35 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) db_info.doorbell_offset = args->in.doorbell_offset; queue->userq_mgr = uq_mgr; + /* Validate the userq virtual address.*/ + r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + if (r) + goto free_queue; + if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) || amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { r = -EINVAL; - goto free_queue; + amdgpu_bo_unreserve(fpriv->vm.root.bo); + goto clean_mapping; } + amdgpu_bo_unreserve(fpriv->vm.root.bo); /* Convert relative doorbell offset into absolute doorbell index */ index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); if (index == (uint64_t)-EINVAL) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); r = -EINVAL; - goto free_queue; + goto clean_mapping; } queue->doorbell_index = index; xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); - r = amdgpu_userq_fence_driver_alloc(adev, queue); + r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); if (r) { drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n"); - goto free_queue; + goto clean_mapping; } r = uq_funcs->mqd_create(queue, &args->in); @@ -811,6 +813,8 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto clean_fence_driver; } + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); + /* don't map the queue if scheduling is halted */ if (adev->userq_halt_for_enforce_isolation && ((queue->queue_type == AMDGPU_HW_IP_GFX) || @@ -822,7 +826,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) r = amdgpu_userq_map_helper(queue); if (r) { drm_file_err(uq_mgr->file, "Failed to map Queue\n"); - down_read(&adev->reset_domain->sem); goto clean_mqd; } } @@ -838,9 +841,8 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) if (r) { if (!skip_map_queue) amdgpu_userq_unmap_helper(queue); - r = -ENOMEM; - goto clean_mqd; + goto clean_reset_domain; } r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL)); @@ -848,8 +850,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) xa_erase(&uq_mgr->userq_xa, qid); if (!skip_map_queue) amdgpu_userq_unmap_helper(queue); - - goto clean_mqd; + goto clean_reset_domain; } up_read(&adev->reset_domain->sem); @@ -861,16 +862,21 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) mutex_unlock(&uq_mgr->userq_mutex); return 0; +clean_reset_domain: + up_read(&adev->reset_domain->sem); clean_mqd: + mutex_unlock(&uq_mgr->userq_mutex); uq_funcs->mqd_destroy(queue); - up_read(&adev->reset_domain->sem); clean_fence_driver: amdgpu_userq_fence_driver_free(queue); +clean_mapping: + amdgpu_bo_reserve(fpriv->vm.root.bo, true); + amdgpu_userq_buffer_vas_list_cleanup(adev, queue); + amdgpu_bo_unreserve(fpriv->vm.root.bo); free_queue: kfree(queue); -unlock: - mutex_unlock(&uq_mgr->userq_mutex); - +err_pm_runtime: + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; } @@ -1000,10 +1006,16 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, static int amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) { + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_usermode_queue *queue; unsigned long queue_id; int ret = 0, r; + + if (amdgpu_bo_reserve(vm->root.bo, false)) + return false; + mutex_lock(&uq_mgr->userq_mutex); /* Resume all the queues for this process */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { @@ -1021,9 +1033,11 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) } mutex_unlock(&uq_mgr->userq_mutex); + amdgpu_bo_unreserve(vm->root.bo); if (ret) - drm_file_err(uq_mgr->file, "Failed to map all the queues\n"); + drm_file_err(uq_mgr->file, + "Failed to map all the queues, restore failed ret=%d\n", ret); return ret; } @@ -1180,7 +1194,7 @@ retry_lock: bo = range->bo; ret = amdgpu_ttm_tt_get_user_pages(bo, range); if (ret) - goto unlock_all; + goto free_ranges; } invalidated = true; @@ -1207,6 +1221,7 @@ retry_lock: unlock_all: drm_exec_fini(&exec); +free_ranges: xa_for_each(&xa, tmp_key, range) { if (!range) continue; @@ -1230,13 +1245,11 @@ static void amdgpu_userq_restore_worker(struct work_struct *work) ret = amdgpu_userq_vm_validate(uq_mgr); if (ret) { - drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n"); + drm_file_err(uq_mgr->file, "Failed to validate BOs to restore ret=%d\n", ret); goto put_fence; } - ret = amdgpu_userq_restore_all(uq_mgr); - if (ret) - drm_file_err(uq_mgr->file, "Failed to restore all queues\n"); + amdgpu_userq_restore_all(uq_mgr); put_fence: dma_fence_put(ev_fence); @@ -1258,7 +1271,8 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) } if (ret) - drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n"); + drm_file_err(uq_mgr->file, + "Couldn't unmap all the queues, eviction failed ret=%d\n", ret); return ret; } @@ -1279,46 +1293,28 @@ void amdgpu_userq_reset_work(struct work_struct *work) amdgpu_device_gpu_recover(adev, NULL, &reset_context); } -static int +static void amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) { struct amdgpu_usermode_queue *queue; unsigned long queue_id; - int ret; xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { struct dma_fence *f = queue->last_fence; - if (!f || dma_fence_is_signaled(f)) + if (!f) continue; - ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); - if (ret <= 0) { - drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", - f->context, f->seqno); - - return -ETIMEDOUT; - } + dma_fence_wait(f, false); } - - return 0; } void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr) { - struct amdgpu_device *adev = uq_mgr->adev; - int ret; - /* Wait for any pending userqueue fence work to finish */ - ret = amdgpu_userq_wait_for_signal(uq_mgr); - if (ret) - dev_err(adev->dev, "Not evicting userqueue, timeout waiting for work\n"); - - ret = amdgpu_userq_evict_all(uq_mgr); - if (ret) - dev_err(adev->dev, "Failed to evict userqueue\n"); - + amdgpu_userq_wait_for_signal(uq_mgr); + amdgpu_userq_evict_all(uq_mgr); } int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, @@ -1480,17 +1476,16 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, return ret; } -int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t saddr) +void amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t saddr) { u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); struct amdgpu_bo_va *bo_va = mapping->bo_va; struct dma_resv *resv = bo_va->base.bo->tbo.base.resv; - int ret = 0; if (!ip_mask) - return 0; + return; dev_warn_once(adev->dev, "now unmapping a vital queue va:%llx\n", saddr); /** @@ -1501,14 +1496,8 @@ int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, * unmap is only for one kind of userq VAs, so at this point suppose * the eviction fence is always unsignaled. */ - if (!dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) { - ret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, true, - MAX_SCHEDULE_TIMEOUT); - if (ret <= 0) - return -EBUSY; - } - - return 0; + dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); } void amdgpu_userq_pre_reset(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index a4d44abf24fa..8b8f345b60b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -156,11 +156,12 @@ void amdgpu_userq_reset_work(struct work_struct *work); void amdgpu_userq_pre_reset(struct amdgpu_device *adev); int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost); void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue); +void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell); int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue, u64 addr, u64 expected_size); -int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t saddr); +void amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t saddr); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index fe6d83e859a0..e2d5f04296e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -32,29 +32,9 @@ #include "amdgpu.h" #include "amdgpu_userq_fence.h" -static const struct dma_fence_ops amdgpu_userq_fence_ops; -static struct kmem_cache *amdgpu_userq_fence_slab; - #define AMDGPU_USERQ_MAX_HANDLES (1U << 16) -int amdgpu_userq_fence_slab_init(void) -{ - amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", - sizeof(struct amdgpu_userq_fence), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!amdgpu_userq_fence_slab) - return -ENOMEM; - - return 0; -} - -void amdgpu_userq_fence_slab_fini(void) -{ - rcu_barrier(); - kmem_cache_destroy(amdgpu_userq_fence_slab); -} +static const struct dma_fence_ops amdgpu_userq_fence_ops; static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) { @@ -78,12 +58,15 @@ amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv, } int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, - struct amdgpu_usermode_queue *userq) + struct amdgpu_userq_fence_driver **fence_drv_req) { struct amdgpu_userq_fence_driver *fence_drv; - unsigned long flags; int r; + if (!fence_drv_req) + return -EINVAL; + *fence_drv_req = NULL; + fence_drv = kzalloc_obj(*fence_drv); if (!fence_drv) return -ENOMEM; @@ -104,19 +87,10 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, fence_drv->context = dma_fence_context_alloc(1); get_task_comm(fence_drv->timeline_name, current); - xa_lock_irqsave(&adev->userq_xa, flags); - r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index, - fence_drv, GFP_KERNEL)); - xa_unlock_irqrestore(&adev->userq_xa, flags); - if (r) - goto free_seq64; - - userq->fence_drv = fence_drv; + *fence_drv_req = fence_drv; return 0; -free_seq64: - amdgpu_seq64_free(adev, fence_drv->va); free_fence_drv: kfree(fence_drv); @@ -144,20 +118,29 @@ void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) { dma_fence_put(userq->last_fence); - + userq->last_fence = NULL; amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); xa_destroy(&userq->fence_drv_xa); - /* Drop the fence_drv reference held by user queue */ + /* Drop the queue's ownership reference to fence_drv explicitly */ amdgpu_userq_fence_driver_put(userq->fence_drv); } +static void +amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence) +{ + unsigned long i; + for (i = 0; i < userq_fence->fence_drv_array_count; i++) + amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); + userq_fence->fence_drv_array_count = 0; +} + void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) { struct amdgpu_userq_fence *userq_fence, *tmp; + LIST_HEAD(to_be_signaled); struct dma_fence *fence; unsigned long flags; u64 rptr; - int i; if (!fence_drv) return; @@ -165,21 +148,26 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d spin_lock_irqsave(&fence_drv->fence_list_lock, flags); rptr = amdgpu_userq_fence_read(fence_drv); - list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { - fence = &userq_fence->base; - - if (rptr < fence->seqno) + list_for_each_entry(userq_fence, &fence_drv->fences, link) { + if (rptr < userq_fence->base.seqno) break; + } - dma_fence_signal(fence); - - for (i = 0; i < userq_fence->fence_drv_array_count; i++) - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); + list_cut_before(&to_be_signaled, &fence_drv->fences, + &userq_fence->link); + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); - list_del(&userq_fence->link); + list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) { + fence = &userq_fence->base; + list_del_init(&userq_fence->link); + dma_fence_signal(fence); + /* Drop fence_drv_array outside fence_list_lock + * to avoid the recursion lock. + */ + amdgpu_userq_fence_put_fence_drv_array(userq_fence); dma_fence_put(fence); } - spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + } void amdgpu_userq_fence_driver_destroy(struct kref *ref) @@ -187,11 +175,9 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref) struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, struct amdgpu_userq_fence_driver, refcount); - struct amdgpu_userq_fence_driver *xa_fence_drv; struct amdgpu_device *adev = fence_drv->adev; struct amdgpu_userq_fence *fence, *tmp; - struct xarray *xa = &adev->userq_xa; - unsigned long index, flags; + unsigned long flags; struct dma_fence *f; spin_lock_irqsave(&fence_drv->fence_list_lock, flags); @@ -208,12 +194,6 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref) } spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); - xa_lock_irqsave(xa, flags); - xa_for_each(xa, index, xa_fence_drv) - if (xa_fence_drv == fence_drv) - __xa_erase(xa, index); - xa_unlock_irqrestore(xa, flags); - /* Free seq64 memory */ amdgpu_seq64_free(adev, fence_drv->va); kfree(fence_drv); @@ -231,7 +211,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) { - *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + *userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL); return *userq_fence ? 0 : -ENOMEM; } @@ -242,6 +222,7 @@ static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, struct amdgpu_userq_fence_driver *fence_drv; struct dma_fence *fence; unsigned long flags; + bool signaled = false; fence_drv = userq->fence_drv; if (!fence_drv) @@ -288,13 +269,17 @@ static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, /* Check if hardware has already processed the job */ spin_lock_irqsave(&fence_drv->fence_list_lock, flags); - if (!dma_fence_is_signaled(fence)) + if (!dma_fence_is_signaled(fence)) { list_add_tail(&userq_fence->link, &fence_drv->fences); - else + } else { + signaled = true; dma_fence_put(fence); - + } spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + if (signaled) + amdgpu_userq_fence_put_fence_drv_array(userq_fence); + *f = fence; return 0; @@ -337,7 +322,7 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu) amdgpu_userq_fence_driver_put(fence_drv); kvfree(userq_fence->fence_drv_array); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); } static void amdgpu_userq_fence_release(struct dma_fence *f) @@ -540,7 +525,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); if (r) { mutex_unlock(&userq_mgr->userq_mutex); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); goto put_gobj_write; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index d76add2afc77..d355a0eecc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -58,13 +58,10 @@ struct amdgpu_userq_fence_driver { char timeline_name[TASK_COMM_LEN]; }; -int amdgpu_userq_fence_slab_init(void); -void amdgpu_userq_fence_slab_fini(void); - void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, - struct amdgpu_usermode_queue *userq); + struct amdgpu_userq_fence_driver **fence_drv_req); void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index eb4a15db2ef2..efdebd9c0a1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -680,6 +680,9 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, uint64_t addr; int r; + if (lo >= ib->length_dw || hi >= ib->length_dw) + return -EINVAL; + if (index == 0xffffffff) index = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 03d95dca93d7..debb82a2e031 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -34,6 +34,7 @@ #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_vcn.h" +#include "amdgpu_reset.h" #include "soc15d.h" /* Firmware Names */ @@ -361,7 +362,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to * restore fw data and clear buffer in amdgpu_vcn_resume() */ - if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset) + if (in_ras_intr || amdgpu_reset_in_dpc(adev)) return 0; return amdgpu_vcn_save_vcpu_bo_inst(adev, i); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index dba7ea16a10d..e8d180a412d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -437,12 +437,9 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev, struct eeprom_table_record bp; uint64_t retired_page; uint32_t bp_idx, bp_cnt; - void *vram_usage_va = NULL; - - if (adev->mman.fw_vram_usage_va) - vram_usage_va = adev->mman.fw_vram_usage_va; - else - vram_usage_va = adev->mman.drv_vram_usage_va; + void *fw_va = adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].cpu_ptr; + void *drv_va = adev->mman.resv_region[AMDGPU_RESV_DRV_VRAM_USAGE].cpu_ptr; + void *vram_usage_va = fw_va ? fw_va : drv_va; memset(&bp, 0, sizeof(bp)); @@ -710,15 +707,17 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev) void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) { uint32_t *pfvf_data = NULL; + void *fw_va = adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].cpu_ptr; + void *drv_va = adev->mman.resv_region[AMDGPU_RESV_DRV_VRAM_USAGE].cpu_ptr; adev->virt.fw_reserve.p_pf2vf = NULL; adev->virt.fw_reserve.p_vf2pf = NULL; adev->virt.vf2pf_update_interval_ms = 0; adev->virt.vf2pf_update_retry_cnt = 0; - if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { + if (fw_va && drv_va) { dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!"); - } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { + } else if (fw_va || drv_va) { /* go through this logic in ip_init and reset to init workqueue*/ amdgpu_virt_exchange_data(adev); @@ -763,41 +762,43 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev) uint64_t bp_block_offset = 0; uint32_t bp_block_size = 0; struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; + void *fw_va = adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].cpu_ptr; + void *drv_va = adev->mman.resv_region[AMDGPU_RESV_DRV_VRAM_USAGE].cpu_ptr; - if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { - if (adev->mman.fw_vram_usage_va) { + if (fw_va || drv_va) { + if (fw_va) { if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) { adev->virt.fw_reserve.p_pf2vf = (struct amd_sriov_msg_pf2vf_info_header *) - (adev->mman.fw_vram_usage_va + + (fw_va + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset); adev->virt.fw_reserve.p_vf2pf = (struct amd_sriov_msg_vf2pf_info_header *) - (adev->mman.fw_vram_usage_va + + (fw_va + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset + (AMD_SRIOV_MSG_SIZE_KB << 10)); adev->virt.fw_reserve.ras_telemetry = - (adev->mman.fw_vram_usage_va + + (fw_va + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset); } else { adev->virt.fw_reserve.p_pf2vf = (struct amd_sriov_msg_pf2vf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); + (fw_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); adev->virt.fw_reserve.p_vf2pf = (struct amd_sriov_msg_vf2pf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10)); + (fw_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10)); adev->virt.fw_reserve.ras_telemetry = - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10)); + (fw_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10)); } - } else if (adev->mman.drv_vram_usage_va) { + } else if (drv_va) { adev->virt.fw_reserve.p_pf2vf = (struct amd_sriov_msg_pf2vf_info_header *) - (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); + (drv_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); adev->virt.fw_reserve.p_vf2pf = (struct amd_sriov_msg_vf2pf_info_header *) - (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10)); + (drv_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10)); adev->virt.fw_reserve.ras_telemetry = - (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10)); + (drv_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10)); } amdgpu_virt_read_pf2vf_data(adev); @@ -1081,13 +1082,14 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) } /* reserved memory starts from crit region base offset with the size of 5MB */ - adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset; - adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10; + amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW_VRAM_USAGE, + adev->virt.crit_regn.offset, + adev->virt.crit_regn.size_kb << 10, true); dev_info(adev->dev, "critical region v%d requested to reserve memory start at %08llx with %llu KB.\n", init_data_hdr->version, - adev->mman.fw_vram_usage_start_offset, - adev->mman.fw_vram_usage_size >> 10); + adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].offset, + adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].size >> 10); adev->virt.is_dynamic_crit_regn_enabled = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 73abac6be5b3..9ba9de16a27a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1978,7 +1978,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping; struct amdgpu_vm *vm = bo_va->base.vm; bool valid = true; - int r; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -2003,12 +2002,8 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, * during user requests GEM unmap IOCTL except for forcing the unmap * from user space. */ - if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) { - r = amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr); - if (unlikely(r == -EBUSY)) - dev_warn_once(adev->dev, - "Attempt to unmap an active userq buffer\n"); - } + if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) + amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr); list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); @@ -2955,6 +2950,50 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } /** + * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible. + * @adev: amdgpu device pointer + * @root: root BO of the VM + * @pasid: PASID of the VM + * The caller needs to unreserve and unref the root bo on success. + */ +struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev, + struct amdgpu_bo **root, u32 pasid) +{ + unsigned long irqflags; + struct amdgpu_vm *vm; + int r; + + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + *root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL; + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); + + if (!*root) + return NULL; + + r = amdgpu_bo_reserve(*root, true); + if (r) + goto error_unref; + + /* Double check that the VM still exists */ + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + if (vm && vm->root.bo != *root) + vm = NULL; + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); + if (!vm) + goto error_unlock; + + return vm; +error_unlock: + amdgpu_bo_unreserve(*root); + +error_unref: + amdgpu_bo_unref(root); + return NULL; +} + +/** * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM @@ -2969,50 +3008,40 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, - bool write_fault) + u32 vmid, u32 node_id, uint64_t addr, + uint64_t ts, bool write_fault) { bool is_compute_context = false; struct amdgpu_bo *root; - unsigned long irqflags; uint64_t value, flags; struct amdgpu_vm *vm; int r; - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); - vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm) { - root = amdgpu_bo_ref(vm->root.bo); - is_compute_context = vm->is_compute_context; - } else { - root = NULL; - } - xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); - - if (!root) + vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); + if (!vm) return false; - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr >> PAGE_SHIFT, ts, write_fault)) { + is_compute_context = vm->is_compute_context; + + if (is_compute_context) { + /* Unreserve root since svm_range_restore_pages might try to reserve it. */ + /* TODO: rework svm_range_restore_pages so that this isn't necessary. */ + amdgpu_bo_unreserve(root); + + if (!svm_range_restore_pages(adev, pasid, vmid, + node_id, addr >> PAGE_SHIFT, ts, write_fault)) { + amdgpu_bo_unref(&root); + return true; + } amdgpu_bo_unref(&root); - return true; + + /* Re-acquire the VM lock, could be that the VM was freed in between. */ + vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); + if (!vm) + return false; } addr /= AMDGPU_GPU_PAGE_SIZE; - - r = amdgpu_bo_reserve(root, true); - if (r) - goto error_unref; - - /* Double check that the VM still exists */ - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); - vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm && vm->root.bo != root) - vm = NULL; - xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); - if (!vm) - goto error_unlock; - flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; @@ -3051,7 +3080,6 @@ error_unlock: if (r < 0) dev_err(adev->dev, "Can't handle page fault (%d)\n", r); -error_unref: amdgpu_bo_unref(&root); return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 3b32f41c3655..d083d7aab75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -592,6 +592,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); +struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev, + struct amdgpu_bo **root, u32 pasid); + void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 22e2e5b47341..f078db3fef79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -21,6 +21,8 @@ */ #include "amdgpu_vm.h" +#include "amdgpu.h" +#include "amdgpu_reset.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -108,11 +110,19 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { + struct amdgpu_device *adev = p->adev; + if (p->needs_flush) atomic64_inc(&p->vm->tlb_seq); mb(); - amdgpu_device_flush_hdp(p->adev, NULL); + /* A reset flushed the HDP anyway, so that here can be skipped when a reset is ongoing */ + if (!down_read_trylock(&adev->reset_domain->sem)) + return 0; + + amdgpu_device_flush_hdp(adev, NULL); + up_read(&adev->reset_domain->sem); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 31a437ce9570..a930f1522f96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -693,8 +693,11 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, !(flags & AMDGPU_PTE_VALID) && !(flags & AMDGPU_PTE_PRT_FLAG(params->adev))) { - /* Workaround for fault priority problem on GMC9 */ - flags |= AMDGPU_PTE_EXECUTABLE; + /* Workaround for fault priority problem on GMC9 and GFX12, + * EXECUTABLE for GMC9 fault priority and init_pte_flags + * (e.g. AMDGPU_PTE_IS_PTE on GFX12) + */ + flags |= AMDGPU_PTE_EXECUTABLE | adev->gmc.init_pte_flags; } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index cc5f4e01e38f..42be8ee155dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -181,6 +181,7 @@ int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode) } xcp_mgr->num_xcps = num_xcps; + xcp_mgr->mem_alloc_mode = AMDGPU_PARTITION_MEM_CAPPING_EVEN; amdgpu_xcp_update_partition_sched_list(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 8058e8f35d41..878c1c422893 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -132,6 +132,8 @@ struct amdgpu_xcp_mgr { struct amdgpu_xcp_cfg *xcp_cfg; uint32_t supp_xcp_modes; uint32_t avail_xcp_modes; + /* used to determin KFD memory alloc mode for each partition */ + uint32_t mem_alloc_mode; }; struct amdgpu_xcp_mgr_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ae39b9e1f7d6..d40ab1e95480 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -64,6 +64,11 @@ #define regPC_CONFIG_CNTL_1 0x194d #define regPC_CONFIG_CNTL_1_BASE_IDX 1 +#define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0 0x0030 +#define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0_BASE_IDX 1 +#define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0 0x0031 +#define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0_BASE_IDX 1 + #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 @@ -5234,11 +5239,27 @@ static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) amdgpu_gfx_off_ctrl(adev, true); } else { preempt_disable(); - clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); - clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); - clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); - if (clock_counter_hi_pre != clock_counter_hi_after) - clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + if (amdgpu_ip_version(adev, SMUIO_HWIP, 0) < IP_VERSION(15, 0, 0)) { + clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, + regGOLDEN_TSC_COUNT_UPPER); + clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, + regGOLDEN_TSC_COUNT_LOWER); + clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, + regGOLDEN_TSC_COUNT_UPPER); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, + regGOLDEN_TSC_COUNT_LOWER); + } else { + clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, + regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0); + clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, + regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0); + clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, + regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, + regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0); + } preempt_enable(); } clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); @@ -6502,15 +6523,7 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: CP EOP\n"); if (adev->enable_mes && doorbell_offset) { - struct amdgpu_userq_fence_driver *fence_drv = NULL; - struct xarray *xa = &adev->userq_xa; - unsigned long flags; - - xa_lock_irqsave(xa, flags); - fence_drv = xa_load(xa, doorbell_offset); - if (fence_drv) - amdgpu_userq_fence_driver_process(fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index a418ae609c36..0e0b1e5b88fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -4854,15 +4854,7 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: CP EOP\n"); if (adev->enable_mes && doorbell_offset) { - struct amdgpu_userq_fence_driver *fence_drv = NULL; - struct xarray *xa = &adev->userq_xa; - unsigned long flags; - - xa_lock_irqsave(xa, flags); - fence_drv = xa_load(xa, doorbell_offset); - if (fence_drv) - amdgpu_userq_fence_driver_process(fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c index db49582a211f..68db1bc73bc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c @@ -3643,15 +3643,7 @@ static int gfx_v12_1_eop_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: CP EOP\n"); if (adev->enable_mes && doorbell_offset) { - struct amdgpu_userq_fence_driver *fence_drv = NULL; - struct xarray *xa = &adev->userq_xa; - unsigned long flags; - - xa_lock_irqsave(xa, flags); - fence_drv = xa_load(xa, doorbell_offset); - if (fence_drv) - amdgpu_userq_fence_driver_process(fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 73223d97a87f..ac90d8e9d86a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1571,6 +1571,71 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +/** + * gfx_v6_0_setup_tcc() - setup which TCCs are used + * + * @adev: amdgpu_device pointer + * + * Verify whether the current GPU has any TCCs disabled, + * which can happen when the GPU is harvested and some + * memory channels are disabled, reducing the memory bus width. + * For example, on the Radeon HD 7870 XT (Tahiti LE). + * + * If some TCCs are disabled, we need to make sure that + * the disabled TCCs are not used, and the remaining TCCs + * are used optimally. + * + * TCP_CHAN_STEER_LO/HI control which TCC is used by TCP channels. + * TCP_ADDR_CONFIG.NUM_TCC_BANKS controls how many channels are used. + * + * For optimal performance: + * - Rely on the CHAN_STEER from the golden registers table, + * only skip disabled TCCs but keep the mapping order. + * - Limit NUM_TCC_BANKS to number of active TCCs to avoid thrashing, + * which performs better than using the same TCC twice. + */ +static void gfx_v6_0_setup_tcc(struct amdgpu_device *adev) +{ + u32 i, tcc, tcp_addr_config, num_active_tcc = 0; + u64 chan_steer, patched_chan_steer = 0; + const u32 num_max_tcc = adev->gfx.config.max_texture_channel_caches; + const u32 dis_tcc_mask = + amdgpu_gfx_create_bitmask(num_max_tcc) & + (REG_GET_FIELD(RREG32(mmCGTS_TCC_DISABLE), + CGTS_TCC_DISABLE, TCC_DISABLE) | + REG_GET_FIELD(RREG32(mmCGTS_USER_TCC_DISABLE), + CGTS_USER_TCC_DISABLE, TCC_DISABLE)); + + /* When no TCC is disabled, the golden registers table already has optimal TCC setup */ + if (!dis_tcc_mask) + return; + + /* Each 4-bit nibble contains the index of a TCC used by all TCPs */ + chan_steer = RREG32(mmTCP_CHAN_STEER_LO) | ((u64)RREG32(mmTCP_CHAN_STEER_HI) << 32ull); + + /* Patch the TCP to TCC mapping to skip disabled TCCs */ + for (i = 0; i < num_max_tcc; ++i) { + tcc = (chan_steer >> (u64)(4 * i)) & 0xf; + + if (!((1 << tcc) & dis_tcc_mask)) { + /* Copy enabled TCC indices to the patched register value. */ + patched_chan_steer |= (u64)tcc << (u64)(4 * num_active_tcc); + ++num_active_tcc; + } + } + + WARN_ON(num_active_tcc != num_max_tcc - hweight32(dis_tcc_mask)); + + /* Patch number of TCCs used by TCPs */ + tcp_addr_config = REG_SET_FIELD(RREG32(mmTCP_ADDR_CONFIG), + TCP_ADDR_CONFIG, NUM_TCC_BANKS, + num_active_tcc - 1); + + WREG32(mmTCP_ADDR_CONFIG, tcp_addr_config); + WREG32(mmTCP_CHAN_STEER_HI, upper_32_bits(patched_chan_steer)); + WREG32(mmTCP_CHAN_STEER_LO, lower_32_bits(patched_chan_steer)); +} + static void gfx_v6_0_config_init(struct amdgpu_device *adev) { adev->gfx.config.double_offchip_lds_buf = 0; @@ -1729,6 +1794,7 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev) gfx_v6_0_tiling_mode_table_init(adev); gfx_v6_0_setup_rb(adev); + gfx_v6_0_setup_tcc(adev); gfx_v6_0_setup_spi(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 95be105671ec..86c7c2a429b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5660,9 +5660,6 @@ static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index fd691b2a6e21..e1ace7d44ffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -860,8 +860,6 @@ static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - amdgpu_gmc_get_vbios_allocations(adev); - /* Memory manager */ r = amdgpu_bo_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index e6db87b94eb1..94d6631ce0bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -834,8 +834,6 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - amdgpu_gmc_get_vbios_allocations(adev); - /* Memory manager */ r = amdgpu_bo_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 6e184ea069ef..e10ac9788d13 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -924,8 +924,6 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - amdgpu_gmc_get_vbios_allocations(adev); - #ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) { r = amdgpu_gmc_init_mem_ranges(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 886bf77309a5..cc272a96fcef 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -854,8 +854,6 @@ static int gmc_v6_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - amdgpu_gmc_get_vbios_allocations(adev); - r = amdgpu_bo_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index d25fdedb0d9f..bb16ba2ef6fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1034,8 +1034,6 @@ static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - amdgpu_gmc_get_vbios_allocations(adev); - /* Memory manager */ r = amdgpu_bo_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 4910e5557a67..a59174f6bcc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1149,8 +1149,6 @@ static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - amdgpu_gmc_get_vbios_allocations(adev); - /* Memory manager */ r = amdgpu_bo_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index d865059e884a..e7b78027002b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2010,8 +2010,6 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - amdgpu_gmc_get_vbios_allocations(adev); - if (amdgpu_is_multi_aid(adev)) { r = amdgpu_gmc_init_mem_ranges(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 9fe8d10ab270..cffb1e6bab35 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -802,6 +802,7 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 20983f126b49..13a6e24c624a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -693,6 +693,7 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, @@ -724,6 +725,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 98f5e0622bc5..d0445df39d2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -594,6 +594,7 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 0bd83820dd20..6fd4238a8471 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -759,6 +759,7 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 4b4aa9553624..0c746580de11 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -736,15 +736,35 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) */ void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) { - if (!amdgpu_sriov_vf(ring->adev)) { + struct amdgpu_device *adev = ring->adev; + + if (!amdgpu_sriov_vf(adev)) { + int jpeg_inst = GET_INST(JPEG, ring->me); + uint32_t value = 0x80004000; /* default DS14 */ + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + + /* PCTL0__MMHUB_DEEPSLEEP_IB could be different on different mmhub version */ + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + amdgpu_ring_write(ring, 0x69004); + value = 0x80010000; + break; + case IP_VERSION(4, 2, 0): + amdgpu_ring_write(ring, 0x60804); + if (jpeg_inst & 1) + value = 0x80010000; + break; + default: + amdgpu_ring_write(ring, 0x62a04); + break; + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x80004000); + amdgpu_ring_write(ring, value); } } @@ -757,15 +777,35 @@ void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) */ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) { - if (!amdgpu_sriov_vf(ring->adev)) { + struct amdgpu_device *adev = ring->adev; + + if (!amdgpu_sriov_vf(adev)) { + int jpeg_inst = GET_INST(JPEG, ring->me); + uint32_t value = 0x00004000; /* default DS14 */ + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); + + /* PCTL0__MMHUB_DEEPSLEEP_IB could be different on different mmhub version */ + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + amdgpu_ring_write(ring, 0x69004); + value = 0x00010000; + break; + case IP_VERSION(4, 2, 0): + amdgpu_ring_write(ring, 0x60804); + if (jpeg_inst & 1) + value = 0x00010000; + break; + default: + amdgpu_ring_write(ring, 0x62a04); + break; + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x00004000); + amdgpu_ring_write(ring, value); } } @@ -1179,6 +1219,7 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 54fd9c800c40..a43582b9c876 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -804,6 +804,7 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 46bf15dce2bd..72a4b2d0676f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -680,6 +680,7 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index edecbfe66c79..250316704dfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -884,6 +884,7 @@ static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c index 285c459379c4..7a4ecea6b39a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c @@ -703,6 +703,7 @@ static const struct amd_ip_funcs jpeg_v5_0_2_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_0_2_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_0_2_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_2_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_2_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c index 1821dced936f..e7546816baba 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c @@ -661,6 +661,7 @@ static const struct amd_ip_funcs jpeg_v5_3_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_3_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_3_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_3_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index faac21ee5739..5b4121ddc78c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -31,89 +31,68 @@ #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE static int -mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) -{ - int ret; - - ret = amdgpu_bo_reserve(bo, true); - if (ret) { - DRM_ERROR("Failed to reserve bo. ret %d\n", ret); - goto err_reserve_bo_failed; - } - - ret = amdgpu_ttm_alloc_gart(&bo->tbo); - if (ret) { - DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); - goto err_map_bo_gart_failed; - } - - amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); - - return 0; - -err_map_bo_gart_failed: - amdgpu_bo_unreserve(bo); -err_reserve_bo_failed: - return ret; -} - -static int mes_userq_create_wptr_mapping(struct amdgpu_device *adev, struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, uint64_t wptr) { struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; + struct amdgpu_bo *obj; + struct amdgpu_vm *vm = queue->vm; + struct drm_exec exec; int ret; - wptr_vm = queue->vm; - ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (ret) - return ret; - wptr &= AMDGPU_GMC_HOLE_MASK; - wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - DRM_ERROR("Failed to lookup wptr bo\n"); - return -EINVAL; - } - wptr_obj->obj = wptr_mapping->bo_va->base.bo; - if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { - DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); - return -EINVAL; - } + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; + + wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT); + if (!wptr_mapping) { + ret = -EINVAL; + goto fail_lock; + } - ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); - if (ret) { - DRM_ERROR("Failed to map wptr bo to GART\n"); - return ret; + obj = wptr_mapping->bo_va->base.bo; + ret = drm_exec_lock_obj(&exec, &obj->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; } - ret = amdgpu_bo_reserve(wptr_obj->obj, true); - if (ret) { - DRM_ERROR("Failed to reserve wptr bo\n"); - return ret; + wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo); + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + ret = -EINVAL; + goto fail_map; } /* TODO use eviction fence instead of pinning. */ ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT); if (ret) { - drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n"); - goto unresv_bo; + DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret); + goto fail_map; + } + + ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto fail_map; } queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj); - amdgpu_bo_unreserve(wptr_obj->obj); + drm_exec_fini(&exec); return 0; -unresv_bo: - amdgpu_bo_unreserve(wptr_obj->obj); +fail_map: + amdgpu_bo_unref(&wptr_obj->obj); +fail_lock: + drm_exec_fini(&exec); return ret; } @@ -322,8 +301,14 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, goto free_mqd; } + r = amdgpu_bo_reserve(queue->vm->root.bo, false); + if (r) { + kfree(compute_mqd); + goto free_mqd; + } r = amdgpu_userq_input_va_validate(adev, queue, compute_mqd->eop_va, 2048); + amdgpu_bo_unreserve(queue->vm->root.bo); if (r) { kfree(compute_mqd); goto free_mqd; @@ -365,14 +350,22 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, userq_props->tmz_queue = mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; + r = amdgpu_bo_reserve(queue->vm->root.bo, false); + if (r) { + kfree(mqd_gfx_v11); + goto free_mqd; + } r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->shadow_va, shadow_info.shadow_size); if (r) { + amdgpu_bo_unreserve(queue->vm->root.bo); kfree(mqd_gfx_v11); goto free_mqd; } + r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->csa_va, shadow_info.csa_size); + amdgpu_bo_unreserve(queue->vm->root.bo); if (r) { kfree(mqd_gfx_v11); goto free_mqd; @@ -394,8 +387,15 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, r = -ENOMEM; goto free_mqd; } + + r = amdgpu_bo_reserve(queue->vm->root.bo, false); + if (r) { + kfree(mqd_sdma_v11); + goto free_mqd; + } r = amdgpu_userq_input_va_validate(adev, queue, mqd_sdma_v11->csa_va, 32); + amdgpu_bo_unreserve(queue->vm->root.bo); if (r) { kfree(mqd_sdma_v11); goto free_mqd; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c index 0e9089544769..cec801278126 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c @@ -2028,7 +2028,7 @@ static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id, int num_xcc = NUM_XCC(adev->gfx.xcc_mask); int sdma_ring_align = 0x10, compute_ring_align = 0x100; uint32_t tmp, xcc_offset; - int r = 0, i, wptr = 0; + int r = 0, i, j, wptr = 0; if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { if (!adev->mes.enable_coop_mode) { @@ -2077,11 +2077,11 @@ static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id, tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSCRATCH_REG0); } else { - for (i = 0; i < num_xcc; i++) { - if (xcc_id != adev->mes.master_xcc_ids[i]) + for (j = 0; j < num_xcc; j++) { + if (xcc_id != adev->mes.master_xcc_ids[j]) continue; - tmp = RREG32_SOC15(GC, GET_INST(GC, i), + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regSCRATCH_REG0); if (tmp != 0xDEADBEEF) break; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c index db14a1a326d2..b6f832c53860 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -54,6 +54,8 @@ #define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL_nbif_4_10_BASE_IDX 3 #define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL1_nbif_4_10 0x4f0af6 #define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL1_nbif_4_10_BASE_IDX 3 +#define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_nbif_4_10 0x0021 +#define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_nbif_4_10_BASE_IDX 2 static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) { @@ -65,7 +67,12 @@ static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) static u32 nbif_v6_3_1_get_rev_id(struct amdgpu_device *adev) { - u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + u32 tmp; + + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) + tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_nbif_4_10); + else + tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c index 73a709773e85..2a8582e87f2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c @@ -32,6 +32,7 @@ #include "mp/mp_15_0_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/psp_15_0_0_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_15_0_0_ta.bin"); static int psp_v15_0_0_init_microcode(struct psp_context *psp) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 44f0f23e1148..e64f2f6df9a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -889,7 +889,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -899,7 +899,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se addr += 4; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index b005672f2f96..8ca46e1e474e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1662,17 +1662,8 @@ static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev, u32 doorbell_offset = entry->src_data[0]; if (adev->enable_mes && doorbell_offset) { - struct amdgpu_userq_fence_driver *fence_drv = NULL; - struct xarray *xa = &adev->userq_xa; - unsigned long flags; - doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; - - xa_lock_irqsave(xa, flags); - fence_drv = xa_load(xa, doorbell_offset); - if (fence_drv) - amdgpu_userq_fence_driver_process(fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 5679a94d0815..37191e2918d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1594,17 +1594,8 @@ static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev, u32 doorbell_offset = entry->src_data[0]; if (adev->enable_mes && doorbell_offset) { - struct amdgpu_userq_fence_driver *fence_drv = NULL; - struct xarray *xa = &adev->userq_xa; - unsigned long flags; - doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; - - xa_lock_irqsave(xa, flags); - fence_drv = xa_load(xa, doorbell_offset); - if (fence_drv) - amdgpu_userq_fence_driver_process(fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c index f20e0fc3fc74..061934a2e93a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c @@ -1268,6 +1268,18 @@ static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; + switch (amdgpu_user_queue) { + case -1: + default: + adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = true; + break; + case 0: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + } + r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) { DRM_ERROR("Failed to init sdma firmware!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index fea576a7f397..efb3fde919ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -242,6 +242,10 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev) uint64_t addr; uint32_t size; + /* When the keyselect is already set, don't perturb it. */ + if (RREG32(mmUVD_FW_START)) + return; + /* program the VCPU memory controller bits 0-27 */ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; @@ -284,6 +288,12 @@ static int uvd_v3_1_fw_validate(struct amdgpu_device *adev) int i; uint32_t keysel = adev->uvd.keyselect; + if (RREG32(mmUVD_FW_START) & UVD_FW_STATUS__PASS_MASK) { + dev_dbg(adev->dev, "UVD keyselect already set: 0x%x (on CPU: 0x%x)\n", + RREG32(mmUVD_FW_START), adev->uvd.keyselect); + return 0; + } + WREG32(mmUVD_FW_START, keysel); for (i = 0; i < 10; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e35fae9cdaf6..0442bfcfd384 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -2113,6 +2113,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v2_0_dec_ring_get_rptr, .get_wptr = vcn_v2_0_dec_ring_get_wptr, @@ -2145,6 +2146,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v2_0_enc_ring_get_rptr, .get_wptr = vcn_v2_0_enc_ring_get_wptr, .set_wptr = vcn_v2_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 006a15451197..8b8184fe6764 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1778,6 +1778,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v2_5_dec_ring_get_rptr, .get_wptr = vcn_v2_5_dec_ring_get_wptr, @@ -1879,6 +1880,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v2_5_enc_ring_get_rptr, .get_wptr = vcn_v2_5_enc_ring_get_wptr, .set_wptr = vcn_v2_5_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 02d5c5af65f2..81bba3ec2a93 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1856,6 +1856,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0x3f, .nop = VCN_DEC_SW_CMD_NO_OP, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, @@ -1909,7 +1910,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_device *adev = p->adev; struct amdgpu_bo_va_mapping *map; - uint32_t *msg, num_buffers; + uint32_t *msg, num_buffers, len_dw; struct amdgpu_bo *bo; uint64_t start, end; unsigned int i; @@ -1930,6 +1931,11 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, return -EINVAL; } + if (end - addr < 16) { + DRM_ERROR("VCN messages must be at least 4 DWORDs!\n"); + return -EINVAL; + } + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -1946,8 +1952,8 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, msg = ptr + addr - start; - /* Check length */ if (msg[1] > end - addr) { + DRM_ERROR("VCN message header does not fit in BO!\n"); r = -EINVAL; goto out; } @@ -1955,9 +1961,19 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, if (msg[3] != RDECODE_MSG_CREATE) goto out; + len_dw = msg[1] / 4; num_buffers = msg[2]; + + /* Verify that all indices fit within the claimed length. Each index is 4 DWORDs */ + if (num_buffers > len_dw || 6 + num_buffers * 4 > len_dw) { + DRM_ERROR("VCN message has too many buffers!\n"); + r = -EINVAL; + goto out; + } + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { uint32_t offset, size, *create; + uint64_t buf_end; if (msg[0] != RDECODE_MESSAGE_CREATE) continue; @@ -1965,14 +1981,16 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, offset = msg[1]; size = msg[2]; - if (offset + size > end) { + if (size < 4 || check_add_overflow(offset, size, &buf_end) || + buf_end > end - addr) { + DRM_ERROR("VCN message buffer exceeds BO bounds!\n"); r = -EINVAL; goto out; } create = ptr + addr + offset - start; - /* H246, HEVC and VP9 can run on any instance */ + /* H264, HEVC and VP9 can run on any instance */ if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; @@ -2021,6 +2039,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, @@ -2123,6 +2142,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v3_0_enc_ring_get_rptr, .get_wptr = vcn_v3_0_enc_ring_get_wptr, .set_wptr = vcn_v3_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index d17219be50f3..ff7269bafae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1826,7 +1826,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_device *adev = p->adev; struct amdgpu_bo_va_mapping *map; - uint32_t *msg, num_buffers; + uint32_t *msg, num_buffers, len_dw; struct amdgpu_bo *bo; uint64_t start, end; unsigned int i; @@ -1847,6 +1847,11 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, return -EINVAL; } + if (end - addr < 16) { + DRM_ERROR("VCN messages must be at least 4 DWORDs!\n"); + return -EINVAL; + } + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -1863,8 +1868,8 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, msg = ptr + addr - start; - /* Check length */ if (msg[1] > end - addr) { + DRM_ERROR("VCN message header does not fit in BO!\n"); r = -EINVAL; goto out; } @@ -1872,9 +1877,19 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, if (msg[3] != RDECODE_MSG_CREATE) goto out; + len_dw = msg[1] / 4; num_buffers = msg[2]; + + /* Verify that all indices fit within the claimed length. Each index is 4 DWORDs */ + if (num_buffers > len_dw || 6 + num_buffers * 4 > len_dw) { + DRM_ERROR("VCN message has too many buffers!\n"); + r = -EINVAL; + goto out; + } + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { uint32_t offset, size, *create; + uint64_t buf_end; if (msg[0] != RDECODE_MESSAGE_CREATE) continue; @@ -1882,7 +1897,9 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, offset = msg[1]; size = msg[2]; - if (offset + size > end) { + if (size < 4 || check_add_overflow(offset, size, &buf_end) || + buf_end > end - addr) { + DRM_ERROR("VCN message buffer exceeds BO bounds!\n"); r = -EINVAL; goto out; } @@ -1913,9 +1930,10 @@ out: static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start) { int i; + uint32_t len; - for (i = start; i < ib->length_dw && ib->ptr[i] >= 8; i += ib->ptr[i] / 4) { - if (ib->ptr[i + 1] == id) + for (i = start; (len = amdgpu_ib_get_value(ib, i)) >= 8; i += len / 4) { + if (amdgpu_ib_get_value(ib, i + 1) == id) return i; } return -1; @@ -1926,8 +1944,6 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib) { struct amdgpu_ring *ring = amdgpu_job_ring(job); - struct amdgpu_vcn_decode_buffer *decode_buffer; - uint64_t addr; uint32_t val; int idx = 0, sidx; @@ -1938,20 +1954,22 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx)) >= 0) { val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; + uint32_t valid_buf_flag = amdgpu_ib_get_value(ib, idx + 6); + uint64_t msg_buffer_addr; - if (!(decode_buffer->valid_buf_flag & 0x1)) + if (!(valid_buf_flag & 0x1)) return 0; - addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo; - return vcn_v4_0_dec_msg(p, job, addr); + msg_buffer_addr = ((u64)amdgpu_ib_get_value(ib, idx + 7)) << 32 | + amdgpu_ib_get_value(ib, idx + 8); + return vcn_v4_0_dec_msg(p, job, msg_buffer_addr); } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx); - if (sidx >= 0 && ib->ptr[sidx + 2] == RENCODE_ENCODE_STANDARD_AV1) + if (sidx >= 0 && + amdgpu_ib_get_value(ib, sidx + 2) == RENCODE_ENCODE_STANDARD_AV1) return vcn_v4_0_limit_sched(p, job); } - idx += ib->ptr[idx] / 4; + idx += amdgpu_ib_get_value(ib, idx) / 4; } return 0; } @@ -1978,6 +1996,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .extra_bytes = sizeof(struct amdgpu_vcn_rb_metadata), .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index ff3013b97abd..10e8fc2821f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1775,6 +1775,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v4_0_3_unified_ring_get_rptr, .get_wptr = vcn_v4_0_3_unified_ring_get_wptr, .set_wptr = vcn_v4_0_3_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 1f6a22983c0d..1571cc5a148c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1483,6 +1483,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v4_0_5_unified_ring_get_rptr, .get_wptr = vcn_v4_0_5_unified_ring_get_wptr, .set_wptr = vcn_v4_0_5_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 6109124f852e..d5f49fa33bee 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1207,6 +1207,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v5_0_0_unified_ring_get_rptr, .get_wptr = vcn_v5_0_0_unified_ring_get_wptr, .set_wptr = vcn_v5_0_0_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index c28c6aff17aa..54fbf8d73ca6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -1419,6 +1419,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v5_0_1_unified_ring_get_rptr, .get_wptr = vcn_v5_0_1_unified_ring_get_wptr, .set_wptr = vcn_v5_0_1_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c index c3d3cc023058..bbc172db91a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c @@ -994,6 +994,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_2_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v5_0_2_unified_ring_get_rptr, .get_wptr = vcn_v5_0_2_unified_ring_get_wptr, .set_wptr = vcn_v5_0_2_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 462a32abf720..f95bf6d95534 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -25,6 +25,7 @@ #include <linux/err.h> #include <linux/fs.h> #include <linux/file.h> +#include <linux/overflow.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uaccess.h> @@ -776,6 +777,9 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, goto out_unlock; } + if (args->num_of_nodes > kfd_topology_get_num_devices()) + return -EINVAL; + /* Fill in process-aperture information for all available * nodes, but not more than args->num_of_nodes as that is * the amount of memory allocated by user @@ -1356,7 +1360,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); if (WARN_ON_ONCE(!peer_pdd)) continue; - kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(peer_pdd); } kfree(devices_arr); @@ -1451,7 +1455,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, if (WARN_ON_ONCE(!peer_pdd)) continue; if (flush_tlb) - kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(peer_pdd); /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); @@ -1692,6 +1696,16 @@ static int kfd_ioctl_smi_events(struct file *filep, return kfd_smi_event_open(pdd->dev, &args->anon_fd); } +static int kfd_ioctl_svm_validate(void *kdata, unsigned int usize) +{ + struct kfd_ioctl_svm_args *args = kdata; + size_t expected = struct_size(args, attrs, args->nattr); + + if (expected == SIZE_MAX || usize < expected) + return -EINVAL; + return 0; +} + #if IS_ENABLED(CONFIG_HSA_AMD_SVM) static int kfd_ioctl_set_xnack_mode(struct file *filep, @@ -3206,7 +3220,11 @@ static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, v #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ - .cmd_drv = 0, .name = #ioctl} + .validate = NULL, .cmd_drv = 0, .name = #ioctl} + +#define AMDKFD_IOCTL_DEF_V(ioctl, _func, _validate, _flags) \ + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ + .validate = _validate, .cmd_drv = 0, .name = #ioctl} /** Ioctl table */ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { @@ -3303,7 +3321,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, kfd_ioctl_smi_events, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0), + AMDKFD_IOCTL_DEF_V(AMDKFD_IOC_SVM, kfd_ioctl_svm, + kfd_ioctl_svm_validate, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), @@ -3428,6 +3447,12 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) memset(kdata, 0, usize); } + if (ioctl->validate) { + retcode = ioctl->validate(kdata, usize); + if (retcode) + goto err_i1; + } + retcode = func(filep, process, kdata); if (cmd & IOC_OUT) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8ff97bf7d95a..b7f8f7ff8198 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1737,37 +1737,6 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr return false; } -/* check if there is kfd process still uses adev */ -static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev) -{ - struct kfd_process *p; - struct hlist_node *p_temp; - unsigned int temp; - struct kfd_node *dev; - - mutex_lock(&kfd_processes_mutex); - - if (hash_empty(kfd_processes_table)) { - mutex_unlock(&kfd_processes_mutex); - return true; - } - - /* check if there is device still use adev */ - hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { - for (int i = 0; i < p->n_pdds; i++) { - dev = p->pdds[i]->dev; - if (dev->adev == adev) { - mutex_unlock(&kfd_processes_mutex); - return false; - } - } - } - - mutex_unlock(&kfd_processes_mutex); - - return true; -} - /** kgd2kfd_teardown_processes - gracefully tear down existing * kfd processes that use adev * @@ -1800,7 +1769,7 @@ void kgd2kfd_teardown_processes(struct amdgpu_device *adev) mutex_unlock(&kfd_processes_mutex); /* wait all kfd processes use adev terminate */ - while (!kgd2kfd_check_device_idle(adev)) + while (!!atomic_read(&adev->kfd.dev->kfd_processes_count)) cond_resched(); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ab3b2e7be9bd..9185ebe4c079 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -572,7 +572,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); if (dqm->dev->kfd2kgd->set_scratch_backing_va) dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, @@ -610,7 +610,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, if (flush_texture_cache_nocpsch(q->device, qpd)) dev_err(dev, "Failed to flush TC\n"); - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -1284,7 +1284,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, dqm->dev->adev, qpd->vmid, qpd->page_table_base); - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } /* Take a safe reference to the mm_struct, which may otherwise diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index fa025bea9b4f..7b5b12206919 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1047,10 +1047,13 @@ extern struct srcu_struct kfd_processes_srcu; typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, void *data); +typedef int amdkfd_ioctl_validate_t(void *kdata, unsigned int usize); + struct amdkfd_ioctl_desc { unsigned int cmd; int flags; amdkfd_ioctl_t *func; + amdkfd_ioctl_validate_t *validate; unsigned int cmd_drv; const char *name; }; @@ -1191,6 +1194,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, return NULL; } int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev); +uint32_t kfd_topology_get_num_devices(void); int kfd_numa_node_to_apic_id(int numa_node_id); uint32_t kfd_gpu_node_num(void); @@ -1550,13 +1554,13 @@ void kfd_signal_reset_event(struct kfd_node *dev); void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); void kfd_signal_process_terminate_event(struct kfd_process *p); -static inline void kfd_flush_tlb(struct kfd_process_device *pdd, - enum TLB_FLUSH_TYPE type) +static inline void kfd_flush_tlb(struct kfd_process_device *pdd) { struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask); + amdgpu_vm_flush_compute_tlb(adev, vm, TLB_FLUSH_HEAVYWEIGHT, + pdd->dev->xcc_mask); } static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index bcd21204aa50..d28ca581cad0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -689,7 +689,8 @@ void kfd_procfs_del_queue(struct queue *q) int kfd_process_create_wq(void) { if (!kfd_process_wq) - kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); + kfd_process_wq = alloc_workqueue("kfd_process_wq", WQ_UNBOUND, + 0); if (!kfd_restore_wq) kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", WQ_FREEZABLE); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index b120fdb0ef77..35ec67d9739b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1366,6 +1366,12 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_debug("CPU[0x%llx 0x%llx] -> GPU[0x%llx 0x%llx]\n", start, last, gpu_start, gpu_end); + + if (!amdgpu_vm_ready(vm)) { + pr_debug("VM not ready, canceling unmap\n"); + return -EINVAL; + } + return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, gpu_start, gpu_end, init_pte_value, 0, 0, NULL, NULL, fence); @@ -1418,7 +1424,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, if (r) break; } - kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(pdd); } return r; @@ -1443,6 +1449,11 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, last_start, last_start + npages - 1, readonly); + if (!amdgpu_vm_ready(vm)) { + pr_debug("VM not ready, canceling map\n"); + return -EINVAL; + } + for (i = offset; i < offset + npages; i++) { uint64_t gpu_start; uint64_t gpu_end; @@ -1560,7 +1571,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, } } - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 995f2c2528a9..29dee26261ab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2297,6 +2297,17 @@ int kfd_topology_remove_device(struct kfd_node *gpu) return res; } +uint32_t kfd_topology_get_num_devices(void) +{ + uint32_t num_devices; + + down_read(&topology_lock); + num_devices = sys_props.num_devices; + up_read(&topology_lock); + + return num_devices; +} + /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD * topology. If GPU device is found @idx, then valid kfd_dev pointer is * returned through @kdev diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 21635e80349a..5fc5d5608506 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -95,6 +95,7 @@ #include <drm/drm_utils.h> #include <drm/drm_vblank.h> #include <drm/drm_audio_component.h> +#include <drm/drm_colorop.h> #include <drm/drm_gem_atomic_helper.h> #include <media/cec-notifier.h> @@ -572,7 +573,7 @@ static void schedule_dc_vmin_vmax(struct amdgpu_device *adev, offload_work->stream = stream; offload_work->adjust = adjust_copy; - queue_work(system_wq, &offload_work->work); + queue_work(system_percpu_wq, &offload_work->work); } static void dm_vupdate_high_irq(void *interrupt_params) @@ -1902,7 +1903,11 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } - init_data.asic_id.chip_family = adev->family; + /* special handling for early revisions of GC 11.5.4 */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4)) + init_data.asic_id.chip_family = AMDGPU_FAMILY_GC_11_5_4; + else + init_data.asic_id.chip_family = adev->family; init_data.asic_id.pci_revision_id = adev->pdev->revision; init_data.asic_id.hw_internal_rev = adev->external_rev_id; @@ -2255,6 +2260,10 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) adev->dm.idle_workqueue = NULL; } + /* Disable ISM before dc_destroy() invalidates dm->dc */ + scoped_guard(mutex, &adev->dm.dc_lock) + amdgpu_dm_ism_disable(&adev->dm); + amdgpu_dm_destroy_drm_device(&adev->dm); #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) @@ -3833,6 +3842,66 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { .atomic_commit_setup = amdgpu_dm_atomic_setup_commit, }; +#define DDC_MANUFACTURERNAME_SAMSUNG 0x2D4C + +static void dm_set_panel_type(struct amdgpu_dm_connector *aconnector) +{ + struct drm_connector *connector = &aconnector->base; + struct drm_display_info *display_info = &connector->display_info; + struct dc_link *link = aconnector->dc_link; + struct amdgpu_device *adev; + + adev = drm_to_adev(connector->dev); + + link->panel_type = PANEL_TYPE_NONE; + + switch (display_info->amd_vsdb.panel_type) { + case AMD_VSDB_PANEL_TYPE_OLED: + link->panel_type = PANEL_TYPE_OLED; + break; + case AMD_VSDB_PANEL_TYPE_MINILED: + link->panel_type = PANEL_TYPE_MINILED; + break; + } + + /* If VSDB didn't determine panel type, check DPCD ext caps */ + if (link->panel_type == PANEL_TYPE_NONE) { + if (link->dpcd_sink_ext_caps.bits.miniled == 1) + link->panel_type = PANEL_TYPE_MINILED; + if (link->dpcd_sink_ext_caps.bits.oled == 1) + link->panel_type = PANEL_TYPE_OLED; + } + + /* + * TODO: get panel type from DID2 that has device technology field + * to specify if it's OLED or not. But we need to wait for DID2 + * support in DC and EDID parser to be able to use it here. + */ + + if (link->panel_type == PANEL_TYPE_NONE) { + struct drm_amd_vsdb_info *vsdb = &display_info->amd_vsdb; + u32 lum1_max = vsdb->luminance_range1.max_luminance; + u32 lum2_max = vsdb->luminance_range2.max_luminance; + + if (vsdb->version && link->local_sink && + link->local_sink->edid_caps.manufacturer_id == + DDC_MANUFACTURERNAME_SAMSUNG && + lum1_max >= ((lum2_max * 3) / 2)) + link->panel_type = PANEL_TYPE_MINILED; + } + + if (link->panel_type == PANEL_TYPE_OLED) + drm_object_property_set_value(&connector->base, + adev_to_drm(adev)->mode_config.panel_type_property, + DRM_MODE_PANEL_TYPE_OLED); + else + drm_object_property_set_value(&connector->base, + adev_to_drm(adev)->mode_config.panel_type_property, + DRM_MODE_PANEL_TYPE_UNKNOWN); + + drm_dbg_kms(aconnector->base.dev, "Panel type: %d\n", link->panel_type); +} + static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) { const struct drm_panel_backlight_quirk *panel_backlight_quirk; @@ -3854,10 +3923,6 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps; caps->aux_support = false; - drm_object_property_set_value(&conn_base->base, - adev_to_drm(adev)->mode_config.panel_type_property, - caps->ext_caps->bits.oled ? DRM_MODE_PANEL_TYPE_OLED : DRM_MODE_PANEL_TYPE_UNKNOWN); - if (caps->ext_caps->bits.oled == 1 /* * || @@ -3938,7 +4003,7 @@ void amdgpu_dm_update_connector_after_detect( if (sink) { if (aconnector->dc_sink) { - amdgpu_dm_update_freesync_caps(connector, NULL); + amdgpu_dm_update_freesync_caps(connector, NULL, true); /* * retain and release below are used to * bump up refcount for sink because the link doesn't point @@ -3950,9 +4015,9 @@ void amdgpu_dm_update_connector_after_detect( aconnector->dc_sink = sink; dc_sink_retain(aconnector->dc_sink); amdgpu_dm_update_freesync_caps(connector, - aconnector->drm_edid); + aconnector->drm_edid, true); } else { - amdgpu_dm_update_freesync_caps(connector, NULL); + amdgpu_dm_update_freesync_caps(connector, NULL, true); if (!aconnector->dc_sink) { aconnector->dc_sink = aconnector->dc_em_sink; dc_sink_retain(aconnector->dc_sink); @@ -3996,7 +4061,7 @@ void amdgpu_dm_update_connector_after_detect( * If yes, put it here. */ if (aconnector->dc_sink) { - amdgpu_dm_update_freesync_caps(connector, NULL); + amdgpu_dm_update_freesync_caps(connector, NULL, true); dc_sink_release(aconnector->dc_sink); } @@ -4029,12 +4094,13 @@ void amdgpu_dm_update_connector_after_detect( "failed to create aconnector->requested_timing\n"); } - amdgpu_dm_update_freesync_caps(connector, aconnector->drm_edid); + amdgpu_dm_update_freesync_caps(connector, aconnector->drm_edid, true); update_connector_ext_caps(aconnector); + dm_set_panel_type(aconnector); } else { hdmi_cec_unset_edid(aconnector); drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); - amdgpu_dm_update_freesync_caps(connector, NULL); + amdgpu_dm_update_freesync_caps(connector, NULL, true); aconnector->num_modes = 0; dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; @@ -4211,7 +4277,7 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector) dc_sink_retain(aconnector->hdmi_prev_sink); /* Schedule delayed detection. */ - if (mod_delayed_work(system_wq, + if (mod_delayed_work(system_percpu_wq, &aconnector->hdmi_hpd_debounce_work, msecs_to_jiffies(aconnector->hdmi_hpd_debounce_delay_ms))) drm_dbg_kms(dev, "HDMI HPD: Re-scheduled debounce work\n"); @@ -8798,7 +8864,7 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, * drm_edid_connector_add_modes() and need to be * restored here. */ - amdgpu_dm_update_freesync_caps(connector, drm_edid); + amdgpu_dm_update_freesync_caps(connector, drm_edid, false); } else { amdgpu_dm_connector->num_modes = 0; } @@ -9342,9 +9408,21 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, if (acrtc_state) { timing = &acrtc_state->stream->timing; - if (amdgpu_ip_version(adev, DCE_HWIP, 0) < - IP_VERSION(3, 5, 0) || - !(adev->flags & AMD_IS_APU)) { + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= + IP_VERSION(3, 2, 0) && + !(adev->flags & AMD_IS_APU)) { + /* + * DGPUs NV3x and newer that support idle optimizations + * experience intermittent flip-done timeouts on cursor + * updates. Restore 5s offdelay behavior for now. + * + * Discussion on the issue: + * https://lore.kernel.org/amd-gfx/20260217191632.1243826-1-sysdadmin@m1k.cloud/ + */ + config.offdelay_ms = 5000; + config.disable_immediate = false; + } else if (amdgpu_ip_version(adev, DCE_HWIP, 0) < + IP_VERSION(3, 5, 0)) { /* * Older HW and DGPU have issues with instant off; * use a 2 frame offdelay. @@ -11142,8 +11220,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (!adev->in_suspend) { /* return the stolen vga memory back to VRAM */ if (!adev->mman.keep_stolen_vga_memory) - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); - amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_STOLEN_VGA); + amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_STOLEN_EXTENDED); } /* @@ -12273,6 +12351,38 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm */ /** + * dm_plane_color_pipeline_active() - Check if a plane's color pipeline active. + * @state: DRM atomic state + * @plane: DRM plane to check + * @use_old: if true, inspect the old colorop states; otherwise the new ones + * + * A color pipeline may be selected (color_pipeline != NULL) but still is + * inactive if every colorop in the chain is bypassed. Only return + * true when at least one colorop has bypass == false, meaning the cursor + * would be subjected to the transformation in native mode. + * + * Return: true if the pipeline modifies pixels, false otherwise. + */ +static bool dm_plane_color_pipeline_active(struct drm_atomic_state *state, + struct drm_plane *plane, + bool use_old) +{ + struct drm_colorop *colorop; + struct drm_colorop_state *old_colorop_state, *new_colorop_state; + int i; + + for_each_oldnew_colorop_in_state(state, colorop, old_colorop_state, new_colorop_state, i) { + struct drm_colorop_state *cstate = use_old ? old_colorop_state : new_colorop_state; + + if (cstate->colorop->plane != plane) + continue; + if (!cstate->bypass) + return true; + } + return false; +} + +/** * dm_crtc_get_cursor_mode() - Determine the required cursor mode on crtc * @adev: amdgpu device * @state: DRM atomic state @@ -12283,8 +12393,8 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm * the dm_crtc_state. * * The cursor should be enabled in overlay mode if there exists an underlying - * plane - on which the cursor may be blended - that is either YUV formatted, or - * scaled differently from the cursor. + * plane - on which the cursor may be blended - that is either YUV formatted, + * scaled differently from the cursor, or has a color pipeline active. * * Since zpos info is required, drm_atomic_normalize_zpos must be called before * calling this function. @@ -12322,7 +12432,7 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev, /* * Cursor mode can change if a plane's format changes, scale changes, is - * enabled/disabled, or z-order changes. + * enabled/disabled, z-order changes, or color management properties change. */ for_each_oldnew_plane_in_state(state, plane, old_plane_state, plane_state, i) { int new_scale_w, new_scale_h, old_scale_w, old_scale_h; @@ -12347,6 +12457,12 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev, consider_mode_change = true; break; } + + if (dm_plane_color_pipeline_active(state, plane, true) != + dm_plane_color_pipeline_active(state, plane, false)) { + consider_mode_change = true; + break; + } } if (!consider_mode_change && !crtc_state->zpos_changed) @@ -12387,6 +12503,12 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev, return 0; } + /* Underlying plane has an active color pipeline - cursor would be transformed */ + if (dm_plane_color_pipeline_active(state, plane, false)) { + *cursor_mode = DM_CURSOR_OVERLAY_MODE; + return 0; + } + dm_get_plane_scale(plane_state, &underlying_scale_w, &underlying_scale_h); dm_get_plane_scale(cursor_state, @@ -12766,7 +12888,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; } else if (required_cursor_mode == DM_CURSOR_OVERLAY_MODE) { drm_dbg_driver(crtc->dev, - "[CRTC:%d:%s] Cannot enable native cursor due to scaling or YUV restrictions\n", + "[CRTC:%d:%s] Cannot enable native cursor due to scaling, YUV, or color pipeline restrictions\n", crtc->base.id, crtc->name); ret = -EINVAL; goto fail; @@ -13031,6 +13153,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, vsdb->amd_vsdb_version = output->amd_vsdb.amd_vsdb_version; vsdb->min_refresh_rate_hz = output->amd_vsdb.min_frame_rate; vsdb->max_refresh_rate_hz = output->amd_vsdb.max_frame_rate; + vsdb->freesync_mccs_vcp_code = output->amd_vsdb.freesync_mccs_vcp_code; } else { drm_warn(adev_to_drm(dm->adev), "Unknown EDID CEA parser results\n"); return false; @@ -13065,6 +13188,8 @@ static bool parse_edid_cea_dmcu(struct amdgpu_display_manager *dm, vsdb_info->amd_vsdb_version = version; vsdb_info->min_refresh_rate_hz = min_rate; vsdb_info->max_refresh_rate_hz = max_rate; + /* Not enabled on DMCU*/ + vsdb_info->freesync_mccs_vcp_code = 0; return true; } /* not amd vsdb */ @@ -13155,56 +13280,15 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, } } -static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector, - const struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info) +static int get_amd_vsdb(struct amdgpu_dm_connector *aconnector, + struct amdgpu_hdmi_vsdb_info *vsdb_info) { - u8 *edid_ext = NULL; - int i; - int j = 0; - int total_ext_block_len; - - if (edid == NULL || edid->extensions == 0) - return -ENODEV; - - /* Find DisplayID extension */ - for (i = 0; i < edid->extensions; i++) { - edid_ext = (void *)(edid + (i + 1)); - if (edid_ext[0] == DISPLAYID_EXT) - break; - } - - total_ext_block_len = EDID_LENGTH * edid->extensions; - while (j < total_ext_block_len - sizeof(struct amd_vsdb_block)) { - struct amd_vsdb_block *amd_vsdb = (struct amd_vsdb_block *)&edid_ext[j]; - unsigned int ieeeId = (amd_vsdb->ieee_id[2] << 16) | (amd_vsdb->ieee_id[1] << 8) | (amd_vsdb->ieee_id[0]); - - if (ieeeId == HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID && - amd_vsdb->version == HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3) { - u8 panel_type; - vsdb_info->replay_mode = (amd_vsdb->feature_caps & AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE) ? true : false; - vsdb_info->amd_vsdb_version = HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3; - drm_dbg_kms(aconnector->base.dev, "Panel supports Replay Mode: %d\n", vsdb_info->replay_mode); - panel_type = (amd_vsdb->color_space_eotf_support & AMD_VDSB_VERSION_3_PANEL_TYPE_MASK) >> AMD_VDSB_VERSION_3_PANEL_TYPE_SHIFT; - switch (panel_type) { - case AMD_VSDB_PANEL_TYPE_OLED: - aconnector->dc_link->panel_type = PANEL_TYPE_OLED; - break; - case AMD_VSDB_PANEL_TYPE_MINILED: - aconnector->dc_link->panel_type = PANEL_TYPE_MINILED; - break; - default: - aconnector->dc_link->panel_type = PANEL_TYPE_NONE; - break; - } - drm_dbg_kms(aconnector->base.dev, "Panel type: %d\n", - aconnector->dc_link->panel_type); + struct drm_connector *connector = &aconnector->base; - return true; - } - j++; - } + vsdb_info->replay_mode = connector->display_info.amd_vsdb.replay_mode; + vsdb_info->amd_vsdb_version = connector->display_info.amd_vsdb.version; - return false; + return connector->display_info.amd_vsdb.version != 0; } static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector, @@ -13244,6 +13328,10 @@ static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector, * * @connector: Connector to query. * @drm_edid: DRM EDID from monitor + * @do_mccs: Controls whether MCCS (Monitor Control Command Set) over + * DDC (Display Data Channel) transactions are performed. When true, + * the driver queries the monitor to get or update additional FreeSync + * capability information. When false, these transactions are skipped. * * Amdgpu supports Freesync in DP and HDMI displays, and it is required to keep * track of some of the display information in the internal data struct used by @@ -13251,7 +13339,7 @@ static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector, * FreeSync parameters. */ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, - const struct drm_edid *drm_edid) + const struct drm_edid *drm_edid, bool do_mccs) { int i = 0; struct amdgpu_dm_connector *amdgpu_dm_connector = @@ -13307,7 +13395,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, freesync_capable = true; } - parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); + get_amd_vsdb(amdgpu_dm_connector, &vsdb_info); if (vsdb_info.replay_mode) { amdgpu_dm_connector->vsdb_info.replay_mode = vsdb_info.replay_mode; @@ -13317,14 +13405,19 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, } else if (drm_edid && sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) { i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); - if (i >= 0 && vsdb_info.freesync_supported) { - amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz; - amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz; - if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) - freesync_capable = true; + if (i >= 0) { + amdgpu_dm_connector->vsdb_info = vsdb_info; + sink->edid_caps.freesync_vcp_code = vsdb_info.freesync_mccs_vcp_code; - connector->display_info.monitor_range.min_vfreq = vsdb_info.min_refresh_rate_hz; - connector->display_info.monitor_range.max_vfreq = vsdb_info.max_refresh_rate_hz; + if (vsdb_info.freesync_supported) { + amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz; + amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz; + if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) + freesync_capable = true; + + connector->display_info.monitor_range.min_vfreq = vsdb_info.min_refresh_rate_hz; + connector->display_info.monitor_range.max_vfreq = vsdb_info.max_refresh_rate_hz; + } } } @@ -13333,22 +13426,38 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (as_type == FREESYNC_TYPE_PCON_IN_WHITELIST) { i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); - if (i >= 0 && vsdb_info.freesync_supported && vsdb_info.amd_vsdb_version > 0) { - - amdgpu_dm_connector->pack_sdp_v1_3 = true; - amdgpu_dm_connector->as_type = as_type; + if (i >= 0) { amdgpu_dm_connector->vsdb_info = vsdb_info; + sink->edid_caps.freesync_vcp_code = vsdb_info.freesync_mccs_vcp_code; - amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz; - amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz; - if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) - freesync_capable = true; + if (vsdb_info.freesync_supported && vsdb_info.amd_vsdb_version > 0) { + amdgpu_dm_connector->pack_sdp_v1_3 = true; + amdgpu_dm_connector->as_type = as_type; + + amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz; + amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz; + if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) + freesync_capable = true; - connector->display_info.monitor_range.min_vfreq = vsdb_info.min_refresh_rate_hz; - connector->display_info.monitor_range.max_vfreq = vsdb_info.max_refresh_rate_hz; + connector->display_info.monitor_range.min_vfreq = vsdb_info.min_refresh_rate_hz; + connector->display_info.monitor_range.max_vfreq = vsdb_info.max_refresh_rate_hz; + } } } + /* Handle MCCS */ + if (do_mccs) + dm_helpers_read_mccs_caps(adev->dm.dc->ctx, amdgpu_dm_connector->dc_link, sink); + + if ((sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A || + as_type == FREESYNC_TYPE_PCON_IN_WHITELIST) && + (!sink->edid_caps.freesync_vcp_code || + (sink->edid_caps.freesync_vcp_code && !sink->mccs_caps.freesync_supported))) + freesync_capable = false; + + if (do_mccs && sink->mccs_caps.freesync_supported && freesync_capable) + dm_helpers_mccs_vcp_set(adev->dm.dc->ctx, amdgpu_dm_connector->dc_link, sink); + update: if (dm_con_state) dm_con_state->freesync_capable = freesync_capable; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index d1a14e0c12bd..74a8fe1a1999 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -53,12 +53,6 @@ #define AMDGPU_DMUB_NOTIFICATION_MAX 8 -#define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A -#define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40 -#define AMD_VDSB_VERSION_3_PANEL_TYPE_MASK 0xC0 -#define AMD_VDSB_VERSION_3_PANEL_TYPE_SHIFT 6 -#define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3 - enum amd_vsdb_panel_type { AMD_VSDB_PANEL_TYPE_DEFAULT = 0, AMD_VSDB_PANEL_TYPE_MINILED, @@ -97,14 +91,6 @@ struct dc_plane_state; struct dmub_notification; struct dmub_cmd_fused_request; -struct amd_vsdb_block { - unsigned char ieee_id[3]; - unsigned char version; - unsigned char feature_caps; - unsigned char reserved[3]; - unsigned char color_space_eotf_support; -}; - struct common_irq_params { struct amdgpu_device *adev; enum dc_irq_source irq_src; @@ -773,6 +759,11 @@ struct amdgpu_hdmi_vsdb_info { unsigned int max_refresh_rate_hz; /** + * @freesync_mccs_vcp_code: MCCS VCP code for freesync state + */ + unsigned int freesync_mccs_vcp_code; + + /** * @replay_mode: Replay supported */ bool replay_mode; @@ -1080,7 +1071,7 @@ void dm_restore_drm_connector_state(struct drm_device *dev, struct drm_connector *connector); void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, - const struct drm_edid *drm_edid); + const struct drm_edid *drm_edid, bool do_mccs); void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index c3c588294665..d69f5a75b685 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -101,23 +101,22 @@ bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state) /** * amdgpu_dm_crtc_set_panel_sr_feature() - Manage panel self-refresh features. - * - * @vblank_work: is a pointer to a struct vblank_control_work object. - * @vblank_enabled: indicates whether the DRM vblank counter is currently - * enabled (true) or disabled (false). - * @allow_sr_entry: represents whether entry into the self-refresh mode is - * allowed (true) or not allowed (false). + * @dm: amdgpu display manager instance. + * @acrtc: CRTC whose panel self-refresh state is being updated. + * @stream: DC stream associated with @acrtc. + * @vblank_enabled: Whether the DRM vblank counter is currently enabled. + * @allow_sr_entry: Whether entry into self-refresh mode is allowed. * * The DRM vblank counter enable/disable action is used as the trigger to enable * or disable various panel self-refresh features: * * Panel Replay and PSR SU * - Enable when: - * - VRR is disabled - * - vblank counter is disabled - * - entry is allowed: usermode demonstrates an adequate number of fast - * commits) - * - CRC capture window isn't active + * - VRR is disabled + * - vblank counter is disabled + * - entry is allowed: usermode demonstrates an adequate number of fast + * commits + * - CRC capture window isn't active * - Keep enabled even when vblank counter gets enabled * * PSR1 @@ -458,9 +457,12 @@ static struct drm_crtc_state *amdgpu_dm_crtc_duplicate_state(struct drm_crtc *cr static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc) { - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + /* + * amdgpu_dm_ism_fini() is intentionally called in amdgpu_dm_fini(). + * It must be called before dc_destroy() in amdgpu_dm_fini() + * to avoid ISM accessing an invalid dc handle once dc is released. + */ - amdgpu_dm_ism_fini(&acrtc->ism); drm_crtc_cleanup(crtc); kfree(crtc); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index f3fa8eb4bcce..a3cb05490dc9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -49,6 +49,45 @@ #include "ddc_service_types.h" #include "clk_mgr.h" +#define MCCS_DEST_ADDR (0x6E >> 1) +#define MCCS_SRC_ADDR 0x51 +#define MCCS_LENGTH_OFFSET 0x80 +#define MCCS_MAX_DATA_SIZE 0x20 + +enum mccs_op_code { + MCCS_OP_CODE_VCP_REQUEST = 0x01, + MCCS_OP_CODE_VCP_REPLY = 0x02, + MCCS_OP_CODE_VCP_SET = 0x03, + MCCS_OP_CODE_VCP_RESET = 0x09, + MCCS_OP_CODE_CAP_REQUEST = 0xF3, + MCCS_OP_CODE_CAP_REPLY = 0xE3 +}; + +enum mccs_op_buff_size { + MCCS_OP_BUFF_SIZE__WR_VCP_REQUEST = 5, + MCCS_OP_BUFF_SIZE_RD_VCP_REQUEST = 11, + MCCS_OP_BUFF_SIZE_WR_VCP_SET = 7, +}; + +enum vcp_reply_mask { + FREESYNC_SUPPORTED = 0x1 +}; + +union vcp_reply { + struct { + unsigned char src_addr; + unsigned char length; /* Length is offset by MccsLengthOffs = 0x80 */ + unsigned char reply_op_code; /* Should return MCCS_OP_CODE_VCP_REPLY = 0x02 */ + unsigned char result_code; /* 00h No Error, 01h Unsupported VCP Code */ + unsigned char request_code; /* Should return mccs vcp code sent in the vcp request */ + unsigned char type_code; /* VCP type code: 00h Set parameter, 01h Momentary */ + unsigned char max_value[2]; /* 2 bytes returning max value current value */ + unsigned char present_value[2]; /* NOTE: Byte0 is MSB, Byte1 is LSB */ + unsigned char check_sum; + } bytes; + unsigned char raw[11]; +}; + static u32 edid_extract_panel_id(struct edid *edid) { return (u32)edid->mfg_id[0] << 24 | @@ -993,6 +1032,45 @@ dm_helpers_read_acpi_edid(struct amdgpu_dm_connector *aconnector) return drm_edid_read_custom(connector, dm_helpers_probe_acpi_edid, connector); } +static const struct drm_edid * +dm_helpers_read_vbios_hardcoded_edid(struct dc_link *link, struct amdgpu_dm_connector *aconnector) +{ + struct dc_bios *bios = link->ctx->dc_bios; + struct embedded_panel_info info; + const struct drm_edid *edid; + enum bp_result r; + + if (!dc_is_embedded_signal(link->connector_signal) || + !bios->funcs->get_embedded_panel_info) + return NULL; + + memset(&info, 0, sizeof(info)); + r = bios->funcs->get_embedded_panel_info(bios, &info); + + if (r != BP_RESULT_OK) { + dm_error("Error when reading embedded panel info: %u\n", r); + return NULL; + } + + if (!info.fake_edid || !info.fake_edid_size) { + dm_error("Embedded panel info doesn't contain an EDID\n"); + return NULL; + } + + edid = drm_edid_alloc(info.fake_edid, info.fake_edid_size); + + if (!drm_edid_valid(edid)) { + dm_error("EDID from embedded panel info is invalid\n"); + drm_edid_free(edid); + return NULL; + } + + aconnector->base.display_info.width_mm = info.panel_width_mm; + aconnector->base.display_info.height_mm = info.panel_height_mm; + + return edid; +} + void populate_hdmi_info_from_connector(struct drm_hdmi_info *hdmi, struct dc_edid_caps *edid_caps) { edid_caps->scdc_present = hdmi->scdc.supported; @@ -1013,6 +1091,9 @@ enum dc_edid_status dm_helpers_read_local_edid( if (link->aux_mode) ddc = &aconnector->dm_dp_aux.aux.ddc; + else if (link->ddc_hw_inst == GPIO_DDC_LINE_UNKNOWN && + dc_is_embedded_signal(link->connector_signal)) + ddc = NULL; else ddc = &aconnector->i2c->base; @@ -1026,6 +1107,8 @@ enum dc_edid_status dm_helpers_read_local_edid( drm_edid = dm_helpers_read_acpi_edid(aconnector); if (drm_edid) drm_info(connector->dev, "Using ACPI provided EDID for %s\n", connector->name); + else if (!ddc) + drm_edid = dm_helpers_read_vbios_hardcoded_edid(link, aconnector); else drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); @@ -1400,6 +1483,8 @@ static bool dm_is_freesync_pcon_whitelist(const uint32_t branch_dev_id) case DP_BRANCH_DEVICE_ID_0060AD: case DP_BRANCH_DEVICE_ID_00E04C: case DP_BRANCH_DEVICE_ID_90CC24: + case DP_BRANCH_DEVICE_ID_001CF8: + case DP_BRANCH_DEVICE_ID_001FF2: ret_val = true; break; default: @@ -1439,3 +1524,203 @@ bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream // TODO return false; } + +static int mccs_operation_vcp_request(unsigned int vcp_code, struct dc_link *link, + union vcp_reply *reply) +{ + const unsigned char retry_interval_ms = 40; + unsigned char retry = 5; + struct amdgpu_dm_connector *aconnector = link->priv; + struct i2c_adapter *ddc; + struct i2c_msg msg = {0}; + int ret = 0; + int idx; + + unsigned char wr_data[MCCS_OP_BUFF_SIZE__WR_VCP_REQUEST] = { + MCCS_SRC_ADDR, /* Byte0 - Src Addr */ + MCCS_LENGTH_OFFSET + 2, /* Byte1 - Length */ + MCCS_OP_CODE_VCP_REQUEST, /* Byte2 - MCCS Command */ + (unsigned char) vcp_code, /* Byte3 - VCP Code */ + MCCS_DEST_ADDR << 1 /* Byte4 - CheckSum */ + }; + + /* calculate checksum */ + for (idx = 0; idx < (MCCS_OP_BUFF_SIZE__WR_VCP_REQUEST - 1); idx++) + wr_data[(MCCS_OP_BUFF_SIZE__WR_VCP_REQUEST-1)] ^= wr_data[idx]; + + if (link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; + + do { + msg.addr = MCCS_DEST_ADDR; + msg.flags = 0; + msg.len = MCCS_OP_BUFF_SIZE__WR_VCP_REQUEST; + msg.buf = wr_data; + + ret = i2c_transfer(ddc, &msg, 1); + if (ret != 1) + goto mccs_retry; + + msleep(retry_interval_ms); + + msg.addr = MCCS_DEST_ADDR; + msg.flags = I2C_M_RD; + msg.len = MCCS_OP_BUFF_SIZE_RD_VCP_REQUEST; + msg.buf = reply->raw; + + ret = i2c_transfer(ddc, &msg, 1); + + /* sink might reply with null msg if it can't reply in time */ + if (ret == 1 && reply->bytes.length > MCCS_LENGTH_OFFSET) + break; +mccs_retry: + retry--; + msleep(retry_interval_ms); + } while (retry); + + if (!retry) { + drm_dbg_driver(aconnector->base.dev, + "%s: MCCS VCP request failed after retries", __func__); + return -EIO; + } + + return 0; +} + +void dm_helpers_read_mccs_caps(struct dc_context *ctx, struct dc_link *link, + struct dc_sink *sink) +{ + bool mccs_op = false; + struct dpcd_caps *dpcd_caps; + struct drm_device *dev; + uint16_t freesync_vcp_value = 0; + union vcp_reply vcp_reply_value = {0}; + + if (!ctx) + return; + dev = adev_to_drm(ctx->driver_context); + + if (!link || !sink) { + drm_dbg_driver(dev, "%s: link or sink is NULL", __func__); + return; + } + + sink->mccs_caps.freesync_supported = false; + dpcd_caps = &link->dpcd_caps; + + if (sink->edid_caps.freesync_vcp_code != 0) { + if (dc_is_dp_signal(link->connector_signal)) { + if ((dpcd_caps->dpcd_rev.raw >= DPCD_REV_14) && + (dpcd_caps->dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) && + dm_is_freesync_pcon_whitelist(dpcd_caps->branch_dev_id) && + (dpcd_caps->adaptive_sync_caps.dp_adap_sync_caps.bits.ADAPTIVE_SYNC_SDP_SUPPORT == true)) + mccs_op = true; + + if ((dpcd_caps->dongle_type != DISPLAY_DONGLE_NONE && + dpcd_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER)) { + if (mccs_op == false) + drm_dbg_driver(dev, "%s: Legacy Pcon support", __func__); + mccs_op = true; + } + + if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + // Todo: Freesync over MST + mccs_op = false; + } + } + + if (dc_is_hdmi_signal(link->connector_signal)) { + drm_dbg_driver(dev, "%s: Local HDMI sink", __func__); + mccs_op = true; + } + + if (mccs_op == true) { + // MCCS VCP request to get VCP value + if (!mccs_operation_vcp_request(sink->edid_caps.freesync_vcp_code, link, + &vcp_reply_value)) { + freesync_vcp_value = vcp_reply_value.bytes.present_value[1]; + freesync_vcp_value |= (uint16_t) vcp_reply_value.bytes.present_value[0] << 8; + } + // If VCP Value bit 0 is 1, freesyncSupport = true + sink->mccs_caps.freesync_supported = + (freesync_vcp_value & FREESYNC_SUPPORTED) ? true : false; + } + } +} + +static int mccs_operation_vcp_set(unsigned int vcp_code, struct dc_link *link, uint16_t value) +{ + const unsigned char retry_interval_ms = 40; + unsigned char retry = 5; + struct amdgpu_dm_connector *aconnector = link->priv; + struct i2c_adapter *ddc; + struct i2c_msg msg = {0}; + int ret = 0; + int idx; + + unsigned char wr_data[MCCS_OP_BUFF_SIZE_WR_VCP_SET] = { + MCCS_SRC_ADDR, /* Byte0 - Src Addr */ + MCCS_LENGTH_OFFSET + 4, /* Byte1 - Length */ + MCCS_OP_CODE_VCP_SET, /* Byte2 - MCCS Command */ + (unsigned char)vcp_code, /* Byte3 - VCP Code */ + (unsigned char)(value >> 8), /* Byte4 - Value High Byte */ + (unsigned char)(value & 0xFF), /* Byte5 - Value Low Byte */ + MCCS_DEST_ADDR << 1 /* Byte6 - CheckSum */ + }; + + /* calculate checksum */ + for (idx = 0; idx < (MCCS_OP_BUFF_SIZE_WR_VCP_SET - 1); idx++) + wr_data[MCCS_OP_BUFF_SIZE_WR_VCP_SET - 1] ^= wr_data[idx]; + + if (link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; + + do { + msg.addr = MCCS_DEST_ADDR; + msg.flags = 0; + msg.len = MCCS_OP_BUFF_SIZE_WR_VCP_SET; + msg.buf = wr_data; + + ret = i2c_transfer(ddc, &msg, 1); + if (ret == 1) + break; + + retry--; + msleep(retry_interval_ms); + } while (retry); + + if (!retry) + return -EIO; + + return 0; +} + +void dm_helpers_mccs_vcp_set(struct dc_context *ctx, struct dc_link *link, + struct dc_sink *sink) +{ + struct drm_device *dev; + const uint16_t enable = 0x0101; + + if (!ctx) + return; + dev = adev_to_drm(ctx->driver_context); + + if (!link || !sink) { + drm_dbg_driver(dev, "%s: link or sink is NULL", __func__); + return; + } + + if (!sink->mccs_caps.freesync_supported) { + drm_dbg_driver(dev, "%s: MCCS freesync not supported on this sink", __func__); + return; + } + + if (mccs_operation_vcp_set(sink->edid_caps.freesync_vcp_code, link, enable)) + drm_dbg_driver(dev, "%s: Failed to set VCP code %d", __func__, + sink->edid_caps.freesync_vcp_code); +} + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c index 65a5cfe1e106..a64e95860e99 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c @@ -35,6 +35,9 @@ /** * dm_ism_next_state - Get next state based on current state and event + * @current_state: current ISM state + * @event: event being processed + * @next_state: place to store the next state * * This function defines the idle state management FSM. Invalid transitions * are ignored and will not progress the FSM. @@ -148,6 +151,11 @@ static uint64_t dm_ism_get_sso_delay(const struct amdgpu_dm_ism *ism, /** * dm_ism_get_idle_allow_delay - Calculate hysteresis-based idle allow delay + * @ism: ISM instance containing configuration, history, and current state + * @stream: display stream used to derive frame timing values for delay + * + * Calculates the delay before allowing idle optimizations based on recent + * idle history and the current stream timing. */ static uint64_t dm_ism_get_idle_allow_delay(const struct amdgpu_dm_ism *ism, const struct dc_stream_state *stream) @@ -212,6 +220,7 @@ static uint64_t dm_ism_get_idle_allow_delay(const struct amdgpu_dm_ism *ism, /** * dm_ism_insert_record - Insert a record into the circular history buffer + * @ism: ISM instance */ static void dm_ism_insert_record(struct amdgpu_dm_ism *ism) { @@ -261,7 +270,6 @@ static void dm_ism_commit_idle_optimization_state(struct amdgpu_dm_ism *ism, struct amdgpu_crtc *acrtc = ism_to_amdgpu_crtc(ism); struct amdgpu_device *adev = drm_to_adev(acrtc->base.dev); struct amdgpu_display_manager *dm = &adev->dm; - int r; trace_amdgpu_dm_ism_commit(dm->active_vblank_irq_count, vblank_enabled, @@ -314,16 +322,7 @@ static void dm_ism_commit_idle_optimization_state(struct amdgpu_dm_ism *ism, */ if (!vblank_enabled && dm->active_vblank_irq_count == 0) { dc_post_update_surfaces_to_stream(dm->dc); - - r = amdgpu_dpm_pause_power_profile(adev, true); - if (r) - dev_warn(adev->dev, "failed to set default power profile mode\n"); - dc_allow_idle_optimizations(dm->dc, true); - - r = amdgpu_dpm_pause_power_profile(adev, false); - if (r) - dev_warn(adev->dev, "failed to restore the power profile mode\n"); } } @@ -463,6 +462,9 @@ void amdgpu_dm_ism_commit_event(struct amdgpu_dm_ism *ism, /* ISM transitions must be called with mutex acquired */ ASSERT(mutex_is_locked(&dm->dc_lock)); + /* ISM should not run after dc is destroyed */ + ASSERT(dm->dc); + if (!acrtc_state) { trace_amdgpu_dm_ism_event(acrtc->crtc_id, "NO_STATE", "NO_STATE", "N/A"); @@ -536,6 +538,8 @@ void amdgpu_dm_ism_disable(struct amdgpu_display_manager *dm) struct amdgpu_crtc *acrtc; struct amdgpu_dm_ism *ism; + ASSERT(mutex_is_locked(&dm->dc_lock)); + drm_for_each_crtc(crtc, dm->ddev) { acrtc = to_amdgpu_crtc(crtc); ism = &acrtc->ism; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5d8c4c7020b1..be038d9014bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -474,7 +474,7 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) if (aconnector->dc_sink) { amdgpu_dm_update_freesync_caps( - connector, aconnector->drm_edid); + connector, aconnector->drm_edid, true); #if defined(CONFIG_DRM_AMD_DC_FP) if (!validate_dsc_caps_on_connector(aconnector)) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c index 8ba9b4f56f87..172999cc84e5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c @@ -59,7 +59,7 @@ inline void dc_assert_fp_enabled(void) } /** - * dc_assert_fp_enabled - Check if FPU protection is enabled + * dc_is_fp_enabled - Check if FPU protection is enabled * * This function tells if the code is already under FPU protection or not. A * function that works as an API for a set of FPU operations can use this diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index dd362071a6c9..c307f42fe0b9 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -794,11 +794,13 @@ static enum bp_result bios_parser_external_encoder_control( static enum bp_result bios_parser_dac_load_detection( struct dc_bios *dcb, - enum engine_id engine_id) + enum engine_id engine_id, + struct graphics_object_id ext_enc_id) { struct bios_parser *bp = BP_FROM_DCB(dcb); struct dc_context *ctx = dcb->ctx; struct bp_load_detection_parameters bp_params = {0}; + struct bp_external_encoder_control ext_cntl = {0}; enum bp_result bp_result = BP_RESULT_UNSUPPORTED; uint32_t bios_0_scratch; uint32_t device_id_mask = 0; @@ -824,6 +826,13 @@ static enum bp_result bios_parser_dac_load_detection( bp_params.engine_id = engine_id; bp_result = bp->cmd_tbl.dac_load_detection(bp, &bp_params); + } else if (ext_enc_id.id) { + if (!bp->cmd_tbl.external_encoder_control) + return BP_RESULT_UNSUPPORTED; + + ext_cntl.action = EXTERNAL_ENCODER_CONTROL_DAC_LOAD_DETECT; + ext_cntl.encoder_id = ext_enc_id; + bp_result = bp->cmd_tbl.external_encoder_control(bp, &ext_cntl); } if (bp_result != BP_RESULT_OK) @@ -1304,6 +1313,60 @@ static enum bp_result bios_parser_get_embedded_panel_info( return BP_RESULT_FAILURE; } +static enum bp_result get_embedded_panel_extra_info( + struct bios_parser *bp, + struct embedded_panel_info *info, + const uint32_t table_offset) +{ + uint8_t *record = bios_get_image(&bp->base, table_offset, 1); + ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record; + ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record; + + while (*record != ATOM_RECORD_END_TYPE) { + switch (*record) { + case LCD_MODE_PATCH_RECORD_MODE_TYPE: + record += sizeof(ATOM_PATCH_RECORD_MODE); + break; + case LCD_RTS_RECORD_TYPE: + record += sizeof(ATOM_LCD_RTS_RECORD); + break; + case LCD_CAP_RECORD_TYPE: + record += sizeof(ATOM_LCD_MODE_CONTROL_CAP); + break; + case LCD_FAKE_EDID_PATCH_RECORD_TYPE: + fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; + if (fake_edid_record->ucFakeEDIDLength) { + if (fake_edid_record->ucFakeEDIDLength == 128) + info->fake_edid_size = + fake_edid_record->ucFakeEDIDLength; + else + info->fake_edid_size = + fake_edid_record->ucFakeEDIDLength * 128; + + info->fake_edid = fake_edid_record->ucFakeEDIDString; + + record += struct_size(fake_edid_record, + ucFakeEDIDString, + info->fake_edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; + } + break; + case LCD_PANEL_RESOLUTION_RECORD_TYPE: + panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; + info->panel_width_mm = panel_res_record->usHSize; + info->panel_height_mm = panel_res_record->usVSize; + record += sizeof(ATOM_PANEL_RESOLUTION_PATCH_RECORD); + break; + default: + return BP_RESULT_BADBIOSTABLE; + } + } + + return BP_RESULT_OK; +} + static enum bp_result get_embedded_panel_info_v1_2( struct bios_parser *bp, struct embedded_panel_info *info) @@ -1420,6 +1483,10 @@ static enum bp_result get_embedded_panel_info_v1_2( if (ATOM_PANEL_MISC_API_ENABLED & lvds->ucLVDS_Misc) info->lcd_timing.misc_info.API_ENABLED = true; + if (lvds->usExtInfoTableOffset) + return get_embedded_panel_extra_info(bp, info, + le16_to_cpu(lvds->usExtInfoTableOffset) + DATA_TABLES(LCD_Info)); + return BP_RESULT_OK; } @@ -1545,6 +1612,10 @@ static enum bp_result get_embedded_panel_info_v1_3( (uint32_t) (ATOM_PANEL_MISC_V13_GREY_LEVEL & lvds->ucLCD_Misc) >> ATOM_PANEL_MISC_V13_GREY_LEVEL_SHIFT; + if (lvds->usExtInfoTableOffset) + return get_embedded_panel_extra_info(bp, info, + le16_to_cpu(lvds->usExtInfoTableOffset) + DATA_TABLES(LCD_Info)); + return BP_RESULT_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn42/dcn42_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn42/dcn42_clk_mgr.c index ec888aed207d..6a97ce69a562 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn42/dcn42_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn42/dcn42_clk_mgr.c @@ -611,80 +611,6 @@ static struct clk_bw_params dcn42_bw_params = { }; -static struct wm_table ddr5_wm_table = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 28.0, - .sr_enter_plus_exit_time_us = 30.0, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 28.0, - .sr_enter_plus_exit_time_us = 30.0, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 28.0, - .sr_enter_plus_exit_time_us = 30.0, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 28.0, - .sr_enter_plus_exit_time_us = 30.0, - .valid = true, - }, - } -}; - -static struct wm_table lpddr5_wm_table = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 28.0, - .sr_enter_plus_exit_time_us = 30.0, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 28.0, - .sr_enter_plus_exit_time_us = 30.0, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 28.0, - .sr_enter_plus_exit_time_us = 30.0, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 28.0, - .sr_enter_plus_exit_time_us = 30.0, - .valid = true, - }, - } -}; - struct dcn42_ss_info_table dcn42_ss_info_table = { .ss_divider = 1000, .ss_percentage = {0, 0, 375, 375, 375} @@ -1141,10 +1067,6 @@ void dcn42_clk_mgr_construct( if (ctx->dc_bios->integrated_info) { clk_mgr->base.base.dentist_vco_freq_khz = ctx->dc_bios->integrated_info->dentist_vco_freq; - if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) - dcn42_bw_params.wm_table = lpddr5_wm_table; - else - dcn42_bw_params.wm_table = ddr5_wm_table; dcn42_bw_params.vram_type = ctx->dc_bios->integrated_info->memory_type; dcn42_bw_params.dram_channel_width_bytes = ctx->dc_bios->integrated_info->memory_type == 0x22 ? 8 : 4; dcn42_bw_params.num_channels = ctx->dc_bios->integrated_info->ma_channel_number ? ctx->dc_bios->integrated_info->ma_channel_number : 1; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index db86e346307c..7333f5905330 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -40,7 +40,6 @@ #include "dcn10/dcn10_hubbub.h" #include "dce/dmub_hw_lock_mgr.h" -#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) #define MAX_NUM_MCACHE 8 /* used as index in array of black_color_format */ @@ -230,7 +229,7 @@ const uint16_t *find_color_matrix(enum dc_color_space color_space, int i; enum dc_color_space_type type; const uint16_t *val = NULL; - int arr_size = NUM_ELEMENTS(output_csc_matrix); + int arr_size = ARRAY_SIZE(output_csc_matrix); type = get_color_space_type(color_space); for (i = 0; i < arr_size; i++) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 66597a1f5b78..05991a10f8bf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -5062,13 +5062,19 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, option = DITHER_OPTION_SPATIAL8; break; case COLOR_DEPTH_101010: - option = DITHER_OPTION_TRUN10; + option = DITHER_OPTION_SPATIAL10; break; default: option = DITHER_OPTION_DISABLE; } } + if (stream->ctx->dce_version < DCE_VERSION_8_0 && + stream->timing.display_color_depth >= COLOR_DEPTH_101010) { + /* DCE 6.x doesn't support 10-bit truncation or dither options. */ + option = DITHER_OPTION_DISABLE; + } + if (option == DITHER_OPTION_DISABLE) return; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 55ec281db3b7..37714d4371fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -63,7 +63,7 @@ struct dcn_dsc_reg_state; struct dcn_optc_reg_state; struct dcn_dccg_reg_state; -#define DC_VER "3.2.376" +#define DC_VER "3.2.378" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC @@ -562,6 +562,7 @@ struct dc_config { bool frame_update_cmd_version2; struct spl_sharpness_range dcn_sharpness_range; struct spl_sharpness_range dcn_override_sharpness_range; + bool no_native422_support; }; enum visual_confirm { @@ -986,7 +987,6 @@ struct link_service; * causing an issue or not. */ struct dc_debug_options { - bool native422_support; bool disable_dsc; enum visual_confirm visual_confirm; int visual_confirm_rect_height; @@ -1061,9 +1061,11 @@ struct dc_debug_options { bool hdmi20_disable; bool skip_detection_link_training; uint32_t edid_read_retry_times; - unsigned int force_odm_combine; //bit vector based on otg inst - unsigned int seamless_boot_odm_combine; - unsigned int force_odm_combine_4to1; //bit vector based on otg inst + + uint8_t force_odm_combine; //bit vector based on otg inst + uint8_t seamless_boot_odm_combine; + uint8_t force_odm_combine_4to1; //bit vector based on otg inst + int minimum_z8_residency_time; int minimum_z10_residency_time; bool disable_z9_mpc; @@ -1680,7 +1682,7 @@ struct dc_scratch_space { struct dc_link_training_overrides preferred_training_settings; struct dp_audio_test_data audio_test_data; - uint8_t ddc_hw_inst; + enum gpio_ddc_line ddc_hw_inst; uint8_t hpd_src; @@ -2725,6 +2727,7 @@ struct dc_sink { struct stereo_3d_features features_3d[TIMING_3D_FORMAT_MAX]; bool converter_disable_audio; + struct mccs_caps mccs_caps; struct scdc_caps scdc_caps; struct dc_sink_dsc_caps dsc_caps; struct dc_sink_fec_caps fec_caps; diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 6f96c5cf39fe..526f71616f94 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -102,7 +102,8 @@ struct dc_vbios_funcs { struct bp_external_encoder_control *cntl); enum bp_result (*dac_load_detection)( struct dc_bios *bios, - enum engine_id engine_id); + enum engine_id engine_id, + struct graphics_object_id ext_enc_id); enum bp_result (*transmitter_control)( struct dc_bios *bios, struct bp_transmitter_control *cntl); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index 9d18f1c08079..101bce6b8de6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -52,6 +52,7 @@ struct dc_dsc_policy { uint32_t max_target_bpp; uint32_t min_target_bpp; bool enable_dsc_when_not_needed; + bool ycbcr422_simple; }; struct dc_dsc_config_options { diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 86394203cee7..7c38fa6f8cb1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -162,13 +162,13 @@ struct test_pattern { #define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR) struct dc_stream_debug_options { - char force_odm_combine_segments; + uint8_t force_odm_combine_segments; /* * When force_odm_combine_segments is non zero, allow dc to * temporarily transition to ODM bypass when minimal transition state * is required to prevent visual glitches showing on the screen */ - char allow_transition_for_forced_odm; + uint8_t allow_transition_for_forced_odm; }; #define LUMINANCE_DATA_TABLE_SIZE 10 diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index fd8ec1660312..c08d5c005df6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -205,6 +205,8 @@ struct dc_edid_caps { uint32_t audio_latency; uint32_t video_latency; + unsigned char freesync_vcp_code; + uint8_t qs_bit; uint8_t qy_bit; @@ -1313,6 +1315,10 @@ struct dc_panel_config { } rio; }; +struct mccs_caps { + bool freesync_supported; +}; + #define MAX_SINKS_PER_LINK 4 /* diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 34e54fdb9d13..25c13822fede 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -57,8 +57,6 @@ #define CALC_PLL_CLK_SRC_ERR_TOLERANCE 1 #define MAX_PLL_CALC_ERROR 0xFFFFFFFF -#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) - static const struct spread_spectrum_data *get_ss_data_entry( struct dce110_clk_src *clk_src, enum signal_type signal, @@ -1271,7 +1269,7 @@ const struct pixel_rate_range_table_entry *look_up_in_video_optimized_rate_tlb( { int i; - for (i = 0; i < NUM_ELEMENTS(video_optimized_pixel_rates); i++) { + for (i = 0; i < ARRAY_SIZE(video_optimized_pixel_rates); i++) { const struct pixel_rate_range_table_entry *e = &video_optimized_pixel_rates[i]; if (e->range_min_khz <= pixel_rate_khz && pixel_rate_khz <= e->range_max_khz) { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 5f40ae9e3120..e15fd1454d3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -1102,7 +1102,9 @@ void dce110_link_encoder_hw_init( ASSERT(result == BP_RESULT_OK); } - aux_initialize(enc110); + + if (enc110->aux_regs) + aux_initialize(enc110); /* reinitialize HPD. * hpd_initialize() will pass DIG_FE id to HW context. diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c index 6f2a0d5d963b..62fe5c3b18dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c @@ -40,8 +40,8 @@ #define FN(reg_name, field_name) \ mcif_wb30->mcif_wb_shift->field_name, mcif_wb30->mcif_wb_mask->field_name -#define MCIF_ADDR(addr) (((unsigned long long)addr & 0xffffffffff) + 0xFE) >> 8 -#define MCIF_ADDR_HIGH(addr) (unsigned long long)addr >> 40 +#define MCIF_ADDR(addr) ((uint32_t)((((unsigned long long)(addr) & 0xffffffffffULL) + 0xFEULL) >> 8)) +#define MCIF_ADDR_HIGH(addr) ((uint32_t)(((unsigned long long)(addr)) >> 40)) /* wbif programming guide: * 1. set up wbif parameter: diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 2818df555e62..107aec6a1265 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -181,6 +181,16 @@ enum dc_edid_status dm_helpers_read_local_edid( struct dc_link *link, struct dc_sink *sink); +void dm_helpers_read_mccs_caps( + struct dc_context *ctx, + struct dc_link *link, + struct dc_sink *sink); + +void dm_helpers_mccs_vcp_set( + struct dc_context *ctx, + struct dc_link *link, + struct dc_sink *sink); + bool dm_helpers_dp_handle_test_pattern_request( struct dc_context *ctx, const struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 887744d56d6a..e82f2d531211 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -2399,7 +2399,7 @@ static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_li return low_pstate_lvl; } -void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +void dcn21_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) { struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h index aed00039ca62..8b2226c5bbbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h @@ -78,7 +78,7 @@ int dcn21_populate_dml_pipes_from_context(struct dc *dc, enum dc_validate_mode validate_mode); bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, enum dc_validate_mode, display_e2e_pipe_params_st *pipes); -void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn21_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 1a28061bb9ff..ad23215da9f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -587,7 +587,7 @@ void dcn31_calculate_wm_and_dlg_fp( context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - total_det; } -void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +void dcn31_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) { struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct clk_limit_table *clk_table = &bw_params->clk_table; @@ -665,7 +665,7 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31); } -void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +void dcn315_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) { struct clk_limit_table *clk_table = &bw_params->clk_table; int i, max_dispclk_mhz = 0, max_dppclk_mhz = 0; @@ -726,7 +726,7 @@ void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN315); } -void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +void dcn316_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) { struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct clk_limit_table *clk_table = &bw_params->clk_table; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h index dfcc5d50071e..0b7fcbbfd17b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -44,9 +44,9 @@ void dcn31_calculate_wm_and_dlg_fp( int pipe_cnt, int vlevel); -void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); -void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); -void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn31_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); +void dcn315_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); +void dcn316_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc); int dcn_get_approx_det_segs_required_for_pstate( struct _vcs_dpi_soc_bounding_box_st *soc, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index e29497204df7..eb199215d298 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1610,38 +1610,6 @@ static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) return false; } -static void dcn20_adjust_freesync_v_startup(const struct dc_crtc_timing *dc_crtc_timing, int *vstartup_start) -{ - struct dc_crtc_timing patched_crtc_timing; - uint32_t asic_blank_end = 0; - uint32_t asic_blank_start = 0; - uint32_t newVstartup = 0; - - patched_crtc_timing = *dc_crtc_timing; - - if (patched_crtc_timing.flags.INTERLACE == 1) { - if (patched_crtc_timing.v_front_porch < 2) - patched_crtc_timing.v_front_porch = 2; - } else { - if (patched_crtc_timing.v_front_porch < 1) - patched_crtc_timing.v_front_porch = 1; - } - - /* blank_start = frame end - front porch */ - asic_blank_start = patched_crtc_timing.v_total - - patched_crtc_timing.v_front_porch; - - /* blank_end = blank_start - active */ - asic_blank_end = asic_blank_start - - patched_crtc_timing.v_border_bottom - - patched_crtc_timing.v_addressable - - patched_crtc_timing.v_border_top; - - newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start); - - *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); -} - static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel) @@ -1756,11 +1724,6 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, } } - if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid) - dcn20_adjust_freesync_v_startup( - &context->res_ctx.pipe_ctx[i].stream->timing, - &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start); - pipe_idx++; } /* If DCN isn't making memory requests we can allow pstate change and lower clocks */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile b/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile index 2625943d7f7e..8a451c36fdb3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile @@ -100,6 +100,7 @@ DML21 += src/dml2_mcg/dml2_mcg_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn3.o DML21 += src/dml2_pmo/dml2_pmo_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o +DML21 += src/dml2_pmo/dml2_pmo_dcn42.o DML21 += src/dml2_standalone_libraries/lib_float_math.o DML21 += dml21_translation_helper.o DML21 += dml21_wrapper.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c index 8e8935995fca..698d62fb9cf7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c @@ -1812,6 +1812,8 @@ static dml_float_t CalculateWriteBackDISPCLK( dml_uint_t WritebackLineBufferSize, dml_float_t DISPCLKDPPCLKVCOSpeed) { + (void)WritebackPixelFormat; + (void)WritebackVRatio; dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB; DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; @@ -1830,6 +1832,8 @@ static dml_float_t CalculateWriteBackDelay( dml_uint_t WritebackSourceHeight, dml_uint_t HTotal) { + (void)WritebackPixelFormat; + (void)WritebackHRatio; dml_float_t CalculateWriteBackDelay; dml_float_t Line_length; dml_float_t Output_lines_last_notclamped; @@ -1977,6 +1981,7 @@ static void CalculateFlipSchedule( dml_float_t *final_flip_bw, dml_bool_t *ImmediateFlipSupportedForPipe) { + (void)HostVMMinPageSize; dml_float_t min_row_time = 0.0; dml_uint_t HostVMDynamicLevelsTrips = 0; dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0; @@ -2118,6 +2123,11 @@ static void CalculateDCCConfiguration( dml_uint_t *IndependentBlockLuma, dml_uint_t *IndependentBlockChroma) { + (void)SurfaceWidthChroma; + (void)SurfaceHeightChroma; + (void)TilingFormat; + (void)BytePerPixelDETY; + (void)BytePerPixelDETC; dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024; dml_uint_t yuv420; @@ -2489,6 +2499,7 @@ static dml_uint_t CalculateVMAndRowBytes( dml_uint_t *DPDE0BytesFrame, dml_uint_t *MetaPTEBytesFrame) { + (void)SourcePixelFormat; dml_uint_t MPDEBytesFrame; dml_uint_t DCCMetaSurfaceBytes; dml_uint_t ExtraDPDEBytesFrame; @@ -3662,6 +3673,8 @@ static void CalculateVMGroupAndRequestTimes( dml_float_t TimePerVMRequestVBlank[], dml_float_t TimePerVMRequestFlip[]) { + (void)dpte_row_width_luma_ub; + (void)dpte_row_width_chroma_ub; dml_uint_t num_group_per_lower_vm_stage; dml_uint_t num_req_per_lower_vm_stage; @@ -3762,6 +3775,7 @@ static void CalculateVMGroupAndRequestTimes( static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scratch, struct CalculateStutterEfficiency_params_st *p) { + (void)scratch; dml_float_t DETBufferingTimeY = 0; dml_float_t SwathWidthYCriticalSurface = 0; dml_float_t SwathHeightYCriticalSurface = 0; @@ -4085,6 +4099,7 @@ static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scrat static void CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st *scratch, struct CalculateSwathAndDETConfiguration_params_st *p) { + (void)scratch; dml_uint_t MaximumSwathHeightY[__DML_NUM_PLANES__]; dml_uint_t MaximumSwathHeightC[__DML_NUM_PLANES__]; dml_uint_t RoundedUpMaxSwathSizeBytesY[__DML_NUM_PLANES__]; @@ -4331,6 +4346,7 @@ static void CalculateSwathWidth( dml_uint_t swath_width_luma_ub[], // per-pipe dml_uint_t swath_width_chroma_ub[]) // per-pipe { + (void)BytePerPixY; enum dml_odm_mode MainSurfaceODMMode; dml_uint_t surface_width_ub_l; dml_uint_t surface_height_ub_l; @@ -5029,6 +5045,7 @@ static void CalculateMaxDETAndMinCompressedBufferSize( dml_uint_t *nomDETInKByte, dml_uint_t *MinCompressedBufferSizeInKByte) { + (void)ROBBufferSizeInKByte; *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; *nomDETInKByte = (dml_uint_t)(dml_floor((dml_float_t) *MaxTotalDETInKByte / (dml_float_t) MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c index 4022f91193ed..b2fada6c44c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c @@ -178,6 +178,7 @@ dml_float_t dml_log2(dml_float_t x) dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding) { + (void)bankers_rounding; // if (bankers_rounding) // return (dml_float_t) lrint(val); // else { @@ -217,6 +218,7 @@ dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *rq_regs) { + (void)rq_regs; dml_print("DML: ===================================== \n"); dml_print("DML: DISPLAY_PLANE_RQ_REGS_ST\n"); dml_print("DML: chunk_size = 0x%x\n", rq_regs->chunk_size); @@ -248,6 +250,7 @@ void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs) void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs) { + (void)dlg_regs; dml_print("DML: ===================================== \n"); dml_print("DML: DISPLAY_DLG_REGS_ST \n"); dml_print("DML: refcyc_h_blank_end = 0x%x\n", dlg_regs->refcyc_h_blank_end); @@ -299,6 +302,7 @@ void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs) void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs) { + (void)ttu_regs; dml_print("DML: ===================================== \n"); dml_print("DML: DISPLAY_TTU_REGS_ST \n"); dml_print("DML: qos_level_low_wm = 0x%x\n", ttu_regs->qos_level_low_wm); @@ -326,6 +330,7 @@ void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs) void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy) { + (void)policy; dml_print("DML: ===================================== \n"); dml_print("DML: DML_MODE_EVAL_POLICY_ST\n"); dml_print("DML: Policy: UseUnboundedRequesting = 0x%x\n", policy->UseUnboundedRequesting); @@ -353,6 +358,8 @@ void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy) void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j) { + (void)j; + (void)mode_lib; dml_print("DML: MODE SUPPORT: ===============================================\n"); dml_print("DML: MODE SUPPORT: Voltage State %d\n", j); dml_print("DML: MODE SUPPORT: Mode Supported : %s\n", mode_lib->ms.support.ModeSupport[j] == true ? "Supported" : "NOT Supported"); @@ -526,6 +533,7 @@ void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *supp void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane) { + (void)timing; for (dml_uint_t i = 0; i < num_plane; i++) { dml_print("DML: timing_cfg: plane=%d, HTotal = %d\n", i, timing->HTotal[i]); dml_print("DML: timing_cfg: plane=%d, VTotal = %d\n", i, timing->VTotal[i]); @@ -542,6 +550,7 @@ void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dm void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane) { + (void)plane; dml_print("DML: plane_cfg: num_plane = %d\n", num_plane); dml_print("DML: plane_cfg: GPUVMEnable = %d\n", plane->GPUVMEnable); dml_print("DML: plane_cfg: HostVMEnable = %d\n", plane->HostVMEnable); @@ -590,6 +599,7 @@ void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_u void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane) { + (void)surface; for (dml_uint_t i = 0; i < num_plane; i++) { dml_print("DML: surface_cfg: plane=%d, PitchY = %d\n", i, surface->PitchY[i]); dml_print("DML: surface_cfg: plane=%d, SurfaceWidthY = %d\n", i, surface->SurfaceWidthY[i]); @@ -609,6 +619,7 @@ void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane) { + (void)hw; for (dml_uint_t i = 0; i < num_plane; i++) { dml_print("DML: hw_resource: plane=%d, ODMMode = %d\n", i, hw->ODMMode[i]); dml_print("DML: hw_resource: plane=%d, DPPPerSurface = %d\n", i, hw->DPPPerSurface[i]); @@ -620,6 +631,7 @@ void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, __DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state) { + (void)state; dml_print("DML: state_bbox: socclk_mhz = %f\n", state->socclk_mhz); dml_print("DML: state_bbox: dscclk_mhz = %f\n", state->dscclk_mhz); dml_print("DML: state_bbox: phyclk_mhz = %f\n", state->phyclk_mhz); @@ -649,6 +661,7 @@ __DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_ __DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc) { + (void)soc; dml_print("DML: soc_bbox: dprefclk_mhz = %f\n", soc->dprefclk_mhz); dml_print("DML: soc_bbox: xtalclk_mhz = %f\n", soc->xtalclk_mhz); dml_print("DML: soc_bbox: pcierefclk_mhz = %f\n", soc->pcierefclk_mhz); @@ -686,6 +699,7 @@ __DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box __DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg) { + (void)clk_cfg; dml_print("DML: clk_cfg: 0-use_required, 1-use pipe.clks_cfg, 2-use state bbox\n"); dml_print("DML: clk_cfg: dcfclk_option = %d\n", clk_cfg->dcfclk_option); dml_print("DML: clk_cfg: dispclk_option = %d\n", clk_cfg->dispclk_option); diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c index 2f0e0048bea8..476030193f14 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c @@ -389,7 +389,9 @@ static void populate_dml21_dummy_surface_cfg(struct dml2_surface_cfg *surface, c surface->tiling = dml2_sw_64kb_2d; } -static void populate_dml21_dummy_plane_cfg(struct dml2_plane_parameters *plane, const struct dc_stream_state *stream) +static void populate_dml21_dummy_plane_cfg(struct dml2_plane_parameters *plane, + const struct dc_stream_state *stream, + const struct dml2_soc_bb *soc_bb) { unsigned int width, height; @@ -433,7 +435,8 @@ static void populate_dml21_dummy_plane_cfg(struct dml2_plane_parameters *plane, plane->pixel_format = dml2_444_32; plane->dynamic_meta_data.enable = false; - plane->overrides.gpuvm_min_page_size_kbytes = 256; + plane->overrides.gpuvm_min_page_size_kbytes = soc_bb->gpuvm_min_page_size_kbytes; + plane->overrides.hostvm_min_page_size_kbytes = soc_bb->hostvm_min_page_size_kbytes; } static void populate_dml21_surface_config_from_plane_state( @@ -441,6 +444,7 @@ static void populate_dml21_surface_config_from_plane_state( struct dml2_surface_cfg *surface, const struct dc_plane_state *plane_state) { + (void)in_dc; surface->plane0.pitch = plane_state->plane_size.surface_pitch; surface->plane1.pitch = plane_state->plane_size.chroma_pitch; surface->plane0.height = plane_state->plane_size.surface_size.height; @@ -503,7 +507,7 @@ static const struct scaler_data *get_scaler_data_for_plane( static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dml_ctx, struct dml2_plane_parameters *plane, const struct dc_plane_state *plane_state, - const struct dc_state *context, unsigned int stream_index) + const struct dc_state *context, unsigned int stream_index, const struct dml2_soc_bb *soc_bb) { const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context); struct dc_stream_state *stream = context->streams[stream_index]; @@ -647,7 +651,8 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->composition.rotation_angle = (enum dml2_rotation_angle) plane_state->rotation; plane->stream_index = stream_index; - plane->overrides.gpuvm_min_page_size_kbytes = 256; + plane->overrides.gpuvm_min_page_size_kbytes = soc_bb->gpuvm_min_page_size_kbytes; + plane->overrides.hostvm_min_page_size_kbytes = soc_bb->hostvm_min_page_size_kbytes; plane->immediate_flip = plane_state->flip_immediate; @@ -785,7 +790,9 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s if (context->stream_status[stream_index].plane_count == 0) { disp_cfg_plane_location = dml_dispcfg->num_planes++; populate_dml21_dummy_surface_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->streams[stream_index]); - populate_dml21_dummy_plane_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->streams[stream_index]); + populate_dml21_dummy_plane_cfg( + &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], + context->streams[stream_index], &dml_ctx->v21.dml_init.soc_bb); dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; } else { for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { @@ -797,7 +804,10 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]); - populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); + populate_dml21_plane_config_from_plane_state( + dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], + context->stream_status[stream_index].plane_states[plane_index], + context, stream_index, &dml_ctx->v21.dml_init.soc_bb); dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) @@ -873,6 +883,7 @@ static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_wat void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx) { + (void)in_dc; const struct dml2_display_cfg_programming *programming = in_ctx->v21.mode_programming.programming; unsigned int wm_index; @@ -907,6 +918,7 @@ void dml21_get_pipe_mcache_config( struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config) { + (void)context; mcache_pipe_config->plane0.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x; mcache_pipe_config->plane0.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c index 4724b08c77e1..732de97335fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c @@ -88,6 +88,7 @@ int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], int dml_plane_idx) { + (void)in_dc; unsigned int dml_stream_index; unsigned int main_stream_id; unsigned int dc_plane_index; @@ -282,6 +283,7 @@ static struct dc_plane_state *dml21_add_phantom_plane(struct dml2_context *dml_c struct dc_plane_state *main_plane, struct dml2_per_plane_programming *plane_programming) { + (void)plane_programming; struct dc_plane_state *phantom_plane; phantom_plane = dml_ctx->config.svp_pstate.callbacks.create_phantom_plane(dc, context, main_plane); diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c index 7398f8b69adb..8bed59e976d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c @@ -58,8 +58,8 @@ bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const s void dml21_destroy(struct dml2_context *dml2) { - vfree(dml2->v21.dml_init.dml2_instance); - vfree(dml2->v21.mode_programming.programming); + DC_RUN_WITH_PREEMPTION_ENABLED(vfree(dml2->v21.dml_init.dml2_instance)); + DC_RUN_WITH_PREEMPTION_ENABLED(vfree(dml2->v21.mode_programming.programming)); } void dml21_copy(struct dml2_context *dst_dml_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper_fpu.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper_fpu.c index cc992af6ac9c..de40d7bae252 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper_fpu.c @@ -51,6 +51,8 @@ void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const st static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt) { + (void)out_new_hw_state; + (void)pipe_cnt; unsigned int dml_prog_idx = 0, dc_pipe_index = 0, num_dpps_required = 0; struct dml2_per_plane_programming *pln_prog = NULL; struct dml2_per_stream_programming *stream_prog = NULL; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn42_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn42_soc_bb.h index c75778ea7a2c..040d89f6de35 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn42_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn42_soc_bb.h @@ -68,6 +68,7 @@ static const struct dml2_soc_qos_parameters dml_dcn42_variant_a_soc_qos_params = .qos_type = dml2_qos_param_type_dcn3, }; +/* Default SOC bounding box for DCN42 based on LPDDR5/LPCAMM2 latencies*/ static const struct dml2_soc_bb dml2_socbb_dcn42 = { .clk_table = { .wck_ratio = { @@ -185,12 +186,13 @@ static const struct dml2_soc_bb dml2_socbb_dcn42 = { .qos_type = dml2_qos_param_type_dcn3, }, + /* DCN42 params for LPDDR5/LPCAMM2 */ .power_management_parameters = { - .dram_clk_change_blackout_us = 29, + .dram_clk_change_blackout_us = 36, .fclk_change_blackout_us = 0, .g7_ppt_blackout_us = 0, - .stutter_enter_plus_exit_latency_us = 11, - .stutter_exit_latency_us = 9, + .stutter_enter_plus_exit_latency_us = 14, + .stutter_exit_latency_us = 12, .z8_stutter_enter_plus_exit_latency_us = 300, .z8_stutter_exit_latency_us = 200, }, @@ -203,12 +205,12 @@ static const struct dml2_soc_bb dml2_socbb_dcn42 = { .xtalclk_mhz = 24, .pcie_refclk_mhz = 100, .dchub_refclk_mhz = 50, - .mall_allocated_for_dcn_mbytes = 64, + .mall_allocated_for_dcn_mbytes = 0, .max_outstanding_reqs = 256, .fabric_datapath_to_dcn_data_return_bytes = 32, .return_bus_width_bytes = 64, .hostvm_min_page_size_kbytes = 4, - .gpuvm_min_page_size_kbytes = 256, + .gpuvm_min_page_size_kbytes = 4, .gpuvm_max_page_table_levels = 1, .hostvm_max_non_cached_page_table_levels = 2, .phy_downspread_percent = 0.38, @@ -222,6 +224,17 @@ static const struct dml2_soc_bb dml2_socbb_dcn42 = { .max_fclk_for_uclk_dpm_khz = 2200 * 1000, }; +/* DCN42 params for DDR5 */ +struct dml2_soc_power_management_parameters dcn42_ddr5_power_management_parameters = { + .dram_clk_change_blackout_us = 36, + .fclk_change_blackout_us = 0, + .g7_ppt_blackout_us = 0, + .stutter_enter_plus_exit_latency_us = 23.5, + .stutter_exit_latency_us = 21.5, + .z8_stutter_enter_plus_exit_latency_us = 300, + .z8_stutter_exit_latency_us = 200, +}; + static const struct dml2_ip_capabilities dml2_dcn42_max_ip_caps = { .pipe_count = 4, .otg_count = 4, @@ -234,7 +247,7 @@ static const struct dml2_ip_capabilities dml2_dcn42_max_ip_caps = { .config_return_buffer_segment_size_in_kbytes = 64, .meta_fifo_size_in_kentries = 32, .compressed_buffer_segment_size_in_kbytes = 64, - .cursor_buffer_size = 24, + .cursor_buffer_size = 42, .max_flip_time_us = 110, .max_flip_time_lines = 50, .hostvm_mode = 0, diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h index 4e9abe1a568d..79dfba54344c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h @@ -26,20 +26,6 @@ enum dml2_swizzle_mode { dml2_gfx11_sw_64kb_r_x, dml2_gfx11_sw_256kb_d_x, dml2_gfx11_sw_256kb_r_x, - - dml2_sw_linear_256b, // GFX10 SW_LINEAR only accepts 256 byte aligned pitch - dml2_gfx10_sw_64kb_r_x, - dml2_gfx102_sw_64kb_s, - dml2_gfx102_sw_64kb_s_t, - dml2_gfx102_sw_64kb_s_x, - dml2_gfx102_sw_64kb_r_x, - - dml2_linear_64elements, // GFX7 LINEAR_ALIGNED accepts pitch alignment of the maximum of 64 elements or 256 bytes - dml2_gfx7_1d_thin, - dml2_gfx7_2d_thin_gen_zero, - dml2_gfx7_2d_thin_gen_one, - dml2_gfx7_2d_thin_arlene, - dml2_gfx7_2d_thin_anubis }; enum dml2_source_format_class { diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c index 99fc2f0666e2..858e7bbc511f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c @@ -135,7 +135,7 @@ struct dml2_core_ip_params core_dcn42_ip_caps_base = { .cursor_64bpp_support = true, .dynamic_metadata_vm_enabled = false, - .max_num_hdmi_frl_outputs = 0, + .max_num_hdmi_frl_outputs = 1, .max_num_dp2p0_outputs = 2, .max_num_dp2p0_streams = 4, .imall_supported = 1, @@ -155,7 +155,7 @@ struct dml2_core_ip_params core_dcn42_ip_caps_base = { .min_meta_chunk_size_bytes = 256, .dchub_arb_to_ret_delay = 102, - .hostvm_mode = 1, + .hostvm_mode = 0, }; static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *ip_caps, const struct dml2_core_ip_params *ip_params) @@ -281,6 +281,7 @@ static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) { + (void)main_stream; memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); phantom->stream_index = phantom_stream_index; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index f6402e199354..827bd9143c87 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -840,6 +840,7 @@ static void CalculateSwathWidth( unsigned int swath_width_luma_ub[], // per-pipe unsigned int swath_width_chroma_ub[]) // per-pipe { + (void)BytePerPixY; enum dml2_odm_mode MainSurfaceODMMode; double odm_hactive_factor = 1.0; unsigned int req_width_horz_y; @@ -1283,6 +1284,8 @@ static double TruncToValidBPP( // Output unsigned int *RequiredSlots) { + (void)DSCInputBitPerComponent; + (void)RequiredSlots; double MaxLinkBPP; unsigned int MinDSCBPP; double MaxDSCBPP; @@ -1922,6 +1925,7 @@ static void CalculateRowBandwidth( double *dpte_row_bw, double *meta_row_bw) { + (void)use_one_row_for_frame; if (!DCCEnable || !mrq_present) { *meta_row_bw = 0; } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { @@ -2020,6 +2024,11 @@ static void CalculateDCCConfiguration( unsigned int *IndependentBlockLuma, unsigned int *IndependentBlockChroma) { + (void)SurfaceWidthChroma; + (void)SurfaceHeightChroma; + (void)TilingFormat; + (void)BytePerPixelDETY; + (void)BytePerPixelDETC; unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; unsigned int segment_order_horz_contiguous_luma; @@ -2270,6 +2279,7 @@ static void calculate_mcache_row_bytes( struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_calculate_mcache_row_bytes_params *p) { + (void)scratch; unsigned int vmpg_bytes = 0; unsigned int blk_bytes = 0; float meta_per_mvmpg_per_channel = 0; @@ -3642,6 +3652,8 @@ static double CalculateWriteBackDelay( unsigned int WritebackSourceHeight, unsigned int HTotal) { + (void)WritebackPixelFormat; + (void)WritebackHRatio; double CalculateWriteBackDelay; double Line_length; double Output_lines_last_notclamped; @@ -3959,6 +3971,7 @@ static enum dml2_odm_mode DecideODMMode(unsigned int HActive, double SurfaceRequiredDISPCLKWithODMCombineThreeToOne, double SurfaceRequiredDISPCLKWithODMCombineFourToOne) { + (void)SurfaceRequiredDISPCLKWithODMCombineFourToOne; enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock; enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive; enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive; @@ -4460,6 +4473,8 @@ static double CalculateWriteBackDISPCLK( unsigned int HTotal, unsigned int WritebackLineBufferSize) { + (void)WritebackPixelFormat; + (void)WritebackVRatio; double DISPCLK_H, DISPCLK_V, DISPCLK_HB; DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio; @@ -4561,6 +4576,10 @@ static void CalculateSurfaceSizeInMall( unsigned int SurfaceSizeInMALL[], bool *ExceededMALLSize) { + (void)Read256BytesBlockWidthY; + (void)Read256BytesBlockWidthC; + (void)Read256BytesBlockHeightY; + (void)Read256BytesBlockHeightC; unsigned int TotalSurfaceSizeInMALLForSS = 0; unsigned int TotalSurfaceSizeInMALLForSubVP = 0; unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; @@ -4620,6 +4639,7 @@ static void calculate_tdlut_setting( struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_calculate_tdlut_setting_params *p) { + (void)scratch; // locals unsigned int tdlut_bpe = 8; unsigned int tdlut_width; @@ -6503,6 +6523,7 @@ static void CalculateFlipSchedule( double *final_flip_bw, bool *ImmediateFlipSupportedForPipe) { + (void)use_one_row_for_frame_flip; struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; @@ -7381,7 +7402,7 @@ static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_inter s->tdlut_bytes_per_group, s->HostVMInefficiencyFactor, s->HostVMInefficiencyFactorPrefetch, - mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.hostvm_min_page_size_kbytes * 1024, mode_lib->soc.qos_parameters.qos_type, !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), mode_lib->soc.max_outstanding_reqs, @@ -7477,12 +7498,11 @@ static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_inter CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; - CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; @@ -8965,7 +8985,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY; CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; @@ -9968,6 +9988,8 @@ static void CalculateVMGroupAndRequestTimes( double TimePerVMRequestVBlank[], double TimePerVMRequestFlip[]) { + (void)dpte_row_width_luma_ub; + (void)dpte_row_width_chroma_ub; unsigned int num_group_per_lower_vm_stage = 0; unsigned int num_req_per_lower_vm_stage = 0; unsigned int num_group_per_lower_vm_stage_flip; @@ -10755,7 +10777,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY; CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; @@ -10971,7 +10993,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->tdlut_bytes_per_group, s->HostVMInefficiencyFactor, s->HostVMInefficiencyFactorPrefetch, - mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.hostvm_min_page_size_kbytes * 1024, mode_lib->soc.qos_parameters.qos_type, !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), mode_lib->soc.max_outstanding_reqs, @@ -11264,12 +11286,11 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; - CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency; CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch; CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc; CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k]; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h index 953f40fde1e1..080bc3c3d244 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h @@ -269,6 +269,9 @@ struct dml2_core_internal_mode_support_info { bool global_dram_clock_change_supported; bool global_fclk_change_supported; bool global_temp_read_or_ppt_supported; + bool fclk_pstate_schedule_admissible; + bool temp_read_pstate_schedule_admissible; + bool ppt_pstate_schedule_admissible; bool USRRetrainingSupport; bool AvgBandwidthSupport; bool UrgVactiveBandwidthSupport; @@ -1063,6 +1066,8 @@ struct dml2_core_calcs_mode_support_locals { bool dummy_boolean_array[2][DML2_MAX_PLANES]; double dummy_single[3]; double dummy_single_array[DML2_MAX_PLANES]; + double dummy_double_array[3][DML2_MAX_PLANES]; + enum dml2_pstate_method dummy_pstate_method_array[DML2_MAX_PLANES]; struct dml2_core_internal_watermarks dummy_watermark; double dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; double surface_dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES]; @@ -1721,30 +1726,30 @@ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_param double ReturnBW; bool SynchronizeTimings; bool SynchronizeDRRDisplaysForUCLKPStateChange; - unsigned int *dpte_group_bytes; + const unsigned int *dpte_group_bytes; struct dml2_core_internal_SOCParametersList mmSOCParameters; unsigned int WritebackChunkSize; double SOCCLK; double DCFClkDeepSleep; - unsigned int *DETBufferSizeY; - unsigned int *DETBufferSizeC; - unsigned int *SwathHeightY; - unsigned int *SwathHeightC; - unsigned int *SwathWidthY; - unsigned int *SwathWidthC; - unsigned int *DPPPerSurface; - double *BytePerPixelDETY; - double *BytePerPixelDETC; - unsigned int *DSTXAfterScaler; - unsigned int *DSTYAfterScaler; + const unsigned int *DETBufferSizeY; + const unsigned int *DETBufferSizeC; + const unsigned int *SwathHeightY; + const unsigned int *SwathHeightC; + const unsigned int *SwathWidthY; + const unsigned int *SwathWidthC; + const unsigned int *DPPPerSurface; + const double *BytePerPixelDETY; + const double *BytePerPixelDETC; + const unsigned int *DSTXAfterScaler; + const unsigned int *DSTYAfterScaler; bool UnboundedRequestEnabled; unsigned int CompressedBufferSizeInkByte; bool max_outstanding_when_urgent_expected; - unsigned int max_outstanding_requests; - unsigned int max_request_size_bytes; - unsigned int *meta_row_height_l; - unsigned int *meta_row_height_c; - enum dml2_pstate_method *uclk_pstate_switch_modes; + const unsigned int max_outstanding_requests; + const unsigned int max_request_size_bytes; + const unsigned int *meta_row_height_l; + const unsigned int *meta_row_height_c; + const enum dml2_pstate_method *uclk_pstate_switch_modes; // Output struct dml2_core_internal_watermarks *Watermark; @@ -1931,7 +1936,6 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { bool DynamicMetadataVMEnabled; unsigned int DynamicMetadataLinesBeforeActiveRequired; unsigned int DynamicMetadataTransmittedBytes; - double UrgentLatency; double ExtraLatencyPrefetch; double TCalc; unsigned int vm_bytes; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c index 6930ba7ce5b7..4f5533dc0430 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c @@ -428,10 +428,6 @@ bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_c unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) { - if (dml2_core_utils_get_gfx_version(sw_mode) == 10 || dml2_core_utils_get_gfx_version(sw_mode) == 7) { - return dml2_core_utils_get_tile_block_size_bytes_backcompat(sw_mode, byte_per_pixel); - } - if (sw_mode == dml2_sw_linear) return 256; else if (sw_mode == dml2_sw_256b_2d) @@ -462,56 +458,14 @@ unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw }; } -unsigned int dml2_core_utils_get_tile_block_size_bytes_backcompat(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) -{ - if (sw_mode == dml2_sw_linear_256b) - return 256; - else if (sw_mode == dml2_gfx10_sw_64kb_r_x) - return 65536; - else if (sw_mode == dml2_gfx102_sw_64kb_s) - return 65536; - else if (sw_mode == dml2_gfx102_sw_64kb_s_t) - return 65536; - else if (sw_mode == dml2_gfx102_sw_64kb_s_x) - return 65536; - else if (sw_mode == dml2_gfx102_sw_64kb_r_x) - return 65536; - else if (sw_mode == dml2_linear_64elements) - return 256; - else if (sw_mode == dml2_gfx7_1d_thin) - return 256; - else if (sw_mode == dml2_gfx7_2d_thin_gen_zero) - return (128 * 64 * byte_per_pixel); - else if (sw_mode == dml2_gfx7_2d_thin_gen_one) - return (128 * 128 * byte_per_pixel); - else if (sw_mode == dml2_gfx7_2d_thin_arlene) - return (64 * 32 * byte_per_pixel); - else if (sw_mode == dml2_gfx7_2d_thin_anubis) - return (128 * 128 * byte_per_pixel); - else { - DML_ASSERT(0); - return 256; - }; -} - bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) { - if (dml2_core_utils_get_gfx_version(sw_mode) == 10 || dml2_core_utils_get_gfx_version(sw_mode) == 7) { - return dml2_core_utils_get_segment_horizontal_contiguous_backcompat(sw_mode, byte_per_pixel); - } else { - return (byte_per_pixel != 2); - } -} - -bool dml2_core_utils_get_segment_horizontal_contiguous_backcompat(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) -{ - return !((byte_per_pixel == 4) && - ((sw_mode == dml2_gfx10_sw_64kb_r_x) || (sw_mode == dml2_gfx102_sw_64kb_s) || (sw_mode == dml2_gfx102_sw_64kb_s_t) || (sw_mode == dml2_gfx102_sw_64kb_s_x))); + return (byte_per_pixel != 2); } bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode) { - return (sw_mode == dml2_sw_linear || sw_mode == dml2_sw_linear_256b || sw_mode == dml2_linear_64elements); + return sw_mode == dml2_sw_linear; }; @@ -544,20 +498,6 @@ int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) sw_mode == dml2_gfx11_sw_256kb_d_x || sw_mode == dml2_gfx11_sw_256kb_r_x) version = 11; - else if (sw_mode == dml2_sw_linear_256b || - sw_mode == dml2_gfx10_sw_64kb_r_x || - sw_mode == dml2_gfx102_sw_64kb_s || - sw_mode == dml2_gfx102_sw_64kb_s_t || - sw_mode == dml2_gfx102_sw_64kb_s_x || - sw_mode == dml2_gfx102_sw_64kb_r_x) - version = 10; - else if (sw_mode == dml2_linear_64elements || - sw_mode == dml2_gfx7_1d_thin || - sw_mode == dml2_gfx7_2d_thin_gen_zero || - sw_mode == dml2_gfx7_2d_thin_gen_one || - sw_mode == dml2_gfx7_2d_thin_arlene || - sw_mode == dml2_gfx7_2d_thin_anubis) - version = 7; else { DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); DML_ASSERT(0); @@ -648,6 +588,7 @@ static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) { + (void)main_stream; memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); phantom->stream_index = phantom_stream_index; @@ -845,3 +786,11 @@ bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode) return false; } } + +double dml2_core_utils_get_frame_time_us(const struct dml2_stream_parameters *stream) +{ + double otg_vline_time_us = (double)stream->timing.h_total / (double)stream->timing.pixel_clock_khz * 1000.0; + double non_vtotal = stream->timing.vblank_nom + stream->timing.v_active; + double frame_time_us = non_vtotal * otg_vline_time_us; + return frame_time_us; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h index 471e73ed671c..60fa2abfef85 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h @@ -22,8 +22,6 @@ void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_in bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); -unsigned int dml2_core_utils_get_tile_block_size_bytes_backcompat(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); -bool dml2_core_utils_get_segment_horizontal_contiguous_backcompat(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode); int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); @@ -41,5 +39,6 @@ bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stre bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate); bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate); bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode); +double dml2_core_utils_get_frame_time_us(const struct dml2_stream_parameters *stream); #endif /* __DML2_CORE_UTILS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index ab0b4a4b5d65..5ffe211a6643 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -552,6 +552,7 @@ static int get_displays_without_vactive_margin_mask(struct dml2_dpmm_map_mode_to static int get_displays_with_fams_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us) { + (void)latency_hiding_requirement_us; unsigned int i; int displays_with_fams_mask = 0x0; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index 1f2d9e97f5fd..39965ff2e111 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -8,11 +8,13 @@ static bool dummy_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) { + (void)in_out; return true; } static bool dummy_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out) { + (void)in_out; return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c index 3dcd2c250633..fb0b0ac547c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -9,6 +9,7 @@ static bool dummy_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out) { + (void)in_out; return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn42.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn42.c new file mode 100644 index 000000000000..30fd5efe4b87 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn42.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2026 Advanced Micro Devices, Inc. + +#include "dml2_pmo_dcn42.h" +#include "lib_float_math.h" +#include "dml2_debug.h" +#include "dml2_pmo_dcn4_fams2.h" + +/* + * DCN42 PMO Policy Implementation + * This implementation provides VBlank-only strategies for 1, 2, 3, and 4 display + * configurations, ensuring p-state watermark support in the blank period only. + */ + +static const struct dml2_pmo_pstate_strategy dcn42_strategy_list_1_display[] = { + // VBlank only + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, +}; + +static const int dcn42_strategy_list_1_display_size = sizeof(dcn42_strategy_list_1_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy dcn42_strategy_list_2_display[] = { + // VBlank only for both displays + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, +}; + +static const int dcn42_strategy_list_2_display_size = sizeof(dcn42_strategy_list_2_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy dcn42_strategy_list_3_display[] = { + // VBlank only for all three displays + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, + .allow_state_increase = true, + }, +}; + +static const int dcn42_strategy_list_3_display_size = sizeof(dcn42_strategy_list_3_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy dcn42_strategy_list_4_display[] = { + // VBlank only for all four displays + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, + .allow_state_increase = true, + }, +}; + +static const int dcn42_strategy_list_4_display_size = sizeof(dcn42_strategy_list_4_display) / sizeof(struct dml2_pmo_pstate_strategy); + +bool pmo_dcn42_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) +{ + const struct dml2_pmo_scratch *s = &in_out->instance->scratch; + const int REQUIRED_RESERVED_TIME = + (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; + bool p_state_supported = true; + unsigned int stream_index; + + if (in_out->base_display_config->display_config.overrides.all_streams_blanked) + return true; + + if (s->pmo_dcn4.cur_pstate_candidate < 0) + return false; + + for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank) { + if (dcn4_get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < REQUIRED_RESERVED_TIME || + dcn4_get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > 0) { + p_state_supported = false; + break; + } + } else { + p_state_supported = false; + break; + } + } + + return p_state_supported; +} + +bool pmo_dcn42_initialize(struct dml2_pmo_initialize_in_out *in_out) +{ + int i = 0; + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int base_list_size = 0; + const struct dml2_pmo_pstate_strategy *base_list = NULL; + unsigned int *expanded_list_size = NULL; + struct dml2_pmo_pstate_strategy *expanded_list = NULL; + + DML_LOG_COMP_IF_ENTER(); + + pmo->soc_bb = in_out->soc_bb; + pmo->ip_caps = in_out->ip_caps; + pmo->mpc_combine_limit = 2; + pmo->odm_combine_limit = 4; + pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; + + /* + * DCN42 does not support FAMS features like SubVP and DRR. + * These parameters are initialized to safe values but won't be used + * since our strategies only use VBlank. + */ + pmo->fams_params.v2.subvp.refresh_rate_limit_max = 0; + pmo->fams_params.v2.subvp.refresh_rate_limit_min = 0; + pmo->fams_params.v2.drr.refresh_rate_limit_max = 0; + pmo->fams_params.v2.drr.refresh_rate_limit_min = 0; + + pmo->options = in_out->options; + + /* Generate permutations of p-state configs from base strategy list */ + for (i = 0; i < PMO_DCN4_MAX_DISPLAYS; i++) { + switch (i+1) { + case 1: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = dcn42_strategy_list_1_display; + base_list_size = dcn42_strategy_list_1_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display; + + break; + case 2: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = dcn42_strategy_list_2_display; + base_list_size = dcn42_strategy_list_2_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display; + + break; + case 3: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = dcn42_strategy_list_3_display; + base_list_size = dcn42_strategy_list_3_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display; + + break; + case 4: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = dcn42_strategy_list_4_display; + base_list_size = dcn42_strategy_list_4_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display; + + break; + } + + DML_ASSERT(base_list_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + + /* + * Populate list using DCN4 FAMS2 expansion function. + * Since our strategies only contain VBlank methods, the expansion + * will not introduce any FAMS-specific logic. + */ + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_list, + base_list_size, + i + 1, + expanded_list, + expanded_list_size); + } + + DML_LOG_DEBUG("%s exit with true\n", __func__); + DML_LOG_COMP_IF_EXIT(); + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn42.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn42.h new file mode 100644 index 000000000000..31ba8575351d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn42.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2026 Advanced Micro Devices, Inc. + */ + +#ifndef __DML2_PMO_DCN42_H__ +#define __DML2_PMO_DCN42_H__ + +#include "dml2_internal_shared_types.h" + +struct dml2_pmo_initialize_in_out; +struct dml2_pmo_test_for_pstate_support_in_out; + +bool pmo_dcn42_initialize(struct dml2_pmo_initialize_in_out *in_out); +bool pmo_dcn42_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); + +#endif /* __DML2_PMO_DCN42_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index e8691983c0eb..b348c65a0f75 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -428,6 +428,7 @@ static void insert_strategy_into_expanded_list( struct dml2_pmo_pstate_strategy *expanded_strategy_list, unsigned int *num_expanded_strategies) { + (void)stream_count; if (expanded_strategy_list && num_expanded_strategies) { memcpy(&expanded_strategy_list[*num_expanded_strategies], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); @@ -520,6 +521,7 @@ static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_ const unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], const unsigned int stream_count) { + (void)variant_strategy; bool valid = true; unsigned int i; @@ -1180,6 +1182,7 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) { + (void)stream_count; scratch->pmo_dcn4.pstate_strategy_candidates[scratch->pmo_dcn4.num_pstate_candidates] = *pstate_strategy; scratch->pmo_dcn4.num_pstate_candidates++; } @@ -1659,7 +1662,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins return is_config_schedulable(pmo, display_cfg, pstate_strategy); } -static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) +int dcn4_get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) { unsigned int i; int min_vactive_margin_us = 0xFFFFFFF; @@ -1847,6 +1850,7 @@ static void build_subvp_meta_per_stream(struct dml2_pmo_instance *pmo, struct display_configuation_with_meta *display_config, int stream_index) { + (void)display_config; struct dml2_implicit_svp_meta *stream_svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; struct dml2_pstate_meta *stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index]; @@ -1903,7 +1907,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp // Figure out which streams can do vactive, and also build up implicit SVP and FAMS2 meta for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= (int)(MIN_VACTIVE_MARGIN_PCT * pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us)) + if (dcn4_get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= (int)(MIN_VACTIVE_MARGIN_PCT * pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us)) set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); /* FAMS2 meta */ @@ -1990,6 +1994,7 @@ static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta * struct dml2_pmo_instance *pmo, int plane_mask) { + (void)pmo; unsigned int plane_index; struct dml2_plane_parameters *plane; @@ -2177,7 +2182,9 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ return success; } -static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) +int dcn4_get_minimum_reserved_time_us_for_planes( + const struct display_configuation_with_meta *display_config, + int plane_mask) { int min_time_us = 0xFFFFFF; unsigned int plane_index = 0; @@ -2217,16 +2224,16 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { - if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || + if (dcn4_get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us) { p_state_supported = false; break; } } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { - if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < + if (dcn4_get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < REQUIRED_RESERVED_TIME || - get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { + dcn4_get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { p_state_supported = false; break; } @@ -2238,7 +2245,7 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp } } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pstate_method_fw_drr) || - get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { + dcn4_get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { p_state_supported = false; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h index 6baab7ad6ecc..f0afa8002a2f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -7,6 +7,16 @@ #include "dml2_internal_shared_types.h" +struct display_configuation_with_meta; + +int dcn4_get_vactive_pstate_margin( + const struct display_configuation_with_meta *display_cfg, + int plane_mask); + +int dcn4_get_minimum_reserved_time_us_for_planes( + const struct display_configuation_with_meta *display_config, + int plane_mask); + bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out); bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c index 4d687fa86caa..83802aac11cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -9,16 +9,19 @@ static bool dummy_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out) { + (void)in_out; return false; } static bool dummy_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out) { + (void)in_out; return true; } static bool dummy_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out) { + (void)in_out; return false; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c index e17b5ceba447..dc5bc649f3ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -23,7 +23,7 @@ double math_mod(const double arg1, const double arg2) return arg2; if (isNaN(arg2)) return arg1; - return arg1 - arg1 * ((int)(arg1 / arg2)); + return arg1 - arg2 * ((int)(arg1 / arg2)); } double math_min2(const double arg1, const double arg2) diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c index 4a7c4c62111e..fa20a91c6e16 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c @@ -17,6 +17,7 @@ static void setup_unoptimized_display_config_with_meta(const struct dml2_instanc static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) { + (void)dml; memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); out->stage1.min_clk_index_for_latency = 0; } @@ -472,6 +473,7 @@ static unsigned int count_elements_in_span(int *array, unsigned int array_size, static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes, enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end) { + (void)h_active; int i, slice_width; const char MAX_SCL_VP_OVERLAP = 3; bool success = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c index 6ef93c6fc1cd..6b78334c2554 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c @@ -178,6 +178,10 @@ static unsigned int find_pipes_assigned_to_plane(struct dml2_context *ctx, static bool validate_pipe_assignment(const struct dml2_context *ctx, const struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, const struct dml2_dml_to_dc_pipe_mapping *mapping) { + (void)ctx; + (void)disp_cfg; + (void)mapping; + (void)state; // int i, j, k; // // unsigned int plane_id; @@ -292,6 +296,7 @@ static unsigned int find_last_resort_pipe_candidates(const struct dc_state *exis const unsigned int stream_id, unsigned int *last_resort_pipe_candidates) { + (void)stream_id; unsigned int num_last_resort_candidates = 0; int i; @@ -541,6 +546,7 @@ static void add_odm_slice_to_odm_tree(struct dml2_context *ctx, struct dc_pipe_mapping_scratch *scratch, unsigned int odm_slice_index) { + (void)ctx; struct pipe_ctx *pipe = NULL; int i; @@ -567,6 +573,8 @@ static struct pipe_ctx *add_plane_to_blend_tree(struct dml2_context *ctx, unsigned int odm_slice, struct pipe_ctx *top_pipe) { + (void)ctx; + (void)plane; int i; for (i = 0; i < pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine; i++) { @@ -722,6 +730,7 @@ static void free_unused_pipes_for_plane(struct dml2_context *ctx, struct dc_stat static void remove_pipes_from_blend_trees(struct dml2_context *ctx, struct dc_state *state, struct dc_plane_pipe_pool *pipe_pool, unsigned int odm_slice) { + (void)ctx; struct pipe_ctx *pipe; int i; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c index cf3a69aba638..8e0997441ee0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c @@ -33,6 +33,7 @@ void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out) { + (void)in_dc; switch (dml2->v20.dml_core_ctx.project) { case dml_project_dcn32: case dml_project_dcn321: @@ -244,6 +245,7 @@ void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, stru void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out) { + (void)in_dc; out->dprefclk_mhz = dml2->config.bbox_overrides.dprefclk_mhz; out->xtalclk_mhz = dml2->config.bbox_overrides.xtalclk_mhz; out->pcierefclk_mhz = 100; @@ -328,6 +330,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out) { + (void)in_dc; struct dml2_policy_build_synthetic_soc_states_scratch *s = &dml2->v20.scratch.create_scratch.build_synthetic_socbb_scratch; struct dml2_policy_build_synthetic_soc_states_params *p = &dml2->v20.scratch.build_synthetic_socbb_params; int dcfclk_stas_mhz[NUM_DCFCLK_STAS] = {0}; @@ -782,6 +785,7 @@ static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st * static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, const struct dc_stream_state *in, const struct pipe_ctx *pipe, struct dml2_context *dml2) { + (void)pipe; unsigned int output_bpc; out->DSCEnable[location] = (enum dml_dsc_enable)in->timing.flags.DSC; @@ -1133,6 +1137,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, const struct dc_stream_state *stream, const struct dml_display_cfg_st *dml_dispcfg) { + (void)dml_dispcfg; int i = 0; int location = -1; @@ -1173,6 +1178,7 @@ static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *conte static unsigned int map_plane_to_dml_display_cfg(const struct dml2_context *dml2, const struct dc_plane_state *plane, const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id, int plane_index) { + (void)dml_dispcfg; unsigned int plane_id; unsigned int i = 0; unsigned int location = UINT_MAX; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c index 6c7cdf102906..86567e232415 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c @@ -465,6 +465,7 @@ void dml2_initialize_det_scratch(struct dml2_context *in_ctx) static unsigned int find_planes_per_stream_and_stream_count(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg, int *num_of_planes_per_stream) { + (void)in_ctx; unsigned int plane_index, stream_index = 0, num_of_streams; for (plane_index = 0; plane_index < dml_dispcfg->num_surfaces; plane_index++) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c index 93b7613fc4f2..1772e74349c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c @@ -108,6 +108,17 @@ bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options return true; } +void dml2_destroy(struct dml2_context *dml2) +{ + if (!dml2) + return; + + if (dml2->architecture == dml2_architecture_21) + dml21_destroy(dml2); + + DC_RUN_WITH_PREEMPTION_ENABLED(vfree(dml2)); +} + void dml2_reinit(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper_fpu.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper_fpu.c index 66624cfc27b1..a14e3004a7b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper_fpu.c @@ -548,16 +548,6 @@ void dml2_apply_debug_options(const struct dc *dc, struct dml2_context *dml2) } } -void dml2_destroy(struct dml2_context *dml2) -{ - if (!dml2) - return; - - if (dml2->architecture == dml2_architecture_21) - dml21_destroy(dml2); - vfree(dml2); -} - void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, unsigned int *fclk_change_support, unsigned int *dram_clk_change_support) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c index 00d22e542469..18962bbf455b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c @@ -563,6 +563,7 @@ void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *disp_dlg_regs, void dml_rq_dlg_get_arb_params(struct display_mode_lib_st *mode_lib, dml_display_arb_params_st *arb_param) { + (void)mode_lib; memset(arb_param, 0, sizeof(*arb_param)); arb_param->max_req_outstanding = 256; arb_param->min_req_outstanding = 256; // turn off the sat level feature if this set to max diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c index f8f6019d8304..2bdd063cc1e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c @@ -49,9 +49,6 @@ #define FN(reg_name, field_name) \ dpp->tf_shift->field_name, dpp->tf_mask->field_name -#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) - - enum dcn10_coef_filter_type_sel { SCL_COEF_LUMA_VERT_FILTER = 0, SCL_COEF_LUMA_HORZ_FILTER = 1, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 821d5173b59f..4f3b48ed8679 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -49,9 +49,6 @@ #define FN(reg_name, field_name) \ dpp->tf_shift->field_name, dpp->tf_mask->field_name -#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) - - enum dcn401_coef_filter_type_sel { SCL_COEF_LUMA_VERT_FILTER = 0, SCL_COEF_LUMA_HORZ_FILTER = 1, diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 5b3584ad5b6b..8dfb6dd14eb2 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -680,9 +680,6 @@ static void get_dsc_enc_caps( } else { build_dsc_enc_caps(dsc, dsc_enc_caps); } - - if (dsc->ctx->dc->debug.native422_support) - dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1; } /* Returns 'false' if no intersection was found for at least one capability. @@ -1100,13 +1097,14 @@ static bool setup_dsc_config( branch_max_throughput_mps = dsc_sink_caps->branch_overall_throughput_0_mps; break; case PIXEL_ENCODING_YCBCR422: - is_dsc_possible = (bool)dsc_common_caps.color_formats.bits.YCBCR_NATIVE_422; - sink_per_slice_throughput_mps = dsc_sink_caps->throughput_mode_1_mps; - branch_max_throughput_mps = dsc_sink_caps->branch_overall_throughput_1_mps; - if (!is_dsc_possible) { + if (policy.ycbcr422_simple) { is_dsc_possible = (bool)dsc_common_caps.color_formats.bits.YCBCR_SIMPLE_422; dsc_cfg->ycbcr422_simple = is_dsc_possible; sink_per_slice_throughput_mps = dsc_sink_caps->throughput_mode_0_mps; + } else { + is_dsc_possible = (bool)dsc_common_caps.color_formats.bits.YCBCR_NATIVE_422; + sink_per_slice_throughput_mps = dsc_sink_caps->throughput_mode_1_mps; + branch_max_throughput_mps = dsc_sink_caps->branch_overall_throughput_1_mps; } break; case PIXEL_ENCODING_YCBCR420: @@ -1406,6 +1404,7 @@ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, policy->min_target_bpp = 8; /* DP specs limits to 3 x bpc */ policy->max_target_bpp = 3 * bpc; + policy->ycbcr422_simple = true; break; case PIXEL_ENCODING_YCBCR420: /* DP specs limits to 6 */ diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c index 242f1e6f0d8f..6e1e759462bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c @@ -100,7 +100,7 @@ void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) dsc_enc_caps->color_formats.bits.RGB = 1; dsc_enc_caps->color_formats.bits.YCBCR_444 = 1; dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1; - dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0; + dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1; dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1; dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c index e712985f7abd..17acb64a9d80 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c @@ -128,7 +128,7 @@ void dsc35_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsigned int m dsc_enc_caps->color_formats.bits.RGB = 1; dsc_enc_caps->color_formats.bits.YCBCR_444 = 1; dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1; - dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0; + dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1; dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1; dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index 3bf737195bac..41c3b814b6bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -78,7 +78,7 @@ static void dsc401_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsign dsc_enc_caps->color_formats.bits.RGB = 1; dsc_enc_caps->color_formats.bits.YCBCR_444 = 1; dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1; - dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0; + dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1; dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1; dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index a2c46350e44e..95f8b7c7d657 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -646,6 +646,9 @@ failure: enum gpio_ddc_line dal_ddc_get_line( const struct ddc *ddc) { + if (!ddc) + return GPIO_DDC_LINE_UNKNOWN; + return (enum gpio_ddc_line)dal_gpio_get_enum(ddc->pin_data); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 5273ca09fe12..f0abbb7c2cb2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -665,16 +665,45 @@ void dce110_update_info_frame(struct pipe_ctx *pipe_ctx) } static void -dce110_dac_encoder_control(struct pipe_ctx *pipe_ctx, bool enable) +dce110_external_encoder_control(enum bp_external_encoder_control_action action, + struct dc_link *link, + struct dc_crtc_timing *timing) { - struct dc_link *link = pipe_ctx->stream->link; + struct dc *dc = link->ctx->dc; struct dc_bios *bios = link->ctx->dc_bios; - struct bp_encoder_control encoder_control = {0}; + const struct dc_link_settings *link_settings = &link->cur_link_settings; + enum bp_result bp_result = BP_RESULT_OK; + struct bp_external_encoder_control ext_cntl = { + .action = action, + .connector_obj_id = link->link_enc->connector, + .encoder_id = link->ext_enc_id, + .lanes_number = link_settings->lane_count, + .link_rate = link_settings->link_rate, + + /* Use signal type of the real link encoder, ie. DP */ + .signal = link->connector_signal, + + /* We don't know the timing yet when executing the SETUP action, + * so use a reasonably high default value. It seems that ENABLE + * can change the actual pixel clock but doesn't work with higher + * pixel clocks than what SETUP was called with. + */ + .pixel_clock = timing ? timing->pix_clk_100hz / 10 : 300000, + .color_depth = timing ? timing->display_color_depth : COLOR_DEPTH_888, + }; + DC_LOGGER_INIT(dc->ctx); - encoder_control.action = enable ? ENCODER_CONTROL_ENABLE : ENCODER_CONTROL_DISABLE; - encoder_control.engine_id = link->link_enc->analog_engine; - encoder_control.pixel_clock = pipe_ctx->stream->timing.pix_clk_100hz / 10; - bios->funcs->encoder_control(bios, &encoder_control); + bp_result = bios->funcs->external_encoder_control(bios, &ext_cntl); + + if (bp_result != BP_RESULT_OK) + DC_LOG_ERROR("Failed to execute external encoder action: 0x%x\n", action); +} + +static void +dce110_prepare_ddc(struct dc_link *link) +{ + if (link->ext_enc_id.id) + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_DDC_SETUP, link, NULL); } static bool @@ -684,7 +713,8 @@ dce110_dac_load_detect(struct dc_link *link) struct link_encoder *link_enc = link->link_enc; enum bp_result bp_result; - bp_result = bios->funcs->dac_load_detection(bios, link_enc->analog_engine); + bp_result = bios->funcs->dac_load_detection( + bios, link_enc->analog_engine, link->ext_enc_id); return bp_result == BP_RESULT_OK; } @@ -700,7 +730,6 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) uint32_t early_control = 0; struct timing_generator *tg = pipe_ctx->stream_res.tg; - link_hwss->setup_stream_attribute(pipe_ctx); link_hwss->setup_stream_encoder(pipe_ctx); dc->hwss.update_info_frame(pipe_ctx); @@ -719,8 +748,8 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) tg->funcs->set_early_control(tg, early_control); - if (dc_is_rgb_signal(pipe_ctx->stream->signal)) - dce110_dac_encoder_control(pipe_ctx, true); + if (link->ext_enc_id.id) + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_ENABLE, link, timing); } static enum bp_result link_transmitter_control( @@ -1219,8 +1248,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) link_enc->transmitter - TRANSMITTER_UNIPHY_A); } - if (dc_is_rgb_signal(pipe_ctx->stream->signal)) - dce110_dac_encoder_control(pipe_ctx, false); + if (link->ext_enc_id.id) + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_DISABLE, link, NULL); } void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, @@ -1603,22 +1632,6 @@ static enum dc_status dce110_enable_stream_timing( return DC_OK; } -static void -dce110_select_crtc_source(struct pipe_ctx *pipe_ctx) -{ - struct dc_link *link = pipe_ctx->stream->link; - struct dc_bios *bios = link->ctx->dc_bios; - struct bp_crtc_source_select crtc_source_select = {0}; - enum engine_id engine_id = link->link_enc->preferred_engine; - - if (dc_is_rgb_signal(pipe_ctx->stream->signal)) - engine_id = link->link_enc->analog_engine; - crtc_source_select.controller_id = CONTROLLER_ID_D0 + pipe_ctx->stream_res.tg->inst; - crtc_source_select.color_depth = pipe_ctx->stream->timing.display_color_depth; - crtc_source_select.engine_id = engine_id; - crtc_source_select.sink_signal = pipe_ctx->stream->signal; - bios->funcs->select_crtc_source(bios, &crtc_source_select); -} enum dc_status dce110_apply_single_controller_ctx_to_hw( struct pipe_ctx *pipe_ctx, @@ -1639,10 +1652,6 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( hws->funcs.disable_stream_gating(dc, pipe_ctx); } - if (pipe_ctx->stream->signal == SIGNAL_TYPE_RGB) { - dce110_select_crtc_source(pipe_ctx); - } - if (pipe_ctx->stream_res.audio != NULL) { struct audio_output audio_output = {0}; @@ -1722,8 +1731,7 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.tg->funcs->set_static_screen_control( pipe_ctx->stream_res.tg, event_triggers, 2); - if (!dc_is_virtual_signal(pipe_ctx->stream->signal) && - !dc_is_rgb_signal(pipe_ctx->stream->signal)) + if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); @@ -3376,6 +3384,15 @@ void dce110_enable_tmds_link_output(struct dc_link *link, link->phy_state.symclk_state = SYMCLK_ON_TX_ON; } +static void dce110_enable_analog_link_output( + struct dc_link *link, + uint32_t pix_clk_100hz) +{ + link->link_enc->funcs->enable_analog_output( + link->link_enc, + pix_clk_100hz); +} + void dce110_enable_dp_link_output( struct dc_link *link, const struct link_resource *link_res, @@ -3423,6 +3440,11 @@ void dce110_enable_dp_link_output( } } + if (link->ext_enc_id.id) { + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_INIT, link, NULL); + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_SETUP, link, NULL); + } + if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { if (dc->clk_mgr->funcs->notify_link_rate_change) dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link); @@ -3513,8 +3535,10 @@ static const struct hw_sequencer_funcs dce110_funcs = { .enable_lvds_link_output = dce110_enable_lvds_link_output, .enable_tmds_link_output = dce110_enable_tmds_link_output, .enable_dp_link_output = dce110_enable_dp_link_output, + .enable_analog_link_output = dce110_enable_analog_link_output, .disable_link_output = dce110_disable_link_output, .dac_load_detect = dce110_dac_load_detect, + .prepare_ddc = dce110_prepare_ddc, }; static const struct hwseq_private_funcs dce110_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 59851924bfcd..794dd6a95918 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -781,10 +781,8 @@ static void restore_phy_clocks_for_destructive_link_verification(const struct dc } static void verify_link_capability_destructive(struct dc_link *link, - struct dc_sink *sink, enum dc_detect_reason reason) { - (void)sink; bool should_prepare_phy_clocks = should_prepare_phy_clocks_for_link_verification(link->dc, reason); @@ -857,11 +855,11 @@ static bool should_verify_link_capability_destructively(struct dc_link *link, return destrictive; } -static void verify_link_capability(struct dc_link *link, struct dc_sink *sink, +static void verify_link_capability(struct dc_link *link, enum dc_detect_reason reason) { if (should_verify_link_capability_destructively(link, reason)) - verify_link_capability_destructive(link, sink, reason); + verify_link_capability_destructive(link, reason); else verify_link_capability_non_destructive(link); } @@ -1236,6 +1234,20 @@ static bool detect_link_and_local_sink(struct dc_link *link, if (dc_is_hdmi_signal(link->connector_signal)) read_scdc_caps(link->ddc, link->local_sink); + /* When FreeSync is toggled through OSD, + * we see same EDID no matter what. Check MCCS caps + * to see if we should update FreeSync caps now. + */ + dm_helpers_read_mccs_caps( + link->ctx, + link, + sink); + + if (prev_sink != NULL) { + if (memcmp(&sink->mccs_caps, &prev_sink->mccs_caps, sizeof(struct mccs_caps))) + same_edid = false; + } + if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && sink_caps.transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX) { @@ -1455,8 +1467,9 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason) is_local_sink_detect_success = detect_link_and_local_sink(link, reason); - if (is_local_sink_detect_success && link->local_sink) - verify_link_capability(link, link->local_sink, reason); + if (is_local_sink_detect_success && link->local_sink) { + verify_link_capability(link, reason); + } DC_LOG_DC("%s: link_index=%d is_local_sink_detect_success=%d pre_link_type=%d link_type=%d\n", __func__, link->link_index, is_local_sink_detect_success, pre_link_type, link->type); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index b4f46408a000..e12c25896364 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -181,7 +181,8 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link) /* link can be also enabled by vbios. In this case it is not recorded * in pipe_ctx. Disable link phy here to make sure it is completely off */ - dp_disable_link_phy(link, &link_res, link->connector_signal); + if (dc_is_dp_signal(link->connector_signal)) + dp_disable_link_phy(link, &link_res, link->connector_signal); } void link_resume(struct dc_link *link) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 7e7682d7dfc8..ae4c4ad05baa 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -568,7 +568,9 @@ static bool construct_phy(struct dc_link *link, goto ddc_create_fail; } - if (!link->ddc->ddc_pin) { + /* Embedded display connectors such as LVDS may not have DDC. */ + if (!link->ddc->ddc_pin && + !dc_is_embedded_signal(link->connector_signal)) { DC_ERROR("Failed to get I2C info for connector!\n"); goto ddc_create_fail; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index e12bf3dd3e46..782a45caa13d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -743,8 +743,6 @@ static bool decide_dp_link_settings(struct dc_link *link, struct dc_link_setting { struct dc_link_settings initial_link_setting = { LANE_COUNT_ONE, LINK_RATE_LOW, LINK_SPREAD_DISABLED, false, 0}; - if (link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN) - initial_link_setting.link_rate = link->preferred_link_setting.link_rate; struct dc_link_settings current_link_setting = initial_link_setting; uint32_t link_bw; @@ -752,6 +750,9 @@ static bool decide_dp_link_settings(struct dc_link *link, struct dc_link_setting if (req_bw > dp_link_bandwidth_kbps(link, &link->verified_link_cap)) return false; + if (link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN) + initial_link_setting.link_rate = link->preferred_link_setting.link_rate; + /* search for the minimum link setting that: * 1. is supported according to the link training result * 2. could support the b/w requested by the timing diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c index ea73473b970a..fa600593f4c1 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c @@ -43,8 +43,6 @@ #define FN(reg_name, field_name) \ mpc20->mpc_shift->field_name, mpc20->mpc_mask->field_name -#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) - void mpc2_update_blending( struct mpc *mpc, struct mpcc_blnd_cfg *blnd_cfg, diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c index 4c7bb0522a8c..4e91e9f6f11a 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c @@ -40,10 +40,6 @@ #define FN(reg_name, field_name) \ mpc30->mpc_shift->field_name, mpc30->mpc_mask->field_name - -#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) - - void mpc3_mpc_init(struct mpc *mpc) { struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 6a25dcfcdf17..d2d56a1c4b8b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -753,7 +753,8 @@ static struct link_encoder *dce60_link_encoder_create( enc_init_data, &link_enc_feature, &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], + enc_init_data->channel == CHANNEL_ID_UNKNOWN ? + NULL : &link_enc_aux_regs[enc_init_data->channel - 1], enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 33be49b3c1b1..6c00497e9a01 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -760,7 +760,8 @@ static struct link_encoder *dce80_link_encoder_create( enc_init_data, &link_enc_feature, &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], + enc_init_data->channel == CHANNEL_ID_UNKNOWN ? + NULL : &link_enc_aux_regs[enc_init_data->channel - 1], enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 54ebf8cf607f..84f6d9dc443f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -1394,6 +1394,13 @@ static enum dc_status dcn21_patch_unknown_plane_state(struct dc_plane_state *pla return dcn20_patch_unknown_plane_state(plane_state); } +static void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + DC_FP_START(); + dcn21_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); +} + static const struct resource_funcs dcn21_res_pool_funcs = { .destroy = dcn21_destroy_resource_pool, .link_enc_create = dcn21_link_encoder_create, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index ee4bc2c2e73a..39944d90ea98 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1854,6 +1854,13 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; +static void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + DC_FP_START(); + dcn31_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); +} + static struct resource_funcs dcn31_res_pool_funcs = { .destroy = dcn31_destroy_resource_pool, .link_enc_create = dcn31_link_encoder_create, @@ -1996,6 +2003,8 @@ static bool dcn31_resource_construct( dc->config.use_pipe_ctx_sync_logic = true; dc->config.disable_hbr_audio_dp2 = true; + dc->config.no_native422_support = true; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 2ca673114841..975e14f3f5fa 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1849,6 +1849,13 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; +static void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + DC_FP_START(); + dcn315_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); +} + static struct resource_funcs dcn315_res_pool_funcs = { .destroy = dcn315_destroy_resource_pool, .link_enc_create = dcn31_link_encoder_create, @@ -1959,6 +1966,8 @@ static bool dcn315_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->config.no_native422_support = true; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 2242df112a3f..914d91df174c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -1725,6 +1725,13 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; +static void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + DC_FP_START(); + dcn316_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); +} + static struct resource_funcs dcn316_res_pool_funcs = { .destroy = dcn316_destroy_resource_pool, .link_enc_create = dcn31_link_encoder_create, diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c index 146a6e47934b..e723b4d0aff3 100644 --- a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c @@ -155,6 +155,10 @@ static void dcn42_update_soc_bb_with_values_from_clk_mgr(struct dml2_soc_bb *soc dcn42_convert_dc_clock_table_to_soc_bb_clock_table(&soc_bb->clk_table, &soc_bb->vmin_limit, dc->clk_mgr->bw_params); } + + if (dc->clk_mgr->bw_params->vram_type == Ddr5MemType) { + soc_bb->power_management_parameters = dcn42_ddr5_power_management_parameters; + } } static void apply_soc_bb_updates(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config) diff --git a/drivers/gpu/drm/amd/display/include/ddc_service_types.h b/drivers/gpu/drm/amd/display/include/ddc_service_types.h index 1c603b12957f..53210e3aa0e0 100644 --- a/drivers/gpu/drm/amd/display/include/ddc_service_types.h +++ b/drivers/gpu/drm/amd/display/include/ddc_service_types.h @@ -36,6 +36,7 @@ #define DP_BRANCH_DEVICE_ID_006037 0x006037 #define DP_BRANCH_DEVICE_ID_001CF8 0x001CF8 #define DP_BRANCH_DEVICE_ID_0060AD 0x0060AD +#define DP_BRANCH_DEVICE_ID_001FF2 0x001FF2 #define DP_BRANCH_HW_REV_10 0x10 #define DP_BRANCH_HW_REV_20 0x20 diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h index 38a77fa9b4af..a0f03fb67605 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h @@ -153,6 +153,10 @@ struct embedded_panel_info { uint32_t drr_enabled; uint32_t min_drr_refresh_rate; bool realtek_eDPToLVDS; + uint16_t panel_width_mm; + uint16_t panel_height_mm; + uint16_t fake_edid_size; + const uint8_t *fake_edid; }; struct dc_firmware_info { diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index c9150019aab0..f3b41087678b 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -153,7 +153,7 @@ unsigned int mod_freesync_calc_v_total_from_refresh( * round down the vtotal value to avoid stretching vblank over * panel's vtotal boundary. */ - v_total = div64_u64(div64_u64(((unsigned long long)( + v_total = (unsigned int)div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000000); } else if (refresh_in_uhz >= stream->timing.max_refresh_in_uhz) { @@ -161,11 +161,11 @@ unsigned int mod_freesync_calc_v_total_from_refresh( * round up the vtotal value to prevent off-by-one error causing * v_total_min to be below the panel's lower bound */ - v_total = div64_u64(div64_u64(((unsigned long long)( + v_total = (unsigned int)div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total) + (1000000 - 1), 1000000); } else { - v_total = div64_u64(div64_u64(((unsigned long long)( + v_total = (unsigned int)div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total) + 500000, 1000000); } @@ -196,11 +196,11 @@ static unsigned int calc_v_total_from_duration( uint32_t h_total_up_scaled; h_total_up_scaled = stream->timing.h_total * 10000; - v_total = div_u64((unsigned long long)duration_in_us + v_total = (unsigned int)div_u64((unsigned long long)duration_in_us * stream->timing.pix_clk_100hz + (h_total_up_scaled - 1), h_total_up_scaled); //ceiling for MMax and MMin for MVRR } else { - v_total = div64_u64(div64_u64(((unsigned long long)( + v_total = (unsigned int)div64_u64(div64_u64(((unsigned long long)( duration_in_us) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000); } @@ -232,22 +232,28 @@ static void update_v_total_for_static_ramp( target_duration_in_us; /* Calculate ratio between new and current frame duration with 3 digit */ - unsigned int frame_duration_ratio = div64_u64(1000000, + uint64_t frame_duration_ratio_u64 = div64_u64(1000000, (1000 + div64_u64(((unsigned long long)( STATIC_SCREEN_RAMP_DELTA_REFRESH_RATE_PER_FRAME) * current_duration_in_us), 1000000))); + ASSERT(frame_duration_ratio_u64 <= 0xFFFFFFFF); + unsigned int frame_duration_ratio = (unsigned int)frame_duration_ratio_u64; /* Calculate delta between new and current frame duration in us */ - unsigned int frame_duration_delta = div64_u64(((unsigned long long)( + uint64_t frame_duration_delta_u64 = div64_u64(((unsigned long long)( current_duration_in_us) * (1000 - frame_duration_ratio)), 1000); + ASSERT(frame_duration_delta_u64 <= 0xFFFFFFFF); + unsigned int frame_duration_delta = (unsigned int)frame_duration_delta_u64; /* Adjust frame duration delta based on ratio between current and * standard frame duration (frame duration at 60 Hz refresh rate). */ - unsigned int ramp_rate_interpolated = div64_u64(((unsigned long long)( + uint64_t ramp_rate_interpolated_u64 = div64_u64(((unsigned long long)( frame_duration_delta) * current_duration_in_us), 16666); + ASSERT(ramp_rate_interpolated_u64 <= 0xFFFFFFFF); + unsigned int ramp_rate_interpolated = (unsigned int)ramp_rate_interpolated_u64; /* Going to a higher refresh rate (lower frame duration) */ if (ramp_direction_is_up) { @@ -277,7 +283,7 @@ static void update_v_total_for_static_ramp( } } - v_total = div64_u64(div64_u64(((unsigned long long)( + v_total = (unsigned int)div64_u64(div64_u64(((unsigned long long)( current_duration_in_us) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000); @@ -1058,8 +1064,12 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, else in_out_vrr->fixed_refresh_in_uhz = 0; - refresh_range = div_u64(in_out_vrr->max_refresh_in_uhz + 500000, 1000000) - - div_u64(in_out_vrr->min_refresh_in_uhz + 500000, 1000000); + { + uint64_t rr_tmp = div_u64(in_out_vrr->max_refresh_in_uhz + 500000, 1000000) - + div_u64(in_out_vrr->min_refresh_in_uhz + 500000, 1000000); + ASSERT(rr_tmp <= 0xFFFFFFFF); + refresh_range = (unsigned int)rr_tmp; + } in_out_vrr->supported = true; } diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index df3b8383b06d..5d444e9eb38f 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -250,10 +250,12 @@ static void fill_backlight_transform_table(struct dmcu_iram_parameters params, unsigned int lut_index; table->backlight_thresholds[0] = 0; - table->backlight_offsets[0] = params.backlight_lut_array[0]; + ASSERT(params.backlight_lut_array[0] <= 0xFFFF); + table->backlight_offsets[0] = (uint16_t)params.backlight_lut_array[0]; table->backlight_thresholds[num_entries-1] = 0xFFFF; + ASSERT(params.backlight_lut_array[params.backlight_lut_array_size - 1] <= 0xFFFF); table->backlight_offsets[num_entries-1] = - params.backlight_lut_array[params.backlight_lut_array_size - 1]; + (uint16_t)params.backlight_lut_array[params.backlight_lut_array_size - 1]; /* Setup all brightness levels between 0% and 100% exclusive * Fills brightness-to-backlight transform table. Backlight custom curve @@ -265,12 +267,17 @@ static void fill_backlight_transform_table(struct dmcu_iram_parameters params, */ for (i = 1; i+1 < num_entries; i++) { lut_index = (params.backlight_lut_array_size - 1) * i / (num_entries - 1); + ASSERT(lut_index < params.backlight_lut_array_size); - table->backlight_thresholds[i] = - cpu_to_be16(DIV_ROUNDUP((i * 65536), num_entries)); - table->backlight_offsets[i] = - cpu_to_be16(params.backlight_lut_array[lut_index]); + unsigned int threshold_val = DIV_ROUNDUP((i * 65536), num_entries); + unsigned int offset_val = params.backlight_lut_array[lut_index]; + + ASSERT(threshold_val <= 0xFFFF); + ASSERT(offset_val <= 0xFFFF); + + table->backlight_thresholds[i] = cpu_to_be16((uint16_t)threshold_val); + table->backlight_offsets[i] = cpu_to_be16((uint16_t)offset_val); } } @@ -282,10 +289,12 @@ static void fill_backlight_transform_table_v_2_2(struct dmcu_iram_parameters par unsigned int lut_index; table->backlight_thresholds[0] = 0; - table->backlight_offsets[0] = params.backlight_lut_array[0]; + ASSERT(params.backlight_lut_array[0] <= 0xFFFF); + table->backlight_offsets[0] = (uint16_t)params.backlight_lut_array[0]; table->backlight_thresholds[num_entries-1] = 0xFFFF; + ASSERT(params.backlight_lut_array[params.backlight_lut_array_size - 1] <= 0xFFFF); table->backlight_offsets[num_entries-1] = - params.backlight_lut_array[params.backlight_lut_array_size - 1]; + (uint16_t)params.backlight_lut_array[params.backlight_lut_array_size - 1]; /* Setup all brightness levels between 0% and 100% exclusive * Fills brightness-to-backlight transform table. Backlight custom curve @@ -299,12 +308,16 @@ static void fill_backlight_transform_table_v_2_2(struct dmcu_iram_parameters par lut_index = DIV_ROUNDUP((i * params.backlight_lut_array_size), num_entries); ASSERT(lut_index < params.backlight_lut_array_size); + unsigned int threshold_val = DIV_ROUNDUP((i * 65536), num_entries); + unsigned int offset_val = params.backlight_lut_array[lut_index]; + + ASSERT(threshold_val <= 0xFFFF); + ASSERT(offset_val <= 0xFFFF); + table->backlight_thresholds[i] = (big_endian) ? - cpu_to_be16(DIV_ROUNDUP((i * 65536), num_entries)) : - cpu_to_le16(DIV_ROUNDUP((i * 65536), num_entries)); + cpu_to_be16((uint16_t)threshold_val) : cpu_to_le16((uint16_t)threshold_val); table->backlight_offsets[i] = (big_endian) ? - cpu_to_be16(params.backlight_lut_array[lut_index]) : - cpu_to_le16(params.backlight_lut_array[lut_index]); + cpu_to_be16((uint16_t)offset_val) : cpu_to_le16((uint16_t)offset_val); } } @@ -740,9 +753,12 @@ bool dmub_init_abm_config(struct resource_pool *res_pool, } if (params.backlight_ramping_override) { + + ASSERT(params.backlight_ramping_reduction <= 0xFFFF); + ASSERT(params.backlight_ramping_start <= 0xFFFF); for (i = 0; i < NUM_AGGR_LEVEL; i++) { - config.blRampReduction[i] = params.backlight_ramping_reduction; - config.blRampStart[i] = params.backlight_ramping_start; + config.blRampReduction[i] = (uint16_t)params.backlight_ramping_reduction; + config.blRampStart[i] = (uint16_t)params.backlight_ramping_start; } } else { for (i = 0; i < NUM_AGGR_LEVEL; i++) { @@ -1060,6 +1076,7 @@ void calculate_replay_link_off_frame_count(struct dc_link *link, bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps) { unsigned int data_points_size; + uint64_t caps_size; if (config_no >= ARRAY_SIZE(custom_backlight_profiles)) return false; @@ -1067,7 +1084,9 @@ bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_back data_points_size = custom_backlight_profiles[config_no].num_data_points * sizeof(custom_backlight_profiles[config_no].data_points[0]); - caps->size = sizeof(struct dm_acpi_atif_backlight_caps) - sizeof(caps->data_points) + data_points_size; + caps_size = sizeof(struct dm_acpi_atif_backlight_caps) - sizeof(caps->data_points) + data_points_size; + ASSERT(caps_size <= 0xFFFF); + caps->size = (uint16_t)caps_size; caps->flags = 0; caps->error_code = 0; caps->ac_level_percentage = custom_backlight_profiles[config_no].ac_level_percentage; diff --git a/drivers/gpu/drm/amd/display/modules/vmid/vmid.c b/drivers/gpu/drm/amd/display/modules/vmid/vmid.c index 9f408cb11ac9..179b505f7777 100644 --- a/drivers/gpu/drm/amd/display/modules/vmid/vmid.c +++ b/drivers/gpu/drm/amd/display/modules/vmid/vmid.c @@ -57,7 +57,10 @@ static void clear_entry_from_vmid_table(struct core_vmid *core_vmid, unsigned in static void evict_vmids(struct core_vmid *core_vmid) { int i; - uint16_t ord = dc_get_vmid_use_vector(core_vmid->dc); + int ord_int = dc_get_vmid_use_vector(core_vmid->dc); + + ASSERT(ord_int >= 0 && ord_int <= 0xFFFF); + uint16_t ord = (uint16_t)ord_int; // At this point any positions with value 0 are unused vmids, evict them for (i = 1; i < core_vmid->num_vmid; i++) { @@ -120,7 +123,8 @@ uint8_t mod_vmid_get_for_ptb(struct mod_vmid *mod_vmid, uint64_t ptb) ASSERT(0); } - return vmid; + ASSERT(vmid >= 0 && vmid <= 0xFF); + return (uint8_t)vmid; } void mod_vmid_reset(struct mod_vmid *mod_vmid) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 62b0b1ef0d10..736304e73ca4 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -995,12 +995,15 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev, return ret; ret = amdgpu_dpm_emit_clock_levels(adev, type, buf, &size); - if (ret) - return ret; + if (ret) { + size = ret; + goto out_pm_put; + } if (size == 0) size = sysfs_emit(buf, "\n"); +out_pm_put: amdgpu_pm_put_access(adev); return size; @@ -3902,11 +3905,14 @@ static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev, return ret; ret = amdgpu_dpm_emit_clock_levels(adev, od_type, buf, &size); - if (ret) - return ret; + if (ret) { + size = ret; + goto out_pm_put; + } if (size == 0) size = sysfs_emit(buf, "\n"); +out_pm_put: amdgpu_pm_put_access(adev); return size; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 8c37aa452569..55e2375e1dad 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -3062,9 +3062,6 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr) smu7_set_private_data_based_on_pptable_v0(hwmgr); } - if (result) - goto fail; - data->is_tlu_enabled = false; hwmgr->platform_descriptor.hardwareActivityPerformanceLevels = diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c index 731355bdb9bc..3650e7beeb67 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c @@ -1333,12 +1333,13 @@ static int ci_populate_all_memory_levels(struct pp_hwmgr *hwmgr) dev_id = adev->pdev->device; - if ((dpm_table->mclk_table.count >= 2) - && ((dev_id == 0x67B0) || (dev_id == 0x67B1))) { - smu_data->smc_state_table.MemoryLevel[1].MinVddci = - smu_data->smc_state_table.MemoryLevel[0].MinVddci; - smu_data->smc_state_table.MemoryLevel[1].MinMvdd = - smu_data->smc_state_table.MemoryLevel[0].MinMvdd; + if ((dpm_table->mclk_table.count >= 2) && + ((dev_id == 0x67B0) || (dev_id == 0x67B1)) && + (adev->pdev->revision == 0)) { + smu_data->smc_state_table.MemoryLevel[1].MinVddc = + smu_data->smc_state_table.MemoryLevel[0].MinVddc; + smu_data->smc_state_table.MemoryLevel[1].MinVddcPhases = + smu_data->smc_state_table.MemoryLevel[0].MinVddcPhases; } smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 609f5ab07d8a..d76e0b005308 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -584,6 +584,7 @@ struct cmn2asic_mapping { /* Message flags for smu_msg_args */ #define SMU_MSG_FLAG_ASYNC BIT(0) /* Async send - skip post-poll */ #define SMU_MSG_FLAG_LOCK_HELD BIT(1) /* Caller holds ctl->lock */ +#define SMU_MSG_FLAG_FORCE_READ_ARG BIT(2) /* force read smu arg from pmfw */ /* smu_msg_ctl flags */ #define SMU_MSG_CTL_DEBUG_MAILBOX BIT(0) /* Debug mailbox supported */ @@ -2164,4 +2165,21 @@ static inline void smu_feature_init(struct smu_context *smu, int feature_num) smu_feature_list_clear_all(smu, SMU_FEATURE_LIST_ALLOWED); } +/* + * smu_safe_u16_nn - Make u16 safe by filtering negative overflow errors + * @val: Input u16 value, may contain invalid negative overflows + * + * Convert u16 to non-negative value. Cast to s16 to detect negative values + * caused by calculation errors. Return 0 for negative errors, return + * original value if valid. + * + * Return: Valid u16 value or 0 + */ +static inline u16 smu_safe_u16_nn(u16 val) +{ + s16 tmp = (s16)val; + + return tmp < 0 ? 0 : val; +} + #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index dc056f1e4b64..9d8b1227388f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -425,6 +425,7 @@ static int aldebaran_set_default_dpm_table(struct smu_context *smu) dpm_table->dpm_levels[0].enabled = true; dpm_table->dpm_levels[1].value = pptable->GfxclkFmax; dpm_table->dpm_levels[1].enabled = true; + dpm_table->flags |= SMU_DPM_TABLE_FINE_GRAINED; } else { dpm_table->count = 1; dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; @@ -1846,6 +1847,7 @@ static int aldebaran_mode2_reset(struct smu_context *smu) amdgpu_device_load_pci_state(adev->pdev); dev_dbg(adev->dev, "wait for reset ack\n"); + ret = -ETIME; while (ret == -ETIME && timeout) { ret = smu_msg_wait_response(ctl, 0); /* Wait a bit more time for getting ACK */ @@ -1855,7 +1857,7 @@ static int aldebaran_mode2_reset(struct smu_context *smu) continue; } - if (ret != 1) { + if (ret != 0) { dev_err(adev->dev, "failed to send mode2 message \tparam: 0x%08x response %#x\n", SMU_RESET_MODE_2, ret); goto out; @@ -1865,10 +1867,9 @@ static int aldebaran_mode2_reset(struct smu_context *smu) } else { dev_err(adev->dev, "smu fw 0x%x does not support MSG_GfxDeviceDriverReset MSG\n", smu->smc_fw_version); + ret = -EOPNOTSUPP; } - if (ret == 1) - ret = 0; out: mutex_unlock(&ctl->lock); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index b414a74d29fd..0a7f5fa3c1d3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -773,13 +773,13 @@ static int smu_v13_0_0_get_smu_metrics_data(struct smu_context *smu, *value = metrics->AverageGfxclkFrequencyPreDs; break; case METRICS_AVERAGE_FCLK: - if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD) + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_0_BUSY_THRESHOLD) *value = metrics->AverageFclkFrequencyPostDs; else *value = metrics->AverageFclkFrequencyPreDs; break; case METRICS_AVERAGE_UCLK: - if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD) + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_0_BUSY_THRESHOLD) *value = metrics->AverageMemclkFrequencyPostDs; else *value = metrics->AverageMemclkFrequencyPreDs; @@ -800,7 +800,7 @@ static int smu_v13_0_0_get_smu_metrics_data(struct smu_context *smu, *value = metrics->AverageGfxActivity; break; case METRICS_AVERAGE_MEMACTIVITY: - *value = metrics->AverageUclkActivity; + *value = smu_safe_u16_nn(metrics->AverageUclkActivity); break; case METRICS_AVERAGE_VCNACTIVITY: *value = max(metrics->Vcn0ActivityPercentage, @@ -2085,7 +2085,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu, metrics->AvgTemperature[TEMP_VR_MEM1]); gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; - gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; + gpu_metrics->average_umc_activity = smu_safe_u16_nn(metrics->AverageUclkActivity); gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, metrics->Vcn1ActivityPercentage); @@ -2102,7 +2102,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu, else gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; - if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD) + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_0_BUSY_THRESHOLD) gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; else gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 54a86eb77cd5..fe929bd89058 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -479,9 +479,14 @@ static int smu_v13_0_12_get_system_metrics_table(struct smu_context *smu) } amdgpu_hdp_invalidate(smu->adev, NULL); + + ret = smu_cmn_vram_cpy(smu, sys_table->cache.buffer, + table->cpu_addr, + smu_v13_0_12_get_system_metrics_size()); + if (ret) + return ret; + smu_table_cache_update_time(sys_table, jiffies); - memcpy(sys_table->cache.buffer, table->cpu_addr, - smu_v13_0_12_get_system_metrics_size()); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 475541189782..0df8c05a7fce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -778,7 +778,10 @@ int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, } amdgpu_hdp_invalidate(smu->adev, NULL); - memcpy(smu_table->metrics_table, table->cpu_addr, table_size); + ret = smu_cmn_vram_cpy(smu, smu_table->metrics_table, + table->cpu_addr, table_size); + if (ret) + return ret; smu_table->metrics_time = jiffies; } @@ -857,9 +860,9 @@ int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu) } amdgpu_hdp_invalidate(smu->adev, NULL); - memcpy(smu_table->metrics_table, table->cpu_addr, table_size); - return 0; + return smu_cmn_vram_cpy(smu, smu_table->metrics_table, + table->cpu_addr, table_size); } static void smu_v13_0_6_update_caps(struct smu_context *smu) @@ -1126,6 +1129,7 @@ static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) /* gfxclk dpm table setup */ dpm_table = &dpm_context->dpm_tables.gfx_table; dpm_table->clk_type = SMU_GFXCLK; + dpm_table->flags = SMU_DPM_TABLE_FINE_GRAINED; if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) { /* In the case of gfxclk, only fine-grained dpm is honored. * Get min/max values from FW. @@ -2404,13 +2408,15 @@ static int smu_v13_0_6_request_i2c_xfer(struct smu_context *smu, table_size = smu_table->tables[SMU_TABLE_I2C_COMMANDS].size; - memcpy(table->cpu_addr, table_data, table_size); + ret = smu_cmn_vram_cpy(smu, table->cpu_addr, table_data, table_size); + if (ret) + return ret; + /* Flush hdp cache */ amdgpu_hdp_flush(adev, NULL); - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RequestI2cTransaction, - NULL); - return ret; + return smu_cmn_send_smc_msg(smu, SMU_MSG_RequestI2cTransaction, + NULL); } static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index fd0b6215364f..5abf2b0703c6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -783,13 +783,13 @@ static int smu_v13_0_7_get_smu_metrics_data(struct smu_context *smu, *value = metrics->AverageGfxclkFrequencyPreDs; break; case METRICS_AVERAGE_FCLK: - if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD) + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_7_BUSY_THRESHOLD) *value = metrics->AverageFclkFrequencyPostDs; else *value = metrics->AverageFclkFrequencyPreDs; break; case METRICS_AVERAGE_UCLK: - if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD) + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_7_BUSY_THRESHOLD) *value = metrics->AverageMemclkFrequencyPostDs; else *value = metrics->AverageMemclkFrequencyPreDs; @@ -814,7 +814,7 @@ static int smu_v13_0_7_get_smu_metrics_data(struct smu_context *smu, *value = metrics->AverageGfxActivity; break; case METRICS_AVERAGE_MEMACTIVITY: - *value = metrics->AverageUclkActivity; + *value = smu_safe_u16_nn(metrics->AverageUclkActivity); break; case METRICS_AVERAGE_SOCKETPOWER: *value = metrics->AverageSocketPower << 8; @@ -2091,7 +2091,7 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct smu_context *smu, metrics->AvgTemperature[TEMP_VR_MEM1]); gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; - gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; + gpu_metrics->average_umc_activity = smu_safe_u16_nn(metrics->AverageUclkActivity); gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, metrics->Vcn1ActivityPercentage); @@ -2104,7 +2104,7 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct smu_context *smu, else gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; - if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD) + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_7_BUSY_THRESHOLD) gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; else gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 31f9566f7979..5ce4e982ca33 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -661,13 +661,13 @@ static int smu_v14_0_2_get_smu_metrics_data(struct smu_context *smu, *value = metrics->AverageGfxclkFrequencyPreDs; break; case METRICS_AVERAGE_FCLK: - if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_14_0_2_BUSY_THRESHOLD) *value = metrics->AverageFclkFrequencyPostDs; else *value = metrics->AverageFclkFrequencyPreDs; break; case METRICS_AVERAGE_UCLK: - if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_14_0_2_BUSY_THRESHOLD) *value = metrics->AverageMemclkFrequencyPostDs; else *value = metrics->AverageMemclkFrequencyPreDs; @@ -688,7 +688,7 @@ static int smu_v14_0_2_get_smu_metrics_data(struct smu_context *smu, *value = metrics->AverageGfxActivity; break; case METRICS_AVERAGE_MEMACTIVITY: - *value = metrics->AverageUclkActivity; + *value = smu_safe_u16_nn(metrics->AverageUclkActivity); break; case METRICS_AVERAGE_VCNACTIVITY: *value = max(metrics->AverageVcn0ActivityPercentage, @@ -2147,7 +2147,7 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, metrics->AvgTemperature[TEMP_VR_MEM1]); gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; - gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; + gpu_metrics->average_umc_activity = smu_safe_u16_nn(metrics->AverageUclkActivity); gpu_metrics->average_mm_activity = max(metrics->AverageVcn0ActivityPercentage, metrics->Vcn1ActivityPercentage); @@ -2159,7 +2159,7 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, else gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; - if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_14_0_2_BUSY_THRESHOLD) gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; else gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs; @@ -2214,17 +2214,61 @@ static void smu_v14_0_2_dump_od_table(struct smu_context *smu, od_table->OverDriveTable.UclkFmax); } +#define OD_ERROR_MSG_MAP(msg) \ + [msg] = #msg + +static const char *od_error_message[] = { + OD_ERROR_MSG_MAP(OD_REQUEST_ADVANCED_NOT_SUPPORTED), + OD_ERROR_MSG_MAP(OD_UNSUPPORTED_FEATURE), + OD_ERROR_MSG_MAP(OD_INVALID_FEATURE_COMBO_ERROR), + OD_ERROR_MSG_MAP(OD_GFXCLK_VF_CURVE_OFFSET_ERROR), + OD_ERROR_MSG_MAP(OD_VDD_GFX_VMAX_ERROR), + OD_ERROR_MSG_MAP(OD_VDD_SOC_VMAX_ERROR), + OD_ERROR_MSG_MAP(OD_PPT_ERROR), + OD_ERROR_MSG_MAP(OD_FAN_MIN_PWM_ERROR), + OD_ERROR_MSG_MAP(OD_FAN_ACOUSTIC_TARGET_ERROR), + OD_ERROR_MSG_MAP(OD_FAN_ACOUSTIC_LIMIT_ERROR), + OD_ERROR_MSG_MAP(OD_FAN_TARGET_TEMP_ERROR), + OD_ERROR_MSG_MAP(OD_FAN_ZERO_RPM_STOP_TEMP_ERROR), + OD_ERROR_MSG_MAP(OD_FAN_CURVE_PWM_ERROR), + OD_ERROR_MSG_MAP(OD_FAN_CURVE_TEMP_ERROR), + OD_ERROR_MSG_MAP(OD_FULL_CTRL_GFXCLK_ERROR), + OD_ERROR_MSG_MAP(OD_FULL_CTRL_UCLK_ERROR), + OD_ERROR_MSG_MAP(OD_FULL_CTRL_FCLK_ERROR), + OD_ERROR_MSG_MAP(OD_FULL_CTRL_VDD_GFX_ERROR), + OD_ERROR_MSG_MAP(OD_FULL_CTRL_VDD_SOC_ERROR), + OD_ERROR_MSG_MAP(OD_TDC_ERROR), + OD_ERROR_MSG_MAP(OD_GFXCLK_ERROR), + OD_ERROR_MSG_MAP(OD_UCLK_ERROR), + OD_ERROR_MSG_MAP(OD_FCLK_ERROR), + OD_ERROR_MSG_MAP(OD_OP_TEMP_ERROR), + OD_ERROR_MSG_MAP(OD_OP_GFX_EDC_ERROR), + OD_ERROR_MSG_MAP(OD_OP_GFX_PCC_ERROR), + OD_ERROR_MSG_MAP(OD_POWER_FEATURE_CTRL_ERROR), +}; + static int smu_v14_0_2_upload_overdrive_table(struct smu_context *smu, OverDriveTableExternal_t *od_table) { - int ret; - ret = smu_cmn_update_table(smu, - SMU_TABLE_OVERDRIVE, - 0, - (void *)od_table, - true); - if (ret) - dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + uint32_t read_arg = 0; + int ret, od_error_type; + + ret = smu_cmn_update_table_read_arg(smu, + SMU_TABLE_OVERDRIVE, + 0, + (void *)od_table, + &read_arg, + true); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table, ret:%d\n", ret); + if ((read_arg & 0xff) == TABLE_TRANSFER_FAILED) { + od_error_type = read_arg >> 16; + dev_err(smu->adev->dev, "Invalid overdrive table content: %s (%d)\n", + od_error_type < ARRAY_SIZE(od_error_message) ? + od_error_message[od_error_type] : "unknown", + od_error_type); + } + } return ret; } @@ -2374,6 +2418,7 @@ static int smu_v14_0_2_od_restore_table_single(struct smu_context *smu, long inp } od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + od_table->OverDriveTable.FeatureCtrlMask &= ~BIT(PP_OD_FEATURE_FAN_LEGACY_BIT); break; case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: od_table->OverDriveTable.FanZeroRpmEnable = @@ -2402,7 +2447,8 @@ static int smu_v14_0_2_od_restore_table_single(struct smu_context *smu, long inp od_table->OverDriveTable.FanMinimumPwm = boot_overdrive_table->OverDriveTable.FanMinimumPwm; od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; - od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_LEGACY_BIT); + od_table->OverDriveTable.FeatureCtrlMask &= ~BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; default: dev_info(adev->dev, "Invalid table index: %ld\n", input); @@ -2572,6 +2618,7 @@ static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FanLinearPwmPoints[input[0]] = input[2]; od_table->OverDriveTable.FanMode = FAN_MODE_MANUAL_LINEAR; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + od_table->OverDriveTable.FeatureCtrlMask &= ~BIT(PP_OD_FEATURE_FAN_LEGACY_BIT); break; case PP_OD_EDIT_ACOUSTIC_LIMIT: @@ -2641,7 +2688,7 @@ static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, break; case PP_OD_EDIT_FAN_MINIMUM_PWM: - if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_LEGACY_BIT)) { dev_warn(adev->dev, "Fan curve setting not supported!\n"); return -ENOTSUPP; } @@ -2659,7 +2706,8 @@ static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FanMinimumPwm = input[0]; od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; - od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_LEGACY_BIT); + od_table->OverDriveTable.FeatureCtrlMask &= ~BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.c index cc2babc6a341..db85186f2d66 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.c @@ -344,7 +344,12 @@ static int smu_v15_0_8_get_metrics_table_internal(struct smu_context *smu, uint3 } amdgpu_device_invalidate_hdp(smu->adev, NULL); - memcpy(smu_table->metrics_table, table->cpu_addr, table_size); + ret = smu_cmn_vram_cpy(smu, smu_table->metrics_table, + table->cpu_addr, table_size); + if (ret) { + mutex_unlock(&smu_table->metrics_lock); + return ret; + } smu_table->metrics_time = jiffies; } @@ -551,9 +556,14 @@ static int smu_v15_0_8_get_system_metrics_table(struct smu_context *smu) } amdgpu_hdp_invalidate(smu->adev, NULL); + + ret = smu_cmn_vram_cpy(smu, sys_table->cache.buffer, + table->cpu_addr, + sizeof(SystemMetricsTable_t)); + if (ret) + return ret; + smu_table_cache_update_time(sys_table, jiffies); - memcpy(sys_table->cache.buffer, table->cpu_addr, - sizeof(SystemMetricsTable_t)); return 0; } @@ -988,9 +998,9 @@ static int smu_v15_0_8_get_static_metrics_table(struct smu_context *smu) } amdgpu_hdp_invalidate(smu->adev, NULL); - memcpy(smu_table->metrics_table, table->cpu_addr, table_size); - return 0; + return smu_cmn_vram_cpy(smu, smu_table->metrics_table, + table->cpu_addr, table_size); } static int smu_v15_0_8_fru_get_product_info(struct smu_context *smu, @@ -1601,8 +1611,6 @@ static ssize_t smu_v15_0_8_get_gpu_metrics(struct smu_context *smu, void **table uint32_t mid_mask = adev->aid_mask; MetricsTable_t *metrics; - metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); - ret = smu_v15_0_8_get_metrics_table_internal(smu, 1, NULL); if (ret) return ret; @@ -1775,7 +1783,7 @@ static int smu_v15_0_8_get_power_limit(struct smu_context *smu, *current_power_limit = power_limit; if (default_power_limit) - *max_power_limit = pptable->MaxSocketPowerLimit; + *default_power_limit = pptable->MaxSocketPowerLimit; if (max_power_limit) *max_power_limit = pptable->MaxSocketPowerLimit; @@ -1901,42 +1909,36 @@ static int smu_v15_0_8_set_soft_freq_limited_range(struct smu_context *smu, if (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) return -EINVAL; - if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - if (min >= max) { - dev_err(smu->adev->dev, - "Minimum clk should be less than the maximum allowed clock\n"); - return -EINVAL; - } + if (min >= max) { + dev_err(smu->adev->dev, + "Minimum clk should be less than the maximum allowed clock\n"); + return -EINVAL; + } - if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK) { - if ((min == pstate_table->gfxclk_pstate.curr.min) && - (max == pstate_table->gfxclk_pstate.curr.max)) - return 0; + if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK) { + if ((min == pstate_table->gfxclk_pstate.curr.min) && + (max == pstate_table->gfxclk_pstate.curr.max)) + return 0; - ret = smu_v15_0_8_set_gfx_soft_freq_limited_range(smu, - min, max); - if (!ret) { - pstate_table->gfxclk_pstate.curr.min = min; - pstate_table->gfxclk_pstate.curr.max = max; - } + ret = smu_v15_0_8_set_gfx_soft_freq_limited_range(smu, min, + max); + if (!ret) { + pstate_table->gfxclk_pstate.curr.min = min; + pstate_table->gfxclk_pstate.curr.max = max; } + } - if (clk_type == SMU_UCLK) { - if (max == pstate_table->uclk_pstate.curr.max) - return 0; - - ret = smu_v15_0_set_soft_freq_limited_range(smu, - SMU_UCLK, - 0, max, - false); - if (!ret) - pstate_table->uclk_pstate.curr.max = max; - } + if (clk_type == SMU_UCLK) { + if (max == pstate_table->uclk_pstate.curr.max) + return 0; - return ret; + ret = smu_v15_0_set_soft_freq_limited_range(smu, SMU_UCLK, 0, + max, false); + if (!ret) + pstate_table->uclk_pstate.curr.max = max; } - return 0; + return ret; } static int smu_v15_0_8_od_edit_dpm_table(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 7bd8c435466a..90c7127beabf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -496,7 +496,8 @@ static int smu_msg_v1_send_msg(struct smu_msg_ctl *ctl, } /* Read output args */ - if (ret == 0 && args->num_out_args > 0) { + if ((ret == 0 || (args->flags & SMU_MSG_FLAG_FORCE_READ_ARG)) && + args->num_out_args > 0) { __smu_msg_v1_read_out_args(ctl, args); dev_dbg(adev->dev, "smu send message: %s(%d) resp : 0x%08x", smu_get_message_name(smu, args->msg), index, reg); @@ -1060,20 +1061,24 @@ int smu_cmn_check_fw_version(struct smu_context *smu) return 0; } -int smu_cmn_update_table(struct smu_context *smu, - enum smu_table_id table_index, - int argument, - void *table_data, - bool drv2smu) +int smu_cmn_update_table_read_arg(struct smu_context *smu, + enum smu_table_id table_index, + int argument, + void *table_data, + uint32_t *read_arg, + bool drv2smu) { - struct smu_table_context *smu_table = &smu->smu_table; struct amdgpu_device *adev = smu->adev; + struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *table = &smu_table->driver_table; + struct smu_msg_ctl *ctl = &smu->msg_ctl; + struct smu_msg_args args; int table_id = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_TABLE, table_index); uint32_t table_size; int ret = 0; + if (!table_data || table_index >= SMU_TABLE_COUNT || table_id < 0) return -EINVAL; @@ -1088,11 +1093,19 @@ int smu_cmn_update_table(struct smu_context *smu, amdgpu_hdp_flush(adev, NULL); } - ret = smu_cmn_send_smc_msg_with_param(smu, drv2smu ? - SMU_MSG_TransferTableDram2Smu : - SMU_MSG_TransferTableSmu2Dram, - table_id | ((argument & 0xFFFF) << 16), - NULL); + args.msg = drv2smu ? SMU_MSG_TransferTableDram2Smu : SMU_MSG_TransferTableSmu2Dram; + args.args[0] = ((argument & 0xFFFF) << 16) | (table_id & 0xffff); + args.num_args = 1; + args.out_args[0] = 0; + args.num_out_args = read_arg ? 1 : 0; + args.flags = read_arg ? SMU_MSG_FLAG_FORCE_READ_ARG : 0; + args.timeout = 0; + + ret = ctl->ops->send_msg(ctl, &args); + + if (read_arg) + *read_arg = args.out_args[0]; + if (ret) return ret; @@ -1104,6 +1117,18 @@ int smu_cmn_update_table(struct smu_context *smu, return 0; } +int smu_cmn_vram_cpy(struct smu_context *smu, void *dst, const void *src, + size_t len) +{ + memcpy(dst, src, len); + + /* Don't trust the copy operation if RAS fatal error happened. */ + if (amdgpu_ras_get_fed_status(smu->adev)) + return -EHWPOISON; + + return 0; +} + int smu_cmn_write_watermarks_table(struct smu_context *smu) { void *watermarks_table = smu->smu_table.watermarks_table; @@ -1345,7 +1370,7 @@ int smu_cmn_print_dpm_clk_levels(struct smu_context *smu, level_index = 1; } - if (!is_fine_grained) { + if (!is_fine_grained || count == 1) { for (i = 0; i < count; i++) { freq_match = !is_deep_sleep && smu_cmn_freqs_match( diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index b76e86df5da7..c6ac0e876aea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -102,6 +102,9 @@ int smu_msg_send_async_locked(struct smu_msg_ctl *ctl, #define SMU_DPM_PCIE_GEN_IDX(gen) smu_cmn_dpm_pcie_gen_idx((gen)) #define SMU_DPM_PCIE_WIDTH_IDX(width) smu_cmn_dpm_pcie_width_idx((width)) +#define smu_cmn_update_table(smu, table_index, argument, table_data, drv2smu) \ + smu_cmn_update_table_read_arg((smu), (table_index), (argument), (table_data), NULL, (drv2smu)) + extern const int link_speed[]; /* Helper to Convert from PCIE Gen 1/2/3/4/5/6 to 0.1 GT/s speed units */ @@ -168,11 +171,15 @@ int smu_cmn_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version); -int smu_cmn_update_table(struct smu_context *smu, - enum smu_table_id table_index, - int argument, - void *table_data, - bool drv2smu); +int smu_cmn_update_table_read_arg(struct smu_context *smu, + enum smu_table_id table_index, + int argument, + void *table_data, + uint32_t *read_arg, + bool drv2smu); + +int smu_cmn_vram_cpy(struct smu_context *smu, void *dst, + const void *src, size_t len); int smu_cmn_write_watermarks_table(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index d213eea71cff..ad8862d43263 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -290,13 +290,10 @@ static int amdgpu_ras_mgr_sw_init(struct amdgpu_ip_block *ip_block) /* Disabled by default */ con->uniras_enabled = false; - /* Enabled only in debug mode */ - if (adev->debug_enable_ras_aca) { + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14) || + adev->debug_enable_ras_aca) con->uniras_enabled = true; - RAS_DEV_INFO(adev, "Debug amdgpu uniras!"); - } - - if (!con->uniras_enabled) + else return 0; ras_mgr = kzalloc_obj(*ras_mgr); diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c index c83357307c55..fb4d375e87b2 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c @@ -36,22 +36,22 @@ static int amdgpu_virt_ras_get_cmd_shared_mem(struct ras_core_context *ras_core, struct amdgpu_device *adev = ras_core->dev; struct amdsriov_ras_telemetry *ras_telemetry_cpu; struct amdsriov_ras_telemetry *ras_telemetry_gpu; + void *fw_va = adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].cpu_ptr; + void *drv_va = adev->mman.resv_region[AMDGPU_RESV_DRV_VRAM_USAGE].cpu_ptr; uint64_t fw_vram_usage_start_offset = 0; uint64_t ras_telemetry_offset = 0; if (!adev->virt.fw_reserve.ras_telemetry) return -EINVAL; - if (adev->mman.fw_vram_usage_va && - adev->mman.fw_vram_usage_va <= adev->virt.fw_reserve.ras_telemetry) { - fw_vram_usage_start_offset = adev->mman.fw_vram_usage_start_offset; + if (fw_va && fw_va <= adev->virt.fw_reserve.ras_telemetry) { + fw_vram_usage_start_offset = adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].offset; ras_telemetry_offset = (uintptr_t)adev->virt.fw_reserve.ras_telemetry - - (uintptr_t)adev->mman.fw_vram_usage_va; - } else if (adev->mman.drv_vram_usage_va && - adev->mman.drv_vram_usage_va <= adev->virt.fw_reserve.ras_telemetry) { - fw_vram_usage_start_offset = adev->mman.drv_vram_usage_start_offset; + (uintptr_t)fw_va; + } else if (drv_va && drv_va <= adev->virt.fw_reserve.ras_telemetry) { + fw_vram_usage_start_offset = adev->mman.resv_region[AMDGPU_RESV_DRV_VRAM_USAGE].offset; ras_telemetry_offset = (uintptr_t)adev->virt.fw_reserve.ras_telemetry - - (uintptr_t)adev->mman.drv_vram_usage_va; + (uintptr_t)drv_va; } else { return -EINVAL; } @@ -517,14 +517,9 @@ int amdgpu_virt_ras_hw_fini(struct amdgpu_device *adev) (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; struct vram_blocks_ecc *blks_ecc = &virt_ras->blocks_ecc; - if (blks_ecc->shared_mem.cpu_addr) { - __set_cmd_auto_update(adev, - RAS_CMD__GET_ALL_BLOCK_ECC_STATUS, - blks_ecc->shared_mem.gpa, - blks_ecc->shared_mem.size, false); - + if (blks_ecc->shared_mem.cpu_addr) memset(blks_ecc->shared_mem.cpu_addr, 0, blks_ecc->shared_mem.size); - } + memset(blks_ecc, 0, sizeof(*blks_ecc)); return 0; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index ca4dea226f4b..ef7be20a59cd 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -345,10 +345,14 @@ static int dw_mipi_dsi_host_attach(struct mipi_dsi_host *host, if (pdata->host_ops && pdata->host_ops->attach) { ret = pdata->host_ops->attach(pdata->priv_data, device); if (ret < 0) - return ret; + goto err_remove_bridge; } return 0; + +err_remove_bridge: + drm_bridge_remove(&dsi->bridge); + return ret; } static int dw_mipi_dsi_host_detach(struct mipi_dsi_host *host, diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c index e6eaf9fd0251..a4bfd3ad166d 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c @@ -540,10 +540,14 @@ static int dw_mipi_dsi2_host_attach(struct mipi_dsi_host *host, if (pdata->host_ops && pdata->host_ops->attach) { ret = pdata->host_ops->attach(pdata->priv_data, device); if (ret < 0) - return ret; + goto err_remove_bridge; } return 0; + +err_remove_bridge: + drm_bridge_remove(&dsi2->bridge); + return ret; } static int dw_mipi_dsi2_host_detach(struct mipi_dsi_host *host, diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c index d9b388165de1..6c427bc75896 100644 --- a/drivers/gpu/drm/bridge/tda998x_drv.c +++ b/drivers/gpu/drm/bridge/tda998x_drv.c @@ -1293,7 +1293,7 @@ static const struct drm_edid *tda998x_edid_read(struct tda998x_priv *priv, * can't handle signals gracefully. */ if (tda998x_edid_delay_wait(priv)) - return 0; + return NULL; if (priv->rev == TDA19988) reg_clear(priv, REG_TX4, TX4_PD_RAM); @@ -1762,7 +1762,7 @@ static const struct drm_bridge_funcs tda998x_bridge_funcs = { static int tda998x_get_audio_ports(struct tda998x_priv *priv, struct device_node *np) { - const u32 *port_data; + const __be32 *port_data; u32 size; int i; diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 26953ed6b53e..a768398a1884 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1916,7 +1916,7 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev, ret = wait_event_timeout(*queue, state->crtcs[i].last_vblank_count != drm_crtc_vblank_count(crtc), - msecs_to_jiffies(100)); + msecs_to_jiffies(1000)); WARN(!ret, "[CRTC:%d:%s] vblank wait timed out\n", crtc->base.id, crtc->name); @@ -3751,6 +3751,13 @@ drm_atomic_helper_duplicate_state(struct drm_device *dev, err = PTR_ERR(plane_state); goto free; } + + if (plane_state->color_pipeline) { + err = drm_atomic_add_affected_colorops(state, plane); + if (err) + goto free; + } + } drm_connector_list_iter_begin(dev, &conn_iter); @@ -3856,6 +3863,8 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, int i, ret; struct drm_plane *plane; struct drm_plane_state *new_plane_state; + struct drm_colorop *colorop; + struct drm_colorop_state *new_colorop_state; struct drm_connector *connector; struct drm_connector_state *new_conn_state; struct drm_crtc *crtc; @@ -3863,6 +3872,9 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, state->acquire_ctx = ctx; + for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) + state->colorops[i].old_state = colorop->state; + for_each_new_plane_in_state(state, plane, new_plane_state, i) state->planes[i].old_state = plane->state; diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index c598b99673fc..e7db4e4ea700 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -831,7 +831,7 @@ static void fill_palette_332(struct drm_crtc *crtc, u16 r, u16 g, u16 b, } /** - * drm_crtc_fill_palette_332 - Programs a default palette for R332-like formats + * drm_crtc_fill_palette_332 - Programs a default palette for RGB332-like formats * @crtc: The displaying CRTC * @set_palette: Callback for programming the hardware gamma LUT * diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 5f9fcd7d9ce4..404208bf23a6 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -99,6 +99,29 @@ enum drm_edid_internal_quirk { }; #define MICROSOFT_IEEE_OUI 0xca125c +#define AMD_IEEE_OUI 0x00001A + +#define AMD_VSDB_V3_PAYLOAD_MIN_LEN 15 +#define AMD_VSDB_V3_PAYLOAD_MAX_LEN 20 + +struct amd_vsdb_v3_payload { + u8 oui[3]; + u8 version; + u8 feature_caps; + u8 rsvd0[3]; + u8 cs_eotf_support; + u8 lum1_max; + u8 lum1_min; + u8 lum2_max; + u8 lum2_min; + u8 rsvd1[2]; + /* + * Bytes beyond AMD_VSDB_V3_PAYLOAD_MIN_LEN are optional; a + * monitor may provide a payload as short as 15 bytes. Always + * check cea_db_payload_len() before accessing extra[]. + */ + u8 extra[AMD_VSDB_V3_PAYLOAD_MAX_LEN - AMD_VSDB_V3_PAYLOAD_MIN_LEN]; +} __packed; struct detailed_mode_closure { struct drm_connector *connector; @@ -5205,6 +5228,13 @@ static bool cea_db_is_microsoft_vsdb(const struct cea_db *db) cea_db_payload_len(db) == 21; } +static bool cea_db_is_amd_vsdb(const struct cea_db *db) +{ + return cea_db_is_vendor(db, AMD_IEEE_OUI) && + cea_db_payload_len(db) >= AMD_VSDB_V3_PAYLOAD_MIN_LEN && + cea_db_payload_len(db) <= AMD_VSDB_V3_PAYLOAD_MAX_LEN; +} + static bool cea_db_is_vcdb(const struct cea_db *db) { return cea_db_is_extended_tag(db, CTA_EXT_DB_VIDEO_CAP) && @@ -6401,6 +6431,45 @@ static void drm_parse_microsoft_vsdb(struct drm_connector *connector, connector->base.id, connector->name, version, db[5]); } +static void drm_parse_amd_vsdb(struct drm_connector *connector, + const struct cea_db *db) +{ + struct drm_display_info *info = &connector->display_info; + const u8 *data = cea_db_data(db); + const struct amd_vsdb_v3_payload *p; + + p = (const struct amd_vsdb_v3_payload *)data; + + if (p->version != 0x03) { + drm_dbg_kms(connector->dev, + "[CONNECTOR:%d:%s] Unsupported AMD VSDB version %u\n", + connector->base.id, connector->name, p->version); + return; + } + + info->amd_vsdb.version = p->version; + info->amd_vsdb.replay_mode = p->feature_caps & 0x40; + info->amd_vsdb.panel_type = (p->cs_eotf_support & 0xC0) >> 6; + info->amd_vsdb.luminance_range1.max_luminance = p->lum1_max; + info->amd_vsdb.luminance_range1.min_luminance = p->lum1_min; + info->amd_vsdb.luminance_range2.max_luminance = p->lum2_max; + info->amd_vsdb.luminance_range2.min_luminance = p->lum2_min; + + /* + * The AMD VSDB v3 payload length is variable (15..20 bytes). + * All fields through p->rsvd1 (byte 14) are always present, + * but p->extra[] (bytes 15+) may not be. Any future access to + * extra[] must be guarded with a runtime length check to avoid + * out-of-bounds reads on shorter (but spec-valid) payloads. + * For example: + * + * int len = cea_db_payload_len(db); + * + * if (len > AMD_VSDB_V3_PAYLOAD_MIN_LEN) + * info->amd_vsdb.foo = p->extra[0]; + */ +} + static void drm_parse_cea_ext(struct drm_connector *connector, const struct drm_edid *drm_edid) { @@ -6449,6 +6518,8 @@ static void drm_parse_cea_ext(struct drm_connector *connector, drm_parse_hdmi_forum_scds(connector, data); else if (cea_db_is_microsoft_vsdb(db)) drm_parse_microsoft_vsdb(connector, data); + else if (cea_db_is_amd_vsdb(db)) + drm_parse_amd_vsdb(connector, db); else if (cea_db_is_y420cmdb(db)) parse_cta_y420cmdb(connector, db, &y420cmdb_map); else if (cea_db_is_y420vdb(db)) @@ -6641,6 +6712,7 @@ static void drm_reset_display_info(struct drm_connector *connector) info->quirks = 0; info->source_physical_address = CEC_PHYS_ADDR_INVALID; + memset(&info->amd_vsdb, 0, sizeof(info->amd_vsdb)); } static void update_displayid_info(struct drm_connector *connector, diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index a80a335f4148..1541fc8a9ac2 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -490,7 +490,7 @@ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, * the number of horizontal pixels that need an update. */ off_t bit_off = (off % line_length) * 8; - off_t bit_end = (end % line_length) * 8; + off_t bit_end = bit_off + len * 8; x1 = bit_off / info->var.bits_per_pixel; x2 = DIV_ROUND_UP(bit_end, info->var.bits_per_pixel); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index d6424267260b..51a887cc7fd7 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1019,7 +1019,7 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_change_handle *args = data; - struct drm_gem_object *obj; + struct drm_gem_object *obj, *idrobj; int handle, ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) @@ -1042,8 +1042,29 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, mutex_lock(&file_priv->prime.lock); spin_lock(&file_priv->table_lock); + + /* When create_tail allocs an obj idr, it needs to first alloc as NULL, + * then later replace with the correct object. This is not necessary + * here, because the only operations that could race are drm_prime + * bookkeeping, and we hold the prime lock. + */ ret = idr_alloc(&file_priv->object_idr, obj, handle, handle + 1, GFP_NOWAIT); + + if (ret < 0) { + spin_unlock(&file_priv->table_lock); + goto out_unlock; + } + + idrobj = idr_replace(&file_priv->object_idr, NULL, handle); + if (idrobj != obj) { + idr_replace(&file_priv->object_idr, idrobj, handle); + idr_remove(&file_priv->object_idr, args->new_handle); + spin_unlock(&file_priv->table_lock); + ret = -ENOENT; + goto out_unlock; + } + spin_unlock(&file_priv->table_lock); if (ret < 0) @@ -1055,6 +1076,8 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, if (ret < 0) { spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, handle); + idrobj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(idrobj != NULL); spin_unlock(&file_priv->table_lock); goto out_unlock; } diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 9166c353f131..88808e972cc1 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -172,8 +172,8 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, } for (i = 0; i < info->num_planes; i++) { - unsigned int width = mode_cmd->width / (i ? info->hsub : 1); - unsigned int height = mode_cmd->height / (i ? info->vsub : 1); + unsigned int width = drm_format_info_plane_width(info, mode_cmd->width, i); + unsigned int height = drm_format_info_plane_height(info, mode_cmd->height, i); unsigned int min_size; objs[i] = drm_gem_object_lookup(file, mode_cmd->handles[i]); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index df4232d7e135..3cc50d697c89 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -116,16 +116,18 @@ int etnaviv_sched_push_job(struct etnaviv_gem_submit *submit) */ mutex_lock(&gpu->sched_lock); + ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, + NULL, xa_limit_32b, &gpu->next_user_fence, + GFP_KERNEL); + if (ret < 0) + goto out_unlock; + drm_sched_job_arm(&submit->sched_job); submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); - ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, - submit->out_fence, xa_limit_32b, - &gpu->next_user_fence, GFP_KERNEL); - if (ret < 0) { - drm_sched_job_cleanup(&submit->sched_job); - goto out_unlock; - } + + xa_store(&gpu->user_fences, submit->out_fence_id, + submit->out_fence, GFP_KERNEL); /* the scheduler holds on to the job now */ kref_get(&submit->refcount); diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index 29a8366513fa..e68c954ec3e6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c @@ -423,7 +423,9 @@ static int exynos_mic_probe(struct platform_device *pdev) mic->bridge.of_node = dev->of_node; - drm_bridge_add(&mic->bridge); + ret = devm_drm_bridge_add(dev, &mic->bridge); + if (ret) + goto err; pm_runtime_enable(dev); @@ -443,12 +445,8 @@ err: static void exynos_mic_remove(struct platform_device *pdev) { - struct exynos_mic *mic = platform_get_drvdata(pdev); - component_del(&pdev->dev, &exynos_mic_component_ops); pm_runtime_disable(&pdev->dev); - - drm_bridge_remove(&mic->bridge); } static const struct of_device_id exynos_mic_of_match[] = { diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index b8189cd5d864..03de219f7a64 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -209,6 +209,8 @@ static struct intel_crtc *intel_crtc_alloc(void) crtc->base.state = &crtc_state->uapi; crtc->config = crtc_state; + INIT_LIST_HEAD(&crtc->pipe_head); + return crtc; } @@ -222,6 +224,8 @@ static void intel_crtc_destroy(struct drm_crtc *_crtc) { struct intel_crtc *crtc = to_intel_crtc(_crtc); + list_del(&crtc->pipe_head); + cpu_latency_qos_remove_request(&crtc->vblank_pm_qos); drm_crtc_cleanup(&crtc->base); @@ -308,6 +312,20 @@ static const struct drm_crtc_funcs i8xx_crtc_funcs = { .get_vblank_timestamp = intel_crtc_get_vblank_timestamp, }; +static void add_crtc_to_pipe_list(struct intel_display *display, struct intel_crtc *crtc) +{ + struct intel_crtc *iter; + + list_for_each_entry(iter, &display->pipe_list, pipe_head) { + if (crtc->pipe < iter->pipe) { + list_add_tail(&crtc->pipe_head, &iter->pipe_head); + return; + } + } + + list_add_tail(&crtc->pipe_head, &display->pipe_list); +} + static int __intel_crtc_init(struct intel_display *display, enum pipe pipe) { struct intel_plane *primary, *cursor; @@ -393,11 +411,11 @@ static int __intel_crtc_init(struct intel_display *display, enum pipe pipe) cpu_latency_qos_add_request(&crtc->vblank_pm_qos, PM_QOS_DEFAULT_VALUE); - drm_WARN_ON(display->drm, drm_crtc_index(&crtc->base) != crtc->pipe); - if (HAS_CASF(display) && crtc->num_scalers >= 2) drm_crtc_create_sharpness_strength_property(&crtc->base); + add_crtc_to_pipe_list(display, crtc); + return 0; fail: @@ -406,6 +424,31 @@ fail: return ret; } +#define HAS_PIPE(display, pipe) (DISPLAY_RUNTIME_INFO(display)->pipe_mask & BIT(pipe)) + +/* + * Expose the pipes in order A, C, B, D on discrete platforms to trick user + * space into using pipes that are more likely to be available for both a) user + * space if pipe B has been reserved for the joiner, and b) the joiner if pipe A + * doesn't need the joiner. + * + * Swap pipes B and C only if both are available i.e. not fused off. + */ +static enum pipe reorder_pipe(struct intel_display *display, enum pipe pipe) +{ + if (!display->platform.dgfx || !HAS_PIPE(display, PIPE_B) || !HAS_PIPE(display, PIPE_C)) + return pipe; + + switch (pipe) { + case PIPE_B: + return PIPE_C; + case PIPE_C: + return PIPE_B; + default: + return pipe; + } +} + int intel_crtc_init(struct intel_display *display) { enum pipe pipe; @@ -415,7 +458,7 @@ int intel_crtc_init(struct intel_display *display) INTEL_NUM_PIPES(display), str_plural(INTEL_NUM_PIPES(display))); for_each_pipe(display, pipe) { - ret = __intel_crtc_init(display, pipe); + ret = __intel_crtc_init(display, reorder_pipe(display, pipe)); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 10b6c6fcb03f..ad2fe10b6b1f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5939,17 +5939,6 @@ static int intel_atomic_check_joiner(struct intel_atomic_state *state, return -EINVAL; } - /* - * The state copy logic assumes the primary crtc gets processed - * before the secondary crtc during the main compute_config loop. - * This works because the crtcs are created in pipe order, - * and the hardware requires primary pipe < secondary pipe as well. - * Should that change we need to rethink the logic. - */ - if (WARN_ON(drm_crtc_index(&primary_crtc->base) > - drm_crtc_index(&secondary_crtc->base))) - return -EINVAL; - drm_dbg_kms(display->drm, "[CRTC:%d:%s] Used as secondary for joiner primary [CRTC:%d:%s]\n", secondary_crtc->base.base.id, secondary_crtc->base.name, @@ -6327,9 +6316,7 @@ static int intel_atomic_check_config(struct intel_atomic_state *state, for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { if (!intel_crtc_needs_modeset(new_crtc_state)) { - if (intel_crtc_is_joiner_secondary(new_crtc_state)) - copy_joiner_crtc_state_nomodeset(state, crtc); - else + if (!intel_crtc_is_joiner_secondary(new_crtc_state)) intel_crtc_copy_uapi_to_hw_state_nomodeset(state, crtc); continue; } @@ -6460,8 +6447,11 @@ int intel_atomic_check(struct drm_device *dev, goto fail; for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { - if (!intel_crtc_needs_modeset(new_crtc_state)) + if (!intel_crtc_needs_modeset(new_crtc_state)) { + if (intel_crtc_is_joiner_secondary(new_crtc_state)) + copy_joiner_crtc_state_nomodeset(state, crtc); continue; + } if (intel_crtc_is_joiner_secondary(new_crtc_state)) { drm_WARN_ON(display->drm, new_crtc_state->uapi.enable); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 552a59d19e0f..1e76a455d7c4 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -212,22 +212,23 @@ enum phy_fia { base.head) \ for_each_if((intel_plane)->pipe == (intel_crtc)->pipe) -#define for_each_intel_crtc(dev, intel_crtc) \ - list_for_each_entry(intel_crtc, \ - &(dev)->mode_config.crtc_list, \ - base.head) +#define for_each_intel_crtc(dev, crtc) \ + list_for_each_entry((crtc), \ + &to_intel_display(dev)->pipe_list, \ + pipe_head) -#define for_each_intel_crtc_in_pipe_mask(dev, intel_crtc, pipe_mask) \ - list_for_each_entry(intel_crtc, \ - &(dev)->mode_config.crtc_list, \ - base.head) \ - for_each_if((pipe_mask) & BIT(intel_crtc->pipe)) +#define for_each_intel_crtc_reverse(dev, crtc) \ + list_for_each_entry_reverse((crtc), \ + &to_intel_display(dev)->pipe_list, \ + pipe_head) + +#define for_each_intel_crtc_in_pipe_mask(dev, crtc, pipe_mask) \ + for_each_intel_crtc((dev), (crtc)) \ + for_each_if((pipe_mask) & BIT((crtc)->pipe)) -#define for_each_intel_crtc_in_pipe_mask_reverse(dev, intel_crtc, pipe_mask) \ - list_for_each_entry_reverse((intel_crtc), \ - &(dev)->mode_config.crtc_list, \ - base.head) \ - for_each_if((pipe_mask) & BIT((intel_crtc)->pipe)) +#define for_each_intel_crtc_in_pipe_mask_reverse(dev, crtc, pipe_mask) \ + for_each_intel_crtc_reverse((dev), (crtc)) \ + for_each_if((pipe_mask) & BIT((crtc)->pipe)) #define for_each_intel_encoder(dev, intel_encoder) \ list_for_each_entry(intel_encoder, \ @@ -269,14 +270,6 @@ enum phy_fia { (__i)++) \ for_each_if(plane) -#define for_each_old_intel_crtc_in_state(__state, crtc, old_crtc_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->base.dev->mode_config.num_crtc && \ - ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ - (old_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].old_state), 1); \ - (__i)++) \ - for_each_if(crtc) - #define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \ for ((__i) = 0; \ (__i) < (__state)->base.dev->mode_config.num_total_plane && \ @@ -285,22 +278,6 @@ enum phy_fia { (__i)++) \ for_each_if(plane) -#define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->base.dev->mode_config.num_crtc && \ - ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ - (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ - (__i)++) \ - for_each_if(crtc) - -#define for_each_new_intel_crtc_in_state_reverse(__state, crtc, new_crtc_state, __i) \ - for ((__i) = (__state)->base.dev->mode_config.num_crtc - 1; \ - (__i) >= 0 && \ - ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ - (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ - (__i)--) \ - for_each_if(crtc) - #define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \ for ((__i) = 0; \ (__i) < (__state)->base.dev->mode_config.num_total_plane && \ @@ -310,23 +287,32 @@ enum phy_fia { (__i)++) \ for_each_if(plane) +#define for_each_old_intel_crtc_in_state(__state, crtc, old_crtc_state, __i) \ + for_each_intel_crtc((__state)->base.dev, (crtc)) \ + for_each_if(((__i) = drm_crtc_index(&(crtc)->base), (void)(__i), \ + (old_crtc_state) = intel_atomic_get_old_crtc_state((__state), (crtc)))) + +#define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \ + for_each_intel_crtc((__state)->base.dev, (crtc)) \ + for_each_if(((__i) = drm_crtc_index(&(crtc)->base), (void)(__i), \ + (new_crtc_state) = intel_atomic_get_new_crtc_state((__state), (crtc)))) + +#define for_each_new_intel_crtc_in_state_reverse(__state, crtc, new_crtc_state, __i) \ + for_each_intel_crtc_reverse((__state)->base.dev, (crtc)) \ + for_each_if(((__i) = drm_crtc_index(&(crtc)->base), (void)(__i), \ + (new_crtc_state) = intel_atomic_get_new_crtc_state((__state), (crtc)))) + #define for_each_oldnew_intel_crtc_in_state(__state, crtc, old_crtc_state, new_crtc_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->base.dev->mode_config.num_crtc && \ - ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ - (old_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].old_state), \ - (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ - (__i)++) \ - for_each_if(crtc) + for_each_intel_crtc((__state)->base.dev, (crtc)) \ + for_each_if(((__i) = drm_crtc_index(&(crtc)->base), (void)(__i), \ + (old_crtc_state) = intel_atomic_get_old_crtc_state((__state), (crtc)), \ + (new_crtc_state) = intel_atomic_get_new_crtc_state((__state), (crtc)))) #define for_each_oldnew_intel_crtc_in_state_reverse(__state, crtc, old_crtc_state, new_crtc_state, __i) \ - for ((__i) = (__state)->base.dev->mode_config.num_crtc - 1; \ - (__i) >= 0 && \ - ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ - (old_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].old_state), \ - (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ - (__i)--) \ - for_each_if(crtc) + for_each_intel_crtc_reverse((__state)->base.dev, (crtc)) \ + for_each_if(((__i) = drm_crtc_index(&(crtc)->base), (void)(__i), \ + (old_crtc_state) = intel_atomic_get_old_crtc_state((__state), (crtc)), \ + (new_crtc_state) = intel_atomic_get_new_crtc_state((__state), (crtc)))) #define intel_atomic_crtc_state_for_each_plane_state( \ plane, plane_state, \ diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index d708d322aa85..d9baca2d5aaf 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -294,6 +294,9 @@ struct intel_display { /* Parent, or core, driver functions exposed to display */ const struct intel_display_parent_interface *parent; + /* list of all intel_crtcs sorted by pipe */ + struct list_head pipe_list; + /* Display functions */ struct { /* Top level crtc-ish functions */ diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 23bfecc983e8..9c2f7ad6c7b7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -117,6 +117,7 @@ static void intel_mode_config_init(struct intel_display *display) drm_mode_config_init(display->drm); INIT_LIST_HEAD(&display->global.obj_list); + INIT_LIST_HEAD(&display->pipe_list); mode_config->min_width = 0; mode_config->min_height = 0; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index e2496db1642a..f6cd0a062090 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1484,6 +1484,7 @@ struct intel_flipq { struct intel_crtc { struct drm_crtc base; + struct list_head pipe_head; enum pipe pipe; /* * Whether the crtc and the connected output pipeline is active. Implies diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index d0c76632a946..a8d56ebf06a2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -615,8 +615,13 @@ check_if_vesa_backlight_possible(struct intel_dp *intel_dp) int ret; u8 bit_min, bit_max; - if (!(intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP)) - return true; + /* + * Since we only support Fully AUX Based VESA Backlight interface make sure + * backlight enable is possible via AUX along with backlight adjustment + */ + if (!(intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP && + intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP)) + return false; ret = drm_dp_dpcd_read_byte(&intel_dp->aux, DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN, &bit_min); if (ret < 0) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 998c3faf5f2e..53c10ae76ab5 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -2981,7 +2981,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, return ret; do { - bool cursor_in_su_area; + bool cursor_in_su_area = false; /* * Adjust su area to cover cursor fully as necessary diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index d45b3bcc6ef0..e0ac4e2ce4dc 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -4028,8 +4028,8 @@ void intel_wm_state_verify(struct intel_atomic_state *state, } /* DDB */ - hw_ddb_entry = &hw->ddb[PLANE_CURSOR]; - sw_ddb_entry = &new_crtc_state->wm.skl.plane_ddb[PLANE_CURSOR]; + hw_ddb_entry = &hw->ddb[plane->id]; + sw_ddb_entry = &new_crtc_state->wm.skl.plane_ddb[plane->id]; if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) { drm_err(display->drm, diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 385a634c3ed0..d9be7a5a239c 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -750,9 +750,8 @@ static bool has_auxccs(struct drm_device *drm) { struct drm_i915_private *i915 = to_i915(drm); - return IS_GRAPHICS_VER(i915, 9, 12) || - IS_ALDERLAKE_P(i915) || - IS_METEORLAKE(i915); + return IS_GRAPHICS_VER(i915, 9, 12) && + !HAS_FLAT_CCS(i915); } static bool has_fenced_regions(struct drm_device *drm) diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c index e154cb35f604..6193811ef7be 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_trace.c +++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c @@ -558,6 +558,6 @@ pvr_fw_trace_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir) &pvr_fw_trace_fops); } - debugfs_create_file("trace_mask", 0600, dir, fw_trace, + debugfs_create_file("trace_mask", 0600, dir, pvr_dev, &pvr_fw_trace_mask_fops); } diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 5d8475e4895e..517ff2c31dce 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -875,7 +875,7 @@ static int nouveau_drm_probe(struct pci_dev *pdev, /* Remove conflicting drivers (vesafb, efifb etc). */ ret = aperture_remove_conflicting_pci_devices(pdev, driver_pci.name); if (ret) - return ret; + goto fail_nvkm; pci_set_master(pdev); diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 82621ede42e1..20dba02d6175 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -686,7 +686,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, } nvbo = (void *)(unsigned long)bo[r->reloc_bo_index].user_priv; - if (unlikely(r->reloc_bo_offset + 4 > + if (unlikely((u64)r->reloc_bo_offset + 4 > nvbo->bo.base.size)) { NV_PRINTK(err, cli, "reloc outside of bo\n"); ret = -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 72848ed80df7..b101e14f841e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2513,6 +2513,7 @@ static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", .bar = { 0x00000001, tu102_bar_new }, + .bios = { 0x00000001, nvkm_bios_new }, .devinit = { 0x00000001, ga100_devinit_new }, .fault = { 0x00000001, tu102_fault_new }, .fb = { 0x00000001, ga100_fb_new }, @@ -2529,7 +2530,6 @@ nv170_chipset = { .vfn = { 0x00000001, ga100_vfn_new }, .ce = { 0x000003ff, ga100_ce_new }, .fifo = { 0x00000001, ga100_fifo_new }, - .sec2 = { 0x00000001, tu102_sec2_new }, }; static const struct nvkm_device_chip @@ -3341,7 +3341,6 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x166: device->chip = &nv166_chipset; break; case 0x167: device->chip = &nv167_chipset; break; case 0x168: device->chip = &nv168_chipset; break; - case 0x170: device->chip = &nv170_chipset; break; case 0x172: device->chip = &nv172_chipset; break; case 0x173: device->chip = &nv173_chipset; break; case 0x174: device->chip = &nv174_chipset; break; @@ -3361,6 +3360,14 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x1b6: device->chip = &nv1b6_chipset; break; case 0x1b7: device->chip = &nv1b7_chipset; break; default: + if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { + switch (device->chipset) { + case 0x170: device->chip = &nv170_chipset; break; + default: + break; + } + } + if (!device->chip) { nvdev_error(device, "unknown chipset (%08x)\n", boot0); ret = -ENODEV; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c index fdd820eeef81..27a13aeccd3c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c @@ -41,11 +41,15 @@ ga100_gsp_flcn = { static const struct nvkm_gsp_func ga100_gsp = { .flcn = &ga100_gsp_flcn, + .fwsec = &tu102_gsp_fwsec, .sig_section = ".fwsignature_ga100", .booter.ctor = tu102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c index dd82c76b8b9a..19cb269e7a26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c @@ -318,13 +318,8 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* - * Calculate FB layout. FRTS is a memory region created by the FWSEC-FRTS firmware. - * FWSEC comes from VBIOS. So on systems with no VBIOS (e.g. GA100), the FRTS does - * not exist. Therefore, use the existence of VBIOS to determine whether to reserve - * an FRTS region. - */ - gsp->fb.wpr2.frts.size = device->bios ? 0x100000 : 0; + /* Calculate FB layout. */ + gsp->fb.wpr2.frts.size = 0x100000; gsp->fb.wpr2.frts.addr = ALIGN_DOWN(gsp->fb.bios.addr, 0x20000) - gsp->fb.wpr2.frts.size; gsp->fb.wpr2.boot.size = gsp->boot.fw.size; @@ -348,12 +343,9 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* Only boot FWSEC-FRTS if it actually exists */ - if (gsp->fb.wpr2.frts.size) { - ret = nvkm_gsp_fwsec_frts(gsp); - if (WARN_ON(ret)) - return ret; - } + ret = nvkm_gsp_fwsec_frts(gsp); + if (WARN_ON(ret)) + return ret; /* Reset GSP into RISC-V mode. */ ret = gsp->func->reset(gsp); diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index d6863b28ddc5..d592f4f4b939 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -208,6 +208,7 @@ config DRM_PANEL_HIMAX_HX83121A depends on OF depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + select DRM_DISPLAY_DSC_HELPER select DRM_KMS_HELPER help Say Y here if you want to enable support for Himax HX83121A-based diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index d5fe105bdbdd..658ce64c71eb 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1324,6 +1324,8 @@ static int boe_panel_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&ctx); + boe->dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&ctx, 150); return ctx.accum_err; diff --git a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c index 4f8d6d8c07e4..dbdb7e3cb7b6 100644 --- a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c +++ b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c @@ -98,9 +98,7 @@ static int feiyang_enable(struct drm_panel *panel) /* T12 (video & logic signal rise + backlight rise) T12 >= 200ms */ msleep(200); - mipi_dsi_dcs_set_display_on(ctx->dsi); - - return 0; + return mipi_dsi_dcs_set_display_on(ctx->dsi); } static int feiyang_disable(struct drm_panel *panel) diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c index 8b2a68ee851e..a5e5c9ea7a73 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83102.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c @@ -937,6 +937,8 @@ static int hx83102_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&dsi_ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx); + dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&dsi_ctx, 150); return dsi_ctx.accum_err; diff --git a/drivers/gpu/drm/panel/panel-himax-hx83121a.c b/drivers/gpu/drm/panel/panel-himax-hx83121a.c index ebe643ba4184..bed79aa06f46 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83121a.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83121a.c @@ -596,8 +596,8 @@ static int himax_probe(struct mipi_dsi_device *dsi) ctx = devm_drm_panel_alloc(dev, struct himax, panel, &himax_panel_funcs, DRM_MODE_CONNECTOR_DSI); - if (!ctx) - return -ENOMEM; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ret = devm_regulator_bulk_get_const(&dsi->dev, ARRAY_SIZE(himax_supplies), diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c index e5e688cf98fd..f1430370ff94 100644 --- a/drivers/gpu/drm/panel/panel-visionox-rm69299.c +++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c @@ -376,6 +376,8 @@ static int visionox_rm69299_probe(struct mipi_dsi_device *dsi) return PTR_ERR(ctx->reset_gpio); } + ctx->panel.prepare_prev_first = true; + ctx->panel.backlight = visionox_rm69299_create_backlight(ctx); if (IS_ERR(ctx->panel.backlight)) return dev_err_probe(dev, PTR_ERR(ctx->panel.backlight), diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 6d14b0269574..cd49859da89b 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -157,7 +157,7 @@ void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo) /** * panthor_kernel_bo_create() - Create and map a GEM object to a VM * @ptdev: Device. - * @vm: VM to map the GEM to. If NULL, the kernel object is not GPU mapped. + * @vm: VM to map the GEM to. * @size: Size of the buffer object. * @bo_flags: Combination of drm_panthor_bo_flags flags. * @vm_map_flags: Combination of drm_panthor_vm_bind_op_flags (only those diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index f8c41e36afa4..75d98dad7b1d 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1648,6 +1648,25 @@ static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size) start + size <= vm->locked_region.start + vm->locked_region.size) return 0; + /* sm_step_remap() may need a locked region that isn't a strict superset + * of the original one because of having to extend unmap boundaries beyond + * it to deal with partial unmaps of transparent huge pages. What we want + * in those cases is to lock the union of both regions. The new region must + * always overlap with the original one, because the upper and lower unmap + * boundaries in a remap operation can only shift up or down respectively, + * but never otherwise. + */ + if (vm->locked_region.size) { + u64 end = max(vm->locked_region.start + vm->locked_region.size, + start + size); + + drm_WARN_ON_ONCE(&vm->ptdev->base, (start + size <= vm->locked_region.start) || + (start >= vm->locked_region.start + vm->locked_region.size)); + + start = min(start, vm->locked_region.start); + size = end - start; + } + mutex_lock(&ptdev->mmu->as.slots_lock); if (vm->as.id >= 0 && size) { /* Lock the region that needs to be updated */ diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 2bbb1168a3ff..1e6a2392d7c6 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -118,12 +118,13 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Complete initialization. */ ret = drm_dev_register(&qdev->ddev, ent->driver_data); if (ret) - goto modeset_cleanup; + goto poll_fini; drm_client_setup(&qdev->ddev, NULL); return 0; -modeset_cleanup: +poll_fini: + drm_kms_helper_poll_fini(&qdev->ddev); qxl_modeset_fini(qdev); unload: qxl_device_fini(qdev); @@ -154,6 +155,7 @@ qxl_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + drm_kms_helper_poll_fini(dev); drm_dev_unregister(dev); drm_atomic_helper_shutdown(dev); if (pci_is_vga(pdev) && pdev->revision < 5) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 22321eb95b7d..703848fac189 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -2461,7 +2461,8 @@ static void ci_register_patching_mc_arb(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { if ((memory_clock > 100000) && (memory_clock <= 125000)) { tmp2 = (((0x31 * engine_clock) / 125000) - 1) & 0xff; *dram_timimg2 &= ~0x00ff0000; @@ -3304,7 +3305,8 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev) pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; if ((dpm_table->mclk_table.count >= 2) && - ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1))) { + ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { pi->smc_state_table.MemoryLevel[1].MinVddc = pi->smc_state_table.MemoryLevel[0].MinVddc; pi->smc_state_table.MemoryLevel[1].MinVddcPhases = @@ -4493,7 +4495,8 @@ static int ci_register_patching_mc_seq(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { for (i = 0; i < table->last; i++) { if (table->last >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 5c72aad3dae7..aac6733ddd82 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -686,7 +686,8 @@ static void radeon_crtc_init(struct drm_device *dev, int index) if (radeon_crtc == NULL) return; - radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0); + radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", + WQ_HIGHPRI | WQ_PERCPU, 0); if (!radeon_crtc->flip_queue) { kfree(radeon_crtc); return; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 87fd6255c114..53d06053dec8 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -278,14 +278,13 @@ static bool radeon_support_enabled(struct device *dev, case CHIP_BONAIRE: case CHIP_HAWAII: - support_by_default = false; - fallthrough; case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: gen = "CIK"; module_param = radeon_cik_support; amdgpu_support_built &= IS_ENABLED(CONFIG_DRM_AMDGPU_CIK); + support_by_default = false; break; default: diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/renesas/rcar-du/rcar_du_crtc.c index 7c36c30a75b6..1a246ebbfc61 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_du_crtc.c @@ -513,7 +513,7 @@ static void rcar_du_cmm_setup(struct drm_crtc *crtc) struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc); struct rcar_cmm_config cmm_config = {}; - if (!rcrtc->cmm->dev) + if (!rcrtc->cmm) return; if (drm_lut) @@ -667,7 +667,7 @@ static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) if (rcar_du_has(rcrtc->dev, RCAR_DU_FEATURE_VSP1_SOURCE)) rcar_du_vsp_disable(rcrtc); - if (rcrtc->cmm->dev) + if (rcrtc->cmm) rcar_cmm_disable(rcrtc->cmm->dev); /* @@ -726,7 +726,7 @@ static void rcar_du_crtc_atomic_enable(struct drm_crtc *crtc, struct rcar_du_crtc_state *rstate = to_rcar_crtc_state(crtc->state); struct rcar_du_device *rcdu = rcrtc->dev; - if (rcrtc->cmm->dev) + if (rcrtc->cmm) rcar_cmm_enable(rcrtc->cmm->dev); rcar_du_crtc_get(rcrtc); diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index b7397827889c..360a88ca8f0c 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -741,6 +741,7 @@ static int sti_hda_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct sti_hda *hda; struct resource *res; + int ret; DRM_INFO("%s\n", __func__); @@ -779,7 +780,9 @@ static int sti_hda_probe(struct platform_device *pdev) return PTR_ERR(hda->clk_hddac); } - drm_bridge_add(&hda->bridge); + ret = devm_drm_bridge_add(dev, &hda->bridge); + if (ret) + return ret; platform_set_drvdata(pdev, hda); @@ -788,10 +791,7 @@ static int sti_hda_probe(struct platform_device *pdev) static void sti_hda_remove(struct platform_device *pdev) { - struct sti_hda *hda = platform_get_drvdata(pdev); - component_del(&pdev->dev, &sti_hda_ops); - drm_bridge_remove(&hda->bridge); } static const struct of_device_id hda_of_match[] = { diff --git a/drivers/gpu/drm/stm/lvds.c b/drivers/gpu/drm/stm/lvds.c index fe38c0984b2b..25e2ba98f36a 100644 --- a/drivers/gpu/drm/stm/lvds.c +++ b/drivers/gpu/drm/stm/lvds.c @@ -897,14 +897,14 @@ static int lvds_connector_atomic_check(struct drm_connector *connector, if (!conn_state) return -EINVAL; + if (!conn_state->crtc) + return 0; + if (list_empty(&connector->modes)) { drm_dbg(connector->dev, "connector: empty modes list\n"); return -EINVAL; } - if (!conn_state->crtc) - return -EINVAL; - panel_mode = list_first_entry(&connector->modes, struct drm_display_mode, head); diff --git a/drivers/gpu/drm/sysfb/ofdrm.c b/drivers/gpu/drm/sysfb/ofdrm.c index d38ba70f4e0d..247cf13c80a0 100644 --- a/drivers/gpu/drm/sysfb/ofdrm.c +++ b/drivers/gpu/drm/sysfb/ofdrm.c @@ -350,6 +350,7 @@ static void ofdrm_pci_release(void *data) struct pci_dev *pcidev = data; pci_disable_device(pcidev); + pci_dev_put(pcidev); } static int ofdrm_device_init_pci(struct ofdrm_device *odev) @@ -375,6 +376,7 @@ static int ofdrm_device_init_pci(struct ofdrm_device *odev) if (ret) { drm_err(dev, "pci_enable_device(%s) failed: %d\n", dev_name(&pcidev->dev), ret); + pci_dev_put(pcidev); return ret; } ret = devm_add_action_or_reset(&pdev->dev, ofdrm_pci_release, pcidev); diff --git a/drivers/gpu/drm/tiny/appletbdrm.c b/drivers/gpu/drm/tiny/appletbdrm.c index 3bae91d7eefe..278bb23fe4c8 100644 --- a/drivers/gpu/drm/tiny/appletbdrm.c +++ b/drivers/gpu/drm/tiny/appletbdrm.c @@ -353,7 +353,7 @@ static int appletbdrm_primary_plane_helper_atomic_check(struct drm_plane *plane, frames_size + sizeof(struct appletbdrm_fb_request_footer), 16); - appletbdrm_state->request = kzalloc(request_size, GFP_KERNEL); + appletbdrm_state->request = kvzalloc(request_size, GFP_KERNEL); if (!appletbdrm_state->request) return -ENOMEM; @@ -543,7 +543,7 @@ static void appletbdrm_primary_plane_destroy_state(struct drm_plane *plane, { struct appletbdrm_plane_state *appletbdrm_state = to_appletbdrm_plane_state(state); - kfree(appletbdrm_state->request); + kvfree(appletbdrm_state->request); kfree(appletbdrm_state->response); __drm_gem_destroy_shadow_plane_state(&appletbdrm_state->base); diff --git a/drivers/gpu/drm/tiny/arcpgu.c b/drivers/gpu/drm/tiny/arcpgu.c index 505888497482..c93d61ac0bb7 100644 --- a/drivers/gpu/drm/tiny/arcpgu.c +++ b/drivers/gpu/drm/tiny/arcpgu.c @@ -250,7 +250,8 @@ DEFINE_DRM_GEM_DMA_FOPS(arcpgu_drm_ops); static int arcpgu_load(struct arcpgu_drm_private *arcpgu) { struct platform_device *pdev = to_platform_device(arcpgu->drm.dev); - struct device_node *encoder_node = NULL, *endpoint_node = NULL; + struct device_node *encoder_node __free(device_node) = NULL; + struct device_node *endpoint_node = NULL; struct drm_connector *connector = NULL; struct drm_device *drm = &arcpgu->drm; int ret; diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 222e4ae1abbd..5d8dc5efec77 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -761,25 +761,21 @@ static int bochs_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent ret = pcim_enable_device(pdev); if (ret) - goto err_free_dev; + return ret; pci_set_drvdata(pdev, dev); ret = bochs_load(bochs); if (ret) - goto err_free_dev; + return ret; ret = drm_dev_register(dev, 0); if (ret) - goto err_free_dev; + return ret; drm_client_setup(dev, NULL); return ret; - -err_free_dev: - drm_dev_put(dev); - return ret; } static void bochs_pci_remove(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 26a3689e5fd9..278bbe7a11ad 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -206,6 +206,14 @@ error_free: return NULL; } +static void __free_pages_gpu_account(struct page *p, unsigned int order, + bool reclaim) +{ + mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, + -(1 << order)); + __free_pages(p, order); +} + /* Reset the caching and pages of size 1 << order */ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, unsigned int order, struct page *p, bool reclaim) @@ -223,9 +231,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, #endif if (!pool || !ttm_pool_uses_dma_alloc(pool)) { - mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, - -(1 << order)); - __free_pages(p, order); + __free_pages_gpu_account(p, order, reclaim); return; } @@ -606,7 +612,7 @@ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, */ ttm_pool_split_for_swap(restore->pool, p); copy_highpage(restore->alloced_page + i, p); - __free_pages(p, 0); + __free_pages_gpu_account(p, 0, false); } restore->restored_pages++; @@ -1068,7 +1074,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, if (flags->purge) { shrunken += num_pages; page->private = 0; - __free_pages(page, order); + __free_pages_gpu_account(page, order, false); memset(tt->pages + i, 0, num_pages * sizeof(*tt->pages)); } @@ -1109,7 +1115,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, } handle = shandle; tt->pages[i] = ttm_backup_handle_to_page_ptr(handle); - put_page(page); + __free_pages_gpu_account(page, 0, false); shrunken++; } diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 08a0e9480d70..17950fe3a0ec 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -285,13 +285,12 @@ static struct urb *udl_get_urb_locked(struct udl_device *udl, long timeout) return unode->urb; } -#define GET_URB_TIMEOUT HZ struct urb *udl_get_urb(struct udl_device *udl) { struct urb *urb; spin_lock_irq(&udl->urbs.lock); - urb = udl_get_urb_locked(udl, GET_URB_TIMEOUT); + urb = udl_get_urb_locked(udl, HZ * 2); spin_unlock_irq(&udl->urbs.lock); return urb; } diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index 231e829bd709..1ca073a4ecb2 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -21,6 +21,7 @@ #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_modeset_helper_vtables.h> +#include <drm/drm_print.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -342,8 +343,10 @@ static void udl_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atom return; urb = udl_get_urb(udl); - if (!urb) + if (!urb) { + drm_err_ratelimited(dev, "get urb failed when enabling crtc\n"); goto out; + } buf = (char *)urb->transfer_buffer; buf = udl_vidreg_lock(buf); diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 8f061b6a05c6..ee4512db294b 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -399,6 +399,11 @@ v3d_get_multisync_submit_deps(struct drm_file *file_priv, if (multisync.pad) return -EINVAL; + if (!multisync.in_sync_count && !multisync.out_sync_count) { + drm_dbg(&v3d->drm, "Empty multisync extension\n"); + return -EINVAL; + } + ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count, multisync.out_syncs); if (ret) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 49de1c22a469..03242e8b3d87 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -88,6 +88,7 @@ xe-y += xe_bb.o \ xe_irq.o \ xe_late_bind_fw.o \ xe_lrc.o \ + xe_mem_pool.o \ xe_migrate.o \ xe_mmio.o \ xe_mmio_gem.o \ diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index a0a4ddf3bb46..00dfa68af29a 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -21,6 +21,7 @@ #include "intel_audio.h" #include "intel_bw.h" #include "intel_display.h" +#include "intel_display_core.h" #include "intel_display_device.h" #include "intel_display_driver.h" #include "intel_display_irq.h" diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 29c72aa4b0d2..33494b86205d 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -37,9 +37,17 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm) struct xe_device *xe = to_xe_device(drm); struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; - struct xe_gsc *gsc = >->uc.gsc; + struct xe_gsc *gsc; + + if (!gt) { + drm_dbg_kms(&xe->drm, + "not checking GSC status for HDCP2.x: media GT not present or disabled\n"); + return false; + } + + gsc = >->uc.gsc; - if (!gsc || !xe_uc_fw_is_available(&gsc->fw)) { + if (!xe_uc_fw_is_available(&gsc->fw)) { drm_dbg_kms(&xe->drm, "GSC Components not ready for HDCP2.x\n"); return false; diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 4ebaa0888a43..9c88ca3ce768 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -583,7 +583,7 @@ #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) #define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32) -#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0) +#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0, XE_REG_OPTION_MASKED) #define CPSS_AWARE_DIS REG_BIT(3) #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index a7c2dc7f224c..4075edf97421 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2322,8 +2322,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, } /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */ - if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) + if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) { + xe_bo_free(bo); return ERR_PTR(-EINVAL); + } if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && @@ -2342,8 +2344,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, alignment = SZ_4K >> PAGE_SHIFT; } - if (type == ttm_bo_type_device && aligned_size != size) + if (type == ttm_bo_type_device && aligned_size != size) { + xe_bo_free(bo); return ERR_PTR(-EINVAL); + } if (!bo) { bo = xe_bo_alloc(); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ff8317bfc1ae..9d19940b8fc0 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -18,6 +18,7 @@ #include "xe_ggtt_types.h" struct xe_device; +struct xe_mem_pool_node; struct xe_vm; #define XE_BO_MAX_PLACEMENTS 3 @@ -88,7 +89,7 @@ struct xe_bo { bool ccs_cleared; /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */ - struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; + struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; /** * @cpu_caching: CPU caching mode. Currently only used for userspace diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 844cfafe1ec7..ad2d8f179eb6 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -45,7 +45,7 @@ static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio, u64 residency = 0; int ret; - ret = xe_pmt_telem_read(to_pci_dev(xe->drm.dev), + ret = xe_pmt_telem_read(xe->drm.dev, xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), &residency, offset, sizeof(residency)); if (ret != sizeof(residency)) { diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 7f9602b3363d..b9828da15897 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -258,6 +258,13 @@ out_unlock: return ERR_PTR(ret); } +/* + * Takes ownership of @storage: on success it is transferred to the returned + * drm_gem_object; on failure it is freed before returning the error. + * This matches the contract of xe_bo_init_locked() which frees @storage on + * its error paths, so callers need not (and must not) free @storage after + * this call. + */ static struct drm_gem_object * xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, struct dma_buf *dma_buf) @@ -271,8 +278,10 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, int ret = 0; dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); - if (!dummy_obj) + if (!dummy_obj) { + xe_bo_free(storage); return ERR_PTR(-ENOMEM); + } dummy_obj->resv = resv; xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { @@ -281,6 +290,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, if (ret) break; + /* xe_bo_init_locked() frees storage on error */ bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, 0, /* Will require 1way or 2way for vm_bind */ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); @@ -368,12 +378,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, goto out_err; } - /* Errors here will take care of freeing the bo. */ + /* + * xe_dma_buf_init_obj() takes ownership of bo on both success + * and failure, so we must not touch bo after this call. + */ obj = xe_dma_buf_init_obj(dev, bo, dma_buf); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + dma_buf_detach(dma_buf, attach); return obj; - - + } get_dma_buf(dma_buf); obj->import_attach = attach; return obj; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index c34408cfd292..dddcdd0bb7a3 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -869,14 +869,14 @@ static int xe_eu_stall_stream_close(struct inode *inode, struct file *file) struct xe_eu_stall_data_stream *stream = file->private_data; struct xe_gt *gt = stream->gt; - drm_dev_put(>->tile->xe->drm); - mutex_lock(>->eu_stall->stream_lock); xe_eu_stall_disable_locked(stream); xe_eu_stall_data_buf_destroy(stream); xe_eu_stall_stream_free(stream); mutex_unlock(>->eu_stall->stream_lock); + drm_dev_put(>->tile->xe->drm); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index b287d0e0e60a..071b8c41df43 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -1405,7 +1405,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (q->vm && q->hwe->hw_engine_group) { err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); if (err) - goto put_exec_queue; + goto kill_exec_queue; } } @@ -1416,12 +1416,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); if (err) - goto kill_exec_queue; + goto del_hw_engine_group; args->exec_queue_id = id; return 0; +del_hw_engine_group: + if (q->vm && q->hwe && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); kill_exec_queue: xe_exec_queue_kill(q); delete_queue_group: @@ -1760,7 +1763,7 @@ void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, unsigned int type) { - xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); dma_fence_put(q->tlb_inval[type].last_fence); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index e5c234f3d795..0d13e357fb43 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -166,7 +166,7 @@ static int query_compatibility_version(struct xe_gsc *gsc) &rd_offset); if (err) { xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); - return err; + goto out_bo; } compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index 87a164efcc33..01fe03b9efe8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -385,10 +385,10 @@ static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, MED_VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, MED_VF_SW_FLAG(n)); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, VF_SW_FLAG(n)); return 0; } @@ -407,10 +407,10 @@ static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, MED_VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), regs[n]); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, VF_SW_FLAG(n), regs[n]); return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 81b5f01b1f65..2b835d48b565 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -512,12 +512,9 @@ static void guc_golden_lrc_init(struct xe_guc_ads *ads) * that starts after the execlists LRC registers. This is * required to allow the GuC to restore just the engine state * when a watchdog reset occurs. - * We calculate the engine state size by removing the size of - * what comes before it in the context image (which is identical - * on all engines). */ ads_blob_write(ads, ads.eng_state_size[guc_class], - real_size - xe_lrc_skip_size(xe)); + xe_lrc_engine_state_size(gt, class)); ads_blob_write(ads, ads.golden_context_lrca[guc_class], addr_ggtt); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index a145234f662b..10556156eaad 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -261,22 +261,10 @@ static void guc_submit_sw_fini(struct drm_device *drm, void *arg) static void guc_submit_fini(void *arg) { struct xe_guc *guc = arg; - - /* Forcefully kill any remaining exec queues */ - xe_guc_ct_stop(&guc->ct); - guc_submit_reset_prepare(guc); - xe_guc_softreset(guc); - xe_guc_submit_stop(guc); - xe_uc_fw_sanitize(&guc->fw); - xe_guc_submit_pause_abort(guc); -} - -static void guc_submit_wedged_fini(void *arg) -{ - struct xe_guc *guc = arg; struct xe_exec_queue *q; unsigned long index; + /* Drop any wedged queue refs */ mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { if (exec_queue_wedged(q)) { @@ -286,6 +274,14 @@ static void guc_submit_wedged_fini(void *arg) } } mutex_unlock(&guc->submission_state.lock); + + /* Forcefully kill any remaining exec queues */ + xe_guc_ct_stop(&guc->ct); + guc_submit_reset_prepare(guc); + xe_guc_softreset(guc); + xe_guc_submit_stop(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -1320,10 +1316,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc, void xe_guc_submit_wedge(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); - struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; - int err; xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); @@ -1335,15 +1329,6 @@ void xe_guc_submit_wedge(struct xe_guc *guc) return; if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; " - "Although device is wedged.\n", - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); - return; - } - mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) if (xe_exec_queue_get_unless_zero(q)) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 0fd4d4f1014a..92e423a339f1 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -506,7 +506,7 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) if (hwmon->xe->info.platform == XE_BATTLEMAGE) { u64 pmt_val; - ret = xe_pmt_telem_read(to_pci_dev(hwmon->xe->drm.dev), + ret = xe_pmt_telem_read(hwmon->xe->drm.dev, xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), &pmt_val, BMG_ENERGY_STATUS_PMT_OFFSET, sizeof(pmt_val)); if (ret != sizeof(pmt_val)) { diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 9d12a0d2f0b5..4af9f0d7c6f3 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -746,9 +746,16 @@ size_t xe_lrc_reg_size(struct xe_device *xe) return 80 * sizeof(u32); } -size_t xe_lrc_skip_size(struct xe_device *xe) +/** + * xe_lrc_engine_state_size() - Get size of the engine state within LRC + * @gt: the &xe_gt struct instance + * @class: Hardware engine class + * + * Returns: Size of the engine state + */ +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class) { - return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); + return xe_gt_lrc_hang_replay_size(gt, class) - xe_lrc_reg_size(gt_to_xe(gt)); } static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) @@ -1214,7 +1221,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) return -ENOSPC; - *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1); *cmd++ = CS_DEBUG_MODE2(0).addr; *cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index e7c975f9e2d9..5440663183f6 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -130,7 +130,7 @@ u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); size_t xe_lrc_reg_size(struct xe_device *xe); -size_t xe_lrc_skip_size(struct xe_device *xe); +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_mem_pool.c b/drivers/gpu/drm/xe/xe_mem_pool.c new file mode 100644 index 000000000000..d5e24d6aa88d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2026 Intel Corporation + */ + +#include <linux/kernel.h> + +#include <drm/drm_managed.h> + +#include "instructions/xe_mi_commands.h" +#include "xe_bo.h" +#include "xe_device_types.h" +#include "xe_map.h" +#include "xe_mem_pool.h" +#include "xe_mem_pool_types.h" +#include "xe_tile_printk.h" + +/** + * struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an + * XE tile. + * + * The XE memory pool is a DRM MM manager that provides sub-allocation of memory + * from a backing buffer object (BO) on a specific XE tile. It is designed to + * manage memory for GPU workloads, allowing for efficient allocation and + * deallocation of memory regions within the BO. + * + * The memory pool maintains a primary BO that is pinned in the GGTT and mapped + * into the CPU address space for direct access. Optionally, it can also maintain + * a shadow BO that can be used for atomic updates to the primary BO's contents. + * + * The API provided by the memory pool allows clients to allocate and free memory + * regions, retrieve GPU and CPU addresses, and synchronize data between the + * primary and shadow BOs as needed. + */ +struct xe_mem_pool { + /** @base: Range allocator over [0, @size) in bytes */ + struct drm_mm base; + /** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */ + struct xe_bo *bo; + /** @shadow: Shadow BO for atomic command updates. */ + struct xe_bo *shadow; + /** @swap_guard: Timeline guard updating @bo and @shadow */ + struct mutex swap_guard; + /** @cpu_addr: CPU virtual address of the active BO. */ + void *cpu_addr; + /** @is_iomem: Indicates if the BO mapping is I/O memory. */ + bool is_iomem; +}; + +static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node) +{ + return container_of(node->sa_node.mm, struct xe_mem_pool, base); +} + +static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool) +{ + return pool->bo->tile; +} + +static void fini_pool_action(struct drm_device *drm, void *arg) +{ + struct xe_mem_pool *pool = arg; + + if (pool->is_iomem) + kvfree(pool->cpu_addr); + + drm_mm_takedown(&pool->base); +} + +static int pool_shadow_init(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool->bo->tile; + struct xe_device *xe = tile_to_xe(tile); + struct xe_bo *shadow; + int ret; + + xe_assert(xe, !pool->shadow); + + ret = drmm_mutex_init(&xe->drm, &pool->swap_guard); + if (ret) + return ret; + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&pool->swap_guard); + fs_reclaim_release(GFP_KERNEL); + } + shadow = xe_managed_bo_create_pin_map(xe, tile, + xe_bo_size(pool->bo), + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(shadow)) + return PTR_ERR(shadow); + + pool->shadow = shadow; + + return 0; +} + +/** + * xe_mem_pool_init() - Initialize memory pool. + * @tile: the &xe_tile where allocate. + * @size: number of bytes to allocate. + * @guard: the size of the guard region at the end of the BO that is not + * sub-allocated, in bytes. + * @flags: flags to use to create shadow pool. + * + * Initializes a memory pool for sub-allocating memory from a backing BO on the + * specified XE tile. The backing BO is pinned in the GGTT and mapped into + * the CPU address space for direct access. Optionally, a shadow BO can also be + * initialized for atomic updates to the primary BO's contents. + * + * Returns: a pointer to the &xe_mem_pool, or an error pointer on failure. + */ +struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, + u32 guard, int flags) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_mem_pool *pool; + struct xe_bo *bo; + u32 managed_size; + int ret; + + xe_tile_assert(tile, size > guard); + managed_size = size - guard; + + pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(bo)) { + xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n", + size / SZ_1K, bo); + return ERR_CAST(bo); + } + pool->bo = bo; + pool->is_iomem = bo->vmap.is_iomem; + + if (pool->is_iomem) { + pool->cpu_addr = kvzalloc(size, GFP_KERNEL); + if (!pool->cpu_addr) + return ERR_PTR(-ENOMEM); + } else { + pool->cpu_addr = bo->vmap.vaddr; + } + + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) { + ret = pool_shadow_init(pool); + + if (ret) + goto out_err; + } + + drm_mm_init(&pool->base, 0, managed_size); + ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool); + if (ret) + return ERR_PTR(ret); + + return pool; + +out_err: + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) + xe_tile_err(tile, + "Failed to initialize shadow BO for mem pool (%d)\n", ret); + if (bo->vmap.is_iomem) + kvfree(pool->cpu_addr); + return ERR_PTR(ret); +} + +/** + * xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool. + * @pool: the memory pool containing the primary and shadow BOs. + * + * Copies the entire contents of the primary pool to the shadow pool. This must + * be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY + * flag to ensure that the shadow pool has the same initial contents as the primary + * pool. After this initial synchronization, clients can choose to synchronize the + * shadow pool with the primary pool on a node basis using + * xe_mem_pool_sync_shadow_locked() as needed. + * + * Return: None. + */ +void xe_mem_pool_sync(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + + xe_tile_assert(tile, pool->shadow); + + xe_map_memcpy_to(xe, &pool->shadow->vmap, 0, + pool->cpu_addr, xe_bo_size(pool->bo)); +} + +/** + * xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO. + * @pool: the memory pool containing the primary and shadow BOs. + * + * Swaps the primary buffer object with the shadow buffer object in the mem + * pool. This allows for atomic updates to the contents of the primary BO + * by first writing to the shadow BO and then swapping it with the primary BO. + * Swap_guard must be held to ensure synchronization with any concurrent swap + * operations. + * + * Return: None. + */ +void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool_to_tile(pool); + + xe_tile_assert(tile, pool->shadow); + lockdep_assert_held(&pool->swap_guard); + + swap(pool->bo, pool->shadow); + if (!pool->bo->vmap.is_iomem) + pool->cpu_addr = pool->bo->vmap.vaddr; +} + +/** + * xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool. + * @node: the node allocated in the memory pool. + * + * Copies the specified batch buffer from the primary pool to the shadow pool. + * Swap_guard must be held to ensure synchronization with any concurrent swap + * operations. + * + * Return: None. + */ +void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + xe_tile_assert(tile, pool->shadow); + lockdep_assert_held(&pool->swap_guard); + + xe_map_memcpy_to(xe, &pool->shadow->vmap, + sa_node->start, + pool->cpu_addr + sa_node->start, + sa_node->size); +} + +/** + * xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool. + * @pool: the memory pool + * + * Returns: GGTT address of the memory pool. + */ +u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool) +{ + return xe_bo_ggtt_addr(pool->bo); +} + +/** + * xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool. + * @pool: the memory pool + * + * Returns: CPU virtual address of memory pool. + */ +void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool) +{ + return pool->cpu_addr; +} + +/** + * xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap + * operations on a memory pool. + * @pool: the memory pool + * + * Returns: Swap guard mutex or NULL if shadow pool is not created. + */ +struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool) +{ + if (!pool->shadow) + return NULL; + + return &pool->swap_guard; +} + +/** + * xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation + * to the GPU memory. + * @node: the node allocated in the memory pool to flush. + */ +void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + if (!pool->bo->vmap.is_iomem) + return; + + xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start, + pool->cpu_addr + sa_node->start, + sa_node->size); +} + +/** + * xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the + * sub-allocation. + * @node: the node allocated in the memory pool to read back. + */ +void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + if (!pool->bo->vmap.is_iomem) + return; + + xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start, + &pool->bo->vmap, sa_node->start, sa_node->size); +} + +/** + * xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool. + * + * Returns: node structure or an ERR_PTR(-ENOMEM). + */ +struct xe_mem_pool_node *xe_mem_pool_alloc_node(void) +{ + struct xe_mem_pool_node *node = kzalloc_obj(*node); + + if (!node) + return ERR_PTR(-ENOMEM); + + return node; +} + +/** + * xe_mem_pool_insert_node() - Insert a node into the memory pool. + * @pool: the memory pool to insert into + * @node: the node to insert + * @size: the size of the node to be allocated in bytes. + * + * Inserts a node into the specified memory pool using drm_mm for + * allocation. + * + * Returns: 0 on success or a negative error code on failure. + */ +int xe_mem_pool_insert_node(struct xe_mem_pool *pool, + struct xe_mem_pool_node *node, u32 size) +{ + if (!pool) + return -EINVAL; + + return drm_mm_insert_node(&pool->base, &node->sa_node, size); +} + +/** + * xe_mem_pool_free_node() - Free a node allocated from the memory pool. + * @node: the node to free + * + * Returns: None. + */ +void xe_mem_pool_free_node(struct xe_mem_pool_node *node) +{ + if (!node) + return; + + drm_mm_remove_node(&node->sa_node); + kfree(node); +} + +/** + * xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node. + * @node: the node allocated in the memory pool + * + * Returns: CPU virtual address of the node. + */ +void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + + return xe_mem_pool_cpu_addr(pool) + node->sa_node.start; +} + +/** + * xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging. + * @pool: the memory pool info be dumped. + * @p: The DRM printer to use for output. + * + * Only the drm managed region is dumped, not the state of the BOs or any other + * pool information. + * + * Returns: None. + */ +void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p) +{ + drm_mm_print(&pool->base, p); +} diff --git a/drivers/gpu/drm/xe/xe_mem_pool.h b/drivers/gpu/drm/xe/xe_mem_pool.h new file mode 100644 index 000000000000..89cd2555fe91 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ +#ifndef _XE_MEM_POOL_H_ +#define _XE_MEM_POOL_H_ + +#include <linux/sizes.h> +#include <linux/types.h> + +#include <drm/drm_mm.h> +#include "xe_mem_pool_types.h" + +struct drm_printer; +struct xe_mem_pool; +struct xe_tile; + +struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, + u32 guard, int flags); +void xe_mem_pool_sync(struct xe_mem_pool *pool); +void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool); +void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node); +u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool); +void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool); +struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool); +void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node); +void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node); +struct xe_mem_pool_node *xe_mem_pool_alloc_node(void); +int xe_mem_pool_insert_node(struct xe_mem_pool *pool, + struct xe_mem_pool_node *node, u32 size); +void xe_mem_pool_free_node(struct xe_mem_pool_node *node); +void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node); +void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_mem_pool_types.h b/drivers/gpu/drm/xe/xe_mem_pool_types.h new file mode 100644 index 000000000000..d5e926c93351 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool_types.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef _XE_MEM_POOL_TYPES_H_ +#define _XE_MEM_POOL_TYPES_H_ + +#include <drm/drm_mm.h> + +#define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0) + +/** + * struct xe_mem_pool_node - Sub-range allocations from mem pool. + */ +struct xe_mem_pool_node { + /** @sa_node: drm_mm_node for this allocation. */ + struct drm_mm_node sa_node; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index fc918b4fba54..5fdc89ed5256 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -29,6 +29,7 @@ #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" +#include "xe_mem_pool.h" #include "xe_mocs.h" #include "xe_printk.h" #include "xe_pt.h" @@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, u32 batch_size, batch_size_allocated; struct xe_device *xe = gt_to_xe(gt); struct xe_res_cursor src_it, ccs_it; + struct xe_mem_pool *bb_pool; struct xe_sriov_vf_ccs_ctx *ctx; - struct xe_sa_manager *bb_pool; u64 size = xe_bo_size(src_bo); - struct xe_bb *bb = NULL; + struct xe_mem_pool_node *bb; u64 src_L0, src_L0_ofs; + struct xe_bb xe_bb_tmp; u32 src_L0_pt; int err; @@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size -= src_L0; } - bb = xe_bb_alloc(gt); + bb = xe_mem_pool_alloc_node(); if (IS_ERR(bb)) return PTR_ERR(bb); bb_pool = ctx->mem.ccs_bb_pool; - scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) { - xe_sa_bo_swap_shadow(bb_pool); + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { + xe_mem_pool_swap_shadow_locked(bb_pool); - err = xe_bb_init(bb, bb_pool, batch_size); + err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32)); if (err) { xe_gt_err(gt, "BB allocation failed.\n"); - xe_bb_free(bb, NULL); + kfree(bb); return err; } @@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size = xe_bo_size(src_bo); batch_size = 0; + xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 }; /* * Emit PTE and copy commands here. * The CCS copy command can only support limited size. If the size to be @@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); batch_size += EMIT_COPY_CCS_DW; - emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); + emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src); - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); + emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, + flush_flags); + flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt, src_L0_ofs, dst_is_pltt, src_L0, ccs_ofs, true); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, + flush_flags); size -= src_L0; } - xe_assert(xe, (batch_size_allocated == bb->len)); + xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len)); + xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32)); src_bo->bb_ccs[read_write] = bb; xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_sa_bo_sync_shadow(bb->bo); + xe_mem_pool_sync_shadow_locked(bb); } return 0; @@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, enum xe_sriov_vf_ccs_rw_ctxs read_write) { - struct xe_bb *bb = src_bo->bb_ccs[read_write]; + struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write]; struct xe_device *xe = xe_bo_device(src_bo); + struct xe_mem_pool *bb_pool; struct xe_sriov_vf_ccs_ctx *ctx; - struct xe_sa_manager *bb_pool; u32 *cs; xe_assert(xe, IS_SRIOV_VF(xe)); @@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, ctx = &xe->sriov.vf.ccs.contexts[read_write]; bb_pool = ctx->mem.ccs_bb_pool; - guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); - xe_sa_bo_swap_shadow(bb_pool); - - cs = xe_sa_bo_cpu_addr(bb->bo); - memset(cs, MI_NOOP, bb->len * sizeof(u32)); - xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { + xe_mem_pool_swap_shadow_locked(bb_pool); - xe_sa_bo_sync_shadow(bb->bo); + cs = xe_mem_pool_node_cpu_addr(bb); + memset(cs, MI_NOOP, bb->sa_node.size); + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_bb_free(bb, NULL); - src_bo->bb_ccs[read_write] = NULL; + xe_mem_pool_sync_shadow_locked(bb); + xe_mem_pool_free_node(bb); + src_bo->bb_ccs[read_write] = NULL; + } } /** diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 01673d2b2464..9f98d0334164 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -118,6 +118,7 @@ static const struct xe_graphics_desc graphics_xe2 = { static const struct xe_graphics_desc graphics_xe3p_lpg = { XE2_GFX_FEATURES, + .has_indirect_ring_state = 1, .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE), .num_geometry_xecore_fuse_regs = 3, .num_compute_xecore_fuse_regs = 3, diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 80577e4b7437..8cc313182968 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -226,7 +226,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, } range_start = reg & REG_GENMASK(25, range_bit); - range_end = range_start | REG_GENMASK(range_bit, 0); + range_end = range_start | REG_GENMASK(range_bit - 1, 0); switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) { case RING_FORCE_TO_NONPRIV_ACCESS_RW: diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c index 6c4b16409cc9..150a241110fb 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c @@ -149,10 +149,11 @@ pf_migration_consume(struct xe_device *xe, unsigned int vfid) for_each_gt(gt, xe, gt_id) { data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); - if (data && PTR_ERR(data) != EAGAIN) + if (!data) + continue; + if (!IS_ERR(data) || PTR_ERR(data) != -EAGAIN) return data; - if (PTR_ERR(data) == -EAGAIN) - more_data = true; + more_data = true; } if (!more_data) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index db023fb66a27..09b99fb2608b 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -14,9 +14,9 @@ #include "xe_guc.h" #include "xe_guc_submit.h" #include "xe_lrc.h" +#include "xe_mem_pool.h" #include "xe_migrate.h" #include "xe_pm.h" -#include "xe_sa.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" #include "xe_sriov_vf_ccs.h" @@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe) static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) { + struct xe_mem_pool *pool; struct xe_device *xe = tile_to_xe(tile); - struct xe_sa_manager *sa_manager; + u32 *pool_cpu_addr, *last_dw_addr; u64 bb_pool_size; - int offset, err; + int err; bb_pool_size = get_ccs_bb_pool_size(xe); xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); - sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16, - XE_SA_BO_MANAGER_FLAG_SHADOW); - - if (IS_ERR(sa_manager)) { - xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", - sa_manager); - err = PTR_ERR(sa_manager); + pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32), + XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY); + if (IS_ERR(pool)) { + xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n", + pool); + err = PTR_ERR(pool); return err; } - offset = 0; - xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, - bb_pool_size); - xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP, - bb_pool_size); + pool_cpu_addr = xe_mem_pool_cpu_addr(pool); + memset(pool_cpu_addr, 0, bb_pool_size); - offset = bb_pool_size - sizeof(u32); - xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); - xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END); + last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1; + *last_dw_addr = MI_BATCH_BUFFER_END; - ctx->mem.ccs_bb_pool = sa_manager; + /** + * Sync the main copy and shadow copy so that the shadow copy is + * replica of main copy. We sync only BBs after init part. So, we + * need to make sure the main pool and shadow copy are in sync after + * this point. This is needed as GuC may read the BB commands from + * shadow copy. + */ + xe_mem_pool_sync(pool); + ctx->mem.ccs_bb_pool = pool; return 0; } static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) { - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); u32 dw[10], i = 0; @@ -388,7 +392,7 @@ err_ret: #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32)) void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx) { - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); struct xe_device *xe = gt_to_xe(ctx->mig_q->gt); @@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) struct xe_device *xe = xe_bo_device(bo); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_mem_pool_node *bb; struct xe_tile *tile; - struct xe_bb *bb; int err = 0; xe_assert(xe, IS_VF_CCS_READY(xe)); @@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; - struct xe_bb *bb; + struct xe_mem_pool_node *bb; xe_assert(xe, IS_VF_CCS_READY(xe)); @@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) */ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) { - struct xe_sa_manager *bb_pool; enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_mem_pool *bb_pool; if (!IS_VF_CCS_READY(xe)) return; @@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); drm_printf(p, "-------------------------\n"); - drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); + xe_mem_pool_dump(bb_pool, p); drm_puts(p, "\n"); } } diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h index 22c499943d2a..6fc8f97ef3f4 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h @@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs { XE_SRIOV_VF_CCS_CTX_COUNT }; -struct xe_migrate; -struct xe_sa_manager; - /** * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data. */ @@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx { /** @mem: memory data */ struct { /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */ - struct xe_sa_manager *ccs_bb_pool; + struct xe_mem_pool *ccs_bb_pool; } mem; }; diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index f8de6a4bf189..0b78ec2bc6a4 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -97,7 +97,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { { XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED), IS_INTEGRATED), - XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE, + XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE, BANK_HASH_4KB_MODE)) }, }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 56e2db50bb36..a717a2b8dea3 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3658,6 +3658,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE && + is_cpu_addr_mirror) || XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) && (op == DRM_XE_VM_BIND_OP_MAP_USERPTR || is_cpu_addr_mirror) && @@ -4156,7 +4158,8 @@ int xe_vm_get_property_ioctl(struct drm_device *drm, void *data, int ret = 0; if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] || - args->reserved[2]))) + args->reserved[2] || args->extensions || + args->pad))) return -EINVAL; vm = xe_vm_lookup(xef, args->vm_id); diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 66f00d3f5c07..c78906dea82b 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -621,6 +621,45 @@ static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details return 0; } +static bool check_pat_args_are_sane(struct xe_device *xe, + struct xe_vmas_in_madvise_range *madvise_range, + u16 pat_index) +{ + u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + int i; + + /* + * Using coh_none with CPU cached buffers is not allowed on iGPU. + * On iGPU the GPU shares the LLC with the CPU, so with coh_none + * the GPU bypasses CPU caches and reads directly from DRAM, + * potentially seeing stale sensitive data from previously freed + * pages. On dGPU this restriction does not apply, because the + * platform does not provide a non-coherent system memory access + * path that would violate the DMA coherency contract. + */ + if (coh_mode != XE_COH_NONE || IS_DGFX(xe)) + return true; + + for (i = 0; i < madvise_range->num_vmas; i++) { + struct xe_vma *vma = madvise_range->vmas[i]; + struct xe_bo *bo = xe_vma_bo(vma); + + if (bo) { + /* BO with WB caching + COH_NONE is not allowed */ + if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) + return false; + /* Imported dma-buf without caching info, assume cached */ + if (XE_IOCTL_DBG(xe, !bo->cpu_caching)) + return false; + } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) || + xe_vma_is_userptr(vma))) + /* System memory (userptr/SVM) is always CPU cached */ + return false; + } + + return true; +} + static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, u32 atomic_val) { @@ -750,6 +789,14 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil } } + if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { + if (!check_pat_args_are_sane(xe, &madvise_range, + args->pat_index.val)) { + err = -EINVAL; + goto free_vmas; + } + } + if (madvise_range.has_bo_vmas) { if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { if (!check_bo_args_are_sane(vm, madvise_range.vmas, diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c index 4ebb4dbe1c9b..a9baf0bfe572 100644 --- a/drivers/gpu/drm/xe/xe_vsec.c +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -140,10 +140,10 @@ static int xe_guid_decode(u32 guid, int *index, u32 *offset) return 0; } -int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, +int xe_pmt_telem_read(struct device *dev, u32 guid, u64 *data, loff_t user_offset, u32 count) { - struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_device *xe = kdev_to_xe_device(dev); void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET; u32 mem_region; u32 offset; @@ -198,7 +198,6 @@ void xe_vsec_init(struct xe_device *xe) { struct intel_vsec_platform_info *info; struct device *dev = xe->drm.dev; - struct pci_dev *pdev = to_pci_dev(dev); enum xe_vsec platform; platform = get_platform_info(xe); @@ -221,6 +220,6 @@ void xe_vsec_init(struct xe_device *xe) * Register a VSEC. Cleanup is handled using device managed * resources. */ - intel_vsec_register(pdev, info); + intel_vsec_register(dev, info); } MODULE_IMPORT_NS("INTEL_VSEC"); diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h index dabfb4e02d70..a25b4e6e681b 100644 --- a/drivers/gpu/drm/xe/xe_vsec.h +++ b/drivers/gpu/drm/xe/xe_vsec.h @@ -6,10 +6,10 @@ #include <linux/types.h> -struct pci_dev; +struct device; struct xe_device; void xe_vsec_init(struct xe_device *xe); -int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, u32 count); +int xe_pmt_telem_read(struct device *dev, u32 guid, u64 *data, loff_t user_offset, u32 count); #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 546296f0220b..4b1cbced06be 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -743,14 +743,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, - { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) - }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, { XE_RTP_NAME("14021490052"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_MODE, |
