diff options
206 files changed, 7742 insertions, 3942 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index eee3efca40bd..f437f42b73ad 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14071,7 +14071,6 @@ F: drivers/net/wireless/quantenna RADEON and AMDGPU DRM DRIVERS M: Alex Deucher <alexander.deucher@amd.com> M: Christian König <christian.koenig@amd.com> -M: David (ChunMing) Zhou <David1.Zhou@amd.com> L: amd-gfx@lists.freedesktop.org S: Supported T: git git://people.freedesktop.org/~agd5f/linux diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 3d42fc4290bc..4b34a5195c65 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -1407,13 +1407,16 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, dev_info(&bridge_pdev->dev, "Intel %s Chipset\n", intel_gtt_chipsets[i].name); - mask = intel_private.driver->dma_mask_size; - if (pci_set_dma_mask(intel_private.pcidev, DMA_BIT_MASK(mask))) - dev_err(&intel_private.pcidev->dev, - "set gfx device dma mask %d-bit failed!\n", mask); - else - pci_set_consistent_dma_mask(intel_private.pcidev, - DMA_BIT_MASK(mask)); + if (bridge) { + mask = intel_private.driver->dma_mask_size; + if (pci_set_dma_mask(intel_private.pcidev, DMA_BIT_MASK(mask))) + dev_err(&intel_private.pcidev->dev, + "set gfx device dma mask %d-bit failed!\n", + mask); + else + pci_set_consistent_dma_mask(intel_private.pcidev, + DMA_BIT_MASK(mask)); + } if (intel_gtt_init() != 0) { intel_gmch_remove(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 99e5f474505d..4a03a24348f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -765,7 +765,6 @@ struct amdgpu_device { uint8_t *bios; uint32_t bios_size; struct amdgpu_bo *stolen_vga_memory; - struct amdgpu_bo *discovery_memory; uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; @@ -918,7 +917,9 @@ struct amdgpu_device { struct amdgpu_display_manager dm; /* discovery */ - uint8_t *discovery; + uint8_t *discovery_bin; + uint32_t discovery_tmr_size; + struct amdgpu_bo *discovery_memory; /* mes */ bool enable_mes; @@ -957,6 +958,7 @@ struct amdgpu_device { /* s3/s4 mask */ bool in_suspend; + bool in_hibernate; bool in_gpu_reset; enum pp_mp1_state mp1_state; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 1e41367ef74e..956cbbda4793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -444,7 +444,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count); - /* todo: add DC handling */ if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) && !amdgpu_device_has_dc_support(adev)) { struct amdgpu_encoder *enc = atif->encoder_for_bl; @@ -463,6 +462,27 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, #endif } } +#if defined(CONFIG_DRM_AMD_DC) +#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) + if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) && + amdgpu_device_has_dc_support(adev)) { + struct amdgpu_display_manager *dm = &adev->dm; + struct backlight_device *bd = dm->backlight_dev; + + if (bd) { + DRM_DEBUG_DRIVER("Changing brightness to %d\n", + req.backlight_level); + + /* + * XXX backlight_device_set_brightness() is + * hardwired to post BACKLIGHT_UPDATE_SYSFS. + * It probably should accept 'reason' parameter. + */ + backlight_device_set_brightness(bd, req.backlight_level); + } + } +#endif +#endif if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { if (adev->flags & AMD_IS_PX) { pm_runtime_get_sync(adev->ddev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index d065c50582eb..3f2b695cf19e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -65,6 +65,7 @@ struct kgd_mem { struct amdgpu_sync sync; bool aql_queue; + bool is_imported; }; /* KFD Memory Eviction */ @@ -148,6 +149,9 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); +int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, + int queue_bit); + /* Shared API */ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -219,7 +223,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( void *vm, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem); + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 0768b7eb7683..da8b31a53291 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1277,7 +1277,7 @@ err: } int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem) + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) { struct amdkfd_process_info *process_info = mem->process_info; unsigned long bo_size = mem->bo->tbo.mem.size; @@ -1286,9 +1286,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct ttm_validate_buffer *bo_list_entry; unsigned int mapped_to_gpu_memory; int ret; + bool is_imported = 0; mutex_lock(&mem->lock); mapped_to_gpu_memory = mem->mapped_to_gpu_memory; + is_imported = mem->is_imported; mutex_unlock(&mem->lock); /* lock is not needed after this, since mem is unused and will * be freed anyway @@ -1340,8 +1342,19 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( kfree(mem->bo->tbo.sg); } + /* Update the size of the BO being freed if it was allocated from + * VRAM and is not imported. + */ + if (size) { + if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && + (!is_imported)) + *size = bo_size; + else + *size = 0; + } + /* Free the BO*/ - amdgpu_bo_unref(&mem->bo); + drm_gem_object_put_unlocked(&mem->bo->tbo.base); mutex_destroy(&mem->lock); kfree(mem); @@ -1686,7 +1699,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; - (*mem)->bo = amdgpu_bo_ref(bo); + drm_gem_object_get(&bo->tbo.base); + (*mem)->bo = bo; (*mem)->va = va; (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; @@ -1694,6 +1708,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, (*mem)->process_info = avm->process_info; add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); amdgpu_sync_create(&(*mem)->sync); + (*mem)->is_imported = true; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 7653f62b1b2d..19070226a945 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1208,7 +1208,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct drm_sched_entity *entity = p->entity; - enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; @@ -1258,7 +1257,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, trace_amdgpu_cs_ioctl(job); amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); - priority = job->base.s_priority; drm_sched_entity_push_job(&job->base, entity); amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b038ddbb2ece..cc41e8f5ad14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -68,6 +68,7 @@ #include <linux/suspend.h> #include <drm/task_barrier.h> +#include <linux/pm_runtime.h> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); @@ -254,6 +255,32 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, uint32_t hi = ~0; uint64_t last; + +#ifdef CONFIG_64BIT + last = min(pos + size, adev->gmc.visible_vram_size); + if (last > pos) { + void __iomem *addr = adev->mman.aper_base_kaddr + pos; + size_t count = last - pos; + + if (write) { + memcpy_toio(addr, buf, count); + mb(); + amdgpu_asic_flush_hdp(adev, NULL); + } else { + amdgpu_asic_invalidate_hdp(adev, NULL); + mb(); + memcpy_fromio(buf, addr, count); + } + + if (count == size) + return; + + pos += count; + buf += count / 4; + size -= count; + } +#endif + spin_lock_irqsave(&adev->mmio_idx_lock, flags); for (last = pos + size; pos < last; pos += 4) { uint32_t tmp = pos >> 31; @@ -2891,6 +2918,14 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) return ret; } +static const struct attribute *amdgpu_dev_attributes[] = { + &dev_attr_product_name.attr, + &dev_attr_product_number.attr, + &dev_attr_serial_number.attr, + &dev_attr_pcie_replay_count.attr, + NULL +}; + /** * amdgpu_device_init - initialize the driver * @@ -3240,27 +3275,9 @@ fence_driver_init: queue_delayed_work(system_wq, &adev->delayed_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); - r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); + r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); if (r) { - dev_err(adev->dev, "Could not create pcie_replay_count"); - return r; - } - - r = device_create_file(adev->dev, &dev_attr_product_name); - if (r) { - dev_err(adev->dev, "Could not create product_name"); - return r; - } - - r = device_create_file(adev->dev, &dev_attr_product_number); - if (r) { - dev_err(adev->dev, "Could not create product_number"); - return r; - } - - r = device_create_file(adev->dev, &dev_attr_serial_number); - if (r) { - dev_err(adev->dev, "Could not create serial_number"); + dev_err(adev->dev, "Could not create amdgpu device attr\n"); return r; } @@ -3343,12 +3360,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); - device_remove_file(adev->dev, &dev_attr_pcie_replay_count); if (adev->ucode_sysfs_en) amdgpu_ucode_sysfs_fini(adev); - device_remove_file(adev->dev, &dev_attr_product_name); - device_remove_file(adev->dev, &dev_attr_product_number); - device_remove_file(adev->dev, &dev_attr_serial_number); + + sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) @@ -4116,6 +4131,64 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) mutex_unlock(&adev->lock_reset); } +static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) +{ + struct pci_dev *p = NULL; + + p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), + adev->pdev->bus->number, 1); + if (p) { + pm_runtime_enable(&(p->dev)); + pm_runtime_resume(&(p->dev)); + } +} + +static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) +{ + enum amd_reset_method reset_method; + struct pci_dev *p = NULL; + u64 expires; + + /* + * For now, only BACO and mode1 reset are confirmed + * to suffer the audio issue without proper suspended. + */ + reset_method = amdgpu_asic_reset_method(adev); + if ((reset_method != AMD_RESET_METHOD_BACO) && + (reset_method != AMD_RESET_METHOD_MODE1)) + return -EINVAL; + + p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), + adev->pdev->bus->number, 1); + if (!p) + return -ENODEV; + + expires = pm_runtime_autosuspend_expiration(&(p->dev)); + if (!expires) + /* + * If we cannot get the audio device autosuspend delay, + * a fixed 4S interval will be used. Considering 3S is + * the audio controller default autosuspend delay setting. + * 4S used here is guaranteed to cover that. + */ + expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL; + + while (!pm_runtime_status_suspended(&(p->dev))) { + if (!pm_runtime_suspend(&(p->dev))) + break; + + if (expires < ktime_get_mono_fast_ns()) { + dev_warn(adev->dev, "failed to suspend display audio\n"); + /* TODO: abort the succeeding gpu reset? */ + return -ETIMEDOUT; + } + } + + pm_runtime_disable(&(p->dev)); + + return 0; +} + /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -4140,6 +4213,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, bool use_baco = (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? true : false; + bool audio_suspended = false; /* * Flush RAM to disk so that after reboot @@ -4197,6 +4271,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, return 0; } + /* + * Try to put the audio codec into suspend state + * before gpu reset started. + * + * Due to the power domain of the graphics device + * is shared with AZ power domain. Without this, + * we may change the audio hardware from behind + * the audio driver's back. That will trigger + * some audio codec errors. + */ + if (!amdgpu_device_suspend_display_audio(tmp_adev)) + audio_suspended = true; + amdgpu_ras_set_error_query_ready(tmp_adev, false); cancel_delayed_work_sync(&tmp_adev->delayed_init_work); @@ -4309,6 +4396,8 @@ skip_sched_resume: /*unlock kfd: SRIOV would do it separately */ if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); + if (audio_suspended) + amdgpu_device_resume_display_audio(tmp_adev); amdgpu_device_unlock_adev(tmp_adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 057f6ea645d7..61a26c15c8dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -52,9 +52,6 @@ struct amdgpu_df_funcs { uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, uint32_t ficadl_val, uint32_t ficadh_val); - uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev, - uint32_t df_inst); - uint32_t (*get_df_inst_id)(struct amdgpu_device *adev); }; struct amdgpu_df { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 43bb22ad8add..b5d6274952a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -133,9 +133,10 @@ static int hw_id_map[MAX_HWIP] = { static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) { uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - uint64_t pos = vram_size - DISCOVERY_TMR_SIZE; + uint64_t pos = vram_size - adev->discovery_tmr_size; - amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false); + amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, + adev->discovery_tmr_size, false); return 0; } @@ -167,17 +168,18 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) uint16_t checksum; int r; - adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL); - if (!adev->discovery) + adev->discovery_tmr_size = DISCOVERY_TMR_SIZE; + adev->discovery_bin = kzalloc(adev->discovery_tmr_size, GFP_KERNEL); + if (!adev->discovery_bin) return -ENOMEM; - r = amdgpu_discovery_read_binary(adev, adev->discovery); + r = amdgpu_discovery_read_binary(adev, adev->discovery_bin); if (r) { DRM_ERROR("failed to read ip discovery binary\n"); goto out; } - bhdr = (struct binary_header *)adev->discovery; + bhdr = (struct binary_header *)adev->discovery_bin; if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) { DRM_ERROR("invalid ip discovery binary signature\n"); @@ -190,7 +192,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) size = bhdr->binary_size - offset; checksum = bhdr->binary_checksum; - if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, + if (!amdgpu_discovery_verify_checksum(adev->discovery_bin + offset, size, checksum)) { DRM_ERROR("invalid ip discovery binary checksum\n"); r = -EINVAL; @@ -200,7 +202,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) info = &bhdr->table_list[IP_DISCOVERY]; offset = le16_to_cpu(info->offset); checksum = le16_to_cpu(info->checksum); - ihdr = (struct ip_discovery_header *)(adev->discovery + offset); + ihdr = (struct ip_discovery_header *)(adev->discovery_bin + offset); if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { DRM_ERROR("invalid ip discovery data table signature\n"); @@ -208,7 +210,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) goto out; } - if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, + if (!amdgpu_discovery_verify_checksum(adev->discovery_bin + offset, ihdr->size, checksum)) { DRM_ERROR("invalid ip discovery data table checksum\n"); r = -EINVAL; @@ -218,9 +220,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) info = &bhdr->table_list[GC]; offset = le16_to_cpu(info->offset); checksum = le16_to_cpu(info->checksum); - ghdr = (struct gpu_info_header *)(adev->discovery + offset); + ghdr = (struct gpu_info_header *)(adev->discovery_bin + offset); - if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, + if (!amdgpu_discovery_verify_checksum(adev->discovery_bin + offset, ghdr->size, checksum)) { DRM_ERROR("invalid gc data table checksum\n"); r = -EINVAL; @@ -230,16 +232,16 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) return 0; out: - kfree(adev->discovery); - adev->discovery = NULL; + kfree(adev->discovery_bin); + adev->discovery_bin = NULL; return r; } void amdgpu_discovery_fini(struct amdgpu_device *adev) { - kfree(adev->discovery); - adev->discovery = NULL; + kfree(adev->discovery_bin); + adev->discovery_bin = NULL; } int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) @@ -263,8 +265,8 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) return r; } - bhdr = (struct binary_header *)adev->discovery; - ihdr = (struct ip_discovery_header *)(adev->discovery + + bhdr = (struct binary_header *)adev->discovery_bin; + ihdr = (struct ip_discovery_header *)(adev->discovery_bin + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); num_dies = le16_to_cpu(ihdr->num_dies); @@ -272,7 +274,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) for (i = 0; i < num_dies; i++) { die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); - dhdr = (struct die_header *)(adev->discovery + die_offset); + dhdr = (struct die_header *)(adev->discovery_bin + die_offset); num_ips = le16_to_cpu(dhdr->num_ips); ip_offset = die_offset + sizeof(*dhdr); @@ -286,7 +288,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) le16_to_cpu(dhdr->die_id), num_ips); for (j = 0; j < num_ips; j++) { - ip = (struct ip *)(adev->discovery + ip_offset); + ip = (struct ip *)(adev->discovery_bin + ip_offset); num_base_address = ip->num_base_address; DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n", @@ -335,24 +337,24 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, uint16_t num_ips; int i, j; - if (!adev->discovery) { + if (!adev->discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); return -EINVAL; } - bhdr = (struct binary_header *)adev->discovery; - ihdr = (struct ip_discovery_header *)(adev->discovery + + bhdr = (struct binary_header *)adev->discovery_bin; + ihdr = (struct ip_discovery_header *)(adev->discovery_bin + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); num_dies = le16_to_cpu(ihdr->num_dies); for (i = 0; i < num_dies; i++) { die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); - dhdr = (struct die_header *)(adev->discovery + die_offset); + dhdr = (struct die_header *)(adev->discovery_bin + die_offset); num_ips = le16_to_cpu(dhdr->num_ips); ip_offset = die_offset + sizeof(*dhdr); for (j = 0; j < num_ips; j++) { - ip = (struct ip *)(adev->discovery + ip_offset); + ip = (struct ip *)(adev->discovery_bin + ip_offset); if (le16_to_cpu(ip->hw_id) == hw_id) { if (major) @@ -375,13 +377,13 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) struct binary_header *bhdr; struct gc_info_v1_0 *gc_info; - if (!adev->discovery) { + if (!adev->discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); return -EINVAL; } - bhdr = (struct binary_header *)adev->discovery; - gc_info = (struct gc_info_v1_0 *)(adev->discovery + + bhdr = (struct binary_header *)adev->discovery_bin; + gc_info = (struct gc_info_v1_0 *)(adev->discovery_bin + le16_to_cpu(bhdr->table_list[GC].offset)); adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 719a963c31a2..beb35dd12964 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1174,14 +1174,6 @@ static int amdgpu_pmops_resume(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - /* GPU comes up enabled by the bios on resume */ - if (amdgpu_device_supports_boco(drm_dev) || - amdgpu_device_supports_baco(drm_dev)) { - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - } - return amdgpu_device_resume(drm_dev, true); } @@ -1191,7 +1183,9 @@ static int amdgpu_pmops_freeze(struct device *dev) struct amdgpu_device *adev = drm_dev->dev_private; int r; + adev->in_hibernate = true; r = amdgpu_device_suspend(drm_dev, true); + adev->in_hibernate = false; if (r) return r; return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 9ae7b61f696a..25ddb482466a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -133,8 +133,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, u32 cpp; u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_CPU_GTT_USWC; + AMDGPU_GEM_CREATE_VRAM_CLEARED; info = drm_get_format_info(adev->ddev, mode_cmd); cpp = info->cpp[0]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 0103acc57474..d612033a23ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -48,7 +48,7 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, return bit; } -void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit, +void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, int *mec, int *pipe, int *queue) { *queue = bit % adev->gfx.mec.num_queue_per_pipe; @@ -274,7 +274,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; - amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); + amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); /* * 1. Using pipes 2/3 from MEC 2 seems cause problems. @@ -485,6 +485,19 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) return amdgpu_ring_test_helper(kiq_ring); } +int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, + int queue_bit) +{ + int mec, pipe, queue; + int set_resource_bit = 0; + + amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); + + set_resource_bit = mec * 4 * 8 + pipe * 8 + queue; + + return set_resource_bit; +} + int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -507,7 +520,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) break; } - queue_mask |= (1ull << i); + queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); } DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ee698f0246d8..d43c11671a38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -364,7 +364,7 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev); int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, int pipe, int queue); -void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit, +void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, int *mec, int *pipe, int *queue); bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index ea7e72ecaefa..682a514f1794 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -189,10 +189,12 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) dev_dbg(&dev->pdev->dev, "Error during ACPI methods call\n"); if (adev->runpm) { - dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP); + /* only need to skip on ATPX */ + if (amdgpu_device_supports_boco(dev) && + !amdgpu_is_atpx_hybrid()) + dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP); pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); - pm_runtime_set_active(dev->dev); pm_runtime_allow(dev->dev); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 730f98aab11b..ddb4af0cc702 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -664,6 +664,121 @@ int psp_xgmi_initialize(struct psp_context *psp) return ret; } +int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + int ret; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; + + /* Invoke xgmi ta to get hive id */ + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + if (ret) + return ret; + + *hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id; + + return 0; +} + +int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + int ret; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; + + /* Invoke xgmi ta to get the node id */ + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + if (ret) + return ret; + + *node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id; + + return 0; +} + +int psp_xgmi_get_topology_info(struct psp_context *psp, + int number_devices, + struct psp_xgmi_topology_info *topology) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; + struct ta_xgmi_cmd_get_topology_info_output *topology_info_output; + int i; + int ret; + + if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) + return -EINVAL; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + /* Fill in the shared memory with topology information as input */ + topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; + topology_info_input->num_nodes = number_devices; + + for (i = 0; i < topology_info_input->num_nodes; i++) { + topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; + topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; + topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; + topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; + } + + /* Invoke xgmi ta to get the topology information */ + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); + if (ret) + return ret; + + /* Read the output topology information from the shared memory */ + topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info; + topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes; + for (i = 0; i < topology->num_nodes; i++) { + topology->nodes[i].node_id = topology_info_output->nodes[i].node_id; + topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops; + topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled; + topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine; + } + + return 0; +} + +int psp_xgmi_set_topology_info(struct psp_context *psp, + int number_devices, + struct psp_xgmi_topology_info *topology) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; + int i; + + if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) + return -EINVAL; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO; + topology_info_input->num_nodes = number_devices; + + for (i = 0; i < topology_info_input->num_nodes; i++) { + topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; + topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; + topology_info_input->nodes[i].is_sharing_enabled = 1; + topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; + } + + /* Invoke xgmi ta to set topology information */ + return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO); +} + // ras begin static int psp_ras_init_shared_buf(struct psp_context *psp) { @@ -746,13 +861,40 @@ static int psp_ras_unload(struct psp_context *psp) int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { + struct ta_ras_shared_memory *ras_cmd; + int ret; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + /* * TODO: bypass the loading in sriov for now */ if (amdgpu_sriov_vf(psp->adev)) return 0; - return psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id); + ret = psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id); + + if (amdgpu_ras_intr_triggered()) + return ret; + + if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) + { + DRM_WARN("RAS: Unsupported Interface"); + return -EINVAL; + } + + if (!ret) { + if (ras_cmd->ras_out_message.flags.err_inject_switch_disable_flag) { + dev_warn(psp->adev->dev, "ECC switch disabled\n"); + + ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE; + } + else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag) + dev_warn(psp->adev->dev, + "RAS internal register access blocked\n"); + } + + return ret; } int psp_ras_enable_features(struct psp_context *psp, @@ -836,6 +978,33 @@ static int psp_ras_initialize(struct psp_context *psp) return 0; } + +int psp_ras_trigger_error(struct psp_context *psp, + struct ta_ras_trigger_error_input *info) +{ + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras.ras_initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; + ras_cmd->ras_in_message.trigger_error = *info; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret) + return -EINVAL; + + /* If err_event_athub occurs error inject was successful, however + return status from TA is no long reliable */ + if (amdgpu_ras_intr_triggered()) + return 0; + + return ras_cmd->ras_status; +} // ras end // HDCP start @@ -1477,7 +1646,7 @@ static int psp_np_fw_load(struct psp_context *psp) /* Start rlc autoload after psp recieved all the gfx firmware */ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) { - ret = psp_rlc_autoload(psp); + ret = psp_rlc_autoload_start(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 7fcd63d5432c..2a56ad996d83 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -95,16 +95,6 @@ struct psp_funcs enum psp_ring_type ring_type); bool (*smu_reload_quirk)(struct psp_context *psp); int (*mode1_reset)(struct psp_context *psp); - int (*xgmi_get_node_id)(struct psp_context *psp, uint64_t *node_id); - int (*xgmi_get_hive_id)(struct psp_context *psp, uint64_t *hive_id); - int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices, - struct psp_xgmi_topology_info *topology); - int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices, - struct psp_xgmi_topology_info *topology); - int (*ras_trigger_error)(struct psp_context *psp, - struct ta_ras_trigger_error_input *info); - int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); - int (*rlc_autoload_start)(struct psp_context *psp); int (*mem_training_init)(struct psp_context *psp); void (*mem_training_fini)(struct psp_context *psp); int (*mem_training)(struct psp_context *psp, uint32_t ops); @@ -316,18 +306,6 @@ struct amdgpu_psp_funcs { ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) #define psp_mode1_reset(psp) \ ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) -#define psp_xgmi_get_node_id(psp, node_id) \ - ((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp), (node_id)) : -EINVAL) -#define psp_xgmi_get_hive_id(psp, hive_id) \ - ((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp), (hive_id)) : -EINVAL) -#define psp_xgmi_get_topology_info(psp, num_device, topology) \ - ((psp)->funcs->xgmi_get_topology_info ? \ - (psp)->funcs->xgmi_get_topology_info((psp), (num_device), (topology)) : -EINVAL) -#define psp_xgmi_set_topology_info(psp, num_device, topology) \ - ((psp)->funcs->xgmi_set_topology_info ? \ - (psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL) -#define psp_rlc_autoload(psp) \ - ((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0) #define psp_mem_training_init(psp) \ ((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0) #define psp_mem_training_fini(psp) \ @@ -335,13 +313,6 @@ struct amdgpu_psp_funcs { #define psp_mem_training(psp, ops) \ ((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0) -#define psp_ras_trigger_error(psp, info) \ - ((psp)->funcs->ras_trigger_error ? \ - (psp)->funcs->ras_trigger_error((psp), (info)) : -EINVAL) -#define psp_ras_cure_posion(psp, addr) \ - ((psp)->funcs->ras_cure_posion ? \ - (psp)->funcs->ras_cure_posion(psp, (addr)) : -EINVAL) - #define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp)) #define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value)) @@ -369,10 +340,21 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, int psp_xgmi_initialize(struct psp_context *psp); int psp_xgmi_terminate(struct psp_context *psp); int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id); +int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id); +int psp_xgmi_get_topology_info(struct psp_context *psp, + int number_devices, + struct psp_xgmi_topology_info *topology); +int psp_xgmi_set_topology_info(struct psp_context *psp, + int number_devices, + struct psp_xgmi_topology_info *topology); int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); +int psp_ras_trigger_error(struct psp_context *psp, + struct ta_ras_trigger_error_input *info); + int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 8b14aee370c8..7348619253c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -502,6 +502,29 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, } /* obj end */ +void amdgpu_ras_parse_status_code(struct amdgpu_device* adev, + const char* invoke_type, + const char* block_name, + enum ta_ras_status ret) +{ + switch (ret) { + case TA_RAS_STATUS__SUCCESS: + return; + case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE: + dev_warn(adev->dev, + "RAS WARN: %s %s currently unavailable\n", + invoke_type, + block_name); + break; + default: + dev_err(adev->dev, + "RAS ERROR: %s %s error failed ret 0x%X\n", + invoke_type, + block_name, + ret); + } +} + /* feature ctl begin */ static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, struct ras_common_if *head) @@ -565,19 +588,23 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - union ta_ras_cmd_input info; + union ta_ras_cmd_input *info; int ret; if (!con) return -EINVAL; + info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); + if (!info) + return -ENOMEM; + if (!enable) { - info.disable_features = (struct ta_ras_disable_features_input) { + info->disable_features = (struct ta_ras_disable_features_input) { .block_id = amdgpu_ras_block_to_ta(head->block), .error_type = amdgpu_ras_error_to_ta(head->type), }; } else { - info.enable_features = (struct ta_ras_enable_features_input) { + info->enable_features = (struct ta_ras_enable_features_input) { .block_id = amdgpu_ras_block_to_ta(head->block), .error_type = amdgpu_ras_error_to_ta(head->type), }; @@ -586,27 +613,33 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, /* Do not enable if it is not allowed. */ WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); /* Are we alerady in that state we are going to set? */ - if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) - return 0; + if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) { + ret = 0; + goto out; + } if (!amdgpu_ras_intr_triggered()) { - ret = psp_ras_enable_features(&adev->psp, &info, enable); + ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { - dev_err(adev->dev, "RAS ERROR: %s %s feature " - "failed ret %d\n", - enable ? "enable":"disable", - ras_block_str(head->block), - ret); + amdgpu_ras_parse_status_code(adev, + enable ? "enable":"disable", + ras_block_str(head->block), + (enum ta_ras_status)ret); if (ret == TA_RAS_STATUS__RESET_NEEDED) - return -EAGAIN; - return -EINVAL; + ret = -EAGAIN; + else + ret = -EINVAL; + + goto out; } } /* setup the obj */ __amdgpu_ras_feature_enable(adev, head, enable); - - return 0; + ret = 0; +out: + kfree(info); + return ret; } /* Only used in device probe stage and called only once. */ @@ -821,10 +854,10 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, ret = -EINVAL; } - if (ret) - dev_err(adev->dev, "RAS ERROR: inject %s error failed ret %d\n", - ras_block_str(info->head.block), - ret); + amdgpu_ras_parse_status_code(adev, + "inject", + ras_block_str(info->head.block), + (enum ta_ras_status)ret); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d5543c25f3c7..eff1f73302de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1957,17 +1957,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; /* - * reserve one TMR (64K) memory at the top of VRAM which holds + * reserve TMR memory at the top of VRAM which holds * IP Discovery data and is protected by PSP. */ - r = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE, - DISCOVERY_TMR_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->discovery_memory, - NULL); - if (r) - return r; + if (adev->discovery_tmr_size > 0) { + r = amdgpu_bo_create_kernel_at(adev, + adev->gmc.real_vram_size - adev->discovery_tmr_size, + adev->discovery_tmr_size, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->discovery_memory, + NULL); + if (r) + return r; + } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 128a667ed8fa..d399e5893170 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -149,6 +149,15 @@ static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO, static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO, amdgpu_mem_info_vram_vendor, NULL); +static const struct attribute *amdgpu_vram_mgr_attributes[] = { + &dev_attr_mem_info_vram_total.attr, + &dev_attr_mem_info_vis_vram_total.attr, + &dev_attr_mem_info_vram_used.attr, + &dev_attr_mem_info_vis_vram_used.attr, + &dev_attr_mem_info_vram_vendor.attr, + NULL +}; + /** * amdgpu_vram_mgr_init - init VRAM manager and DRM MM * @@ -173,31 +182,9 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, man->priv = mgr; /* Add the two VRAM-related sysfs files */ - ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_total); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_vram_total\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_total); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_vis_vram_total\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_used); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_vram_used\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_used); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_vram_vendor\n"); - return ret; - } + ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); + if (ret) + DRM_ERROR("Failed to register sysfs\n"); return 0; } @@ -220,11 +207,7 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) spin_unlock(&mgr->lock); kfree(mgr); man->priv = NULL; - device_remove_file(adev->dev, &dev_attr_mem_info_vram_total); - device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total); - device_remove_file(adev->dev, &dev_attr_mem_info_vram_used); - device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used); - device_remove_file(adev->dev, &dev_attr_mem_info_vram_vendor); + sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 48c0ce13f68e..90610b4f2c75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -649,31 +649,8 @@ void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr) { - uint32_t df_inst_id; - uint64_t dram_base_addr = 0; - const struct amdgpu_df_funcs *df_funcs = adev->df.funcs; - - if ((!df_funcs) || - (!df_funcs->get_df_inst_id) || - (!df_funcs->get_dram_base_addr)) { - dev_warn(adev->dev, - "XGMI: relative phy_addr algorithm is not supported\n"); - return addr; - } - - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) { - dev_warn(adev->dev, - "failed to disable DF-Cstate, DF register may not be accessible\n"); - return addr; - } - - df_inst_id = df_funcs->get_df_inst_id(adev); - dram_base_addr = df_funcs->get_dram_base_addr(adev, df_inst_id); - - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) - dev_warn(adev->dev, "failed to enable DF-Cstate\n"); - - return addr + dram_base_addr; + struct amdgpu_xgmi *xgmi = &adev->gmc.xgmi; + return (addr + xgmi->physical_node_id * xgmi->node_segment_size); } static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 5a1bd8ed1a6c..a7b8292cefee 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -686,58 +686,6 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, } } -static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev, - uint32_t df_inst) -{ - uint32_t base_addr_reg_val = 0; - uint64_t base_addr = 0; - - base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 + - df_inst * DF_3_6_SMN_REG_INST_DIST); - - if (REG_GET_FIELD(base_addr_reg_val, - DF_CS_UMC_AON0_DramBaseAddress0, - AddrRngVal) == 0) { - DRM_WARN("address range not valid"); - return 0; - } - - base_addr = REG_GET_FIELD(base_addr_reg_val, - DF_CS_UMC_AON0_DramBaseAddress0, - DramBaseAddr); - - return base_addr << 28; -} - -static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev) -{ - uint32_t xgmi_node_id = 0; - uint32_t df_inst_id = 0; - - /* Walk through DF dst nodes to find current XGMI node */ - for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) { - - xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 + - df_inst_id * DF_3_6_SMN_REG_INST_DIST); - xgmi_node_id = REG_GET_FIELD(xgmi_node_id, - DF_CS_UMC_AON0_DramLimitAddress0, - DstFabricID); - - /* TODO: establish reason dest fabric id is offset by 7 */ - xgmi_node_id = xgmi_node_id >> 7; - - if (adev->gmc.xgmi.physical_node_id == xgmi_node_id) - break; - } - - if (df_inst_id == DF_3_6_INST_CNT) { - DRM_WARN("cant match df dst id with gpu node"); - return 0; - } - - return df_inst_id; -} - const struct amdgpu_df_funcs df_v3_6_funcs = { .sw_init = df_v3_6_sw_init, .sw_fini = df_v3_6_sw_fini, @@ -752,6 +700,4 @@ const struct amdgpu_df_funcs df_v3_6_funcs = { .pmc_get_count = df_v3_6_pmc_get_count, .get_fica = df_v3_6_get_fica, .set_fica = df_v3_6_set_fica, - .get_dram_base_addr = df_v3_6_get_dram_base_addr, - .get_df_inst_id = df_v3_6_get_df_inst_id }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9fe20b782e88..64080d209223 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4577,11 +4577,13 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, - adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, - adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + /* amdgpu_mm_wreg_mmio_rlc will fall back to mmio if doesn't support rlcg_write */ + amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), + adev->gfx.rlc.clear_state_gpu_addr >> 32, 0); + amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc, 0); + amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), + adev->gfx.rlc.clear_state_size, 0); return 0; } @@ -5190,7 +5192,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); - WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); + amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp, 0); for (i = 0; i < adev->usec_timeout; i++) { if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) @@ -6540,14 +6542,16 @@ static int gfx_v10_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - r = smu_load_microcode(&adev->smu); - if (r) - return r; + if (adev->smu.ppt_funcs != NULL) { + r = smu_load_microcode(&adev->smu); + if (r) + return r; - r = smu_check_fw_status(&adev->smu); - if (r) { - pr_err("SMC firmware status is not correct\n"); - return r; + r = smu_check_fw_status(&adev->smu); + if (r) { + pr_err("SMC firmware status is not correct\n"); + return r; + } } } @@ -7011,7 +7015,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, /* === CGCG /CGLS for GFX 3D Only === */ gfx_v10_0_update_3d_clock_gating(adev, enable); /* === MGCG + MGLS === */ - /* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */ + gfx_v10_0_update_medium_grain_clock_gating(adev, enable); } if (adev->cg_flags & @@ -7086,6 +7090,20 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { .reset = gfx_v10_0_rlc_reset, .start = gfx_v10_0_rlc_start, .update_spm_vmid = gfx_v10_0_update_spm_vmid, +}; + +static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = { + .is_rlc_enabled = gfx_v10_0_is_rlc_enabled, + .set_safe_mode = gfx_v10_0_set_safe_mode, + .unset_safe_mode = gfx_v10_0_unset_safe_mode, + .init = gfx_v10_0_rlc_init, + .get_csb_size = gfx_v10_0_get_csb_size, + .get_csb_buffer = gfx_v10_0_get_csb_buffer, + .resume = gfx_v10_0_rlc_resume, + .stop = gfx_v10_0_rlc_stop, + .reset = gfx_v10_0_rlc_reset, + .start = gfx_v10_0_rlc_start, + .update_spm_vmid = gfx_v10_0_update_spm_vmid, .rlcg_wreg = gfx_v10_rlcg_wreg, .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, }; @@ -7102,11 +7120,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: - if (!enable) { - amdgpu_gfx_off_ctrl(adev, false); - cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); - } else - amdgpu_gfx_off_ctrl(adev, true); + amdgpu_gfx_off_ctrl(adev, enable); break; default: break; @@ -7672,6 +7686,19 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } +static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring, + unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32_SOC15(GC, 0, mmSQ_CMD, value); +} + static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -8063,6 +8090,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v10_0_ring_soft_recovery, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { @@ -8183,9 +8211,11 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: - case CHIP_NAVI12: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; + case CHIP_NAVI12: + adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs_sriov; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index eedb92218ba5..a0988634aeaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -507,8 +507,8 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), @@ -1233,6 +1233,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, + /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ + { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, { 0, 0, 0, 0, 0 }, }; @@ -5039,10 +5041,9 @@ static int gfx_v9_0_set_powergating_state(void *handle, switch (adev->asic_type) { case CHIP_RAVEN: case CHIP_RENOIR: - if (!enable) { + if (!enable) amdgpu_gfx_off_ctrl(adev, false); - cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); - } + if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); @@ -5066,12 +5067,7 @@ static int gfx_v9_0_set_powergating_state(void *handle, amdgpu_gfx_off_ctrl(adev, true); break; case CHIP_VEGA12: - if (!enable) { - amdgpu_gfx_off_ctrl(adev, false); - cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); - } else { - amdgpu_gfx_off_ctrl(adev, true); - } + amdgpu_gfx_off_ctrl(adev, enable); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index c1a530dbe162..a75e472b4a81 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -61,17 +61,6 @@ MODULE_FIRMWARE("amdgpu/si58_mc.bin"); #define MC_SEQ_MISC0__MT__HBM 0x60000000 #define MC_SEQ_MISC0__MT__DDR3 0xB0000000 - -static const u32 crtc_offsets[6] = -{ - SI_CRTC0_REGISTER_OFFSET, - SI_CRTC1_REGISTER_OFFSET, - SI_CRTC2_REGISTER_OFFSET, - SI_CRTC3_REGISTER_OFFSET, - SI_CRTC4_REGISTER_OFFSET, - SI_CRTC5_REGISTER_OFFSET -}; - static void gmc_v6_0_mc_stop(struct amdgpu_device *adev) { u32 blackout; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index e8529e244a2b..bcd4baecfe11 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -762,6 +762,7 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value + * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value * * Print human readable fault information (CIK). */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 0aa5b82808d1..26976e50e2a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1005,6 +1005,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value + * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value * * Print human readable fault information (VI). */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 97c80f1d5731..1de89cc3c355 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -524,181 +524,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) return 0; } -/* TODO: Fill in follow functions once PSP firmware interface for XGMI is ready. - * For now, return success and hack the hive_id so high level code can - * start testing - */ -static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp, - int number_devices, struct psp_xgmi_topology_info *topology) -{ - struct ta_xgmi_shared_memory *xgmi_cmd; - struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; - struct ta_xgmi_cmd_get_topology_info_output *topology_info_output; - int i; - int ret; - - if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) - return -EINVAL; - - xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; - memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); - - /* Fill in the shared memory with topology information as input */ - topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; - topology_info_input->num_nodes = number_devices; - - for (i = 0; i < topology_info_input->num_nodes; i++) { - topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; - topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; - topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; - topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; - } - - /* Invoke xgmi ta to get the topology information */ - ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); - if (ret) - return ret; - - /* Read the output topology information from the shared memory */ - topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info; - topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes; - for (i = 0; i < topology->num_nodes; i++) { - topology->nodes[i].node_id = topology_info_output->nodes[i].node_id; - topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops; - topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled; - topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine; - } - - return 0; -} - -static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp, - int number_devices, struct psp_xgmi_topology_info *topology) -{ - struct ta_xgmi_shared_memory *xgmi_cmd; - struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; - int i; - - if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) - return -EINVAL; - - xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; - memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); - - topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO; - topology_info_input->num_nodes = number_devices; - - for (i = 0; i < topology_info_input->num_nodes; i++) { - topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; - topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; - topology_info_input->nodes[i].is_sharing_enabled = 1; - topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; - } - - /* Invoke xgmi ta to set topology information */ - return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO); -} - -static int psp_v11_0_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id) -{ - struct ta_xgmi_shared_memory *xgmi_cmd; - int ret; - - xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; - memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); - - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; - - /* Invoke xgmi ta to get hive id */ - ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); - if (ret) - return ret; - - *hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id; - - return 0; -} - -static int psp_v11_0_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) -{ - struct ta_xgmi_shared_memory *xgmi_cmd; - int ret; - - xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; - memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); - - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; - - /* Invoke xgmi ta to get the node id */ - ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); - if (ret) - return ret; - - *node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id; - - return 0; -} - -static int psp_v11_0_ras_trigger_error(struct psp_context *psp, - struct ta_ras_trigger_error_input *info) -{ - struct ta_ras_shared_memory *ras_cmd; - int ret; - - if (!psp->ras.ras_initialized) - return -EINVAL; - - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; - ras_cmd->ras_in_message.trigger_error = *info; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); - if (ret) - return -EINVAL; - - /* If err_event_athub occurs error inject was successful, however - return status from TA is no long reliable */ - if (amdgpu_ras_intr_triggered()) - return 0; - - return ras_cmd->ras_status; -} - -static int psp_v11_0_ras_cure_posion(struct psp_context *psp, uint64_t *mode_ptr) -{ -#if 0 - // not support yet. - struct ta_ras_shared_memory *ras_cmd; - int ret; - - if (!psp->ras.ras_initialized) - return -EINVAL; - - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__CURE_POISON; - ras_cmd->ras_in_message.cure_poison.mode_ptr = mode_ptr; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); - if (ret) - return -EINVAL; - - return ras_cmd->ras_status; -#else - return -EINVAL; -#endif -} - -static int psp_v11_0_rlc_autoload_start(struct psp_context *psp) -{ - return psp_rlc_autoload_start(psp); -} - static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) { int ret; @@ -995,13 +820,6 @@ static const struct psp_funcs psp_v11_0_funcs = { .ring_stop = psp_v11_0_ring_stop, .ring_destroy = psp_v11_0_ring_destroy, .mode1_reset = psp_v11_0_mode1_reset, - .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info, - .xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info, - .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id, - .xgmi_get_node_id = psp_v11_0_xgmi_get_node_id, - .ras_trigger_error = psp_v11_0_ras_trigger_error, - .ras_cure_posion = psp_v11_0_ras_cure_posion, - .rlc_autoload_start = psp_v11_0_rlc_autoload_start, .mem_training_init = psp_v11_0_memory_training_init, .mem_training_fini = psp_v11_0_memory_training_fini, .mem_training = psp_v11_0_memory_training, diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index ca7d05993ca2..745ed0fba1ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -24,6 +24,8 @@ #ifndef _TA_RAS_IF_H #define _TA_RAS_IF_H +#define RAS_TA_HOST_IF_VER 0 + /* Responses have bit 31 set */ #define RSP_ID_MASK (1U << 31) #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) @@ -36,18 +38,24 @@ enum ras_command { TA_RAS_COMMAND__TRIGGER_ERROR, }; -enum ta_ras_status { - TA_RAS_STATUS__SUCCESS = 0x00, - TA_RAS_STATUS__RESET_NEEDED = 0x01, - TA_RAS_STATUS__ERROR_INVALID_PARAMETER = 0x02, - TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE = 0x03, - TA_RAS_STATUS__ERROR_RAS_DUPLICATE_CMD = 0x04, - TA_RAS_STATUS__ERROR_INJECTION_FAILED = 0x05, - TA_RAS_STATUS__ERROR_ASD_READ_WRITE = 0x06, - TA_RAS_STATUS__ERROR_TOGGLE_DF_CSTATE = 0x07, - TA_RAS_STATUS__ERROR_TIMEOUT = 0x08, - TA_RAS_STATUS__ERROR_BLOCK_DISABLED = 0x09, - TA_RAS_STATUS__ERROR_GENERIC = 0x10, +enum ta_ras_status +{ + TA_RAS_STATUS__SUCCESS = 0x00, + TA_RAS_STATUS__RESET_NEEDED = 0xA001, + TA_RAS_STATUS__ERROR_INVALID_PARAMETER = 0xA002, + TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE = 0xA003, + TA_RAS_STATUS__ERROR_RAS_DUPLICATE_CMD = 0xA004, + TA_RAS_STATUS__ERROR_INJECTION_FAILED = 0xA005, + TA_RAS_STATUS__ERROR_ASD_READ_WRITE = 0xA006, + TA_RAS_STATUS__ERROR_TOGGLE_DF_CSTATE = 0xA007, + TA_RAS_STATUS__ERROR_TIMEOUT = 0xA008, + TA_RAS_STATUS__ERROR_BLOCK_DISABLED = 0XA009, + TA_RAS_STATUS__ERROR_GENERIC = 0xA00A, + TA_RAS_STATUS__ERROR_RAS_MMHUB_INIT = 0xA00B, + TA_RAS_STATUS__ERROR_GET_DEV_INFO = 0xA00C, + TA_RAS_STATUS__ERROR_UNSUPPORTED_DEV = 0xA00D, + TA_RAS_STATUS__ERROR_NOT_INITIALIZED = 0xA00E, + TA_RAS_STATUS__ERROR_TEE_INTERNAL = 0xA00F }; enum ta_ras_block { @@ -97,22 +105,39 @@ struct ta_ras_trigger_error_input { uint64_t value; // method if error injection. i.e persistent, coherent etc. }; +struct ta_ras_output_flags +{ + uint8_t ras_init_success_flag; + uint8_t err_inject_switch_disable_flag; + uint8_t reg_access_failure_flag; +}; + /* Common input structure for RAS callbacks */ /**********************************************************/ union ta_ras_cmd_input { struct ta_ras_enable_features_input enable_features; struct ta_ras_disable_features_input disable_features; struct ta_ras_trigger_error_input trigger_error; + + uint32_t reserve_pad[256]; +}; + +union ta_ras_cmd_output +{ + struct ta_ras_output_flags flags; + + uint32_t reserve_pad[256]; }; /* Shared Memory structures */ /**********************************************************/ struct ta_ras_shared_memory { - uint32_t cmd_id; - uint32_t resp_id; - enum ta_ras_status ras_status; - uint32_t reserved; - union ta_ras_cmd_input ras_in_message; + uint32_t cmd_id; + uint32_t resp_id; + uint32_t ras_status; + uint32_t if_version; + union ta_ras_cmd_input ras_in_message; + union ta_ras_cmd_output ras_out_message; }; #endif // TL_RAS_IF_H_ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 349da7bf7c68..90ed773695ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -1845,7 +1845,6 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) uint32_t table_size = 0; struct mmsch_v2_0_cmd_direct_write direct_wt = { {0} }; struct mmsch_v2_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} }; - struct mmsch_v2_0_cmd_direct_polling direct_poll = { {0} }; struct mmsch_v2_0_cmd_end end = { {0} }; struct mmsch_v2_0_init_header *header; uint32_t *init_table = adev->virt.mm_table.cpu_addr; @@ -1855,8 +1854,6 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; - direct_poll.cmd_header.command_type = - MMSCH_COMMAND__DIRECT_REG_POLLING; end.cmd_header.command_type = MMSCH_COMMAND__END; if (header->vcn_table_offset == 0 && header->vcn_table_size == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 38ca4a712f12..34ed906645c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -86,7 +86,7 @@ static int vcn_v2_5_early_init(void *handle) adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING); + harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING); if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) adev->vcn.harvest_config |= 1 << i; } @@ -177,15 +177,15 @@ static int vcn_v2_5_sw_init(void *handle) adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; - adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9); + adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9); adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; - adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0); adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; - adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1); adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; - adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD); adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; - adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); + adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP); ring = &adev->vcn.inst[j].ring_dec; ring->use_doorbell = true; @@ -399,46 +399,46 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) continue; /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo)); - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi)); - WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0); offset = 0; } else { - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->vcn.inst[i].gpu_addr)); - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.inst[i].gpu_addr)); offset = size; - WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); } - WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size); /* cache window 1: stack */ - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); - WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0); - WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); /* cache window 2: context */ - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); - WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); - WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); /* non-cache window */ - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr)); - WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr)); - WREG32_SOC15(UVD, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0); - WREG32_SOC15(UVD, i, mmUVD_VCPU_NONCACHE_SIZE0, + WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); } } @@ -452,91 +452,91 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (!indirect) { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } else { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } offset = 0; } else { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); offset = size; WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); else WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ if (!indirect) { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); /* cache window 2: context */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); /* non-cache window */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), + VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0), AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } /** @@ -690,19 +690,19 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__MMSCH_MODE_MASK); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); /* turn off clock gating */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); + VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); /* turn on SUVD clock gating */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); + VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); } /** @@ -774,13 +774,13 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo uint32_t rb_bufsz, tmp; /* disable register anti-hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1, + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1, ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* enable dynamic power gating mode */ - tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS); + tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS); tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; - WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp); + WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp); if (indirect) adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; @@ -793,11 +793,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); + VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* disable master interupt */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); + VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect); /* setup mmUVD_LMI_CTRL */ tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | @@ -809,28 +809,28 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 0x00100000L); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); + VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_MPC_CNTL), + VCN, 0, mmUVD_MPC_CNTL), 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_MPC_SET_MUXA0), + VCN, 0, mmUVD_MPC_SET_MUXA0), ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_MPC_SET_MUXB0), + VCN, 0, mmUVD_MPC_SET_MUXB0), ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_MPC_SET_MUX), + VCN, 0, mmUVD_MPC_SET_MUX), ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); @@ -838,26 +838,26 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); /* enable LMI MC and UMC channels */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); + VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); /* unblock VCPU register access */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); + VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); + VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* enable master interrupt */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_MASTINT_EN), + VCN, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) @@ -873,39 +873,39 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); - WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp); /* Stall DPG before WPTR/RPTR reset */ - WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET; /* set the write pointer delay */ - WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); /* set the wb address */ - WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); /* programm the RB_BASE for ring buffer */ - WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0); + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0); - WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0); + WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0); - ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR); - WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET; /* Unstall DPG */ - WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); return 0; @@ -929,12 +929,12 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) } /* disable register anti-hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* set uvd status busy */ - tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; - WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); + tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp); } if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) @@ -947,44 +947,44 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; /* enable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); /* disable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0, + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0, ~UVD_MASTINT_EN__VCPU_EN_MASK); /* setup mmUVD_LMI_CTRL */ - tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL); + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL); tmp &= ~0xff; - WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8| + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8| UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | UVD_LMI_CTRL__MASK_MC_URGENT_MASK | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); /* setup mmUVD_MPC_CNTL */ - tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL); + tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL); tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); /* setup UVD_MPC_SET_MUXA0 */ - WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0, + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0, ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); /* setup UVD_MPC_SET_MUXB0 */ - WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0, + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0, ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); /* setup mmUVD_MPC_SET_MUX */ - WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX, + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX, ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); @@ -997,27 +997,27 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; /* VCN global tiling registers */ - WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG, adev->gfx.config.gb_addr_config); /* enable LMI MC and UMC channels */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); /* unblock VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0, + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0, ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, ~UVD_VCPU_CNTL__BLK_RST_MASK); for (k = 0; k < 10; ++k) { uint32_t status; for (j = 0; j < 100; ++j) { - status = RREG32_SOC15(UVD, i, mmUVD_STATUS); + status = RREG32_SOC15(VCN, i, mmUVD_STATUS); if (status & 2) break; if (amdgpu_emu_mode == 1) @@ -1030,11 +1030,11 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) break; DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), UVD_VCPU_CNTL__BLK_RST_MASK, ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); @@ -1047,15 +1047,15 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) } /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, ~UVD_MASTINT_EN__VCPU_EN_MASK); /* clear the busy bit of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0, + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0, ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0); + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0); ring = &adev->vcn.inst[i].ring_dec; /* force RBC into idle state */ @@ -1065,39 +1065,39 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); - WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET; /* programm the RB_BASE for ring buffer */ - WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0); + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0); - ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); - WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, + ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET; fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET; ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET; fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET; ring = &adev->vcn.inst[i].ring_enc[1]; - WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET; } @@ -1118,33 +1118,33 @@ static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev, * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of * memory descriptor location */ - WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr)); - WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr)); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr)); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr)); /* 2, update vmid of descriptor */ - data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID); + data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID); data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; /* use domain0 for MM scheduler */ data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); - WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, data); /* 3, notify mmsch about the size of this descriptor */ - WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size); /* 4, set resp to zero */ - WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0); /* * 5, kick off the initialization and wait until * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ - WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001); - data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP); + data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP); loop = 10; while ((data & 0x10000002) != 0x10000002) { udelay(100); - data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP); + data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP); loop--; if (!loop) break; @@ -1167,14 +1167,12 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) uint32_t table_size = 0; struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; - struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; struct mmsch_v1_0_cmd_end end = { { 0 } }; uint32_t *init_table = adev->virt.mm_table.cpu_addr; struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; - direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; end.cmd_header.command_type = MMSCH_COMMAND__END; header->version = MMSCH_VERSION; @@ -1189,93 +1187,93 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) table_size = 0; MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), + SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); offset = 0; MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), 0); + SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), 0); } else { MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[i].gpu_addr)); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[i].gpu_addr)); offset = size; MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), + SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3); } MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), + SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE0), size); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), + SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET1), 0); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), + SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), + SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET2), 0); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2), + SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); ring = &adev->vcn.inst[i].ring_enc[0]; ring->wptr = 0; MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), + SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_LO), lower_32_bits(ring->gpu_addr)); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), + SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), + SOC15_REG_OFFSET(VCN, i, mmUVD_RB_SIZE), ring->ring_size / 4); ring = &adev->vcn.inst[i].ring_dec; ring->wptr = 0; MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), lower_32_bits(ring->gpu_addr)); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, + SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), upper_32_bits(ring->gpu_addr)); @@ -1287,7 +1285,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); MMSCH_V1_0_INSERT_DIRECT_WT( - SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp); + SOC15_REG_OFFSET(VCN, i, mmUVD_RBC_RB_CNTL), tmp); /* add end packet */ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); @@ -1308,24 +1306,24 @@ static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) uint32_t tmp; /* Wait for power status to be 1 */ - SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); /* wait for read ptr to be equal to write ptr */ - tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR); - SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); - tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2); - SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); - tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; - SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); - SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); /* disable dynamic power gating mode */ - WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0, + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); return 0; @@ -1369,17 +1367,17 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) return r; /* block VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), UVD_RB_ARB_CTRL__VCPU_DIS_MASK, ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); /* reset VCPU */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), UVD_VCPU_CNTL__BLK_RST_MASK, ~UVD_VCPU_CNTL__BLK_RST_MASK); /* disable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, ~(UVD_VCPU_CNTL__CLK_EN_MASK)); /* clear status */ @@ -1388,7 +1386,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) vcn_v2_5_enable_clock_gating(adev); /* enable register anti-hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } @@ -1410,11 +1408,11 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { DRM_DEBUG("dpg pause state changed %d -> %d", adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); - reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) & + reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) & (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { - SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); if (!ret_code) { @@ -1422,15 +1420,15 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data); /* wait for ACK */ - SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE, + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); /* Stall DPG before WPTR/RPTR reset */ - WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); @@ -1438,39 +1436,39 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET; ring = &adev->vcn.inst[inst_idx].ring_enc[0]; ring->wptr = 0; - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET; fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET; ring = &adev->vcn.inst[inst_idx].ring_enc[1]; ring->wptr = 0; - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET; fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET; - WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET; /* Unstall DPG */ - WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); - SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); } } else { reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); } adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; @@ -1490,7 +1488,7 @@ static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); + return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR); } /** @@ -1507,7 +1505,7 @@ static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else - return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); + return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR); } /** @@ -1522,14 +1520,14 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2, + WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2, lower_32_bits(ring->wptr) | 0x80000000); if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } } @@ -1575,9 +1573,9 @@ static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) - return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); + return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR); else - return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); + return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2); } /** @@ -1595,12 +1593,12 @@ static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else - return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); + return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR); } else { if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else - return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); + return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2); } } @@ -1620,14 +1618,14 @@ static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); } } else { if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); } } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ff47b1f69b68..cf0017f4d9d5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1323,6 +1323,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_free; } + /* Update the VRAM usage count */ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size); + mutex_unlock(&p->mutex); args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); @@ -1338,7 +1342,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); err_unlock: mutex_unlock(&p->mutex); return err; @@ -1352,6 +1356,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, void *mem; struct kfd_dev *dev; int ret; + uint64_t size = 0; dev = kfd_device_by_id(GET_GPU_ID(args->handle)); if (!dev) @@ -1374,7 +1379,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, } ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, - (struct kgd_mem *)mem); + (struct kgd_mem *)mem, &size); /* If freeing the buffer failed, leave the handle in place for * clean-up during process tear-down. @@ -1383,6 +1388,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, kfd_process_device_remove_obj_handle( pdd, GET_IDR_HANDLE(args->handle)); + WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); + err_unlock: mutex_unlock(&p->mutex); return ret; @@ -1727,7 +1734,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); err_unlock: mutex_unlock(&p->mutex); return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ae9547791813..e9c4867abeff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1089,7 +1089,9 @@ static int set_sched_resources(struct device_queue_manager *dqm) break; } - res.queue_mask |= (1ull << i); + res.queue_mask |= 1ull + << amdgpu_queue_mask_bit_to_set_resource_bit( + (struct amdgpu_device *)dqm->dev->kgd, i); } res.gws_mask = ~0ull; res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index d48b33449267..cde5e4c7caa1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -629,6 +629,8 @@ enum kfd_pdd_bound { PDD_BOUND_SUSPENDED, }; +#define MAX_VRAM_FILENAME_LEN 11 + /* Data that is per-process-per device. */ struct kfd_process_device { /* @@ -671,6 +673,11 @@ struct kfd_process_device { /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ enum kfd_pdd_bound bound; + + /* VRAM usage */ + uint64_t vram_usage; + struct attribute attr_vram; + char vram_filename[MAX_VRAM_FILENAME_LEN]; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 82b4c5a9382a..d27221ddcdeb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -79,18 +79,22 @@ static struct kfd_procfs_tree procfs; static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, char *buffer) { - int val = 0; - if (strcmp(attr->name, "pasid") == 0) { struct kfd_process *p = container_of(attr, struct kfd_process, attr_pasid); - val = p->pasid; + + return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid); + } else if (strncmp(attr->name, "vram_", 5) == 0) { + struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, + attr_vram); + if (pdd) + return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); } else { pr_err("Invalid attribute"); return -EINVAL; } - return snprintf(buffer, PAGE_SIZE, "%d\n", val); + return 0; } static void kfd_procfs_kobj_release(struct kobject *kobj) @@ -206,6 +210,34 @@ int kfd_procfs_add_queue(struct queue *q) return 0; } +int kfd_procfs_add_vram_usage(struct kfd_process *p) +{ + int ret = 0; + struct kfd_process_device *pdd; + + if (!p) + return -EINVAL; + + if (!p->kobj) + return -EFAULT; + + /* Create proc/<pid>/vram_<gpuid> file for each GPU */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u", + pdd->dev->id); + pdd->attr_vram.name = pdd->vram_filename; + pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&pdd->attr_vram); + ret = sysfs_create_file(p->kobj, &pdd->attr_vram); + if (ret) + pr_warn("Creating vram usage for gpu id %d failed", + (int)pdd->dev->id); + } + + return ret; +} + + void kfd_procfs_del_queue(struct queue *q) { if (!q) @@ -248,7 +280,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, struct kfd_dev *dev = pdd->dev; amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL); } /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process @@ -312,7 +344,7 @@ sync_memory_failed: return err; err_map_mem: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL); err_alloc_mem: *kptr = NULL; return err; @@ -411,6 +443,11 @@ struct kfd_process *kfd_create_process(struct file *filep) process->kobj); if (!process->kobj_queues) pr_warn("Creating KFD proc/queues folder failed"); + + ret = kfd_procfs_add_vram_usage(process); + if (ret) + pr_warn("Creating vram usage file for pid %d failed", + (int)process->lead_thread->pid); } out: if (!IS_ERR(process)) @@ -488,7 +525,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) peer_pdd->dev->kgd, mem, peer_pdd->vm); } - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL); kfd_process_device_remove_obj_handle(pdd, id); } } @@ -551,6 +588,7 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); + struct kfd_process_device *pdd; /* Remove the procfs files */ if (p->kobj) { @@ -558,6 +596,10 @@ static void kfd_process_wq_release(struct work_struct *work) kobject_del(p->kobj_queues); kobject_put(p->kobj_queues); p->kobj_queues = NULL; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + sysfs_remove_file(p->kobj, &pdd->attr_vram); + kobject_del(p->kobj); kobject_put(p->kobj); p->kobj = NULL; @@ -863,6 +905,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->bound = PDD_UNBOUND; pdd->already_dequeued = false; pdd->runtime_inuse = false; + pdd->vram_usage = 0; list_add(&pdd->per_device_list, &p->per_device_data); /* Init idr used for memory handle translation */ @@ -1079,7 +1122,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) return p; } -/* process_evict_queues - Evict all user queues of a process +/* kfd_process_evict_queues - Evict all user queues of a process * * Eviction is reference-counted per process-device. This means multiple * evictions from different sources can be nested safely. @@ -1119,7 +1162,7 @@ fail: return r; } -/* process_restore_queues - Restore all user queues of a process */ +/* kfd_process_restore_queues - Restore all user queues of a process */ int kfd_process_restore_queues(struct kfd_process *p) { struct kfd_process_device *pdd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index bc4a22df12d7..bb77f7af2b6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -478,6 +478,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.device_id); sysfs_show_32bit_prop(buffer, "location_id", dev->node_props.location_id); + sysfs_show_32bit_prop(buffer, "domain", + dev->node_props.domain); sysfs_show_32bit_prop(buffer, "drm_render_minor", dev->node_props.drm_render_minor); sysfs_show_64bit_prop(buffer, "hive_id", @@ -1306,6 +1308,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_ASIC_REVISION_SHIFT) & HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->pdev); + dev->node_props.domain = pci_domain_nr(gpu->pdev->bus); dev->node_props.max_engine_clk_fcompute = amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); dev->node_props.max_engine_clk_ccompute = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 0c51bd3dcd59..326d9b26b7aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -80,6 +80,7 @@ struct kfd_node_properties { uint32_t vendor_id; uint32_t device_id; uint32_t location_id; + uint32_t domain; uint32_t max_engine_clk_fcompute; uint32_t max_engine_clk_ccompute; int32_t drm_render_minor; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 71309ee3aca3..48f2b3710e7c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -30,7 +30,7 @@ #include "dc.h" #include "dc/inc/core_types.h" #include "dal_asic_id.h" -#include "dmub/inc/dmub_srv.h" +#include "dmub/dmub_srv.h" #include "dc/inc/hw/dmcu.h" #include "dc/inc/hw/abm.h" #include "dc/dc_dmub_srv.h" @@ -441,7 +441,7 @@ static void dm_vupdate_high_irq(void *interrupt_params) /** * dm_crtc_high_irq() - Handles CRTC interrupt - * @interrupt_params: ignored + * @interrupt_params: used for determining the CRTC instance * * Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK * event handler. @@ -455,70 +455,6 @@ static void dm_crtc_high_irq(void *interrupt_params) unsigned long flags; acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK); - - if (acrtc) { - acrtc_state = to_dm_crtc_state(acrtc->base.state); - - DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n", - acrtc->crtc_id, - amdgpu_dm_vrr_active(acrtc_state)); - - /* Core vblank handling at start of front-porch is only possible - * in non-vrr mode, as only there vblank timestamping will give - * valid results while done in front-porch. Otherwise defer it - * to dm_vupdate_high_irq after end of front-porch. - */ - if (!amdgpu_dm_vrr_active(acrtc_state)) - drm_crtc_handle_vblank(&acrtc->base); - - /* Following stuff must happen at start of vblank, for crc - * computation and below-the-range btr support in vrr mode. - */ - amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); - - if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI && - acrtc_state->vrr_params.supported && - acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) { - spin_lock_irqsave(&adev->ddev->event_lock, flags); - mod_freesync_handle_v_update( - adev->dm.freesync_module, - acrtc_state->stream, - &acrtc_state->vrr_params); - - dc_stream_adjust_vmin_vmax( - adev->dm.dc, - acrtc_state->stream, - &acrtc_state->vrr_params.adjust); - spin_unlock_irqrestore(&adev->ddev->event_lock, flags); - } - } -} - -#if defined(CONFIG_DRM_AMD_DC_DCN) -/** - * dm_dcn_crtc_high_irq() - Handles VStartup interrupt for DCN generation ASICs - * @interrupt params - interrupt parameters - * - * Notify DRM's vblank event handler at VSTARTUP - * - * Unlike DCE hardware, we trigger the handler at VSTARTUP. at which: - * * We are close enough to VUPDATE - the point of no return for hw - * * We are in the fixed portion of variable front porch when vrr is enabled - * * We are before VUPDATE, where double-buffered vrr registers are swapped - * - * It is therefore the correct place to signal vblank, send user flip events, - * and update VRR. - */ -static void dm_dcn_crtc_high_irq(void *interrupt_params) -{ - struct common_irq_params *irq_params = interrupt_params; - struct amdgpu_device *adev = irq_params->adev; - struct amdgpu_crtc *acrtc; - struct dm_crtc_state *acrtc_state; - unsigned long flags; - - acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK); - if (!acrtc) return; @@ -528,22 +464,35 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params) amdgpu_dm_vrr_active(acrtc_state), acrtc_state->active_planes); + /** + * Core vblank handling at start of front-porch is only possible + * in non-vrr mode, as only there vblank timestamping will give + * valid results while done in front-porch. Otherwise defer it + * to dm_vupdate_high_irq after end of front-porch. + */ + if (!amdgpu_dm_vrr_active(acrtc_state)) + drm_crtc_handle_vblank(&acrtc->base); + + /** + * Following stuff must happen at start of vblank, for crc + * computation and below-the-range btr support in vrr mode. + */ amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); - drm_crtc_handle_vblank(&acrtc->base); + + /* BTR updates need to happen before VUPDATE on Vega and above. */ + if (adev->family < AMDGPU_FAMILY_AI) + return; spin_lock_irqsave(&adev->ddev->event_lock, flags); - if (acrtc_state->vrr_params.supported && + if (acrtc_state->stream && acrtc_state->vrr_params.supported && acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) { - mod_freesync_handle_v_update( - adev->dm.freesync_module, - acrtc_state->stream, - &acrtc_state->vrr_params); + mod_freesync_handle_v_update(adev->dm.freesync_module, + acrtc_state->stream, + &acrtc_state->vrr_params); - dc_stream_adjust_vmin_vmax( - adev->dm.dc, - acrtc_state->stream, - &acrtc_state->vrr_params.adjust); + dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc_state->stream, + &acrtc_state->vrr_params.adjust); } /* @@ -556,7 +505,8 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params) * avoid race conditions between flip programming and completion, * which could cause too early flip completion events. */ - if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED && + if (adev->family >= AMDGPU_FAMILY_RV && + acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED && acrtc_state->active_planes == 0) { if (acrtc->event) { drm_crtc_send_vblank_event(&acrtc->base, acrtc->event); @@ -568,7 +518,6 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params) spin_unlock_irqrestore(&adev->ddev->event_lock, flags); } -#endif static int dm_set_clockgating_state(void *handle, enum amd_clockgating_state state) @@ -1389,9 +1338,14 @@ static int dm_late_init(void *handle) struct dmcu_iram_parameters params; unsigned int linear_lut[16]; int i; - struct dmcu *dmcu = adev->dm.dc->res_pool->dmcu; + struct dmcu *dmcu = NULL; bool ret = false; + if (!adev->dm.fw_dmcu) + return detect_mst_link_for_all_connectors(adev->ddev); + + dmcu = adev->dm.dc->res_pool->dmcu; + for (i = 0; i < 16; i++) linear_lut[i] = 0xFFFF * i / 15; @@ -1571,7 +1525,6 @@ static int dm_suspend(void *handle) { struct amdgpu_device *adev = handle; struct amdgpu_display_manager *dm = &adev->dm; - int ret = 0; WARN_ON(adev->dm.cached_state); adev->dm.cached_state = drm_atomic_helper_suspend(adev->ddev); @@ -1583,7 +1536,7 @@ static int dm_suspend(void *handle) dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); - return ret; + return 0; } static struct amdgpu_dm_connector * @@ -2013,17 +1966,22 @@ void amdgpu_dm_update_connector_after_detect( dc_sink_retain(aconnector->dc_sink); if (sink->dc_edid.length == 0) { aconnector->edid = NULL; - drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); + if (aconnector->dc_link->aux_mode) { + drm_dp_cec_unset_edid( + &aconnector->dm_dp_aux.aux); + } } else { aconnector->edid = - (struct edid *) sink->dc_edid.raw_edid; - + (struct edid *)sink->dc_edid.raw_edid; drm_connector_update_edid_property(connector, - aconnector->edid); - drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, - aconnector->edid); + aconnector->edid); + + if (aconnector->dc_link->aux_mode) + drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, + aconnector->edid); } + amdgpu_dm_update_freesync_caps(connector, aconnector->edid); update_connector_ext_caps(aconnector); } else { @@ -2445,8 +2403,36 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) c_irq_params->adev = adev; c_irq_params->irq_src = int_params.irq_source; + amdgpu_dm_irq_register_interrupt( + adev, &int_params, dm_crtc_high_irq, c_irq_params); + } + + /* Use VUPDATE_NO_LOCK interrupt on DCN, which seems to correspond to + * the regular VUPDATE interrupt on DCE. We want DC_IRQ_SOURCE_VUPDATEx + * to trigger at end of each vblank, regardless of state of the lock, + * matching DCE behaviour. + */ + for (i = DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT; + i <= DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT + adev->mode_info.num_crtc - 1; + i++) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq); + + if (r) { + DRM_ERROR("Failed to add vupdate irq id!\n"); + return r; + } + + int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT; + int_params.irq_source = + dc_interrupt_to_irq_source(dc, i, 0); + + c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1]; + + c_irq_params->adev = adev; + c_irq_params->irq_src = int_params.irq_source; + amdgpu_dm_irq_register_interrupt(adev, &int_params, - dm_dcn_crtc_high_irq, c_irq_params); + dm_vupdate_high_irq, c_irq_params); } /* Use GRPH_PFLIP interrupt */ @@ -3661,6 +3647,10 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, case DRM_FORMAT_P010: plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb; break; + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F; + break; default: DRM_ERROR( "Unsupported screen format %s\n", @@ -4458,10 +4448,6 @@ static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) struct amdgpu_device *adev = crtc->dev->dev_private; int rc; - /* Do not set vupdate for DCN hardware */ - if (adev->family > AMDGPU_FAMILY_AI) - return 0; - irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst; rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; @@ -5577,6 +5563,10 @@ static int get_plane_formats(const struct drm_plane *plane, formats[num_formats++] = DRM_FORMAT_NV12; if (plane_cap && plane_cap->pixel_format_support.p010) formats[num_formats++] = DRM_FORMAT_P010; + if (plane_cap && plane_cap->pixel_format_support.fp16) { + formats[num_formats++] = DRM_FORMAT_XRGB16161616F; + formats[num_formats++] = DRM_FORMAT_ARGB16161616F; + } break; case DRM_PLANE_TYPE_OVERLAY: @@ -6865,7 +6855,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, dc_state); if ((acrtc_state->update_type > UPDATE_TYPE_FAST) && - acrtc_state->stream->link->psr_settings.psr_version != PSR_VERSION_UNSUPPORTED && + acrtc_state->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED && !acrtc_state->stream->link->psr_settings.psr_feature_enabled) amdgpu_dm_link_setup_psr(acrtc_state->stream); else if ((acrtc_state->update_type == UPDATE_TYPE_FAST) && @@ -7895,6 +7885,7 @@ static int dm_update_plane_state(struct dc *dc, struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state; struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state; + struct amdgpu_crtc *new_acrtc; bool needs_reset; int ret = 0; @@ -7904,9 +7895,30 @@ static int dm_update_plane_state(struct dc *dc, dm_new_plane_state = to_dm_plane_state(new_plane_state); dm_old_plane_state = to_dm_plane_state(old_plane_state); - /*TODO Implement atomic check for cursor plane */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) + /*TODO Implement better atomic check for cursor plane */ + if (plane->type == DRM_PLANE_TYPE_CURSOR) { + if (!enable || !new_plane_crtc || + drm_atomic_plane_disabling(plane->state, new_plane_state)) + return 0; + + new_acrtc = to_amdgpu_crtc(new_plane_crtc); + + if ((new_plane_state->crtc_w > new_acrtc->max_cursor_width) || + (new_plane_state->crtc_h > new_acrtc->max_cursor_height)) { + DRM_DEBUG_ATOMIC("Bad cursor size %d x %d\n", + new_plane_state->crtc_w, new_plane_state->crtc_h); + return -EINVAL; + } + + if (new_plane_state->crtc_x <= -new_acrtc->max_cursor_width || + new_plane_state->crtc_y <= -new_acrtc->max_cursor_height) { + DRM_DEBUG_ATOMIC("Bad cursor position %d, %d\n", + new_plane_state->crtc_x, new_plane_state->crtc_y); + return -EINVAL; + } + return 0; + } needs_reset = should_reset_plane(state, plane, old_plane_state, new_plane_state); @@ -8640,10 +8652,10 @@ static void amdgpu_dm_set_psr_caps(struct dc_link *link) link->dpcd_caps.psr_caps.psr_version = dpcd_data[0]; if (dpcd_data[0] == 0) { - link->psr_settings.psr_version = PSR_VERSION_UNSUPPORTED; + link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; link->psr_settings.psr_feature_enabled = false; } else { - link->psr_settings.psr_version = PSR_VERSION_1; + link->psr_settings.psr_version = DC_PSR_VERSION_1; link->psr_settings.psr_feature_enabled = true; } @@ -8662,14 +8674,12 @@ static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) struct dc_link *link = NULL; struct psr_config psr_config = {0}; struct psr_context psr_context = {0}; - struct dc *dc = NULL; bool ret = false; if (stream == NULL) return false; link = stream->link; - dc = link->ctx->dc; psr_config.psr_version = link->dpcd_caps.psr_caps.psr_version; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index b3b7efd973ca..076af267b488 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -32,7 +32,7 @@ #include "amdgpu_dm.h" #include "amdgpu_dm_debugfs.h" #include "dm_helpers.h" -#include "dmub/inc/dmub_srv.h" +#include "dmub/dmub_srv.h" struct dmub_debugfs_trace_header { uint32_t entry_count; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 78e1c11d4ae5..dcf84a61de37 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -398,15 +398,15 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; - memset(display, 0, sizeof(*display)); - memset(link, 0, sizeof(*link)); - - display->index = aconnector->base.index; - if (config->dpms_off) { hdcp_remove_display(hdcp_work, link_index, aconnector); return; } + + memset(display, 0, sizeof(*display)); + memset(link, 0, sizeof(*link)); + + display->index = aconnector->base.index; display->state = MOD_HDCP_DISPLAY_ACTIVE; if (aconnector->dc_sink != NULL) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index c407f06cd1f5..b086d5c906e0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -554,6 +554,7 @@ enum dc_edid_status dm_helpers_read_local_edid( struct dc_sink *sink) { struct amdgpu_dm_connector *aconnector = link->priv; + struct drm_connector *connector = &aconnector->base; struct i2c_adapter *ddc; int retry = 3; enum dc_edid_status edid_status; @@ -571,6 +572,15 @@ enum dc_edid_status dm_helpers_read_local_edid( edid = drm_get_edid(&aconnector->base, ddc); + /* DP Compliance Test 4.2.2.6 */ + if (link->aux_mode && connector->edid_corrupt) + drm_dp_send_real_edid_checksum(&aconnector->dm_dp_aux.aux, connector->real_edid_checksum); + + if (!edid && connector->edid_corrupt) { + connector->edid_corrupt = false; + return EDID_BAD_CHECKSUM; + } + if (!edid) return EDID_NO_RESPONSE; @@ -605,34 +615,10 @@ enum dc_edid_status dm_helpers_read_local_edid( DRM_ERROR("EDID err: %d, on connector: %s", edid_status, aconnector->base.name); - if (link->aux_mode) { - union test_request test_request = { {0} }; - union test_response test_response = { {0} }; - - dm_helpers_dp_read_dpcd(ctx, - link, - DP_TEST_REQUEST, - &test_request.raw, - sizeof(union test_request)); - - if (!test_request.bits.EDID_READ) - return edid_status; - test_response.bits.EDID_CHECKSUM_WRITE = 1; - - dm_helpers_dp_write_dpcd(ctx, - link, - DP_TEST_EDID_CHECKSUM, - &sink->dc_edid.raw_edid[sink->dc_edid.length-1], - 1); - - dm_helpers_dp_write_dpcd(ctx, - link, - DP_TEST_RESPONSE, - &test_response.raw, - sizeof(test_response)); - - } + /* DP Compliance Test 4.2.2.3 */ + if (link->aux_mode) + drm_dp_send_real_edid_checksum(&aconnector->dm_dp_aux.aux, sink->dc_edid.raw_edid[sink->dc_edid.length-1]); return edid_status; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ad817bd74586..45cfb7c45566 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -66,7 +66,7 @@ #include "dce/dce_i2c.h" -#include "dmub/inc/dmub_cmd_dal.h" +#include "dmub/dmub_srv.h" #define CTX \ dc->ctx @@ -839,11 +839,10 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) { int i; - int count = 0; - struct pipe_ctx *pipe; PERF_TRACE(); for (i = 0; i < MAX_PIPES; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; + int count = 0; + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (!pipe->plane_state) continue; @@ -2210,9 +2209,7 @@ static void commit_planes_do_stream_update(struct dc *dc, if (should_program_abm) { if (*stream_update->abm_level == ABM_LEVEL_IMMEDIATE_DISABLE) { - pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable( - pipe_ctx->stream_res.abm, - pipe_ctx->stream->link->panel_cntl->inst); + dc->hwss.set_abm_immediate_disable(pipe_ctx); } else { pipe_ctx->stream_res.abm->funcs->set_abm_level( pipe_ctx->stream_res.abm, stream->abm_level); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 67c5342cf89a..c08de6823db4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -26,7 +26,7 @@ #include <linux/slab.h> #include "dm_services.h" -#include "atom.h" +#include "atomfirmware.h" #include "dm_helpers.h" #include "dc.h" #include "grph_object_id.h" @@ -46,7 +46,7 @@ #include "dmcu.h" #include "hw/clk_mgr.h" #include "dce/dmub_psr.h" -#include "dmub/inc/dmub_cmd_dal.h" +#include "dmub/dmub_srv.h" #include "inc/hw/panel_cntl.h" #define DC_LOGGER_INIT(logger) @@ -1552,7 +1552,7 @@ static bool dc_link_construct(struct dc_link *link, */ program_hpd_filter(link); - link->psr_settings.psr_version = PSR_VERSION_UNSUPPORTED; + link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; return true; device_tag_fail: @@ -2504,59 +2504,56 @@ int dc_link_get_target_backlight_pwm(const struct dc_link *link) return (int) abm->funcs->get_target_backlight(abm); } +static struct pipe_ctx *get_pipe_from_link(const struct dc_link *link) +{ + int i; + struct dc *dc = link->ctx->dc; + struct pipe_ctx *pipe_ctx = NULL; + + for (i = 0; i < MAX_PIPES; i++) { + if (dc->current_state->res_ctx.pipe_ctx[i].stream) { + if (dc->current_state->res_ctx.pipe_ctx[i].stream->link == link) { + pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + break; + } + } + } + + return pipe_ctx; +} + bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t backlight_pwm_u16_16, uint32_t frame_ramp) { struct dc *dc = link->ctx->dc; - int i; DC_LOGGER_INIT(link->ctx->logger); DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", backlight_pwm_u16_16, backlight_pwm_u16_16); if (dc_is_embedded_signal(link->connector_signal)) { - struct pipe_ctx *pipe_ctx = NULL; - - for (i = 0; i < MAX_PIPES; i++) { - if (dc->current_state->res_ctx.pipe_ctx[i].stream) { - if (dc->current_state->res_ctx. - pipe_ctx[i].stream->link - == link) { - pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; - - /* Disable brightness ramping when the display is blanked - * as it can hang the DMCU - */ - if (dc->current_state->res_ctx.pipe_ctx[i].plane_state == NULL) - frame_ramp = 0; - } - } - } + struct pipe_ctx *pipe_ctx = get_pipe_from_link(link); - if (pipe_ctx == NULL) + if (pipe_ctx) { + /* Disable brightness ramping when the display is blanked + * as it can hang the DMCU + */ + if (pipe_ctx->plane_state == NULL) + frame_ramp = 0; + } else { ASSERT(false); + return false; + } dc->hwss.set_backlight_level( pipe_ctx, backlight_pwm_u16_16, frame_ramp); } - return true; } -bool dc_link_set_abm_disable(const struct dc_link *link) -{ - struct abm *abm = get_abm_from_stream_res(link); - bool success = false; - - if (abm) - success = abm->funcs->set_abm_immediate_disable(abm, link->panel_cntl->inst); - - return success; -} - bool dc_link_set_psr_allow_active(struct dc_link *link, bool allow_active, bool wait) { struct dc *dc = link->ctx->dc; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 9ef9e50a34fa..1db592372435 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -13,7 +13,6 @@ #include "core_status.h" #include "dpcd_defs.h" -#include "resource.h" #define DC_LOGGER \ link->ctx->logger diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1a01c038632b..cb5d11f11cad 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1547,35 +1547,6 @@ bool dc_add_all_planes_for_stream( return add_all_planes_for_stream(dc, stream, &set, 1, context); } - -static bool is_hdr_static_meta_changed(struct dc_stream_state *cur_stream, - struct dc_stream_state *new_stream) -{ - if (cur_stream == NULL) - return true; - - if (memcmp(&cur_stream->hdr_static_metadata, - &new_stream->hdr_static_metadata, - sizeof(struct dc_info_packet)) != 0) - return true; - - return false; -} - -static bool is_vsc_info_packet_changed(struct dc_stream_state *cur_stream, - struct dc_stream_state *new_stream) -{ - if (cur_stream == NULL) - return true; - - if (memcmp(&cur_stream->vsc_infopacket, - &new_stream->vsc_infopacket, - sizeof(struct dc_info_packet)) != 0) - return true; - - return false; -} - static bool is_timing_changed(struct dc_stream_state *cur_stream, struct dc_stream_state *new_stream) { @@ -1610,15 +1581,9 @@ static bool are_stream_backends_same( if (is_timing_changed(stream_a, stream_b)) return false; - if (is_hdr_static_meta_changed(stream_a, stream_b)) - return false; - if (stream_a->dpms_off != stream_b->dpms_off) return false; - if (is_vsc_info_packet_changed(stream_a, stream_b)) - return false; - return true; } @@ -1758,21 +1723,6 @@ static struct audio *find_first_free_audio( return 0; } -bool resource_is_stream_unchanged( - struct dc_state *old_context, struct dc_stream_state *stream) -{ - int i; - - for (i = 0; i < old_context->stream_count; i++) { - struct dc_stream_state *old_stream = old_context->streams[i]; - - if (are_stream_backends_same(old_stream, stream)) - return true; - } - - return false; -} - /** * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state. */ @@ -2027,17 +1977,6 @@ enum dc_status resource_map_pool_resources( int pipe_idx = -1; struct dc_bios *dcb = dc->ctx->dc_bios; - /* TODO Check if this is needed */ - /*if (!resource_is_stream_unchanged(old_context, stream)) { - if (stream != NULL && old_context->streams[i] != NULL) { - stream->bit_depth_params = - old_context->streams[i]->bit_depth_params; - stream->clamping = old_context->streams[i]->clamping; - continue; - } - } - */ - calculate_phy_pix_clks(stream); /* TODO: Check Linux */ @@ -2720,15 +2659,9 @@ bool pipe_need_reprogram( if (is_timing_changed(pipe_ctx_old->stream, pipe_ctx->stream)) return true; - if (is_hdr_static_meta_changed(pipe_ctx_old->stream, pipe_ctx->stream)) - return true; - if (pipe_ctx_old->stream->dpms_off != pipe_ctx->stream->dpms_off) return true; - if (is_vsc_info_packet_changed(pipe_ctx_old->stream, pipe_ctx->stream)) - return true; - if (false == pipe_ctx_old->stream->link->link_state_valid && false == pipe_ctx_old->stream->dpms_off) return true; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 17075f99bc54..85908561c741 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.83.1" +#define DC_VER "3.2.84" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -277,6 +277,7 @@ struct dc_config { bool disable_extended_timeout_support; // Used to disable extended timeout and lttpr feature as well bool multi_mon_pp_mclk_switch; bool disable_dmcu; + bool enable_4to1MPC; }; enum visual_confirm { @@ -476,6 +477,7 @@ struct dc_debug_options { bool enable_dmcub_surface_flip; bool usbc_combo_phy_reset_wa; bool disable_dsc; + bool enable_dram_clock_change_one_display_vactive; }; struct dc_debug_data { diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 907e0c5374bb..eea2429ac67d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -25,7 +25,7 @@ #include "dc.h" #include "dc_dmub_srv.h" -#include "../dmub/inc/dmub_srv.h" +#include "../dmub/dmub_srv.h" static void dc_dmub_srv_construct(struct dc_dmub_srv *dc_srv, struct dc *dc, struct dmub_srv *dmub) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 6689ae33dee8..a3a09ccb6d26 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -27,7 +27,7 @@ #define _DMUB_DC_SRV_H_ #include "os_types.h" -#include "dmub/inc/dmub_cmd.h" +#include "dmub/dmub_srv.h" struct dmub_srv; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 80fb4149f36a..f63fc25aa6c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -29,7 +29,6 @@ #include "dc.h" #include "dc_types.h" #include "grph_object_defs.h" -#include "dmub/inc/dmub_cmd_dal.h" enum dc_link_fec_state { dc_link_fec_not_ready, @@ -72,7 +71,7 @@ struct link_trace { struct psr_settings { bool psr_feature_enabled; // PSR is supported by sink bool psr_allow_active; // PSR is currently active - enum psr_version psr_version; // Internal PSR version, determined based on DPCD + enum dc_psr_version psr_version; // Internal PSR version, determined based on DPCD /* These parameters are calculated in Driver, * based on display timing and Sink capabilities. @@ -220,8 +219,6 @@ int dc_link_get_backlight_level(const struct dc_link *dc_link); int dc_link_get_target_backlight_pwm(const struct dc_link *link); -bool dc_link_set_abm_disable(const struct dc_link *dc_link); - bool dc_link_set_psr_allow_active(struct dc_link *dc_link, bool enable, bool wait); bool dc_link_get_psr_state(const struct dc_link *dc_link, uint32_t *psr_state); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 0d210104ba0a..f236da1c1859 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -862,4 +862,9 @@ struct dsc_dec_dpcd_caps { uint32_t branch_max_line_width; }; +enum dc_psr_version { + DC_PSR_VERSION_1 = 0, + DC_PSR_VERSION_UNSUPPORTED = 0xFFFFFFFF, +}; + #endif /* DC_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index c15e60fb5ebc..4e87e70237e3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -83,120 +83,6 @@ static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id, uint32_t p return true; } -static unsigned int calculate_16_bit_backlight_from_pwm(struct dce_abm *abm_dce) -{ - uint64_t current_backlight; - uint32_t round_result; - uint32_t pwm_period_cntl, bl_period, bl_int_count; - uint32_t bl_pwm_cntl, bl_pwm, fractional_duty_cycle_en; - uint32_t bl_period_mask, bl_pwm_mask; - - pwm_period_cntl = REG_READ(BL_PWM_PERIOD_CNTL); - REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, &bl_period); - REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, &bl_int_count); - - bl_pwm_cntl = REG_READ(BL_PWM_CNTL); - REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, (uint32_t *)(&bl_pwm)); - REG_GET(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, &fractional_duty_cycle_en); - - if (bl_int_count == 0) - bl_int_count = 16; - - bl_period_mask = (1 << bl_int_count) - 1; - bl_period &= bl_period_mask; - - bl_pwm_mask = bl_period_mask << (16 - bl_int_count); - - if (fractional_duty_cycle_en == 0) - bl_pwm &= bl_pwm_mask; - else - bl_pwm &= 0xFFFF; - - current_backlight = bl_pwm << (1 + bl_int_count); - - if (bl_period == 0) - bl_period = 0xFFFF; - - current_backlight = div_u64(current_backlight, bl_period); - current_backlight = (current_backlight + 1) >> 1; - - current_backlight = (uint64_t)(current_backlight) * bl_period; - - round_result = (uint32_t)(current_backlight & 0xFFFFFFFF); - - round_result = (round_result >> (bl_int_count-1)) & 1; - - current_backlight >>= bl_int_count; - current_backlight += round_result; - - return (uint32_t)(current_backlight); -} - -static void driver_set_backlight_level(struct dce_abm *abm_dce, - uint32_t backlight_pwm_u16_16) -{ - uint32_t backlight_16bit; - uint32_t masked_pwm_period; - uint8_t bit_count; - uint64_t active_duty_cycle; - uint32_t pwm_period_bitcnt; - - /* - * 1. Find 16 bit backlight active duty cycle, where 0 <= backlight - * active duty cycle <= backlight period - */ - - /* 1.1 Apply bitmask for backlight period value based on value of BITCNT - */ - REG_GET_2(BL_PWM_PERIOD_CNTL, - BL_PWM_PERIOD_BITCNT, &pwm_period_bitcnt, - BL_PWM_PERIOD, &masked_pwm_period); - - if (pwm_period_bitcnt == 0) - bit_count = 16; - else - bit_count = pwm_period_bitcnt; - - /* e.g. maskedPwmPeriod = 0x24 when bitCount is 6 */ - masked_pwm_period = masked_pwm_period & ((1 << bit_count) - 1); - - /* 1.2 Calculate integer active duty cycle required upper 16 bits - * contain integer component, lower 16 bits contain fractional component - * of active duty cycle e.g. 0x21BDC0 = 0xEFF0 * 0x24 - */ - active_duty_cycle = backlight_pwm_u16_16 * masked_pwm_period; - - /* 1.3 Calculate 16 bit active duty cycle from integer and fractional - * components shift by bitCount then mask 16 bits and add rounding bit - * from MSB of fraction e.g. 0x86F7 = ((0x21BDC0 >> 6) & 0xFFF) + 0 - */ - backlight_16bit = active_duty_cycle >> bit_count; - backlight_16bit &= 0xFFFF; - backlight_16bit += (active_duty_cycle >> (bit_count - 1)) & 0x1; - - /* - * 2. Program register with updated value - */ - - /* 2.1 Lock group 2 backlight registers */ - - REG_UPDATE_2(BL_PWM_GRP1_REG_LOCK, - BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, 1, - BL_PWM_GRP1_REG_LOCK, 1); - - // 2.2 Write new active duty cycle - REG_UPDATE(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, backlight_16bit); - - /* 2.3 Unlock group 2 backlight registers */ - REG_UPDATE(BL_PWM_GRP1_REG_LOCK, - BL_PWM_GRP1_REG_LOCK, 0); - - /* 3 Wait for pending bit to be cleared */ - REG_WAIT(BL_PWM_GRP1_REG_LOCK, - BL_PWM_GRP1_REG_UPDATE_PENDING, 0, - 1, 10000); -} - static void dmcu_set_backlight_level( struct dce_abm *abm_dce, uint32_t backlight_pwm_u16_16, @@ -249,10 +135,9 @@ static void dmcu_set_backlight_level( 0, 1, 80000); } -static void dce_abm_init(struct abm *abm) +static void dce_abm_init(struct abm *abm, uint32_t backlight) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); - unsigned int backlight = calculate_16_bit_backlight_from_pwm(abm_dce); REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x103); REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x101); @@ -334,85 +219,11 @@ static bool dce_abm_set_level(struct abm *abm, uint32_t level) static bool dce_abm_immediate_disable(struct abm *abm, uint32_t panel_inst) { - struct dce_abm *abm_dce = TO_DCE_ABM(abm); - if (abm->dmcu_is_running == false) return true; dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY, panel_inst); - abm->stored_backlight_registers.BL_PWM_CNTL = - REG_READ(BL_PWM_CNTL); - abm->stored_backlight_registers.BL_PWM_CNTL2 = - REG_READ(BL_PWM_CNTL2); - abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL = - REG_READ(BL_PWM_PERIOD_CNTL); - - REG_GET(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, - &abm->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); - return true; -} - -static bool dce_abm_init_backlight(struct abm *abm) -{ - struct dce_abm *abm_dce = TO_DCE_ABM(abm); - uint32_t value; - - /* It must not be 0, so we have to restore them - * Bios bug w/a - period resets to zero, - * restoring to cache values which is always correct - */ - REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, &value); - if (value == 0 || value == 1) { - if (abm->stored_backlight_registers.BL_PWM_CNTL != 0) { - REG_WRITE(BL_PWM_CNTL, - abm->stored_backlight_registers.BL_PWM_CNTL); - REG_WRITE(BL_PWM_CNTL2, - abm->stored_backlight_registers.BL_PWM_CNTL2); - REG_WRITE(BL_PWM_PERIOD_CNTL, - abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL); - REG_UPDATE(LVTMA_PWRSEQ_REF_DIV, - BL_PWM_REF_DIV, - abm->stored_backlight_registers. - LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); - } else { - /* TODO: Note: This should not really happen since VBIOS - * should have initialized PWM registers on boot. - */ - REG_WRITE(BL_PWM_CNTL, 0xC000FA00); - REG_WRITE(BL_PWM_PERIOD_CNTL, 0x000C0FA0); - } - } else { - abm->stored_backlight_registers.BL_PWM_CNTL = - REG_READ(BL_PWM_CNTL); - abm->stored_backlight_registers.BL_PWM_CNTL2 = - REG_READ(BL_PWM_CNTL2); - abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL = - REG_READ(BL_PWM_PERIOD_CNTL); - - REG_GET(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, - &abm->stored_backlight_registers. - LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); - } - - /* Have driver take backlight control - * TakeBacklightControl(true) - */ - value = REG_READ(BIOS_SCRATCH_2); - value |= ATOM_S2_VRI_BRIGHT_ENABLE; - REG_WRITE(BIOS_SCRATCH_2, value); - - /* Enable the backlight output */ - REG_UPDATE(BL_PWM_CNTL, BL_PWM_EN, 1); - - /* Disable fractional pwm if configured */ - REG_UPDATE(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, - abm->ctx->dc->config.disable_fractional_pwm ? 0 : 1); - - /* Unlock group 2 backlight registers */ - REG_UPDATE(BL_PWM_GRP1_REG_LOCK, - BL_PWM_GRP1_REG_LOCK, 0); - return true; } @@ -421,23 +232,18 @@ static bool dce_abm_set_backlight_level_pwm( unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, unsigned int controller_id, - unsigned int panel_inst, - bool fw_set_brightness) + unsigned int panel_inst) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", backlight_pwm_u16_16, backlight_pwm_u16_16); - /* If DMCU is in reset state, DMCU is uninitialized */ - if (fw_set_brightness) - dmcu_set_backlight_level(abm_dce, - backlight_pwm_u16_16, - frame_ramp, - controller_id, - panel_inst); - else - driver_set_backlight_level(abm_dce, backlight_pwm_u16_16); + dmcu_set_backlight_level(abm_dce, + backlight_pwm_u16_16, + frame_ramp, + controller_id, + panel_inst); return true; } @@ -445,13 +251,12 @@ static bool dce_abm_set_backlight_level_pwm( static const struct abm_funcs dce_funcs = { .abm_init = dce_abm_init, .set_abm_level = dce_abm_set_level, - .init_backlight = dce_abm_init_backlight, .set_pipe = dce_abm_set_pipe, .set_backlight_level_pwm = dce_abm_set_backlight_level_pwm, .get_current_backlight = dce_abm_get_current_backlight, .get_target_backlight = dce_abm_get_target_backlight, .init_abm_config = NULL, - .set_abm_immediate_disable = dce_abm_immediate_disable + .set_abm_immediate_disable = dce_abm_immediate_disable, }; static void dce_abm_construct( @@ -465,10 +270,6 @@ static void dce_abm_construct( base->ctx = ctx; base->funcs = &dce_funcs; - base->stored_backlight_registers.BL_PWM_CNTL = 0; - base->stored_backlight_registers.BL_PWM_CNTL2 = 0; - base->stored_backlight_registers.BL_PWM_PERIOD_CNTL = 0; - base->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV = 0; base->dmcu_is_running = false; abm_dce->regs = regs; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h index ba0caaffa24b..9718a4823372 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h @@ -30,11 +30,6 @@ #include "abm.h" #define ABM_COMMON_REG_LIST_DCE_BASE() \ - SR(BL_PWM_PERIOD_CNTL), \ - SR(BL_PWM_CNTL), \ - SR(BL_PWM_CNTL2), \ - SR(BL_PWM_GRP1_REG_LOCK), \ - SR(LVTMA_PWRSEQ_REF_DIV), \ SR(MASTER_COMM_CNTL_REG), \ SR(MASTER_COMM_CMD_REG), \ SR(MASTER_COMM_DATA_REG1) @@ -85,15 +80,6 @@ .field_name = reg_name ## __ ## field_name ## post_fix #define ABM_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh) \ - ABM_SF(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, mask_sh), \ - ABM_SF(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, mask_sh), \ - ABM_SF(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, mask_sh), \ - ABM_SF(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, mask_sh), \ - ABM_SF(BL_PWM_CNTL, BL_PWM_EN, mask_sh), \ - ABM_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, mask_sh), \ - ABM_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_LOCK, mask_sh), \ - ABM_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_UPDATE_PENDING, mask_sh), \ - ABM_SF(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, mask_sh), \ ABM_SF(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, mask_sh), \ ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, mask_sh), \ ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE1, mask_sh), \ @@ -178,19 +164,10 @@ type ABM1_HG_REG_READ_MISSED_FRAME_CLEAR; \ type ABM1_LS_REG_READ_MISSED_FRAME_CLEAR; \ type ABM1_BL_REG_READ_MISSED_FRAME_CLEAR; \ - type BL_PWM_PERIOD; \ - type BL_PWM_PERIOD_BITCNT; \ - type BL_ACTIVE_INT_FRAC_CNT; \ - type BL_PWM_FRACTIONAL_EN; \ type MASTER_COMM_INTERRUPT; \ type MASTER_COMM_CMD_REG_BYTE0; \ type MASTER_COMM_CMD_REG_BYTE1; \ - type MASTER_COMM_CMD_REG_BYTE2; \ - type BL_PWM_REF_DIV; \ - type BL_PWM_EN; \ - type BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN; \ - type BL_PWM_GRP1_REG_LOCK; \ - type BL_PWM_GRP1_REG_UPDATE_PENDING + type MASTER_COMM_CMD_REG_BYTE2 struct dce_abm_shift { ABM_REG_FIELD_LIST(uint8_t); @@ -201,10 +178,6 @@ struct dce_abm_mask { }; struct dce_abm_registers { - uint32_t BL_PWM_PERIOD_CNTL; - uint32_t BL_PWM_CNTL; - uint32_t BL_PWM_CNTL2; - uint32_t LVTMA_PWRSEQ_REF_DIV; uint32_t DC_ABM1_HG_SAMPLE_RATE; uint32_t DC_ABM1_LS_SAMPLE_RATE; uint32_t BL1_PWM_BL_UPDATE_SAMPLE_RATE; @@ -219,7 +192,6 @@ struct dce_abm_registers { uint32_t MASTER_COMM_CMD_REG; uint32_t MASTER_COMM_DATA_REG1; uint32_t BIOS_SCRATCH_2; - uint32_t BL_PWM_GRP1_REG_LOCK; }; struct dce_abm { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c index d9b0ff7eb2a4..ebff9b1e312e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c @@ -28,6 +28,7 @@ #include "dc_dmub_srv.h" #include "panel_cntl.h" #include "dce_panel_cntl.h" +#include "atom.h" #define TO_DCE_PANEL_CNTL(panel_cntl)\ container_of(panel_cntl, struct dce_panel_cntl, base) @@ -45,9 +46,113 @@ #define FN(reg_name, field_name) \ dce_panel_cntl->shift->field_name, dce_panel_cntl->mask->field_name -void dce_panel_cntl_hw_init(struct panel_cntl *panel_cntl) +static unsigned int calculate_16_bit_backlight_from_pwm(struct dce_panel_cntl *dce_panel_cntl) { + uint64_t current_backlight; + uint32_t round_result; + uint32_t pwm_period_cntl, bl_period, bl_int_count; + uint32_t bl_pwm_cntl, bl_pwm, fractional_duty_cycle_en; + uint32_t bl_period_mask, bl_pwm_mask; + pwm_period_cntl = REG_READ(BL_PWM_PERIOD_CNTL); + REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, &bl_period); + REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, &bl_int_count); + + bl_pwm_cntl = REG_READ(BL_PWM_CNTL); + REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, (uint32_t *)(&bl_pwm)); + REG_GET(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, &fractional_duty_cycle_en); + + if (bl_int_count == 0) + bl_int_count = 16; + + bl_period_mask = (1 << bl_int_count) - 1; + bl_period &= bl_period_mask; + + bl_pwm_mask = bl_period_mask << (16 - bl_int_count); + + if (fractional_duty_cycle_en == 0) + bl_pwm &= bl_pwm_mask; + else + bl_pwm &= 0xFFFF; + + current_backlight = bl_pwm << (1 + bl_int_count); + + if (bl_period == 0) + bl_period = 0xFFFF; + + current_backlight = div_u64(current_backlight, bl_period); + current_backlight = (current_backlight + 1) >> 1; + + current_backlight = (uint64_t)(current_backlight) * bl_period; + + round_result = (uint32_t)(current_backlight & 0xFFFFFFFF); + + round_result = (round_result >> (bl_int_count-1)) & 1; + + current_backlight >>= bl_int_count; + current_backlight += round_result; + + return (uint32_t)(current_backlight); +} + +uint32_t dce_panel_cntl_hw_init(struct panel_cntl *panel_cntl) +{ + struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl); + uint32_t value; + uint32_t current_backlight; + + /* It must not be 0, so we have to restore them + * Bios bug w/a - period resets to zero, + * restoring to cache values which is always correct + */ + REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, &value); + + if (value == 0 || value == 1) { + if (panel_cntl->stored_backlight_registers.BL_PWM_CNTL != 0) { + REG_WRITE(BL_PWM_CNTL, + panel_cntl->stored_backlight_registers.BL_PWM_CNTL); + REG_WRITE(BL_PWM_CNTL2, + panel_cntl->stored_backlight_registers.BL_PWM_CNTL2); + REG_WRITE(BL_PWM_PERIOD_CNTL, + panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL); + REG_UPDATE(PWRSEQ_REF_DIV, + BL_PWM_REF_DIV, + panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); + } else { + /* TODO: Note: This should not really happen since VBIOS + * should have initialized PWM registers on boot. + */ + REG_WRITE(BL_PWM_CNTL, 0xC000FA00); + REG_WRITE(BL_PWM_PERIOD_CNTL, 0x000C0FA0); + } + } else { + panel_cntl->stored_backlight_registers.BL_PWM_CNTL = + REG_READ(BL_PWM_CNTL); + panel_cntl->stored_backlight_registers.BL_PWM_CNTL2 = + REG_READ(BL_PWM_CNTL2); + panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL = + REG_READ(BL_PWM_PERIOD_CNTL); + + REG_GET(PWRSEQ_REF_DIV, BL_PWM_REF_DIV, + &panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); + } + + // Have driver take backlight control + // TakeBacklightControl(true) + value = REG_READ(BIOS_SCRATCH_2); + value |= ATOM_S2_VRI_BRIGHT_ENABLE; + REG_WRITE(BIOS_SCRATCH_2, value); + + // Enable the backlight output + REG_UPDATE(BL_PWM_CNTL, BL_PWM_EN, 1); + + // Unlock group 2 backlight registers + REG_UPDATE(BL_PWM_GRP1_REG_LOCK, + BL_PWM_GRP1_REG_LOCK, 0); + + current_backlight = calculate_16_bit_backlight_from_pwm(dce_panel_cntl); + + return current_backlight; } bool dce_is_panel_backlight_on(struct panel_cntl *panel_cntl) @@ -55,7 +160,7 @@ bool dce_is_panel_backlight_on(struct panel_cntl *panel_cntl) struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl); uint32_t value; - REG_GET(PWRSEQ_CNTL, BLON, &value); + REG_GET(PWRSEQ_CNTL, LVTMA_BLON, &value); return value; } @@ -65,13 +170,94 @@ bool dce_is_panel_powered_on(struct panel_cntl *panel_cntl) struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl); uint32_t pwr_seq_state, dig_on, dig_on_ovrd; - REG_GET(PWRSEQ_STATE, PWRSEQ_TARGET_STATE_R, &pwr_seq_state); + REG_GET(PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, &pwr_seq_state); - REG_GET_2(PWRSEQ_CNTL, DIGON, &dig_on, DIGON_OVRD, &dig_on_ovrd); + REG_GET_2(PWRSEQ_CNTL, LVTMA_DIGON, &dig_on, LVTMA_DIGON_OVRD, &dig_on_ovrd); return (pwr_seq_state == 1) || (dig_on == 1 && dig_on_ovrd == 1); } +void dce_store_backlight_level(struct panel_cntl *panel_cntl) +{ + struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl); + + panel_cntl->stored_backlight_registers.BL_PWM_CNTL = + REG_READ(BL_PWM_CNTL); + panel_cntl->stored_backlight_registers.BL_PWM_CNTL2 = + REG_READ(BL_PWM_CNTL2); + panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL = + REG_READ(BL_PWM_PERIOD_CNTL); + + REG_GET(PWRSEQ_REF_DIV, BL_PWM_REF_DIV, + &panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); +} + +void dce_driver_set_backlight(struct panel_cntl *panel_cntl, + uint32_t backlight_pwm_u16_16) +{ + uint32_t backlight_16bit; + uint32_t masked_pwm_period; + uint8_t bit_count; + uint64_t active_duty_cycle; + uint32_t pwm_period_bitcnt; + struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl); + + /* + * 1. Find 16 bit backlight active duty cycle, where 0 <= backlight + * active duty cycle <= backlight period + */ + + /* 1.1 Apply bitmask for backlight period value based on value of BITCNT + */ + REG_GET_2(BL_PWM_PERIOD_CNTL, + BL_PWM_PERIOD_BITCNT, &pwm_period_bitcnt, + BL_PWM_PERIOD, &masked_pwm_period); + + if (pwm_period_bitcnt == 0) + bit_count = 16; + else + bit_count = pwm_period_bitcnt; + + /* e.g. maskedPwmPeriod = 0x24 when bitCount is 6 */ + masked_pwm_period = masked_pwm_period & ((1 << bit_count) - 1); + + /* 1.2 Calculate integer active duty cycle required upper 16 bits + * contain integer component, lower 16 bits contain fractional component + * of active duty cycle e.g. 0x21BDC0 = 0xEFF0 * 0x24 + */ + active_duty_cycle = backlight_pwm_u16_16 * masked_pwm_period; + + /* 1.3 Calculate 16 bit active duty cycle from integer and fractional + * components shift by bitCount then mask 16 bits and add rounding bit + * from MSB of fraction e.g. 0x86F7 = ((0x21BDC0 >> 6) & 0xFFF) + 0 + */ + backlight_16bit = active_duty_cycle >> bit_count; + backlight_16bit &= 0xFFFF; + backlight_16bit += (active_duty_cycle >> (bit_count - 1)) & 0x1; + + /* + * 2. Program register with updated value + */ + + /* 2.1 Lock group 2 backlight registers */ + + REG_UPDATE_2(BL_PWM_GRP1_REG_LOCK, + BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, 1, + BL_PWM_GRP1_REG_LOCK, 1); + + // 2.2 Write new active duty cycle + REG_UPDATE(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, backlight_16bit); + + /* 2.3 Unlock group 2 backlight registers */ + REG_UPDATE(BL_PWM_GRP1_REG_LOCK, + BL_PWM_GRP1_REG_LOCK, 0); + + /* 3 Wait for pending bit to be cleared */ + REG_WAIT(BL_PWM_GRP1_REG_LOCK, + BL_PWM_GRP1_REG_UPDATE_PENDING, 0, + 1, 10000); +} + static void dce_panel_cntl_destroy(struct panel_cntl **panel_cntl) { struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(*panel_cntl); @@ -85,7 +271,8 @@ static const struct panel_cntl_funcs dce_link_panel_cntl_funcs = { .hw_init = dce_panel_cntl_hw_init, .is_panel_backlight_on = dce_is_panel_backlight_on, .is_panel_powered_on = dce_is_panel_powered_on, - + .store_backlight_level = dce_store_backlight_level, + .driver_set_backlight = dce_driver_set_backlight, }; void dce_panel_cntl_construct( @@ -95,6 +282,13 @@ void dce_panel_cntl_construct( const struct dce_panel_cntl_shift *shift, const struct dce_panel_cntl_mask *mask) { + struct panel_cntl *base = &dce_panel_cntl->base; + + base->stored_backlight_registers.BL_PWM_CNTL = 0; + base->stored_backlight_registers.BL_PWM_CNTL2 = 0; + base->stored_backlight_registers.BL_PWM_PERIOD_CNTL = 0; + base->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV = 0; + dce_panel_cntl->regs = regs; dce_panel_cntl->shift = shift; dce_panel_cntl->mask = mask; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h index 6dc6984f9248..70ec691e14d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h @@ -35,10 +35,12 @@ #define DCE_PANEL_CNTL_REG_LIST()\ DCE_PANEL_CNTL_SR(PWRSEQ_CNTL, LVTMA), \ DCE_PANEL_CNTL_SR(PWRSEQ_STATE, LVTMA), \ + DCE_PANEL_CNTL_SR(PWRSEQ_REF_DIV, LVTMA), \ SR(BL_PWM_CNTL), \ SR(BL_PWM_CNTL2), \ SR(BL_PWM_PERIOD_CNTL), \ - SR(BL_PWM_GRP1_REG_LOCK) + SR(BL_PWM_GRP1_REG_LOCK), \ + SR(BIOS_SCRATCH_2) #define DCN_PANEL_CNTL_SR(reg_name, block)\ .reg_name = BASE(mm ## block ## _ ## reg_name ## _BASE_IDX) + \ @@ -47,33 +49,37 @@ #define DCN_PANEL_CNTL_REG_LIST()\ DCN_PANEL_CNTL_SR(PWRSEQ_CNTL, LVTMA), \ DCN_PANEL_CNTL_SR(PWRSEQ_STATE, LVTMA), \ + DCE_PANEL_CNTL_SR(PWRSEQ_REF_DIV, LVTMA), \ SR(BL_PWM_CNTL), \ SR(BL_PWM_CNTL2), \ SR(BL_PWM_PERIOD_CNTL), \ - SR(BL_PWM_GRP1_REG_LOCK) + SR(BL_PWM_GRP1_REG_LOCK), \ + SR(BIOS_SCRATCH_2) -#define DCE_PANEL_CNTL_SF(block, reg_name, field_name, post_fix)\ - .field_name = block ## reg_name ## __ ## block ## field_name ## post_fix +#define DCE_PANEL_CNTL_SF(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix #define DCE_PANEL_CNTL_MASK_SH_LIST(mask_sh) \ - DCE_PANEL_CNTL_SF(LVTMA_, PWRSEQ_CNTL, BLON, mask_sh),\ - DCE_PANEL_CNTL_SF(LVTMA_, PWRSEQ_CNTL, DIGON, mask_sh),\ - DCE_PANEL_CNTL_SF(LVTMA_, PWRSEQ_CNTL, DIGON_OVRD, mask_sh),\ - DCE_PANEL_CNTL_SF(LVTMA_, PWRSEQ_STATE, PWRSEQ_TARGET_STATE_R, mask_sh), \ - DCE_PANEL_CNTL_SF(, BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, mask_sh), \ - DCE_PANEL_CNTL_SF(, BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, mask_sh), \ - DCE_PANEL_CNTL_SF(, BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, mask_sh), \ - DCE_PANEL_CNTL_SF(, BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, mask_sh), \ - DCE_PANEL_CNTL_SF(, BL_PWM_CNTL, BL_PWM_EN, mask_sh), \ - DCE_PANEL_CNTL_SF(, BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, mask_sh), \ - DCE_PANEL_CNTL_SF(, BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_LOCK, mask_sh), \ - DCE_PANEL_CNTL_SF(, BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_UPDATE_PENDING, mask_sh) + DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh),\ + DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_DIGON, mask_sh),\ + DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_DIGON_OVRD, mask_sh),\ + DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh), \ + DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, mask_sh), \ + DCE_PANEL_CNTL_SF(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, mask_sh), \ + DCE_PANEL_CNTL_SF(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, mask_sh), \ + DCE_PANEL_CNTL_SF(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, mask_sh), \ + DCE_PANEL_CNTL_SF(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, mask_sh), \ + DCE_PANEL_CNTL_SF(BL_PWM_CNTL, BL_PWM_EN, mask_sh), \ + DCE_PANEL_CNTL_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, mask_sh), \ + DCE_PANEL_CNTL_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_LOCK, mask_sh), \ + DCE_PANEL_CNTL_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_UPDATE_PENDING, mask_sh) #define DCE_PANEL_CNTL_REG_FIELD_LIST(type) \ - type BLON;\ - type DIGON;\ - type DIGON_OVRD;\ - type PWRSEQ_TARGET_STATE_R; \ + type LVTMA_BLON;\ + type LVTMA_DIGON;\ + type LVTMA_DIGON_OVRD;\ + type LVTMA_PWRSEQ_TARGET_STATE_R; \ + type BL_PWM_REF_DIV; \ type BL_PWM_EN; \ type BL_ACTIVE_INT_FRAC_CNT; \ type BL_PWM_FRACTIONAL_EN; \ @@ -98,6 +104,8 @@ struct dce_panel_cntl_registers { uint32_t BL_PWM_CNTL2; uint32_t BL_PWM_PERIOD_CNTL; uint32_t BL_PWM_GRP1_REG_LOCK; + uint32_t PWRSEQ_REF_DIV; + uint32_t BIOS_SCRATCH_2; }; struct dce_panel_cntl { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 451574971b96..4cdaaf4d881c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -1336,7 +1336,6 @@ static void dce110_se_audio_setup( { struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - uint32_t speakers = 0; uint32_t channels = 0; ASSERT(audio_info); @@ -1344,7 +1343,6 @@ static void dce110_se_audio_setup( /* This should not happen.it does so we don't get BSOD*/ return; - speakers = audio_info->flags.info.ALLSPEAKERS; channels = speakers_to_channels(audio_info->flags.speaker_flags).all; /* setup the audio stream source select (audio -> dig mapping) */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index 06d39d529c09..da0b29abfbda 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -27,7 +27,7 @@ #include "dce_abm.h" #include "dc.h" #include "dc_dmub_srv.h" -#include "dmub/inc/dmub_srv.h" +#include "dmub/dmub_srv.h" #include "core_types.h" #include "dm_services.h" #include "reg_helper.h" @@ -70,53 +70,6 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t panel return true; } -static unsigned int calculate_16_bit_backlight_from_pwm(struct dce_abm *dce_abm) -{ - uint64_t current_backlight; - uint32_t round_result; - uint32_t bl_period, bl_int_count; - uint32_t bl_pwm, fractional_duty_cycle_en; - uint32_t bl_period_mask, bl_pwm_mask; - - REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, &bl_period); - REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, &bl_int_count); - - REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, &bl_pwm); - REG_GET(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, &fractional_duty_cycle_en); - - if (bl_int_count == 0) - bl_int_count = 16; - - bl_period_mask = (1 << bl_int_count) - 1; - bl_period &= bl_period_mask; - - bl_pwm_mask = bl_period_mask << (16 - bl_int_count); - - if (fractional_duty_cycle_en == 0) - bl_pwm &= bl_pwm_mask; - else - bl_pwm &= 0xFFFF; - - current_backlight = (uint64_t)bl_pwm << (1 + bl_int_count); - - if (bl_period == 0) - bl_period = 0xFFFF; - - current_backlight = div_u64(current_backlight, bl_period); - current_backlight = (current_backlight + 1) >> 1; - - current_backlight = (uint64_t)(current_backlight) * bl_period; - - round_result = (uint32_t)(current_backlight & 0xFFFFFFFF); - - round_result = (round_result >> (bl_int_count-1)) & 1; - - current_backlight >>= bl_int_count; - current_backlight += round_result; - - return (uint32_t)(current_backlight); -} - static void dmcub_set_backlight_level( struct dce_abm *dce_abm, uint32_t backlight_pwm_u16_16, @@ -178,10 +131,9 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc) dc_dmub_srv_wait_idle(dc->dmub_srv); } -static void dmub_abm_init(struct abm *abm) +static void dmub_abm_init(struct abm *abm, uint32_t backlight) { struct dce_abm *dce_abm = TO_DMUB_ABM(abm); - unsigned int backlight = calculate_16_bit_backlight_from_pwm(dce_abm); REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x103); REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x101); @@ -261,77 +213,8 @@ static bool dmub_abm_set_level(struct abm *abm, uint32_t level) static bool dmub_abm_immediate_disable(struct abm *abm, uint32_t panel_inst) { - struct dce_abm *dce_abm = TO_DMUB_ABM(abm); - dmub_abm_set_pipe(abm, DISABLE_ABM_IMMEDIATELY, panel_inst); - abm->stored_backlight_registers.BL_PWM_CNTL = - REG_READ(BL_PWM_CNTL); - abm->stored_backlight_registers.BL_PWM_CNTL2 = - REG_READ(BL_PWM_CNTL2); - abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL = - REG_READ(BL_PWM_PERIOD_CNTL); - - REG_GET(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, - &abm->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); - - return true; -} - -static bool dmub_abm_init_backlight(struct abm *abm) -{ - struct dce_abm *dce_abm = TO_DMUB_ABM(abm); - uint32_t value; - - /* It must not be 0, so we have to restore them - * Bios bug w/a - period resets to zero, - * restoring to cache values which is always correct - */ - REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, &value); - - if (value == 0 || value == 1) { - if (abm->stored_backlight_registers.BL_PWM_CNTL != 0) { - REG_WRITE(BL_PWM_CNTL, - abm->stored_backlight_registers.BL_PWM_CNTL); - REG_WRITE(BL_PWM_CNTL2, - abm->stored_backlight_registers.BL_PWM_CNTL2); - REG_WRITE(BL_PWM_PERIOD_CNTL, - abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL); - REG_UPDATE(LVTMA_PWRSEQ_REF_DIV, - BL_PWM_REF_DIV, - abm->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); - } else { - /* TODO: Note: This should not really happen since VBIOS - * should have initialized PWM registers on boot. - */ - REG_WRITE(BL_PWM_CNTL, 0xC000FA00); - REG_WRITE(BL_PWM_PERIOD_CNTL, 0x000C0FA0); - } - } else { - abm->stored_backlight_registers.BL_PWM_CNTL = - REG_READ(BL_PWM_CNTL); - abm->stored_backlight_registers.BL_PWM_CNTL2 = - REG_READ(BL_PWM_CNTL2); - abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL = - REG_READ(BL_PWM_PERIOD_CNTL); - - REG_GET(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, - &abm->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); - } - - // Have driver take backlight control - // TakeBacklightControl(true) - value = REG_READ(BIOS_SCRATCH_2); - value |= ATOM_S2_VRI_BRIGHT_ENABLE; - REG_WRITE(BIOS_SCRATCH_2, value); - - // Enable the backlight output - REG_UPDATE(BL_PWM_CNTL, BL_PWM_EN, 1); - - // Unlock group 2 backlight registers - REG_UPDATE(BL_PWM_GRP1_REG_LOCK, - BL_PWM_GRP1_REG_LOCK, 0); - return true; } @@ -340,8 +223,7 @@ static bool dmub_abm_set_backlight_level_pwm( unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, unsigned int otg_inst, - uint32_t panel_inst, - bool fw_set_brightness) + uint32_t panel_inst) { struct dce_abm *dce_abm = TO_DMUB_ABM(abm); @@ -384,7 +266,6 @@ static bool dmub_abm_init_config(struct abm *abm, static const struct abm_funcs abm_funcs = { .abm_init = dmub_abm_init, .set_abm_level = dmub_abm_set_level, - .init_backlight = dmub_abm_init_backlight, .set_pipe = dmub_abm_set_pipe, .set_backlight_level_pwm = dmub_abm_set_backlight_level_pwm, .get_current_backlight = dmub_abm_get_current_backlight, @@ -404,10 +285,6 @@ static void dmub_abm_construct( base->ctx = ctx; base->funcs = &abm_funcs; - base->stored_backlight_registers.BL_PWM_CNTL = 0; - base->stored_backlight_registers.BL_PWM_CNTL2 = 0; - base->stored_backlight_registers.BL_PWM_PERIOD_CNTL = 0; - base->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV = 0; base->dmcu_is_running = false; abm_dce->regs = regs; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 9f12c76f21ab..044a0133ebb1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -26,8 +26,7 @@ #include "dmub_psr.h" #include "dc.h" #include "dc_dmub_srv.h" -#include "dmub/inc/dmub_srv.h" -#include "dmub/inc/dmub_gpint_cmd.h" +#include "dmub/dmub_srv.h" #include "core_types.h" #define MAX_PIPES 6 @@ -94,12 +93,20 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state * union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; - if (stream->link->psr_settings.psr_version == PSR_VERSION_UNSUPPORTED) + if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) return false; cmd.psr_set_version.header.type = DMUB_CMD__PSR; cmd.psr_set_version.header.sub_type = DMUB_CMD__PSR_SET_VERSION; - cmd.psr_set_version.psr_set_version_data.version = stream->link->psr_settings.psr_version; + switch (stream->link->psr_settings.psr_version) { + case DC_PSR_VERSION_1: + cmd.psr_set_version.psr_set_version_data.version = PSR_VERSION_1; + break; + case DC_PSR_VERSION_UNSUPPORTED: + default: + cmd.psr_set_version.psr_set_version_data.version = PSR_VERSION_UNSUPPORTED; + break; + } cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data); dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 6bd8d4e1c294..b77e9dc16086 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1066,7 +1066,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { hws->funcs.edp_backlight_control(link, false); - dc_link_set_abm_disable(link); + link->dc->hwss.set_abm_immediate_disable(pipe_ctx); } if (dc_is_dp_signal(pipe_ctx->stream->signal)) @@ -2355,6 +2355,7 @@ static void init_hw(struct dc *dc) struct abm *abm; struct dmcu *dmcu; struct dce_hwseq *hws = dc->hwseq; + uint32_t backlight = MAX_BACKLIGHT_LEVEL; bp = dc->ctx->dc_bios; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -2401,12 +2402,17 @@ static void init_hw(struct dc *dc) audio->funcs->hw_init(audio); } - abm = dc->res_pool->abm; - if (abm != NULL) { - abm->funcs->init_backlight(abm); - abm->funcs->abm_init(abm); + for (i = 0; i < dc->link_count; i++) { + struct dc_link *link = dc->links[i]; + + if (link->panel_cntl) + backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); } + abm = dc->res_pool->abm; + if (abm != NULL) + abm->funcs->abm_init(abm, backlight); + dmcu = dc->res_pool->dmcu; if (dmcu != NULL && abm != NULL) abm->dmcu_is_running = dmcu->funcs->is_dmcu_initialized(dmcu); @@ -2721,6 +2727,7 @@ bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx, struct dc_link *link = pipe_ctx->stream->link; struct dc *dc = link->ctx->dc; struct abm *abm = pipe_ctx->stream_res.abm; + struct panel_cntl *panel_cntl = link->panel_cntl; struct dmcu *dmcu = dc->res_pool->dmcu; bool fw_set_brightness = true; /* DMCU -1 for all controller id values, @@ -2728,23 +2735,38 @@ bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx, */ uint32_t controller_id = pipe_ctx->stream_res.tg->inst + 1; - if (abm == NULL || (abm->funcs->set_backlight_level_pwm == NULL)) + if (abm == NULL || panel_cntl == NULL || (abm->funcs->set_backlight_level_pwm == NULL)) return false; if (dmcu) fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); - abm->funcs->set_backlight_level_pwm( - abm, - backlight_pwm_u16_16, - frame_ramp, - controller_id, - link->panel_cntl->inst, - fw_set_brightness); + if (!fw_set_brightness && panel_cntl->funcs->driver_set_backlight) + panel_cntl->funcs->driver_set_backlight(panel_cntl, backlight_pwm_u16_16); + else + abm->funcs->set_backlight_level_pwm( + abm, + backlight_pwm_u16_16, + frame_ramp, + controller_id, + link->panel_cntl->inst); return true; } +void dce110_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) +{ + struct abm *abm = pipe_ctx->stream_res.abm; + struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; + + if (abm) + abm->funcs->set_abm_immediate_disable(abm, + pipe_ctx->stream->link->panel_cntl->inst); + + if (panel_cntl) + panel_cntl->funcs->store_backlight_level(panel_cntl); +} + static const struct hw_sequencer_funcs dce110_funcs = { .program_gamut_remap = program_gamut_remap, .program_output_csc = program_output_csc, @@ -2781,6 +2803,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { .set_cursor_position = dce110_set_cursor_position, .set_cursor_attribute = dce110_set_cursor_attribute, .set_backlight_level = dce110_set_backlight_level, + .set_abm_immediate_disable = dce110_set_abm_immediate_disable, }; static const struct hwseq_private_funcs dce110_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h index e609a72b4634..fe5326df00f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h @@ -88,6 +88,7 @@ void dce110_edp_wait_for_hpd_ready( bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx, uint32_t backlight_pwm_u16_16, uint32_t frame_ramp); +void dce110_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx); #endif /* __DC_HWSS_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index 0e682b5aa3eb..7f8456b9988b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -134,13 +134,6 @@ bool dpp1_get_optimal_number_of_taps( struct scaler_data *scl_data, const struct scaling_taps *in_taps) { - uint32_t pixel_width; - - if (scl_data->viewport.width > scl_data->recout.width) - pixel_width = scl_data->recout.width; - else - pixel_width = scl_data->viewport.width; - /* Some ASICs does not support FP16 scaling, so we reject modes require this*/ if (scl_data->format == PIXEL_FORMAT_FP16 && dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT && diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 2eb5d0e3e425..f36d1f57b846 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -826,6 +826,14 @@ enum dc_status dcn10_enable_stream_timing( color_space = stream->output_color_space; color_space_to_black_color(dc, color_space, &black_color); + /* + * The way 420 is packed, 2 channels carry Y component, 1 channel + * alternate between Cb and Cr, so both channels need the pixel + * value for Y + */ + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + black_color.color_r_cr = black_color.color_g_y; + if (pipe_ctx->stream_res.tg->funcs->set_blank_color) pipe_ctx->stream_res.tg->funcs->set_blank_color( pipe_ctx->stream_res.tg, @@ -903,8 +911,7 @@ static void dcn10_reset_back_end_for_pipe( if (pipe_ctx->top_pipe == NULL) { if (pipe_ctx->stream_res.abm) - pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm, - pipe_ctx->stream->link->panel_cntl->inst); + dc->hwss.set_abm_immediate_disable(pipe_ctx); pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg); @@ -1245,6 +1252,7 @@ void dcn10_init_hw(struct dc *dc) struct dce_hwseq *hws = dc->hwseq; struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; + uint32_t backlight = MAX_BACKLIGHT_LEVEL; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -1411,11 +1419,16 @@ void dcn10_init_hw(struct dc *dc) audio->funcs->hw_init(audio); } - if (abm != NULL) { - abm->funcs->init_backlight(abm); - abm->funcs->abm_init(abm); + for (i = 0; i < dc->link_count; i++) { + struct dc_link *link = dc->links[i]; + + if (link->panel_cntl) + backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); } + if (abm != NULL) + abm->funcs->abm_init(abm, backlight); + if (dmcu != NULL && !dmcu->auto_load_dmcu) dmcu->funcs->dmcu_init(dmcu); @@ -2249,6 +2262,14 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) &blnd_cfg.black_color); } + /* + * The way 420 is packed, 2 channels carry Y component, 1 channel + * alternate between Cb and Cr, so both channels need the pixel + * value for Y + */ + if (pipe_ctx->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + blnd_cfg.black_color.color_r_cr = blnd_cfg.black_color.color_g_y; + if (per_pixel_alpha) blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; else @@ -2490,9 +2511,7 @@ void dcn10_blank_pixel_data( stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level); } } else if (blank) { - if (stream_res->abm) - stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm, - stream->link->panel_cntl->inst); + dc->hwss.set_abm_immediate_disable(pipe_ctx); if (stream_res->tg->funcs->set_blank) stream_res->tg->funcs->set_blank(stream_res->tg, blank); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index 6ff7e2bd1d4a..897a3d25685a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -73,6 +73,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .set_backlight_level = dce110_set_backlight_level, + .set_abm_immediate_disable = dce110_set_abm_immediate_disable, }; static const struct hwseq_private_funcs dcn10_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index bacf865f55c0..17d5cb422025 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1121,24 +1121,6 @@ static enum dc_status build_mapped_resource( { struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream); - /*TODO Seems unneeded anymore */ - /* if (old_context && resource_is_stream_unchanged(old_context, stream)) { - if (stream != NULL && old_context->streams[i] != NULL) { - todo: shouldn't have to copy missing parameter here - resource_build_bit_depth_reduction_params(stream, - &stream->bit_depth_params); - stream->clamping.pixel_encoding = - stream->timing.pixel_encoding; - - resource_build_bit_depth_reduction_params(stream, - &stream->bit_depth_params); - build_clamping_params(stream); - - continue; - } - } - */ - if (!pipe_ctx) return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 7eba9333c328..07b2f9399671 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -1274,7 +1274,6 @@ static void enc1_se_audio_setup( { struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); - uint32_t speakers = 0; uint32_t channels = 0; ASSERT(audio_info); @@ -1282,7 +1281,6 @@ static void enc1_se_audio_setup( /* This should not happen.it does so we don't get BSOD*/ return; - speakers = audio_info->flags.info.ALLSPEAKERS; channels = speakers_to_channels(audio_info->flags.speaker_flags).all; /* setup the audio stream source select (audio -> dig mapping) */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 3a290ccaeb82..da5333d165ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -961,9 +961,7 @@ void dcn20_blank_pixel_data( width = width / odm_cnt; if (blank) { - if (stream_res->abm) - stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm, - stream->link->panel_cntl->inst); + dc->hwss.set_abm_immediate_disable(pipe_ctx); if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE) { test_pattern = CONTROLLER_DP_TEST_PATTERN_COLORSQUARES; @@ -2042,9 +2040,7 @@ static void dcn20_reset_back_end_for_pipe( */ if (pipe_ctx->top_pipe == NULL) { - if (pipe_ctx->stream_res.abm) - pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm, - pipe_ctx->stream->link->panel_cntl->inst); + dc->hwss.set_abm_immediate_disable(pipe_ctx); pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index c83d98e0d211..a8bcd747d7ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -84,6 +84,7 @@ static const struct hw_sequencer_funcs dcn20_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .set_backlight_level = dce110_set_backlight_level, + .set_abm_immediate_disable = dce110_set_abm_immediate_disable, }; static const struct hwseq_private_funcs dcn20_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 60ea499c1ca8..778e2e8fd2c6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1653,24 +1653,6 @@ enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state enum dc_status status = DC_OK; struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream); - /*TODO Seems unneeded anymore */ - /* if (old_context && resource_is_stream_unchanged(old_context, stream)) { - if (stream != NULL && old_context->streams[i] != NULL) { - todo: shouldn't have to copy missing parameter here - resource_build_bit_depth_reduction_params(stream, - &stream->bit_depth_params); - stream->clamping.pixel_encoding = - stream->timing.pixel_encoding; - - resource_build_bit_depth_reduction_params(stream, - &stream->bit_depth_params); - build_clamping_params(stream); - - continue; - } - } - */ - if (!pipe_ctx) return DC_ERROR_UNEXPECTED; @@ -1940,7 +1922,7 @@ bool dcn20_split_stream_for_odm( return true; } -bool dcn20_split_stream_for_mpc( +void dcn20_split_stream_for_mpc( struct resource_context *res_ctx, const struct resource_pool *pool, struct pipe_ctx *primary_pipe, @@ -1969,11 +1951,6 @@ bool dcn20_split_stream_for_mpc( secondary_pipe->top_pipe = primary_pipe; ASSERT(primary_pipe->plane_state); - if (!resource_build_scaling_params(primary_pipe) || - !resource_build_scaling_params(secondary_pipe)) - return false; - - return true; } void dcn20_populate_dml_writeback_from_context( @@ -2599,11 +2576,32 @@ static void dcn20_merge_pipes_for_validate( } } +int dcn20_find_previous_split_count(struct pipe_ctx *pipe) +{ + int previous_split = 1; + struct pipe_ctx *current_pipe = pipe; + + while (current_pipe->bottom_pipe) { + if (current_pipe->plane_state != current_pipe->bottom_pipe->plane_state) + break; + previous_split++; + current_pipe = current_pipe->bottom_pipe; + } + current_pipe = pipe; + while (current_pipe->top_pipe) { + if (current_pipe->plane_state != current_pipe->top_pipe->plane_state) + break; + previous_split++; + current_pipe = current_pipe->top_pipe; + } + return previous_split; +} + int dcn20_validate_apply_pipe_split_flags( struct dc *dc, struct dc_state *context, int vlevel, - bool *split, + int *split, bool *merge) { int i, pipe_idx, vlevel_split; @@ -2658,8 +2656,14 @@ int dcn20_validate_apply_pipe_split_flags( if (!context->res_ctx.pipe_ctx[i].stream) continue; - if (force_split || context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_plane] > 1) - split[i] = true; + if (force_split + || context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_plane] > 1) { + if (context->stream_count == 1 && plane_count == 1 + && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4) + split[i] = 4; + else + split[i] = 2; + } if ((pipe->stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE || pipe->stream->view_format == @@ -2668,9 +2672,9 @@ int dcn20_validate_apply_pipe_split_flags( TIMING_3D_FORMAT_TOP_AND_BOTTOM || pipe->stream->timing.timing_3d_format == TIMING_3D_FORMAT_SIDE_BY_SIDE)) - split[i] = true; + split[i] = 2; if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { - split[i] = true; + split[i] = 2; context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1; } context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_plane] = @@ -2678,39 +2682,58 @@ int dcn20_validate_apply_pipe_split_flags( if (pipe->prev_odm_pipe && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_plane] != dm_odm_combine_mode_disabled) { /*Already split odm pipe tree, don't try to split again*/ - split[i] = false; - split[pipe->prev_odm_pipe->pipe_idx] = false; + split[i] = 0; + split[pipe->prev_odm_pipe->pipe_idx] = 0; } else if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) { - /*Already split mpc tree, don't try to split again, assumes only 2x mpc combine*/ - split[i] = false; - split[pipe->top_pipe->pipe_idx] = false; - } else if (pipe->prev_odm_pipe || (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state)) { - if (split[i] == false) { + /*If 2 way split but can support 4 way split, then split each pipe again*/ + if (context->stream_count == 1 && plane_count == 1 + && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4) { + split[i] = 2; + } else { + split[i] = 0; + split[pipe->top_pipe->pipe_idx] = 0; + } + } else if (pipe->prev_odm_pipe || (dcn20_find_previous_split_count(pipe) == 2 && pipe->top_pipe)) { + if (split[i] == 0) { /*Exiting mpc/odm combine*/ merge[i] = true; - if (pipe->prev_odm_pipe) { - ASSERT(0); /*should not actually happen yet*/ - merge[pipe->prev_odm_pipe->pipe_idx] = true; - } else - merge[pipe->top_pipe->pipe_idx] = true; } else { /*Transition from mpc combine to odm combine or vice versa*/ ASSERT(0); /*should not actually happen yet*/ - split[i] = true; + split[i] = 2; merge[i] = true; if (pipe->prev_odm_pipe) { - split[pipe->prev_odm_pipe->pipe_idx] = true; + split[pipe->prev_odm_pipe->pipe_idx] = 2; merge[pipe->prev_odm_pipe->pipe_idx] = true; } else { - split[pipe->top_pipe->pipe_idx] = true; + split[pipe->top_pipe->pipe_idx] = 2; merge[pipe->top_pipe->pipe_idx] = true; } } + } else if (dcn20_find_previous_split_count(pipe) == 3) { + if (split[i] == 0 && !pipe->top_pipe) { + merge[pipe->bottom_pipe->pipe_idx] = true; + merge[pipe->bottom_pipe->bottom_pipe->pipe_idx] = true; + } else if (split[i] == 2 && !pipe->top_pipe) { + merge[pipe->bottom_pipe->bottom_pipe->pipe_idx] = true; + split[i] = 0; + } + } else if (dcn20_find_previous_split_count(pipe) == 4) { + if (split[i] == 0 && !pipe->top_pipe) { + merge[pipe->bottom_pipe->pipe_idx] = true; + merge[pipe->bottom_pipe->bottom_pipe->pipe_idx] = true; + merge[pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx] = true; + } else if (split[i] == 2 && !pipe->top_pipe) { + merge[pipe->bottom_pipe->bottom_pipe->pipe_idx] = true; + merge[pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx] = true; + split[i] = 0; + } } /* Adjust dppclk when split is forced, do not bother with dispclk */ - if (split[i] && context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] == 1) + if (split[i] != 0 + && context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] == 1) context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] /= 2; pipe_idx++; } @@ -2727,7 +2750,7 @@ bool dcn20_fast_validate_bw( int *vlevel_out) { bool out = false; - bool split[MAX_PIPES] = { false }; + int split[MAX_PIPES] = { 0 }; int pipe_cnt, i, pipe_idx, vlevel; ASSERT(pipes); @@ -2787,7 +2810,7 @@ bool dcn20_fast_validate_bw( && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) goto validate_fail; - if (split[i]) { + if (split[i] == 2) { if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) { /* pipe not split previously needs split */ hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); @@ -2802,11 +2825,13 @@ bool dcn20_fast_validate_bw( pipe, hsplit_pipe)) goto validate_fail; dcn20_build_mapped_resource(dc, context, pipe->stream); - } else - if (!dcn20_split_stream_for_mpc( + } else { + dcn20_split_stream_for_mpc( &context->res_ctx, dc->res_pool, - pipe, hsplit_pipe)) + pipe, hsplit_pipe); + if (!resource_build_scaling_params(pipe) || !resource_build_scaling_params(hsplit_pipe)) goto validate_fail; + } pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; } } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) { @@ -3107,25 +3132,34 @@ validate_out: return out; } - -bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) +/* + * This must be noinline to ensure anything that deals with FP registers + * is contained within this call; previously our compiling with hard-float + * would result in fp instructions being emitted outside of the boundaries + * of the DC_FP_START/END macros, which makes sense as the compiler has no + * idea about what is wrapped and what is not + * + * This is largely just a workaround to avoid breakage introduced with 5.6, + * ideally all fp-using code should be moved into its own file, only that + * should be compiled with hard-float, and all code exported from there + * should be strictly wrapped with DC_FP_START/END + */ +static noinline bool dcn20_validate_bandwidth_fp(struct dc *dc, + struct dc_state *context, bool fast_validate) { bool voltage_supported = false; bool full_pstate_supported = false; bool dummy_pstate_supported = false; double p_state_latency_us; - DC_FP_START(); p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us; context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support = dc->debug.disable_dram_clock_change_vactive_support; + context->bw_ctx.dml.soc.allow_dram_clock_one_display_vactive = + dc->debug.enable_dram_clock_change_one_display_vactive; if (fast_validate) { - voltage_supported = dcn20_validate_bandwidth_internal(dc, context, true); - - DC_FP_END(); - return voltage_supported; + return dcn20_validate_bandwidth_internal(dc, context, true); } // Best case, we support full UCLK switch latency @@ -3154,7 +3188,15 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, restore_dml_state: context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us; + return voltage_supported; +} +bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported = false; + DC_FP_START(); + voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate); DC_FP_END(); return voltage_supported; } @@ -3213,8 +3255,6 @@ static struct dc_cap_funcs cap_funcs = { enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state) { - enum dc_status result = DC_OK; - enum surface_pixel_format surf_pix_format = plane_state->format; unsigned int bpp = resource_pixel_format_to_bpp(surf_pix_format); @@ -3226,7 +3266,7 @@ enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_stat swizzle = DC_SW_64KB_S; plane_state->tiling_info.gfx9.swizzle = swizzle; - return result; + return DC_OK; } static struct resource_funcs dcn20_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h index 578265ccbf5b..d5448c9b0e15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h @@ -119,17 +119,18 @@ void dcn20_set_mcif_arb_params( display_e2e_pipe_params_st *pipes, int pipe_cnt); bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); +int dcn20_find_previous_split_count(struct pipe_ctx *pipe); int dcn20_validate_apply_pipe_split_flags( struct dc *dc, struct dc_state *context, int vlevel, - bool *split, + int *split, bool *merge); void dcn20_release_dsc(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc); bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx); -bool dcn20_split_stream_for_mpc( +void dcn20_split_stream_for_mpc( struct resource_context *res_ctx, const struct resource_pool *pool, struct pipe_ctx *primary_pipe, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index ae05a8bfdae1..e97dfaa656e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -86,13 +86,9 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .optimize_pwr_state = dcn21_optimize_pwr_state, - .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, + .set_abm_immediate_disable = dce110_set_abm_immediate_disable, }; static const struct hwseq_private_funcs dcn21_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index ceaf70a934c3..419cdde624f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1384,7 +1384,8 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; - unsigned int i, j, closest_clk_lvl; + unsigned int i, closest_clk_lvl; + int j; // Default clock levels are used for diags, which may lead to overclocking. if (!IS_DIAG_DC(dc->ctx->dce_environment)) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 922ab7169e52..80170f9721ce 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -2599,18 +2599,40 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP } } + { + float SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; + int PlaneWithMinActiveDRAMClockChangeMargin = -1; + mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) { mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + PlaneWithMinActiveDRAMClockChangeMargin = k; + } else { + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[k] == j) { + PlaneWithMinActiveDRAMClockChangeMargin = j; + } + } + } } } mode_lib->vba.MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + mode_lib->vba.DRAMClockChangeLatency; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (mode_lib->vba.BlendingAndTiming[k] == k)) + && !(mode_lib->vba.BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) + && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] + < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + } + } if (mode_lib->vba.DRAMClockChangeSupportsVActive && mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) { @@ -2629,7 +2651,11 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; } else { - if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { + if ((mode_lib->vba.SynchronizedVBlank + || mode_lib->vba.NumberOfActivePlanes == 1 + || (SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0 && + mode_lib->vba.AllowDramClockChangeOneDisplayVactive)) + && mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vblank; for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { if (!mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k]) { @@ -2641,6 +2667,7 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_unsupported; } } + } for (k = 0; k <= mode_lib->vba.soc.num_states; k++) for (j = 0; j < 2; j++) mode_lib->vba.DRAMClockChangeSupport[k][j] = mode_lib->vba.DRAMClockChangeSupport[0][0]; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index 5bc80b6084da..a576eed94d9b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -3190,6 +3190,7 @@ static void CalculateFlipSchedule( double TimeForFetchingRowInVBlankImmediateFlip; double ImmediateFlipBW; double HostVMInefficiencyFactor; + double VRatioClamped; if (GPUVMEnable == true && HostVMEnable == true) { HostVMInefficiencyFactor = @@ -3222,31 +3223,32 @@ static void CalculateFlipSchedule( *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); + VRatioClamped = (VRatio < 1.0) ? 1.0 : VRatio; if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { if (GPUVMEnable == true && DCCEnable != true) { min_row_time = dml_min( - dpte_row_height * LineTime / VRatio, - dpte_row_height_chroma * LineTime / (VRatio / 2)); + dpte_row_height * LineTime / VRatioClamped, + dpte_row_height_chroma * LineTime / (VRatioClamped / 2)); } else if (GPUVMEnable != true && DCCEnable == true) { min_row_time = dml_min( - meta_row_height * LineTime / VRatio, - meta_row_height_chroma * LineTime / (VRatio / 2)); + meta_row_height * LineTime / VRatioClamped, + meta_row_height_chroma * LineTime / (VRatioClamped / 2)); } else { min_row_time = dml_min4( - dpte_row_height * LineTime / VRatio, - meta_row_height * LineTime / VRatio, - dpte_row_height_chroma * LineTime / (VRatio / 2), - meta_row_height_chroma * LineTime / (VRatio / 2)); + dpte_row_height * LineTime / VRatioClamped, + meta_row_height * LineTime / VRatioClamped, + dpte_row_height_chroma * LineTime / (VRatioClamped / 2), + meta_row_height_chroma * LineTime / (VRatioClamped / 2)); } } else { if (GPUVMEnable == true && DCCEnable != true) { - min_row_time = dpte_row_height * LineTime / VRatio; + min_row_time = dpte_row_height * LineTime / VRatioClamped; } else if (GPUVMEnable != true && DCCEnable == true) { - min_row_time = meta_row_height * LineTime / VRatio; + min_row_time = meta_row_height * LineTime / VRatioClamped; } else { min_row_time = dml_min( - dpte_row_height * LineTime / VRatio, - meta_row_height * LineTime / VRatio); + dpte_row_height * LineTime / VRatioClamped, + meta_row_height * LineTime / VRatioClamped); } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 193f31b8ac4a..90a5fefef05b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -1200,7 +1200,7 @@ static void dml_rq_dlg_get_dlg_params( min_hratio_fact_l = 1.0; min_hratio_fact_c = 1.0; - if (htaps_l <= 1) + if (hratio_l <= 1) min_hratio_fact_l = 2.0; else if (htaps_l <= 6) { if ((hratio_l * 2.0) > 4.0) @@ -1216,7 +1216,7 @@ static void dml_rq_dlg_get_dlg_params( hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; - if (htaps_c <= 1) + if (hratio_c <= 1) min_hratio_fact_c = 2.0; else if (htaps_c <= 6) { if ((hratio_c * 2.0) > 4.0) @@ -1533,8 +1533,8 @@ static void dml_rq_dlg_get_dlg_params( disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; - disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; - disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; + disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); + disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // Clamp to max for now if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index b2ecb174a93f..439ffd04be34 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -118,6 +118,7 @@ struct _vcs_dpi_soc_bounding_box_st { double urgent_latency_adjustment_fabric_clock_component_us; double urgent_latency_adjustment_fabric_clock_reference_mhz; bool disable_dram_clock_change_vactive_support; + bool allow_dram_clock_one_display_vactive; }; struct _vcs_dpi_ip_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 6e4e8a452e66..b19988f54721 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -224,6 +224,7 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib) mode_lib->vba.DummyPStateCheck = soc->dram_clock_change_latency_us == soc->dummy_pstate_latency_us; mode_lib->vba.DRAMClockChangeSupportsVActive = !soc->disable_dram_clock_change_vactive_support || mode_lib->vba.DummyPStateCheck; + mode_lib->vba.AllowDramClockChangeOneDisplayVactive = soc->allow_dram_clock_one_display_vactive; mode_lib->vba.Downspreading = soc->downspread_percent; mode_lib->vba.DRAMChannelWidth = soc->dram_channel_width_bytes; // new! diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index a1884ffe63ae..6a7b20927a6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -899,6 +899,7 @@ struct vba_vars_st { double BPP; enum odm_combine_policy ODMCombinePolicy; bool UseMinimumRequiredDCFCLK; + bool AllowDramClockChangeOneDisplayVactive; }; bool CalculateMinAndMaxPrefetchMode( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h index 91a42f0f1fc4..e8ce8c85adf1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h @@ -27,27 +27,17 @@ #include "dm_services_types.h" -struct abm_backlight_registers { - unsigned int BL_PWM_CNTL; - unsigned int BL_PWM_CNTL2; - unsigned int BL_PWM_PERIOD_CNTL; - unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV; -}; - struct abm { struct dc_context *ctx; const struct abm_funcs *funcs; bool dmcu_is_running; - /* registers setting needs to be saved and restored at InitBacklight */ - struct abm_backlight_registers stored_backlight_registers; }; struct abm_funcs { - void (*abm_init)(struct abm *abm); + void (*abm_init)(struct abm *abm, uint32_t back_light); bool (*set_abm_level)(struct abm *abm, unsigned int abm_level); bool (*set_abm_immediate_disable)(struct abm *abm, unsigned int panel_inst); bool (*set_pipe)(struct abm *abm, unsigned int controller_id, unsigned int panel_inst); - bool (*init_backlight)(struct abm *abm); /* backlight_pwm_u16_16 is unsigned 32 bit, * 16 bit integer + 16 fractional, where 1.0 is max backlight value. @@ -56,8 +46,7 @@ struct abm_funcs { unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, unsigned int controller_id, - unsigned int panel_inst, - bool fw_set_brightness); + unsigned int panel_inst); unsigned int (*get_current_backlight)(struct abm *abm); unsigned int (*get_target_backlight)(struct abm *abm); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h index 5a75d035f1fa..f9ab5abb6462 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h @@ -32,11 +32,23 @@ #include "dc_types.h" +#define MAX_BACKLIGHT_LEVEL 0xFFFF + +struct panel_cntl_backlight_registers { + unsigned int BL_PWM_CNTL; + unsigned int BL_PWM_CNTL2; + unsigned int BL_PWM_PERIOD_CNTL; + unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV; +}; + struct panel_cntl_funcs { void (*destroy)(struct panel_cntl **panel_cntl); - void (*hw_init)(struct panel_cntl *panel_cntl); + uint32_t (*hw_init)(struct panel_cntl *panel_cntl); bool (*is_panel_backlight_on)(struct panel_cntl *panel_cntl); bool (*is_panel_powered_on)(struct panel_cntl *panel_cntl); + void (*store_backlight_level)(struct panel_cntl *panel_cntl); + void (*driver_set_backlight)(struct panel_cntl *panel_cntl, + uint32_t backlight_pwm_u16_16); }; struct panel_cntl_init_data { @@ -48,6 +60,8 @@ struct panel_cntl { const struct panel_cntl_funcs *funcs; struct dc_context *ctx; uint32_t inst; + /* registers setting needs to be saved and restored at InitBacklight */ + struct panel_cntl_backlight_registers stored_backlight_registers; }; #endif /* DC_PANEL_CNTL_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 1570fed20de0..3b2ea9bdb62c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -196,6 +196,8 @@ struct hw_sequencer_funcs { uint32_t backlight_pwm_u16_16, uint32_t frame_ramp); + void (*set_abm_immediate_disable)(struct pipe_ctx *pipe_ctx); + }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index ca4c36c0c9bc..109c589eb97c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -138,9 +138,6 @@ struct pipe_ctx *find_idle_secondary_pipe( const struct resource_pool *pool, const struct pipe_ctx *primary_pipe); -bool resource_is_stream_unchanged( - struct dc_state *old_context, struct dc_stream_state *stream); - bool resource_validate_attach_surfaces( const struct dc_validation_set set[], int set_count, diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index c34eba19860a..6d7bca562eec 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -108,7 +108,7 @@ #define ASSERT(expr) ASSERT_CRITICAL(expr) #else -#define ASSERT(expr) WARN_ON(!(expr)) +#define ASSERT(expr) WARN_ON_ONCE(!(expr)) #endif #define BREAK_TO_DEBUGGER() ASSERT(0) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 9f10bfe3d2df..26d94eb5ab58 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -64,10 +64,11 @@ * other component within DAL. */ -#include "dmub_types.h" -#include "dmub_cmd.h" -#include "dmub_gpint_cmd.h" -#include "dmub_rb.h" +#include "inc/dmub_types.h" +#include "inc/dmub_cmd.h" +#include "inc/dmub_gpint_cmd.h" +#include "inc/dmub_cmd_dal.h" +#include "inc/dmub_rb.h" #if defined(__cplusplus) extern "C" { diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index eb10518dc058..599bf2055bcb 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -262,6 +262,7 @@ struct dmub_cmd_abm_set_pipe_data { uint32_t ramping_boundary; uint32_t otg_inst; uint32_t panel_inst; + uint32_t set_pipe_option; }; struct dmub_rb_cmd_abm_set_pipe { diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c index 45638d61b73d..edc73d6d7ba2 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c @@ -23,7 +23,7 @@ * */ -#include "../inc/dmub_srv.h" +#include "../dmub_srv.h" #include "dmub_reg.h" #include "dmub_dcn20.h" diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c index 5bed9fcd6b5c..e8f488232e34 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c @@ -23,7 +23,7 @@ * */ -#include "../inc/dmub_srv.h" +#include "../dmub_srv.h" #include "dmub_reg.h" #include "dmub_dcn21.h" diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c index 4094eca212f0..ca0c8a54b635 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c @@ -24,7 +24,7 @@ */ #include "dmub_reg.h" -#include "../inc/dmub_srv.h" +#include "../dmub_srv.h" struct dmub_reg_value_masks { uint32_t value; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 66ba0b2d80d2..0e3751d94cb0 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -23,7 +23,7 @@ * */ -#include "../inc/dmub_srv.h" +#include "../dmub_srv.h" #include "dmub_dcn20.h" #include "dmub_dcn21.h" #include "dmub_fw_meta.h" diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 22a5484d9f28..7cd8a43d1889 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -29,7 +29,6 @@ #include "mod_shared.h" #include "mod_freesync.h" #include "dc.h" -#include "dmub/inc/dmub_cmd_dal.h" enum vsc_packet_revision { vsc_packet_undefined = 0, @@ -145,7 +144,7 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, } /*VSC packet set to 2 when DP revision >= 1.2*/ - if (stream->link->psr_settings.psr_version != PSR_VERSION_UNSUPPORTED) + if (stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) vsc_packet_revision = vsc_packet_rev2; /* Update to revision 5 for extended colorimetry support */ diff --git a/drivers/gpu/drm/amd/display/modules/stats/stats.c b/drivers/gpu/drm/amd/display/modules/stats/stats.c deleted file mode 100644 index 03121ca64fe4..000000000000 --- a/drivers/gpu/drm/amd/display/modules/stats/stats.c +++ /dev/null @@ -1,448 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "mod_stats.h" -#include "dm_services.h" -#include "dc.h" -#include "core_types.h" - -#define DAL_STATS_ENABLE_REGKEY "DalStatsEnable" -#define DAL_STATS_ENABLE_REGKEY_DEFAULT 0x00000000 -#define DAL_STATS_ENABLE_REGKEY_ENABLED 0x00000001 - -#define DAL_STATS_ENTRIES_REGKEY "DalStatsEntries" -#define DAL_STATS_ENTRIES_REGKEY_DEFAULT 0x00350000 -#define DAL_STATS_ENTRIES_REGKEY_MAX 0x01000000 - -#define DAL_STATS_EVENT_ENTRIES_DEFAULT 0x00000100 - -#define MOD_STATS_NUM_VSYNCS 5 -#define MOD_STATS_EVENT_STRING_MAX 512 - -struct stats_time_cache { - unsigned int entry_id; - - unsigned long flip_timestamp_in_ns; - unsigned long vupdate_timestamp_in_ns; - - unsigned int render_time_in_us; - unsigned int avg_render_time_in_us_last_ten; - unsigned int v_sync_time_in_us[MOD_STATS_NUM_VSYNCS]; - unsigned int num_vsync_between_flips; - - unsigned int flip_to_vsync_time_in_us; - unsigned int vsync_to_flip_time_in_us; - - unsigned int min_window; - unsigned int max_window; - unsigned int v_total_min; - unsigned int v_total_max; - unsigned int event_triggers; - - unsigned int lfc_mid_point_in_us; - unsigned int num_frames_inserted; - unsigned int inserted_duration_in_us; - - unsigned int flags; -}; - -struct stats_event_cache { - unsigned int entry_id; - char event_string[MOD_STATS_EVENT_STRING_MAX]; -}; - -struct core_stats { - struct mod_stats public; - struct dc *dc; - - bool enabled; - unsigned int entries; - unsigned int event_entries; - unsigned int entry_id; - - struct stats_time_cache *time; - unsigned int index; - - struct stats_event_cache *events; - unsigned int event_index; - -}; - -#define MOD_STATS_TO_CORE(mod_stats)\ - container_of(mod_stats, struct core_stats, public) - -bool mod_stats_init(struct mod_stats *mod_stats) -{ - bool result = false; - struct core_stats *core_stats = NULL; - struct dc *dc = NULL; - - if (mod_stats == NULL) - return false; - - core_stats = MOD_STATS_TO_CORE(mod_stats); - dc = core_stats->dc; - - return result; -} - -struct mod_stats *mod_stats_create(struct dc *dc) -{ - struct core_stats *core_stats = NULL; - struct persistent_data_flag flag; - unsigned int reg_data; - int i = 0; - - if (dc == NULL) - goto fail_construct; - - core_stats = kzalloc(sizeof(struct core_stats), GFP_KERNEL); - - if (core_stats == NULL) - goto fail_construct; - - core_stats->dc = dc; - - core_stats->enabled = DAL_STATS_ENABLE_REGKEY_DEFAULT; - if (dm_read_persistent_data(dc->ctx, NULL, NULL, - DAL_STATS_ENABLE_REGKEY, - ®_data, sizeof(unsigned int), &flag)) - core_stats->enabled = reg_data; - - if (core_stats->enabled) { - core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT; - if (dm_read_persistent_data(dc->ctx, NULL, NULL, - DAL_STATS_ENTRIES_REGKEY, - ®_data, sizeof(unsigned int), &flag)) { - if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX) - core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX; - else - core_stats->entries = reg_data; - } - core_stats->time = kcalloc(core_stats->entries, - sizeof(struct stats_time_cache), - GFP_KERNEL); - - if (core_stats->time == NULL) - goto fail_construct_time; - - core_stats->event_entries = DAL_STATS_EVENT_ENTRIES_DEFAULT; - core_stats->events = kcalloc(core_stats->event_entries, - sizeof(struct stats_event_cache), - GFP_KERNEL); - - if (core_stats->events == NULL) - goto fail_construct_events; - - } else { - core_stats->entries = 0; - } - - /* Purposely leave index 0 unused so we don't need special logic to - * handle calculation cases that depend on previous flip data. - */ - core_stats->index = 1; - core_stats->event_index = 0; - - // Keeps track of ordering within the different stats structures - core_stats->entry_id = 0; - - return &core_stats->public; - -fail_construct_events: - kfree(core_stats->time); - -fail_construct_time: - kfree(core_stats); - -fail_construct: - return NULL; -} - -void mod_stats_destroy(struct mod_stats *mod_stats) -{ - if (mod_stats != NULL) { - struct core_stats *core_stats = MOD_STATS_TO_CORE(mod_stats); - - kfree(core_stats->time); - kfree(core_stats->events); - kfree(core_stats); - } -} - -void mod_stats_dump(struct mod_stats *mod_stats) -{ - struct dc *dc = NULL; - struct dal_logger *logger = NULL; - struct core_stats *core_stats = NULL; - struct stats_time_cache *time = NULL; - struct stats_event_cache *events = NULL; - unsigned int time_index = 1; - unsigned int event_index = 0; - unsigned int index = 0; - struct log_entry log_entry; - - if (mod_stats == NULL) - return; - - core_stats = MOD_STATS_TO_CORE(mod_stats); - dc = core_stats->dc; - logger = dc->ctx->logger; - time = core_stats->time; - events = core_stats->events; - - DISPLAY_STATS_BEGIN(log_entry); - - DISPLAY_STATS("==Display Caps==\n"); - - DISPLAY_STATS("==Display Stats==\n"); - - DISPLAY_STATS("%10s %10s %10s %10s %10s" - " %11s %11s %17s %10s %14s" - " %10s %10s %10s %10s %10s" - " %10s %10s %10s %10s\n", - "render", "avgRender", - "minWindow", "midPoint", "maxWindow", - "vsyncToFlip", "flipToVsync", "vsyncsBetweenFlip", - "numFrame", "insertDuration", - "vTotalMin", "vTotalMax", "eventTrigs", - "vSyncTime1", "vSyncTime2", "vSyncTime3", - "vSyncTime4", "vSyncTime5", "flags"); - - for (int i = 0; i < core_stats->entry_id; i++) { - if (event_index < core_stats->event_index && - i == events[event_index].entry_id) { - DISPLAY_STATS("==Event==%s\n", events[event_index].event_string); - event_index++; - } else if (time_index < core_stats->index && - i == time[time_index].entry_id) { - DISPLAY_STATS("%10u %10u %10u %10u %10u" - " %11u %11u %17u %10u %14u" - " %10u %10u %10u %10u %10u" - " %10u %10u %10u %10u\n", - time[time_index].render_time_in_us, - time[time_index].avg_render_time_in_us_last_ten, - time[time_index].min_window, - time[time_index].lfc_mid_point_in_us, - time[time_index].max_window, - time[time_index].vsync_to_flip_time_in_us, - time[time_index].flip_to_vsync_time_in_us, - time[time_index].num_vsync_between_flips, - time[time_index].num_frames_inserted, - time[time_index].inserted_duration_in_us, - time[time_index].v_total_min, - time[time_index].v_total_max, - time[time_index].event_triggers, - time[time_index].v_sync_time_in_us[0], - time[time_index].v_sync_time_in_us[1], - time[time_index].v_sync_time_in_us[2], - time[time_index].v_sync_time_in_us[3], - time[time_index].v_sync_time_in_us[4], - time[time_index].flags); - - time_index++; - } - } - - DISPLAY_STATS_END(log_entry); -} - -void mod_stats_reset_data(struct mod_stats *mod_stats) -{ - struct core_stats *core_stats = NULL; - struct stats_time_cache *time = NULL; - unsigned int index = 0; - - if (mod_stats == NULL) - return; - - core_stats = MOD_STATS_TO_CORE(mod_stats); - - memset(core_stats->time, 0, - sizeof(struct stats_time_cache) * core_stats->entries); - - memset(core_stats->events, 0, - sizeof(struct stats_event_cache) * core_stats->event_entries); - - core_stats->index = 1; - core_stats->event_index = 0; - - // Keeps track of ordering within the different stats structures - core_stats->entry_id = 0; -} - -void mod_stats_update_event(struct mod_stats *mod_stats, - char *event_string, - unsigned int length) -{ - struct core_stats *core_stats = NULL; - struct stats_event_cache *events = NULL; - unsigned int index = 0; - unsigned int copy_length = 0; - - if (mod_stats == NULL) - return; - - core_stats = MOD_STATS_TO_CORE(mod_stats); - - if (core_stats->event_index >= core_stats->event_entries) - return; - - events = core_stats->events; - index = core_stats->event_index; - - copy_length = length; - if (length > MOD_STATS_EVENT_STRING_MAX) - copy_length = MOD_STATS_EVENT_STRING_MAX; - - memcpy(&events[index].event_string, event_string, copy_length); - events[index].event_string[copy_length - 1] = '\0'; - - events[index].entry_id = core_stats->entry_id; - core_stats->event_index++; - core_stats->entry_id++; -} - -void mod_stats_update_flip(struct mod_stats *mod_stats, - unsigned long timestamp_in_ns) -{ - struct core_stats *core_stats = NULL; - struct stats_time_cache *time = NULL; - unsigned int index = 0; - - if (mod_stats == NULL) - return; - - core_stats = MOD_STATS_TO_CORE(mod_stats); - - if (core_stats->index >= core_stats->entries) - return; - - time = core_stats->time; - index = core_stats->index; - - time[index].flip_timestamp_in_ns = timestamp_in_ns; - time[index].render_time_in_us = - (timestamp_in_ns - time[index - 1].flip_timestamp_in_ns) / 1000; - - if (index >= 10) { - for (unsigned int i = 0; i < 10; i++) - time[index].avg_render_time_in_us_last_ten += - time[index - i].render_time_in_us; - time[index].avg_render_time_in_us_last_ten /= 10; - } - - if (time[index].num_vsync_between_flips > 0) - time[index].vsync_to_flip_time_in_us = - (timestamp_in_ns - - time[index].vupdate_timestamp_in_ns) / 1000; - else - time[index].vsync_to_flip_time_in_us = - (timestamp_in_ns - - time[index - 1].vupdate_timestamp_in_ns) / 1000; - - time[index].entry_id = core_stats->entry_id; - core_stats->index++; - core_stats->entry_id++; -} - -void mod_stats_update_vupdate(struct mod_stats *mod_stats, - unsigned long timestamp_in_ns) -{ - struct core_stats *core_stats = NULL; - struct stats_time_cache *time = NULL; - unsigned int index = 0; - unsigned int num_vsyncs = 0; - unsigned int prev_vsync_in_ns = 0; - - if (mod_stats == NULL) - return; - - core_stats = MOD_STATS_TO_CORE(mod_stats); - - if (core_stats->index >= core_stats->entries) - return; - - time = core_stats->time; - index = core_stats->index; - num_vsyncs = time[index].num_vsync_between_flips; - - if (num_vsyncs < MOD_STATS_NUM_VSYNCS) { - if (num_vsyncs == 0) { - prev_vsync_in_ns = - time[index - 1].vupdate_timestamp_in_ns; - - time[index].flip_to_vsync_time_in_us = - (timestamp_in_ns - - time[index - 1].flip_timestamp_in_ns) / - 1000; - } else { - prev_vsync_in_ns = - time[index].vupdate_timestamp_in_ns; - } - - time[index].v_sync_time_in_us[num_vsyncs] = - (timestamp_in_ns - prev_vsync_in_ns) / 1000; - } - - time[index].vupdate_timestamp_in_ns = timestamp_in_ns; - time[index].num_vsync_between_flips++; -} - -void mod_stats_update_freesync(struct mod_stats *mod_stats, - unsigned int v_total_min, - unsigned int v_total_max, - unsigned int event_triggers, - unsigned int window_min, - unsigned int window_max, - unsigned int lfc_mid_point_in_us, - unsigned int inserted_frames, - unsigned int inserted_duration_in_us) -{ - struct core_stats *core_stats = NULL; - struct stats_time_cache *time = NULL; - unsigned int index = 0; - - if (mod_stats == NULL) - return; - - core_stats = MOD_STATS_TO_CORE(mod_stats); - - if (core_stats->index >= core_stats->entries) - return; - - time = core_stats->time; - index = core_stats->index; - - time[index].v_total_min = v_total_min; - time[index].v_total_max = v_total_max; - time[index].event_triggers = event_triggers; - time[index].min_window = window_min; - time[index].max_window = window_max; - time[index].lfc_mid_point_in_us = lfc_mid_point_in_us; - time[index].num_frames_inserted = inserted_frames; - time[index].inserted_duration_in_us = inserted_duration_in_us; -} - diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h index e7db6f9f9c86..8b0b9a2a8fed 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h @@ -5599,6 +5599,7 @@ #define GRBM_PWR_CNTL__ALL_REQ_EN_MASK 0x00008000L //GRBM_STATUS #define GRBM_STATUS__ME0PIPE0_CMDFIFO_AVAIL__SHIFT 0x0 +#define GRBM_STATUS__RSMU_RQ_PENDING__SHIFT 0x5 #define GRBM_STATUS__ME0PIPE0_CF_RQ_PENDING__SHIFT 0x7 #define GRBM_STATUS__ME0PIPE0_PF_RQ_PENDING__SHIFT 0x8 #define GRBM_STATUS__GDS_DMA_RQ_PENDING__SHIFT 0x9 @@ -5619,6 +5620,7 @@ #define GRBM_STATUS__CB_BUSY__SHIFT 0x1e #define GRBM_STATUS__GUI_ACTIVE__SHIFT 0x1f #define GRBM_STATUS__ME0PIPE0_CMDFIFO_AVAIL_MASK 0x0000000FL +#define GRBM_STATUS__RSMU_RQ_PENDING_MASK 0x00000020L #define GRBM_STATUS__ME0PIPE0_CF_RQ_PENDING_MASK 0x00000080L #define GRBM_STATUS__ME0PIPE0_PF_RQ_PENDING_MASK 0x00000100L #define GRBM_STATUS__GDS_DMA_RQ_PENDING_MASK 0x00000200L @@ -5832,6 +5834,7 @@ #define GRBM_READ_ERROR__READ_ERROR_MASK 0x80000000L //GRBM_READ_ERROR2 #define GRBM_READ_ERROR2__READ_REQUESTER_CPF__SHIFT 0x10 +#define GRBM_READ_ERROR2__READ_REQUESTER_RSMU__SHIFT 0x11 #define GRBM_READ_ERROR2__READ_REQUESTER_RLC__SHIFT 0x12 #define GRBM_READ_ERROR2__READ_REQUESTER_GDS_DMA__SHIFT 0x13 #define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE0_CF__SHIFT 0x14 @@ -5847,6 +5850,7 @@ #define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE2__SHIFT 0x1e #define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE3__SHIFT 0x1f #define GRBM_READ_ERROR2__READ_REQUESTER_CPF_MASK 0x00010000L +#define GRBM_READ_ERROR2__READ_REQUESTER_RSMU_MASK 0x00020000L #define GRBM_READ_ERROR2__READ_REQUESTER_RLC_MASK 0x00040000L #define GRBM_READ_ERROR2__READ_REQUESTER_GDS_DMA_MASK 0x00080000L #define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE0_CF_MASK 0x00100000L diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index a6f6e6bf5992..b36ea8340afa 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -972,11 +972,13 @@ struct atom_ext_display_path }; //usCaps -enum ext_display_path_cap_def -{ - EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE =0x0001, - EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN =0x0002, - EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK =0x007C, +enum ext_display_path_cap_def { + EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE = 0x0001, + EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = 0x0002, + EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007C, + EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x01 << 2), //PI redriver chip + EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x02 << 2), //TI retimer chip + EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 = (0x03 << 2) //Parade DP->HDMI recoverter chip }; struct atom_external_display_connection_info diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index fc31499c2e5c..7e6dcdf7df73 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -322,12 +322,12 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr, if (*level & profile_mode_mask) { hwmgr->saved_dpm_level = hwmgr->dpm_level; hwmgr->en_umd_pstate = true; - amdgpu_device_ip_set_clockgating_state(hwmgr->adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_CG_STATE_UNGATE); amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_UNGATE); } } else { /* exit umd pstate, restore level, enable gfx cg*/ diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 5964d6323a13..de14542de775 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -1498,7 +1498,7 @@ static int smu_disable_dpm(struct smu_context *smu) bool use_baco = !smu->is_apu && ((adev->in_gpu_reset && (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || - (adev->in_runpm && amdgpu_asic_supports_baco(adev))); + ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev))); ret = smu_get_smc_version(smu, NULL, &smu_version); if (ret) { @@ -1784,12 +1784,12 @@ static int smu_enable_umd_pstate(void *handle, if (*level & profile_mode_mask) { smu_dpm_ctx->saved_dpm_level = smu_dpm_ctx->dpm_level; smu_dpm_ctx->enable_umd_pstate = true; - amdgpu_device_ip_set_clockgating_state(smu->adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_CG_STATE_UNGATE); amdgpu_device_ip_set_powergating_state(smu->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(smu->adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_UNGATE); } } else { /* exit umd pstate, restore level, enable gfx cg*/ diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 1c66b7d7139c..cfae4bcaf32e 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -2251,7 +2251,7 @@ static bool arcturus_is_baco_supported(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t val; - if (!smu_v11_0_baco_is_support(smu)) + if (!smu_v11_0_baco_is_support(smu) || amdgpu_sriov_vf(adev)) return false; val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 1cc30f750c26..4f8c1b85e688 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -1357,7 +1357,6 @@ static int smu10_asic_reset(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mod static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .backend_init = smu10_hwmgr_backend_init, .backend_fini = smu10_hwmgr_backend_fini, - .asic_setup = NULL, .apply_state_adjust_rules = smu10_apply_state_adjust_rules, .force_dpm_level = smu10_dpm_force_dpm_level, .get_power_state_size = smu10_get_power_state_size, diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 2184d247a9f7..0c9be864d072 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -1293,8 +1293,6 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u } if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { - if (size < 0) - return -EINVAL; ret = smu_update_table(smu, SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 44c506b7e117..caf00d92ea9d 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -22,6 +22,7 @@ subdir-ccflags-y += $(call cc-disable-warning, sign-compare) subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-y += $(call cc-disable-warning, uninitialized) +subdir-ccflags-y += $(call cc-disable-warning, frame-address) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable @@ -91,6 +92,7 @@ gt-y += \ gt/intel_ggtt.o \ gt/intel_ggtt_fencing.o \ gt/intel_gt.o \ + gt/intel_gt_clock_utils.o \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_irq.o \ @@ -109,6 +111,7 @@ gt-y += \ gt/intel_sseu.o \ gt/intel_timeline.o \ gt/intel_workarounds.o \ + gt/shmem_utils.o \ gt/sysfs_engines.o # autogenerated null render state gt-y += \ @@ -257,7 +260,8 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/igt_live_test.o \ selftests/igt_mmap.o \ selftests/igt_reset.o \ - selftests/igt_spinner.o + selftests/igt_spinner.o \ + selftests/librapl.o # virtual gpu code i915-y += i915_vgpu.o diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 99a25c0bb08f..4fec5bd64920 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -36,15 +36,15 @@ #include "intel_panel.h" #include "intel_vdsc.h" -static inline int header_credits_available(struct drm_i915_private *dev_priv, - enum transcoder dsi_trans) +static int header_credits_available(struct drm_i915_private *dev_priv, + enum transcoder dsi_trans) { return (intel_de_read(dev_priv, DSI_CMD_TXCTL(dsi_trans)) & FREE_HEADER_CREDIT_MASK) >> FREE_HEADER_CREDIT_SHIFT; } -static inline int payload_credits_available(struct drm_i915_private *dev_priv, - enum transcoder dsi_trans) +static int payload_credits_available(struct drm_i915_private *dev_priv, + enum transcoder dsi_trans) { return (intel_de_read(dev_priv, DSI_CMD_TXCTL(dsi_trans)) & FREE_PLOAD_CREDIT_MASK) >> FREE_PLOAD_CREDIT_SHIFT; @@ -1195,7 +1195,7 @@ static void gen11_dsi_enable(struct intel_atomic_state *state, { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - WARN_ON(crtc_state->has_pch_encoder); + drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder); /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); @@ -1525,15 +1525,18 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi, base); struct intel_connector *intel_connector = intel_dsi->attached_connector; - struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); const struct drm_display_mode *fixed_mode = - intel_connector->panel.fixed_mode; + intel_connector->panel.fixed_mode; struct drm_display_mode *adjusted_mode = - &pipe_config->hw.adjusted_mode; + &pipe_config->hw.adjusted_mode; + int ret; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; intel_fixed_panel_mode(fixed_mode, adjusted_mode); - intel_pch_panel_fitting(crtc, pipe_config, conn_state->scaling_mode); + + ret = intel_pch_panel_fitting(pipe_config, conn_state); + if (ret) + return ret; adjusted_mode->flags = 0; diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 25dfeb3197aa..79032701873a 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -125,7 +125,7 @@ intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { struct intel_plane_state *plane_state = to_intel_plane_state(state); - WARN_ON(plane_state->vma); + drm_WARN_ON(plane->dev, plane_state->vma); __drm_atomic_helper_plane_destroy_state(&plane_state->uapi); if (plane_state->hw.fb) @@ -396,7 +396,7 @@ skl_next_plane_to_commit(struct intel_atomic_state *state, } /* should never happen */ - WARN_ON(1); + drm_WARN_ON(state->base.dev, 1); return NULL; } diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 57b80971ae78..36aaee8536f1 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -514,6 +514,143 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder, mutex_unlock(&dev_priv->av_mutex); } +/* Add a factor to take care of rounding and truncations */ +#define ROUNDING_FACTOR 10000 + +static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + unsigned int link_clks_available, link_clks_required; + unsigned int tu_data, tu_line, link_clks_active; + unsigned int hblank_rise, hblank_early_prog; + unsigned int h_active, h_total, hblank_delta, pixel_clk, v_total; + unsigned int fec_coeff, refresh_rate, cdclk, vdsc_bpp; + + h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay; + h_total = crtc_state->hw.adjusted_mode.crtc_htotal; + v_total = crtc_state->hw.adjusted_mode.crtc_vtotal; + pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock; + refresh_rate = crtc_state->hw.adjusted_mode.vrefresh; + vdsc_bpp = crtc_state->dsc.compressed_bpp; + cdclk = i915->cdclk.hw.cdclk; + /* fec= 0.972261, using rounding multiplier of 1000000 */ + fec_coeff = 972261; + + drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :" + "lanes = %u vdsc_bpp = %u cdclk = %u\n", + h_active, crtc_state->port_clock, crtc_state->lane_count, + vdsc_bpp, cdclk); + + if (WARN_ON(!crtc_state->port_clock || !crtc_state->lane_count || + !crtc_state->dsc.compressed_bpp || !i915->cdclk.hw.cdclk)) + return 0; + + link_clks_available = ((((h_total - h_active) * + ((crtc_state->port_clock * ROUNDING_FACTOR) / + pixel_clk)) / ROUNDING_FACTOR) - 28); + + link_clks_required = DIV_ROUND_UP(192000, (refresh_rate * + v_total)) * ((48 / + crtc_state->lane_count) + 2); + + if (link_clks_available > link_clks_required) + hblank_delta = 32; + else + hblank_delta = DIV_ROUND_UP(((((5 * ROUNDING_FACTOR) / + crtc_state->port_clock) + ((5 * + ROUNDING_FACTOR) / + cdclk)) * pixel_clk), + ROUNDING_FACTOR); + + tu_data = (pixel_clk * vdsc_bpp * 8) / ((crtc_state->port_clock * + crtc_state->lane_count * fec_coeff) / 1000000); + tu_line = (((h_active * crtc_state->port_clock * fec_coeff) / + 1000000) / (64 * pixel_clk)); + link_clks_active = (tu_line - 1) * 64 + tu_data; + + hblank_rise = ((link_clks_active + 6 * DIV_ROUND_UP(link_clks_active, + 250) + 4) * ((pixel_clk * ROUNDING_FACTOR) / + crtc_state->port_clock)) / ROUNDING_FACTOR; + + hblank_early_prog = h_active - hblank_rise + hblank_delta; + + return hblank_early_prog; +} + +static unsigned int get_sample_room_req_config(const struct intel_crtc_state *crtc_state) +{ + unsigned int h_active, h_total, pixel_clk; + unsigned int samples_room; + + h_active = crtc_state->hw.adjusted_mode.hdisplay; + h_total = crtc_state->hw.adjusted_mode.htotal; + pixel_clk = crtc_state->hw.adjusted_mode.clock; + + samples_room = ((((h_total - h_active) * ((crtc_state->port_clock * + ROUNDING_FACTOR) / pixel_clk)) / + ROUNDING_FACTOR) - 12) / ((48 / + crtc_state->lane_count) + 2); + + return samples_room; +} + +static void enable_audio_dsc_wa(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum pipe pipe = crtc->pipe; + unsigned int hblank_early_prog, samples_room; + unsigned int val; + + if (INTEL_GEN(i915) < 11) + return; + + val = intel_de_read(i915, AUD_CONFIG_BE); + + if (INTEL_GEN(i915) == 11) + val |= HBLANK_EARLY_ENABLE_ICL(pipe); + else if (INTEL_GEN(i915) >= 12) + val |= HBLANK_EARLY_ENABLE_TGL(pipe); + + if (crtc_state->dsc.compression_enable && + (crtc_state->hw.adjusted_mode.hdisplay >= 3840 && + crtc_state->hw.adjusted_mode.vdisplay >= 2160)) { + /* Get hblank early enable value required */ + hblank_early_prog = get_hblank_early_enable_config(encoder, + crtc_state); + if (hblank_early_prog < 32) { + val &= ~HBLANK_START_COUNT_MASK(pipe); + val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_32); + } else if (hblank_early_prog < 64) { + val &= ~HBLANK_START_COUNT_MASK(pipe); + val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_64); + } else if (hblank_early_prog < 96) { + val &= ~HBLANK_START_COUNT_MASK(pipe); + val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_96); + } else { + val &= ~HBLANK_START_COUNT_MASK(pipe); + val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_128); + } + + /* Get samples room value required */ + samples_room = get_sample_room_req_config(crtc_state); + if (samples_room < 3) { + val &= ~NUMBER_SAMPLES_PER_LINE_MASK(pipe); + val |= NUMBER_SAMPLES_PER_LINE(pipe, samples_room); + } else { + /* Program 0 i.e "All Samples available in buffer" */ + val &= ~NUMBER_SAMPLES_PER_LINE_MASK(pipe); + val |= NUMBER_SAMPLES_PER_LINE(pipe, 0x0); + } + } + + intel_de_write(i915, AUD_CONFIG_BE, val); +} + +#undef ROUNDING_FACTOR + static void hsw_audio_codec_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) @@ -531,6 +668,10 @@ static void hsw_audio_codec_enable(struct intel_encoder *encoder, mutex_lock(&dev_priv->av_mutex); + /* Enable Audio WA for 4k DSC usecases */ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP)) + enable_audio_dsc_wa(encoder, crtc_state); + /* Enable audio presence detect, invalidate ELD */ tmp = intel_de_read(dev_priv, HSW_AUD_PIN_ELD_CP_VLD); tmp |= AUDIO_OUTPUT_ENABLE(cpu_transcoder); @@ -1138,6 +1279,10 @@ static void i915_audio_component_unbind(struct device *i915_kdev, drm_modeset_unlock_all(&dev_priv->drm); device_link_remove(hda_kdev, i915_kdev); + + if (dev_priv->audio_power_refcount) + drm_err(&dev_priv->drm, "audio power refcount %d after unbind\n", + dev_priv->audio_power_refcount); } static const struct component_ops i915_audio_component_bind_ops = { diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 88f367eb28ea..4aa54fcb0629 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -375,7 +375,29 @@ static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv, return data_rate; } -static struct intel_bw_state * +struct intel_bw_state * +intel_atomic_get_old_bw_state(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_global_state *bw_state; + + bw_state = intel_atomic_get_old_global_obj_state(state, &dev_priv->bw_obj); + + return to_intel_bw_state(bw_state); +} + +struct intel_bw_state * +intel_atomic_get_new_bw_state(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_global_state *bw_state; + + bw_state = intel_atomic_get_new_global_obj_state(state, &dev_priv->bw_obj); + + return to_intel_bw_state(bw_state); +} + +struct intel_bw_state * intel_atomic_get_bw_state(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index a8aa7624c5aa..ac004d6f4276 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -24,6 +24,15 @@ struct intel_bw_state { #define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base) +struct intel_bw_state * +intel_atomic_get_old_bw_state(struct intel_atomic_state *state); + +struct intel_bw_state * +intel_atomic_get_new_bw_state(struct intel_atomic_state *state); + +struct intel_bw_state * +intel_atomic_get_bw_state(struct intel_atomic_state *state); + void intel_bw_init_hw(struct drm_i915_private *dev_priv); int intel_bw_init(struct drm_i915_private *dev_priv); int intel_bw_atomic_check(struct intel_atomic_state *state); diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index 98ec2ea86c7c..406e96785c76 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c +++ b/drivers/gpu/drm/i915/display/intel_connector.c @@ -33,6 +33,7 @@ #include "i915_drv.h" #include "intel_connector.h" +#include "intel_display_debugfs.h" #include "intel_display_types.h" #include "intel_hdcp.h" @@ -123,6 +124,8 @@ int intel_connector_register(struct drm_connector *connector) goto err_backlight; } + intel_connector_debugfs_add(connector); + return 0; err_backlight: diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index a59ecbed0004..2f5b9a4baafd 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -294,7 +294,7 @@ static void hsw_pre_enable_crt(struct intel_atomic_state *state, hsw_fdi_link_train(encoder, crtc_state); - intel_ddi_enable_pipe_clock(crtc_state); + intel_ddi_enable_pipe_clock(encoder, crtc_state); } static void hsw_enable_crt(struct intel_atomic_state *state, @@ -308,6 +308,8 @@ static void hsw_enable_crt(struct intel_atomic_state *state, drm_WARN_ON(&dev_priv->drm, !crtc_state->has_pch_encoder); + intel_ddi_enable_transcoder_func(encoder, crtc_state); + intel_enable_pipe(crtc_state); lpt_pch_enable(crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index be6c61bcbc9c..5601673c3f30 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1261,7 +1261,10 @@ void hsw_fdi_link_train(struct intel_encoder *encoder, for (i = 0; i < ARRAY_SIZE(hsw_ddi_translations_fdi) * 2; i++) { /* Configure DP_TP_CTL with auto-training */ intel_de_write(dev_priv, DP_TP_CTL(PORT_E), - DP_TP_CTL_FDI_AUTOTRAIN | DP_TP_CTL_ENHANCED_FRAME_ENABLE | DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_ENABLE); + DP_TP_CTL_FDI_AUTOTRAIN | + DP_TP_CTL_ENHANCED_FRAME_ENABLE | + DP_TP_CTL_LINK_TRAIN_PAT1 | + DP_TP_CTL_ENABLE); /* Configure and enable DDI_BUF_CTL for DDI E with next voltage. * DDI E does not support port reversal, the functionality is @@ -1337,7 +1340,10 @@ void hsw_fdi_link_train(struct intel_encoder *encoder, /* Enable normal pixel sending for FDI */ intel_de_write(dev_priv, DP_TP_CTL(PORT_E), - DP_TP_CTL_FDI_AUTOTRAIN | DP_TP_CTL_LINK_TRAIN_NORMAL | DP_TP_CTL_ENHANCED_FRAME_ENABLE | DP_TP_CTL_ENABLE); + DP_TP_CTL_FDI_AUTOTRAIN | + DP_TP_CTL_LINK_TRAIN_NORMAL | + DP_TP_CTL_ENHANCED_FRAME_ENABLE | + DP_TP_CTL_ENABLE); } static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) @@ -1351,27 +1357,6 @@ static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count); } -static struct intel_encoder * -intel_ddi_get_crtc_encoder(struct intel_crtc *crtc) -{ - struct drm_device *dev = crtc->base.dev; - struct intel_encoder *encoder, *ret = NULL; - int num_encoders = 0; - - for_each_encoder_on_crtc(dev, &crtc->base, encoder) { - ret = encoder; - num_encoders++; - } - - if (num_encoders != 1) - drm_WARN(dev, 1, "%d encoders on crtc for pipe %c\n", - num_encoders, - pipe_name(crtc->pipe)); - - BUG_ON(ret == NULL); - return ret; -} - static int icl_calc_tbt_pll_link(struct drm_i915_private *dev_priv, enum port port) { @@ -1512,10 +1497,10 @@ static u32 bdw_trans_port_sync_master_select(enum transcoder master_transcoder) * intel_ddi_config_transcoder_func(). */ static u32 -intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state) +intel_ddi_transcoder_func_reg_val_get(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; @@ -1617,7 +1602,8 @@ intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state) return temp; } -void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) +void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1640,7 +1626,7 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) TRANS_DDI_FUNC_CTL2(cpu_transcoder), ctl2); } - ctl = intel_ddi_transcoder_func_reg_val_get(crtc_state); + ctl = intel_ddi_transcoder_func_reg_val_get(encoder, crtc_state); if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) ctl |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC; intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), ctl); @@ -1651,14 +1637,15 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) * bit. */ static void -intel_ddi_config_transcoder_func(const struct intel_crtc_state *crtc_state) +intel_ddi_config_transcoder_func(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 ctl; - ctl = intel_ddi_transcoder_func_reg_val_get(crtc_state); + ctl = intel_ddi_transcoder_func_reg_val_get(encoder, crtc_state); ctl &= ~TRANS_DDI_FUNC_ENABLE; intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), ctl); } @@ -1927,7 +1914,7 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, return true; } -static inline enum intel_display_power_domain +static enum intel_display_power_domain intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port) { /* CNL+ HW requires corresponding AUX IOs to be powered up for PSR with @@ -1986,11 +1973,11 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder, intel_dsc_power_domain(crtc_state)); } -void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) +void intel_ddi_enable_pipe_clock(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); enum port port = encoder->port; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; @@ -2654,8 +2641,9 @@ static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder, tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level); } -static u32 translate_signal_level(int signal_levels) +static u32 translate_signal_level(struct intel_dp *intel_dp, int signal_levels) { + struct drm_i915_private *i915 = dp_to_i915(intel_dp); int i; for (i = 0; i < ARRAY_SIZE(index_to_dp_signal_levels); i++) { @@ -2663,8 +2651,9 @@ static u32 translate_signal_level(int signal_levels) return i; } - WARN(1, "Unsupported voltage swing/pre-emphasis level: 0x%x\n", - signal_levels); + drm_WARN(&i915->drm, 1, + "Unsupported voltage swing/pre-emphasis level: 0x%x\n", + signal_levels); return 0; } @@ -2675,46 +2664,73 @@ static u32 intel_ddi_dp_level(struct intel_dp *intel_dp) int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | DP_TRAIN_PRE_EMPHASIS_MASK); - return translate_signal_level(signal_levels); + return translate_signal_level(intel_dp, signal_levels); } -u32 bxt_signal_levels(struct intel_dp *intel_dp) +static void +tgl_set_signal_levels(struct intel_dp *intel_dp) { - struct intel_digital_port *dport = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - struct intel_encoder *encoder = &dport->base; + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; int level = intel_ddi_dp_level(intel_dp); - if (INTEL_GEN(dev_priv) >= 12) - tgl_ddi_vswing_sequence(encoder, intel_dp->link_rate, - level, encoder->type); - else if (INTEL_GEN(dev_priv) >= 11) - icl_ddi_vswing_sequence(encoder, intel_dp->link_rate, - level, encoder->type); - else if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, level, encoder->type); - else - bxt_ddi_vswing_sequence(encoder, level, encoder->type); + tgl_ddi_vswing_sequence(encoder, intel_dp->link_rate, + level, encoder->type); +} - return 0; +static void +icl_set_signal_levels(struct intel_dp *intel_dp) +{ + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + int level = intel_ddi_dp_level(intel_dp); + + icl_ddi_vswing_sequence(encoder, intel_dp->link_rate, + level, encoder->type); } -u32 ddi_signal_levels(struct intel_dp *intel_dp) +static void +cnl_set_signal_levels(struct intel_dp *intel_dp) { - struct intel_digital_port *dport = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - struct intel_encoder *encoder = &dport->base; + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + int level = intel_ddi_dp_level(intel_dp); + + cnl_ddi_vswing_sequence(encoder, level, encoder->type); +} + +static void +bxt_set_signal_levels(struct intel_dp *intel_dp) +{ + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + int level = intel_ddi_dp_level(intel_dp); + + bxt_ddi_vswing_sequence(encoder, level, encoder->type); +} + +static void +hsw_set_signal_levels(struct intel_dp *intel_dp) +{ + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int level = intel_ddi_dp_level(intel_dp); + enum port port = encoder->port; + u32 signal_levels; + + signal_levels = DDI_BUF_TRANS_SELECT(level); + + drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n", + signal_levels); + + intel_dp->DP &= ~DDI_BUF_EMP_MASK; + intel_dp->DP |= signal_levels; if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level, encoder->type); - return DDI_BUF_TRANS_SELECT(level); + intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP); + intel_de_posting_read(dev_priv, DDI_BUF_CTL(port)); } -static inline -u32 icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv, - enum phy phy) +static u32 icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv, + enum phy phy) { if (intel_phy_is_combo(dev_priv, phy)) { return ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); @@ -3158,13 +3174,13 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, * 7.a Configure Transcoder Clock Select to direct the Port clock to the * Transcoder. */ - intel_ddi_enable_pipe_clock(crtc_state); + intel_ddi_enable_pipe_clock(encoder, crtc_state); /* * 7.b Configure TRANS_DDI_FUNC_CTL DDI Select, DDI Mode Select & MST * Transport Select */ - intel_ddi_config_transcoder_func(crtc_state); + intel_ddi_config_transcoder_func(encoder, crtc_state); /* * 7.c Configure & enable DP_TP_CTL with link training pattern 1 @@ -3252,9 +3268,6 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count, is_mst); - intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port); - intel_dp->regs.dp_tp_status = DP_TP_STATUS(port); - intel_edp_panel_on(intel_dp); intel_ddi_clk_select(encoder, crtc_state); @@ -3299,7 +3312,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_ddi_enable_fec(encoder, crtc_state); if (!is_mst) - intel_ddi_enable_pipe_clock(crtc_state); + intel_ddi_enable_pipe_clock(encoder, crtc_state); intel_dsc_enable(encoder, crtc_state); } @@ -3360,7 +3373,7 @@ static void intel_ddi_pre_enable_hdmi(struct intel_atomic_state *state, if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI); - intel_ddi_enable_pipe_clock(crtc_state); + intel_ddi_enable_pipe_clock(encoder, crtc_state); intel_dig_port->set_infoframes(encoder, crtc_state->has_infoframe, @@ -3766,7 +3779,9 @@ static void intel_enable_ddi(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - WARN_ON(crtc_state->has_pch_encoder); + drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder); + + intel_ddi_enable_transcoder_func(encoder, crtc_state); intel_enable_pipe(crtc_state); @@ -3877,7 +3892,7 @@ intel_ddi_update_prepare(struct intel_atomic_state *state, crtc ? intel_atomic_get_new_crtc_state(state, crtc) : NULL; int required_lanes = crtc_state ? crtc_state->lane_count : 1; - WARN_ON(crtc && crtc->active); + drm_WARN_ON(state->base.dev, crtc && crtc->active); intel_tc_port_get_link(enc_to_dig_port(encoder), required_lanes); @@ -3969,6 +3984,74 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) udelay(600); } +static void intel_ddi_set_link_train(struct intel_dp *intel_dp, + u8 dp_train_pat) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd); + enum port port = dp_to_dig_port(intel_dp)->base.port; + u32 temp; + + temp = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl); + + if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE) + temp |= DP_TP_CTL_SCRAMBLE_DISABLE; + else + temp &= ~DP_TP_CTL_SCRAMBLE_DISABLE; + + temp &= ~DP_TP_CTL_LINK_TRAIN_MASK; + switch (dp_train_pat & train_pat_mask) { + case DP_TRAINING_PATTERN_DISABLE: + temp |= DP_TP_CTL_LINK_TRAIN_NORMAL; + break; + case DP_TRAINING_PATTERN_1: + temp |= DP_TP_CTL_LINK_TRAIN_PAT1; + break; + case DP_TRAINING_PATTERN_2: + temp |= DP_TP_CTL_LINK_TRAIN_PAT2; + break; + case DP_TRAINING_PATTERN_3: + temp |= DP_TP_CTL_LINK_TRAIN_PAT3; + break; + case DP_TRAINING_PATTERN_4: + temp |= DP_TP_CTL_LINK_TRAIN_PAT4; + break; + } + + intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, temp); + + intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP); + intel_de_posting_read(dev_priv, DDI_BUF_CTL(port)); +} + +static void intel_ddi_set_idle_link_train(struct intel_dp *intel_dp) +{ + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; + u32 val; + + val = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl); + val &= ~DP_TP_CTL_LINK_TRAIN_MASK; + val |= DP_TP_CTL_LINK_TRAIN_IDLE; + intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, val); + + /* + * Until TGL on PORT_A we can have only eDP in SST mode. There the only + * reason we need to set idle transmission mode is to work around a HW + * issue where we enable the pipe while not in idle link-training mode. + * In this case there is requirement to wait for a minimum number of + * idle patterns to be sent. + */ + if (port == PORT_A && INTEL_GEN(dev_priv) < 12) + return; + + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, + DP_TP_STATUS_IDLE_DONE, 1)) + drm_err(&dev_priv->drm, + "Timed out waiting for DP idle patterns\n"); +} + static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder) { @@ -4061,12 +4144,18 @@ void intel_ddi_get_config(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); u32 temp, flags = 0; /* XXX: DSI transcoder paranoia */ if (drm_WARN_ON(&dev_priv->drm, transcoder_is_dsi(cpu_transcoder))) return; + if (INTEL_GEN(dev_priv) >= 12) { + intel_dp->regs.dp_tp_ctl = TGL_DP_TP_CTL(cpu_transcoder); + intel_dp->regs.dp_tp_status = TGL_DP_TP_STATUS(cpu_transcoder); + } + intel_dsc_get_config(encoder, pipe_config); temp = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder)); @@ -4396,6 +4485,7 @@ static const struct drm_encoder_funcs intel_ddi_funcs = { static struct intel_connector * intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port) { + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); struct intel_connector *connector; enum port port = intel_dig_port->base.port; @@ -4406,6 +4496,24 @@ intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port) intel_dig_port->dp.output_reg = DDI_BUF_CTL(port); intel_dig_port->dp.prepare_link_retrain = intel_ddi_prepare_link_retrain; + intel_dig_port->dp.set_link_train = intel_ddi_set_link_train; + intel_dig_port->dp.set_idle_link_train = intel_ddi_set_idle_link_train; + + if (INTEL_GEN(dev_priv) >= 12) + intel_dig_port->dp.set_signal_levels = tgl_set_signal_levels; + else if (INTEL_GEN(dev_priv) >= 11) + intel_dig_port->dp.set_signal_levels = icl_set_signal_levels; + else if (IS_CANNONLAKE(dev_priv)) + intel_dig_port->dp.set_signal_levels = cnl_set_signal_levels; + else if (IS_GEN9_LP(dev_priv)) + intel_dig_port->dp.set_signal_levels = bxt_set_signal_levels; + else + intel_dig_port->dp.set_signal_levels = hsw_set_signal_levels; + + if (INTEL_GEN(dev_priv) < 12) { + intel_dig_port->dp.regs.dp_tp_ctl = DP_TP_CTL(port); + intel_dig_port->dp.regs.dp_tp_status = DP_TP_STATUS(port); + } if (!intel_dp_init_connector(intel_dig_port, connector)) { kfree(connector); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index de4cd877c002..fbdf8ddde486 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -25,9 +25,11 @@ void hsw_fdi_link_train(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); -void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state); +void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state); -void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); +void intel_ddi_enable_pipe_clock(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3afcfedb8627..2a17cf38d3dc 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -238,9 +238,9 @@ static void intel_update_czclk(struct drm_i915_private *dev_priv) dev_priv->czclk_freq); } -static inline u32 /* units of 100MHz */ -intel_fdi_link_freq(struct drm_i915_private *dev_priv, - const struct intel_crtc_state *pipe_config) +/* units of 100MHz */ +static u32 intel_fdi_link_freq(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *pipe_config) { if (HAS_DDI(dev_priv)) return pipe_config->port_clock; /* SPLL */ @@ -1973,16 +1973,16 @@ static bool is_aux_plane(const struct drm_framebuffer *fb, int plane) static int main_to_ccs_plane(const struct drm_framebuffer *fb, int main_plane) { - WARN_ON(!is_ccs_modifier(fb->modifier) || - (main_plane && main_plane >= fb->format->num_planes / 2)); + drm_WARN_ON(fb->dev, !is_ccs_modifier(fb->modifier) || + (main_plane && main_plane >= fb->format->num_planes / 2)); return fb->format->num_planes / 2 + main_plane; } static int ccs_to_main_plane(const struct drm_framebuffer *fb, int ccs_plane) { - WARN_ON(!is_ccs_modifier(fb->modifier) || - ccs_plane < fb->format->num_planes / 2); + drm_WARN_ON(fb->dev, !is_ccs_modifier(fb->modifier) || + ccs_plane < fb->format->num_planes / 2); return ccs_plane - fb->format->num_planes / 2; } @@ -2992,7 +2992,7 @@ setup_fb_rotation(int plane, const struct intel_remapped_plane_info *plane_info, fb->modifier != I915_FORMAT_MOD_Yf_TILED) return 0; - if (WARN_ON(plane >= ARRAY_SIZE(rot_info->plane))) + if (drm_WARN_ON(fb->dev, plane >= ARRAY_SIZE(rot_info->plane))) return 0; rot_info->plane[plane] = *plane_info; @@ -6089,30 +6089,26 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, return 0; } -/** - * skl_update_scaler_crtc - Stages update to scaler state for a given crtc. - * - * @state: crtc's scaler state - * - * Return - * 0 - scaler_usage updated successfully - * error - requested scaling cannot be supported or other error condition - */ -int skl_update_scaler_crtc(struct intel_crtc_state *state) +static int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state) { - const struct drm_display_mode *adjusted_mode = &state->hw.adjusted_mode; - bool need_scaler = false; + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + int width, height; - if (state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 || - state->pch_pfit.enabled) - need_scaler = true; + if (crtc_state->pch_pfit.enabled) { + width = drm_rect_width(&crtc_state->pch_pfit.dst); + height = drm_rect_height(&crtc_state->pch_pfit.dst); + } else { + width = adjusted_mode->crtc_hdisplay; + height = adjusted_mode->crtc_vdisplay; + } - return skl_update_scaler(state, !state->hw.active, SKL_CRTC_INDEX, - &state->scaler_state.scaler_id, - state->pipe_src_w, state->pipe_src_h, - adjusted_mode->crtc_hdisplay, - adjusted_mode->crtc_vdisplay, NULL, 0, - need_scaler); + return skl_update_scaler(crtc_state, !crtc_state->hw.active, + SKL_CRTC_INDEX, + &crtc_state->scaler_state.scaler_id, + crtc_state->pipe_src_w, crtc_state->pipe_src_h, + width, height, NULL, 0, + crtc_state->pch_pfit.enabled); } /** @@ -6221,70 +6217,80 @@ static void skl_pfit_enable(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum pipe pipe = crtc->pipe; const struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; + struct drm_rect src = { + .x2 = crtc_state->pipe_src_w << 16, + .y2 = crtc_state->pipe_src_h << 16, + }; + const struct drm_rect *dst = &crtc_state->pch_pfit.dst; + u16 uv_rgb_hphase, uv_rgb_vphase; + enum pipe pipe = crtc->pipe; + int width = drm_rect_width(dst); + int height = drm_rect_height(dst); + int x = dst->x1; + int y = dst->y1; + int hscale, vscale; + unsigned long irqflags; + int id; - if (crtc_state->pch_pfit.enabled) { - u16 uv_rgb_hphase, uv_rgb_vphase; - int pfit_w, pfit_h, hscale, vscale; - unsigned long irqflags; - int id; - - if (drm_WARN_ON(&dev_priv->drm, - crtc_state->scaler_state.scaler_id < 0)) - return; + if (!crtc_state->pch_pfit.enabled) + return; - pfit_w = (crtc_state->pch_pfit.size >> 16) & 0xFFFF; - pfit_h = crtc_state->pch_pfit.size & 0xFFFF; + if (drm_WARN_ON(&dev_priv->drm, + crtc_state->scaler_state.scaler_id < 0)) + return; - hscale = (crtc_state->pipe_src_w << 16) / pfit_w; - vscale = (crtc_state->pipe_src_h << 16) / pfit_h; + hscale = drm_rect_calc_hscale(&src, dst, 0, INT_MAX); + vscale = drm_rect_calc_vscale(&src, dst, 0, INT_MAX); - uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false); - uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false); + uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false); + uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false); - id = scaler_state->scaler_id; + id = scaler_state->scaler_id; - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - intel_de_write_fw(dev_priv, SKL_PS_CTRL(pipe, id), PS_SCALER_EN | - PS_FILTER_MEDIUM | scaler_state->scalers[id].mode); - intel_de_write_fw(dev_priv, SKL_PS_VPHASE(pipe, id), - PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_vphase)); - intel_de_write_fw(dev_priv, SKL_PS_HPHASE(pipe, id), - PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_hphase)); - intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(pipe, id), - crtc_state->pch_pfit.pos); - intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(pipe, id), - crtc_state->pch_pfit.size); + intel_de_write_fw(dev_priv, SKL_PS_CTRL(pipe, id), PS_SCALER_EN | + PS_FILTER_MEDIUM | scaler_state->scalers[id].mode); + intel_de_write_fw(dev_priv, SKL_PS_VPHASE(pipe, id), + PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_vphase)); + intel_de_write_fw(dev_priv, SKL_PS_HPHASE(pipe, id), + PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_hphase)); + intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(pipe, id), + x << 16 | y); + intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(pipe, id), + width << 16 | height); - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); - } + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_rect *dst = &crtc_state->pch_pfit.dst; enum pipe pipe = crtc->pipe; + int width = drm_rect_width(dst); + int height = drm_rect_height(dst); + int x = dst->x1; + int y = dst->y1; - if (crtc_state->pch_pfit.enabled) { - /* Force use of hard-coded filter coefficients - * as some pre-programmed values are broken, - * e.g. x201. - */ - if (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) - intel_de_write(dev_priv, PF_CTL(pipe), - PF_ENABLE | PF_FILTER_MED_3x3 | PF_PIPE_SEL_IVB(pipe)); - else - intel_de_write(dev_priv, PF_CTL(pipe), - PF_ENABLE | PF_FILTER_MED_3x3); - intel_de_write(dev_priv, PF_WIN_POS(pipe), - crtc_state->pch_pfit.pos); - intel_de_write(dev_priv, PF_WIN_SZ(pipe), - crtc_state->pch_pfit.size); - } + if (!crtc_state->pch_pfit.enabled) + return; + + /* Force use of hard-coded filter coefficients + * as some pre-programmed values are broken, + * e.g. x201. + */ + if (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) + intel_de_write(dev_priv, PF_CTL(pipe), PF_ENABLE | + PF_FILTER_MED_3x3 | PF_PIPE_SEL_IVB(pipe)); + else + intel_de_write(dev_priv, PF_CTL(pipe), PF_ENABLE | + PF_FILTER_MED_3x3); + intel_de_write(dev_priv, PF_WIN_POS(pipe), x << 16 | y); + intel_de_write(dev_priv, PF_WIN_SZ(pipe), width << 16 | height); } void hsw_enable_ips(const struct intel_crtc_state *crtc_state) @@ -6626,7 +6632,7 @@ intel_connector_primary_encoder(struct intel_connector *connector) return &dp_to_dig_port(connector->mst_port)->base; encoder = intel_attached_encoder(connector); - WARN_ON(!encoder); + drm_WARN_ON(connector->base.dev, !encoder); return encoder; } @@ -7071,9 +7077,6 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, if (INTEL_GEN(dev_priv) >= 11) icl_set_pipe_chicken(crtc); - if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_transcoder_func(new_crtc_state); - if (dev_priv->display.initial_watermarks) dev_priv->display.initial_watermarks(state, crtc); @@ -7104,11 +7107,12 @@ void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state) /* To avoid upsetting the power well on haswell only disable the pfit if * it's in use. The hw state code will make sure we get this right. */ - if (old_crtc_state->pch_pfit.enabled) { - intel_de_write(dev_priv, PF_CTL(pipe), 0); - intel_de_write(dev_priv, PF_WIN_POS(pipe), 0); - intel_de_write(dev_priv, PF_WIN_SZ(pipe), 0); - } + if (!old_crtc_state->pch_pfit.enabled) + return; + + intel_de_write(dev_priv, PF_CTL(pipe), 0); + intel_de_write(dev_priv, PF_WIN_POS(pipe), 0); + intel_de_write(dev_priv, PF_WIN_SZ(pipe), 0); } static void ilk_crtc_disable(struct intel_atomic_state *state, @@ -7296,7 +7300,17 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) } } - switch (dig_port->aux_ch) { + return intel_legacy_aux_to_power_domain(dig_port->aux_ch); +} + +/* + * Converts aux_ch to power_domain without caring about TBT ports for that use + * intel_aux_power_domain() + */ +enum intel_display_power_domain +intel_legacy_aux_to_power_domain(enum aux_ch aux_ch) +{ + switch (aux_ch) { case AUX_CH_A: return POWER_DOMAIN_AUX_A; case AUX_CH_B: @@ -7312,7 +7326,7 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) case AUX_CH_G: return POWER_DOMAIN_AUX_G; default: - MISSING_CASE(dig_port->aux_ch); + MISSING_CASE(aux_ch); return POWER_DOMAIN_AUX_A; } } @@ -7926,39 +7940,36 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } -static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) +static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *crtc_state) { - u32 pixel_rate; - - pixel_rate = pipe_config->hw.adjusted_mode.crtc_clock; + u32 pixel_rate = crtc_state->hw.adjusted_mode.crtc_clock; + unsigned int pipe_w, pipe_h, pfit_w, pfit_h; /* * We only use IF-ID interlacing. If we ever use * PF-ID we'll need to adjust the pixel_rate here. */ - if (pipe_config->pch_pfit.enabled) { - u64 pipe_w, pipe_h, pfit_w, pfit_h; - u32 pfit_size = pipe_config->pch_pfit.size; + if (!crtc_state->pch_pfit.enabled) + return pixel_rate; - pipe_w = pipe_config->pipe_src_w; - pipe_h = pipe_config->pipe_src_h; + pipe_w = crtc_state->pipe_src_w; + pipe_h = crtc_state->pipe_src_h; - pfit_w = (pfit_size >> 16) & 0xFFFF; - pfit_h = pfit_size & 0xFFFF; - if (pipe_w < pfit_w) - pipe_w = pfit_w; - if (pipe_h < pfit_h) - pipe_h = pfit_h; + pfit_w = drm_rect_width(&crtc_state->pch_pfit.dst); + pfit_h = drm_rect_height(&crtc_state->pch_pfit.dst); - if (WARN_ON(!pfit_w || !pfit_h)) - return pixel_rate; + if (pipe_w < pfit_w) + pipe_w = pfit_w; + if (pipe_h < pfit_h) + pipe_h = pfit_h; - pixel_rate = div_u64(mul_u32_u32(pixel_rate, pipe_w * pipe_h), - pfit_w * pfit_h); - } + if (drm_WARN_ON(crtc_state->uapi.crtc->dev, + !pfit_w || !pfit_h)) + return pixel_rate; - return pixel_rate; + return div_u64(mul_u32_u32(pixel_rate, pipe_w * pipe_h), + pfit_w * pfit_h); } static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) @@ -8127,7 +8138,7 @@ static void intel_panel_sanitize_ssc(struct drm_i915_private *dev_priv) } } -static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) +static bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) { if (i915_modparams.panel_use_ssc >= 0) return i915_modparams.panel_use_ssc != 0; @@ -9151,9 +9162,9 @@ static bool i9xx_has_pfit(struct drm_i915_private *dev_priv) IS_PINEVIEW(dev_priv) || IS_MOBILE(dev_priv); } -static void i9xx_get_pfit_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void i9xx_get_pfit_config(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 tmp; @@ -9173,9 +9184,9 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc, return; } - pipe_config->gmch_pfit.control = tmp; - pipe_config->gmch_pfit.pgm_ratios = intel_de_read(dev_priv, - PFIT_PGM_RATIOS); + crtc_state->gmch_pfit.control = tmp; + crtc_state->gmch_pfit.pgm_ratios = + intel_de_read(dev_priv, PFIT_PGM_RATIOS); } static void vlv_crtc_clock_get(struct intel_crtc *crtc, @@ -9425,7 +9436,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, intel_get_pipe_timings(crtc, pipe_config); intel_get_pipe_src_size(crtc, pipe_config); - i9xx_get_pfit_config(crtc, pipe_config); + i9xx_get_pfit_config(pipe_config); if (INTEL_GEN(dev_priv) >= 4) { /* No way to read it out on pipes B and C */ @@ -10395,37 +10406,47 @@ static void ilk_get_fdi_m_n_config(struct intel_crtc *crtc, &pipe_config->fdi_m_n, NULL); } -static void skl_get_pfit_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void ilk_get_pfit_pos_size(struct intel_crtc_state *crtc_state, + u32 pos, u32 size) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc_scaler_state *scaler_state = &pipe_config->scaler_state; - u32 ps_ctrl = 0; + drm_rect_init(&crtc_state->pch_pfit.dst, + pos >> 16, pos & 0xffff, + size >> 16, size & 0xffff); +} + +static void skl_get_pfit_config(struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; int id = -1; int i; /* find scaler attached to this pipe */ for (i = 0; i < crtc->num_scalers; i++) { - ps_ctrl = intel_de_read(dev_priv, SKL_PS_CTRL(crtc->pipe, i)); - if (ps_ctrl & PS_SCALER_EN && !(ps_ctrl & PS_PLANE_SEL_MASK)) { - id = i; - pipe_config->pch_pfit.enabled = true; - pipe_config->pch_pfit.pos = intel_de_read(dev_priv, - SKL_PS_WIN_POS(crtc->pipe, i)); - pipe_config->pch_pfit.size = intel_de_read(dev_priv, - SKL_PS_WIN_SZ(crtc->pipe, i)); - scaler_state->scalers[i].in_use = true; - break; - } + u32 ctl, pos, size; + + ctl = intel_de_read(dev_priv, SKL_PS_CTRL(crtc->pipe, i)); + if ((ctl & (PS_SCALER_EN | PS_PLANE_SEL_MASK)) != PS_SCALER_EN) + continue; + + id = i; + crtc_state->pch_pfit.enabled = true; + + pos = intel_de_read(dev_priv, SKL_PS_WIN_POS(crtc->pipe, i)); + size = intel_de_read(dev_priv, SKL_PS_WIN_SZ(crtc->pipe, i)); + + ilk_get_pfit_pos_size(crtc_state, pos, size); + + scaler_state->scalers[i].in_use = true; + break; } scaler_state->scaler_id = id; - if (id >= 0) { + if (id >= 0) scaler_state->scaler_users |= (1 << SKL_CRTC_INDEX); - } else { + else scaler_state->scaler_users &= ~(1 << SKL_CRTC_INDEX); - } } static void @@ -10561,30 +10582,30 @@ error: kfree(intel_fb); } -static void ilk_get_pfit_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void ilk_get_pfit_config(struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - u32 tmp; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 ctl, pos, size; - tmp = intel_de_read(dev_priv, PF_CTL(crtc->pipe)); - - if (tmp & PF_ENABLE) { - pipe_config->pch_pfit.enabled = true; - pipe_config->pch_pfit.pos = intel_de_read(dev_priv, - PF_WIN_POS(crtc->pipe)); - pipe_config->pch_pfit.size = intel_de_read(dev_priv, - PF_WIN_SZ(crtc->pipe)); - - /* We currently do not free assignements of panel fitters on - * ivb/hsw (since we don't use the higher upscaling modes which - * differentiates them) so just WARN about this case for now. */ - if (IS_GEN(dev_priv, 7)) { - drm_WARN_ON(dev, (tmp & PF_PIPE_SEL_MASK_IVB) != - PF_PIPE_SEL_IVB(crtc->pipe)); - } - } + ctl = intel_de_read(dev_priv, PF_CTL(crtc->pipe)); + if ((ctl & PF_ENABLE) == 0) + return; + + crtc_state->pch_pfit.enabled = true; + + pos = intel_de_read(dev_priv, PF_WIN_POS(crtc->pipe)); + size = intel_de_read(dev_priv, PF_WIN_SZ(crtc->pipe)); + + ilk_get_pfit_pos_size(crtc_state, pos, size); + + /* + * We currently do not free assignements of panel fitters on + * ivb/hsw (since we don't use the higher upscaling modes which + * differentiates them) so just WARN about this case for now. + */ + drm_WARN_ON(&dev_priv->drm, IS_GEN(dev_priv, 7) && + (ctl & PF_PIPE_SEL_MASK_IVB) != PF_PIPE_SEL_IVB(crtc->pipe)); } static bool ilk_get_pipe_config(struct intel_crtc *crtc, @@ -10695,7 +10716,7 @@ static bool ilk_get_pipe_config(struct intel_crtc *crtc, intel_get_pipe_timings(crtc, pipe_config); intel_get_pipe_src_size(crtc, pipe_config); - ilk_get_pfit_config(crtc, pipe_config); + ilk_get_pfit_config(pipe_config); ret = true; @@ -11169,9 +11190,9 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, power_domain_mask |= BIT_ULL(power_domain); if (INTEL_GEN(dev_priv) >= 9) - skl_get_pfit_config(crtc, pipe_config); + skl_get_pfit_config(pipe_config); else - ilk_get_pfit_config(crtc, pipe_config); + ilk_get_pfit_config(pipe_config); } if (hsw_crtc_supports_ips(crtc)) { @@ -12430,8 +12451,10 @@ static int icl_add_linked_planes(struct intel_atomic_state *state) if (IS_ERR(linked_plane_state)) return PTR_ERR(linked_plane_state); - WARN_ON(linked_plane_state->planar_linked_plane != plane); - WARN_ON(linked_plane_state->planar_slave == plane_state->planar_slave); + drm_WARN_ON(state->base.dev, + linked_plane_state->planar_linked_plane != plane); + drm_WARN_ON(state->base.dev, + linked_plane_state->planar_slave == plane_state->planar_slave); } return 0; @@ -12819,7 +12842,7 @@ static void intel_dump_crtc_timings(struct drm_i915_private *i915, mode->type, mode->flags); } -static inline void +static void intel_dump_m_n_config(const struct intel_crtc_state *pipe_config, const char *id, unsigned int lane_count, const struct intel_link_m_n *m_n) @@ -13030,9 +13053,8 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, pipe_config->gmch_pfit.lvds_border_bits); else drm_dbg_kms(&dev_priv->drm, - "pch pfit: pos: 0x%08x, size: 0x%08x, %s, force thru: %s\n", - pipe_config->pch_pfit.pos, - pipe_config->pch_pfit.size, + "pch pfit: " DRM_RECT_FMT ", %s, force thru: %s\n", + DRM_RECT_ARG(&pipe_config->pch_pfit.dst), enableddisabled(pipe_config->pch_pfit.enabled), yesno(pipe_config->pch_pfit.force_thru)); @@ -13154,7 +13176,8 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state { crtc_state->uapi.enable = crtc_state->hw.enable; crtc_state->uapi.active = crtc_state->hw.active; - WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->uapi, &crtc_state->hw.mode) < 0); + drm_WARN_ON(crtc_state->uapi.crtc->dev, + drm_atomic_set_mode_for_crtc(&crtc_state->uapi, &crtc_state->hw.mode) < 0); crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode; @@ -13773,8 +13796,10 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(pch_pfit.enabled); if (current_config->pch_pfit.enabled) { - PIPE_CONF_CHECK_X(pch_pfit.pos); - PIPE_CONF_CHECK_X(pch_pfit.size); + PIPE_CONF_CHECK_I(pch_pfit.dst.x1); + PIPE_CONF_CHECK_I(pch_pfit.dst.y1); + PIPE_CONF_CHECK_I(pch_pfit.dst.x2); + PIPE_CONF_CHECK_I(pch_pfit.dst.y2); } PIPE_CONF_CHECK_I(scaler_state.scaler_id); @@ -15353,12 +15378,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_set_cdclk_pre_plane_update(state); - /* - * SKL workaround: bspec recommends we disable the SAGV when we - * have more then one pipe enabled - */ - if (!intel_can_enable_sagv(state)) - intel_disable_sagv(dev_priv); + intel_sagv_pre_plane_update(state); intel_modeset_verify_disabled(dev_priv, state); } @@ -15455,11 +15475,11 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_check_cpu_fifo_underruns(dev_priv); intel_check_pch_fifo_underruns(dev_priv); - if (state->modeset) + if (state->modeset) { intel_verify_planes(state); - if (state->modeset && intel_can_enable_sagv(state)) - intel_enable_sagv(dev_priv); + intel_sagv_post_plane_update(state); + } drm_atomic_helper_commit_hw_done(&state->base); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index cc7f287804d7..efb4da205ea2 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -583,13 +583,14 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state); enum intel_display_power_domain intel_port_to_power_domain(enum port port); enum intel_display_power_domain intel_aux_power_domain(struct intel_digital_port *dig_port); +enum intel_display_power_domain +intel_legacy_aux_to_power_domain(enum aux_ch aux_ch); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); -int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state); void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state); u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index bdeea2e02642..70525623bcdf 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -9,6 +9,7 @@ #include "i915_debugfs.h" #include "intel_csr.h" #include "intel_display_debugfs.h" +#include "intel_display_power.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_fbc.h" @@ -1143,6 +1144,51 @@ static int i915_drrs_status(struct seq_file *m, void *unused) return 0; } +#define LPSP_STATUS(COND) (COND ? seq_puts(m, "LPSP: enabled\n") : \ + seq_puts(m, "LPSP: disabled\n")) + +static bool +intel_lpsp_power_well_enabled(struct drm_i915_private *i915, + enum i915_power_well_id power_well_id) +{ + intel_wakeref_t wakeref; + bool is_enabled; + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + is_enabled = intel_display_power_well_is_enabled(i915, + power_well_id); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + + return is_enabled; +} + +static int i915_lpsp_status(struct seq_file *m, void *unused) +{ + struct drm_i915_private *i915 = node_to_i915(m->private); + + switch (INTEL_GEN(i915)) { + case 12: + case 11: + LPSP_STATUS(!intel_lpsp_power_well_enabled(i915, ICL_DISP_PW_3)); + break; + case 10: + case 9: + LPSP_STATUS(!intel_lpsp_power_well_enabled(i915, SKL_DISP_PW_2)); + break; + default: + /* + * Apart from HASWELL/BROADWELL other legacy platform doesn't + * support lpsp. + */ + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + LPSP_STATUS(!intel_lpsp_power_well_enabled(i915, HSW_DISP_PW_GLOBAL)); + else + seq_puts(m, "LPSP: not supported\n"); + } + + return 0; +} + static int i915_dp_mst_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -1910,6 +1956,7 @@ static const struct drm_info_list intel_display_debugfs_list[] = { {"i915_dp_mst_info", i915_dp_mst_info, 0}, {"i915_ddb_info", i915_ddb_info, 0}, {"i915_drrs_status", i915_drrs_status, 0}, + {"i915_lpsp_status", i915_lpsp_status, 0}, }; static const struct { @@ -1991,6 +2038,48 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) } DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability); +#define LPSP_CAPABLE(COND) (COND ? seq_puts(m, "LPSP: capable\n") : \ + seq_puts(m, "LPSP: incapable\n")) + +static int i915_lpsp_capability_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct intel_encoder *encoder = + intel_attached_encoder(to_intel_connector(connector)); + struct drm_i915_private *i915 = to_i915(connector->dev); + + if (connector->status != connector_status_connected) + return -ENODEV; + + switch (INTEL_GEN(i915)) { + case 12: + /* + * Actually TGL can drive LPSP on port till DDI_C + * but there is no physical connected DDI_C on TGL sku's, + * even driver is not initilizing DDI_C port for gen12. + */ + LPSP_CAPABLE(encoder->port <= PORT_B); + break; + case 11: + LPSP_CAPABLE(connector->connector_type == DRM_MODE_CONNECTOR_DSI || + connector->connector_type == DRM_MODE_CONNECTOR_eDP); + break; + case 10: + case 9: + LPSP_CAPABLE(encoder->port == PORT_A && + (connector->connector_type == DRM_MODE_CONNECTOR_DSI || + connector->connector_type == DRM_MODE_CONNECTOR_eDP || + connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)); + break; + default: + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + LPSP_CAPABLE(connector->connector_type == DRM_MODE_CONNECTOR_eDP); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(i915_lpsp_capability); + static int i915_dsc_fec_support_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; @@ -2134,5 +2223,16 @@ int intel_connector_debugfs_add(struct drm_connector *connector) debugfs_create_file("i915_dsc_fec_support", S_IRUGO, root, connector, &i915_dsc_fec_support_fops); + /* Legacy panels doesn't lpsp on any platform */ + if ((INTEL_GEN(dev_priv) >= 9 || IS_HASWELL(dev_priv) || + IS_BROADWELL(dev_priv)) && + (connector->connector_type == DRM_MODE_CONNECTOR_DSI || + connector->connector_type == DRM_MODE_CONNECTOR_eDP || + connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)) + debugfs_create_file("i915_lpsp_capability", 0444, root, + connector, &i915_lpsp_capability_fops); + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 03bdde19c8c9..49998906cc61 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -151,6 +151,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "GT_IRQ"; case POWER_DOMAIN_DPLL_DC_OFF: return "DPLL_DC_OFF"; + case POWER_DOMAIN_TC_COLD_OFF: + return "TC_COLD_OFF"; default: MISSING_CASE(domain); return "?"; @@ -282,8 +284,51 @@ static void hsw_power_well_pre_disable(struct drm_i915_private *dev_priv, gen8_irq_power_well_pre_disable(dev_priv, irq_pipe_mask); } +#define ICL_AUX_PW_TO_CH(pw_idx) \ + ((pw_idx) - ICL_PW_CTL_IDX_AUX_A + AUX_CH_A) + +#define ICL_TBT_AUX_PW_TO_CH(pw_idx) \ + ((pw_idx) - ICL_PW_CTL_IDX_AUX_TBT1 + AUX_CH_C) + +static enum aux_ch icl_tc_phy_aux_ch(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + int pw_idx = power_well->desc->hsw.idx; + + return power_well->desc->hsw.is_tc_tbt ? ICL_TBT_AUX_PW_TO_CH(pw_idx) : + ICL_AUX_PW_TO_CH(pw_idx); +} + +static struct intel_digital_port * +aux_ch_to_digital_port(struct drm_i915_private *dev_priv, + enum aux_ch aux_ch) +{ + struct intel_digital_port *dig_port = NULL; + struct intel_encoder *encoder; + + for_each_intel_encoder(&dev_priv->drm, encoder) { + /* We'll check the MST primary port */ + if (encoder->type == INTEL_OUTPUT_DP_MST) + continue; + + dig_port = enc_to_dig_port(encoder); + if (!dig_port) + continue; + + if (dig_port->aux_ch != aux_ch) { + dig_port = NULL; + continue; + } + + break; + } + + return dig_port; +} + static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) + struct i915_power_well *power_well, + bool timeout_expected) { const struct i915_power_well_regs *regs = power_well->desc->hsw.regs; int pw_idx = power_well->desc->hsw.idx; @@ -294,8 +339,8 @@ static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv, drm_dbg_kms(&dev_priv->drm, "%s power well enable timeout\n", power_well->desc->name); - /* An AUX timeout is expected if the TBT DP tunnel is down. */ - drm_WARN_ON(&dev_priv->drm, !power_well->desc->hsw.is_tc_tbt); + drm_WARN_ON(&dev_priv->drm, !timeout_expected); + } } @@ -358,11 +403,11 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, { const struct i915_power_well_regs *regs = power_well->desc->hsw.regs; int pw_idx = power_well->desc->hsw.idx; - bool wait_fuses = power_well->desc->hsw.has_fuses; - enum skl_power_gate uninitialized_var(pg); u32 val; - if (wait_fuses) { + if (power_well->desc->hsw.has_fuses) { + enum skl_power_gate pg; + pg = INTEL_GEN(dev_priv) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) : SKL_PW_CTL_IDX_TO_PG(pw_idx); /* @@ -379,19 +424,27 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, val = intel_de_read(dev_priv, regs->driver); intel_de_write(dev_priv, regs->driver, val | HSW_PWR_WELL_CTL_REQ(pw_idx)); - hsw_wait_for_power_well_enable(dev_priv, power_well); + + hsw_wait_for_power_well_enable(dev_priv, power_well, false); /* Display WA #1178: cnl */ if (IS_CANNONLAKE(dev_priv) && pw_idx >= GLK_PW_CTL_IDX_AUX_B && pw_idx <= CNL_PW_CTL_IDX_AUX_F) { + u32 val; + val = intel_de_read(dev_priv, CNL_AUX_ANAOVRD1(pw_idx)); val |= CNL_AUX_ANAOVRD1_ENABLE | CNL_AUX_ANAOVRD1_LDO_BYPASS; intel_de_write(dev_priv, CNL_AUX_ANAOVRD1(pw_idx), val); } - if (wait_fuses) + if (power_well->desc->hsw.has_fuses) { + enum skl_power_gate pg; + + pg = INTEL_GEN(dev_priv) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) : + SKL_PW_CTL_IDX_TO_PG(pw_idx); gen9_wait_for_power_well_fuses(dev_priv, pg); + } hsw_power_well_post_enable(dev_priv, power_well->desc->hsw.irq_pipe_mask, @@ -437,7 +490,7 @@ icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, val | ICL_LANE_ENABLE_AUX); } - hsw_wait_for_power_well_enable(dev_priv, power_well); + hsw_wait_for_power_well_enable(dev_priv, power_well, false); /* Display WA #1178: icl */ if (pw_idx >= ICL_PW_CTL_IDX_AUX_A && pw_idx <= ICL_PW_CTL_IDX_AUX_B && @@ -470,21 +523,6 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, hsw_wait_for_power_well_disable(dev_priv, power_well); } -#define ICL_AUX_PW_TO_CH(pw_idx) \ - ((pw_idx) - ICL_PW_CTL_IDX_AUX_A + AUX_CH_A) - -#define ICL_TBT_AUX_PW_TO_CH(pw_idx) \ - ((pw_idx) - ICL_PW_CTL_IDX_AUX_TBT1 + AUX_CH_C) - -static enum aux_ch icl_tc_phy_aux_ch(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - int pw_idx = power_well->desc->hsw.idx; - - return power_well->desc->hsw.is_tc_tbt ? ICL_TBT_AUX_PW_TO_CH(pw_idx) : - ICL_AUX_PW_TO_CH(pw_idx); -} - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) static u64 async_put_domains_mask(struct i915_power_domains *power_domains); @@ -501,51 +539,28 @@ static int power_well_async_ref_count(struct drm_i915_private *dev_priv, } static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) + struct i915_power_well *power_well, + struct intel_digital_port *dig_port) { - enum aux_ch aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well); - struct intel_digital_port *dig_port = NULL; - struct intel_encoder *encoder; - /* Bypass the check if all references are released asynchronously */ if (power_well_async_ref_count(dev_priv, power_well) == power_well->count) return; - aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well); - - for_each_intel_encoder(&dev_priv->drm, encoder) { - enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - - if (!intel_phy_is_tc(dev_priv, phy)) - continue; - - /* We'll check the MST primary port */ - if (encoder->type == INTEL_OUTPUT_DP_MST) - continue; - - dig_port = enc_to_dig_port(encoder); - if (drm_WARN_ON(&dev_priv->drm, !dig_port)) - continue; - - if (dig_port->aux_ch != aux_ch) { - dig_port = NULL; - continue; - } - - break; - } - if (drm_WARN_ON(&dev_priv->drm, !dig_port)) return; + if (INTEL_GEN(dev_priv) == 11 && dig_port->tc_legacy_port) + return; + drm_WARN_ON(&dev_priv->drm, !intel_tc_port_ref_held(dig_port)); } #else static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) + struct i915_power_well *power_well, + struct intel_digital_port *dig_port) { } @@ -553,24 +568,65 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, #define TGL_AUX_PW_TO_TC_PORT(pw_idx) ((pw_idx) - TGL_PW_CTL_IDX_AUX_TC1) +static void icl_tc_cold_exit(struct drm_i915_private *i915) +{ + int ret, tries = 0; + + while (1) { + ret = sandybridge_pcode_write_timeout(i915, + ICL_PCODE_EXIT_TCCOLD, + 0, 250, 1); + if (ret != -EAGAIN || ++tries == 3) + break; + msleep(1); + } + + /* Spec states that TC cold exit can take up to 1ms to complete */ + if (!ret) + msleep(1); + + /* TODO: turn failure into a error as soon i915 CI updates ICL IFWI */ + drm_dbg_kms(&i915->drm, "TC cold block %s\n", ret ? "failed" : + "succeeded"); +} + static void icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { enum aux_ch aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well); + struct intel_digital_port *dig_port = aux_ch_to_digital_port(dev_priv, aux_ch); + const struct i915_power_well_regs *regs = power_well->desc->hsw.regs; + bool is_tbt = power_well->desc->hsw.is_tc_tbt; + bool timeout_expected; u32 val; - icl_tc_port_assert_ref_held(dev_priv, power_well); + icl_tc_port_assert_ref_held(dev_priv, power_well, dig_port); val = intel_de_read(dev_priv, DP_AUX_CH_CTL(aux_ch)); val &= ~DP_AUX_CH_CTL_TBT_IO; - if (power_well->desc->hsw.is_tc_tbt) + if (is_tbt) val |= DP_AUX_CH_CTL_TBT_IO; intel_de_write(dev_priv, DP_AUX_CH_CTL(aux_ch), val); - hsw_power_well_enable(dev_priv, power_well); + val = intel_de_read(dev_priv, regs->driver); + intel_de_write(dev_priv, regs->driver, + val | HSW_PWR_WELL_CTL_REQ(power_well->desc->hsw.idx)); + + /* + * An AUX timeout is expected if the TBT DP tunnel is down, + * or need to enable AUX on a legacy TypeC port as part of the TC-cold + * exit sequence. + */ + timeout_expected = is_tbt; + if (INTEL_GEN(dev_priv) == 11 && dig_port->tc_legacy_port) { + icl_tc_cold_exit(dev_priv); + timeout_expected = true; + } + + hsw_wait_for_power_well_enable(dev_priv, power_well, timeout_expected); - if (INTEL_GEN(dev_priv) >= 12 && !power_well->desc->hsw.is_tc_tbt) { + if (INTEL_GEN(dev_priv) >= 12 && !is_tbt) { enum tc_port tc_port; tc_port = TGL_AUX_PW_TO_TC_PORT(power_well->desc->hsw.idx); @@ -588,11 +644,48 @@ static void icl_tc_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - icl_tc_port_assert_ref_held(dev_priv, power_well); + enum aux_ch aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well); + struct intel_digital_port *dig_port = aux_ch_to_digital_port(dev_priv, aux_ch); + + icl_tc_port_assert_ref_held(dev_priv, power_well, dig_port); hsw_power_well_disable(dev_priv, power_well); } +static void +icl_aux_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + int pw_idx = power_well->desc->hsw.idx; + enum phy phy = ICL_AUX_PW_TO_PHY(pw_idx); /* non-TBT only */ + bool is_tbt = power_well->desc->hsw.is_tc_tbt; + + if (is_tbt || intel_phy_is_tc(dev_priv, phy)) + return icl_tc_phy_aux_power_well_enable(dev_priv, power_well); + else if (IS_ICELAKE(dev_priv)) + return icl_combo_phy_aux_power_well_enable(dev_priv, + power_well); + else + return hsw_power_well_enable(dev_priv, power_well); +} + +static void +icl_aux_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + int pw_idx = power_well->desc->hsw.idx; + enum phy phy = ICL_AUX_PW_TO_PHY(pw_idx); /* non-TBT only */ + bool is_tbt = power_well->desc->hsw.is_tc_tbt; + + if (is_tbt || intel_phy_is_tc(dev_priv, phy)) + return icl_tc_phy_aux_power_well_disable(dev_priv, power_well); + else if (IS_ICELAKE(dev_priv)) + return icl_combo_phy_aux_power_well_disable(dev_priv, + power_well); + else + return hsw_power_well_disable(dev_priv, power_well); +} + /* * We should only use the power well if we explicitly asked the hardware to * enable it, so check if it's enabled and also check if we've requested it to @@ -943,7 +1036,7 @@ static void assert_can_enable_dc5(struct drm_i915_private *dev_priv) /* Power wells at this level and above must be disabled for DC5 entry */ if (INTEL_GEN(dev_priv) >= 12) - high_pg = TGL_DISP_PW_3; + high_pg = ICL_DISP_PW_3; else high_pg = SKL_DISP_PW_2; @@ -2805,6 +2898,21 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, #define TGL_AUX_I_TBT6_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_I_TBT)) +#define TGL_TC_COLD_OFF_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUX_E) | \ + BIT_ULL(POWER_DOMAIN_AUX_F) | \ + BIT_ULL(POWER_DOMAIN_AUX_G) | \ + BIT_ULL(POWER_DOMAIN_AUX_H) | \ + BIT_ULL(POWER_DOMAIN_AUX_I) | \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_G_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_H_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_I_TBT) | \ + BIT_ULL(POWER_DOMAIN_TC_COLD_OFF)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, @@ -3503,17 +3611,10 @@ static const struct i915_power_well_desc cnl_power_wells[] = { }, }; -static const struct i915_power_well_ops icl_combo_phy_aux_power_well_ops = { - .sync_hw = hsw_power_well_sync_hw, - .enable = icl_combo_phy_aux_power_well_enable, - .disable = icl_combo_phy_aux_power_well_disable, - .is_enabled = hsw_power_well_enabled, -}; - -static const struct i915_power_well_ops icl_tc_phy_aux_power_well_ops = { +static const struct i915_power_well_ops icl_aux_power_well_ops = { .sync_hw = hsw_power_well_sync_hw, - .enable = icl_tc_phy_aux_power_well_enable, - .disable = icl_tc_phy_aux_power_well_disable, + .enable = icl_aux_power_well_enable, + .disable = icl_aux_power_well_disable, .is_enabled = hsw_power_well_enabled, }; @@ -3571,7 +3672,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { .name = "power well 3", .domains = ICL_PW_3_POWER_DOMAINS, .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = ICL_DISP_PW_3, { .hsw.regs = &hsw_power_well_regs, .hsw.idx = ICL_PW_CTL_IDX_PW_3, @@ -3643,7 +3744,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX A", .domains = ICL_AUX_A_IO_POWER_DOMAINS, - .ops = &icl_combo_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3653,7 +3754,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX B", .domains = ICL_AUX_B_IO_POWER_DOMAINS, - .ops = &icl_combo_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3663,7 +3764,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX C TC1", .domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3674,7 +3775,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX D TC2", .domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3685,7 +3786,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX E TC3", .domains = ICL_AUX_E_TC3_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3696,7 +3797,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX F TC4", .domains = ICL_AUX_F_TC4_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3707,7 +3808,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX C TBT1", .domains = ICL_AUX_C_TBT1_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3718,7 +3819,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX D TBT2", .domains = ICL_AUX_D_TBT2_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3729,7 +3830,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX E TBT3", .domains = ICL_AUX_E_TBT3_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3740,7 +3841,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { { .name = "AUX F TBT4", .domains = ICL_AUX_F_TBT4_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3762,149 +3863,89 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }; -static const struct i915_power_well_desc ehl_power_wells[] = { - { - .name = "always-on", - .always_on = true, - .domains = POWER_DOMAIN_MASK, - .ops = &i9xx_always_on_power_well_ops, - .id = DISP_PW_ID_NONE, - }, - { - .name = "power well 1", - /* Handled by the DMC firmware */ - .always_on = true, - .domains = 0, - .ops = &hsw_power_well_ops, - .id = SKL_DISP_PW_1, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_PW_1, - .hsw.has_fuses = true, - }, - }, - { - .name = "DC off", - .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, - .ops = &gen9_dc_off_power_well_ops, - .id = SKL_DISP_DC_OFF, - }, - { - .name = "power well 2", - .domains = ICL_PW_2_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = SKL_DISP_PW_2, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_PW_2, - .hsw.has_fuses = true, - }, - }, - { - .name = "power well 3", - .domains = ICL_PW_3_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_PW_3, - .hsw.irq_pipe_mask = BIT(PIPE_B), - .hsw.has_vga = true, - .hsw.has_fuses = true, - }, - }, - { - .name = "DDI A IO", - .domains = ICL_DDI_IO_A_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &icl_ddi_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_DDI_A, - }, - }, - { - .name = "DDI B IO", - .domains = ICL_DDI_IO_B_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &icl_ddi_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_DDI_B, - }, - }, - { - .name = "DDI C IO", - .domains = ICL_DDI_IO_C_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &icl_ddi_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_DDI_C, - }, - }, - { - .name = "DDI D IO", - .domains = ICL_DDI_IO_D_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &icl_ddi_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_DDI_D, - }, - }, - { - .name = "AUX A", - .domains = ICL_AUX_A_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &icl_aux_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_AUX_A, - }, - }, - { - .name = "AUX B", - .domains = ICL_AUX_B_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &icl_aux_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_AUX_B, - }, - }, - { - .name = "AUX C", - .domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &icl_aux_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_AUX_C, - }, - }, - { - .name = "AUX D", - .domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &icl_aux_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_AUX_D, - }, - }, - { - .name = "power well 4", - .domains = ICL_PW_4_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = ICL_PW_CTL_IDX_PW_4, - .hsw.has_fuses = true, - .hsw.irq_pipe_mask = BIT(PIPE_C), - }, - }, +static void +tgl_tc_cold_request(struct drm_i915_private *i915, bool block) +{ + u8 tries = 0; + int ret; + + while (1) { + u32 low_val = 0, high_val; + + if (block) + high_val = TGL_PCODE_EXIT_TCCOLD_DATA_H_BLOCK_REQ; + else + high_val = TGL_PCODE_EXIT_TCCOLD_DATA_H_UNBLOCK_REQ; + + /* + * Spec states that we should timeout the request after 200us + * but the function below will timeout after 500us + */ + ret = sandybridge_pcode_read(i915, TGL_PCODE_TCCOLD, &low_val, + &high_val); + if (ret == 0) { + if (block && + (low_val & TGL_PCODE_EXIT_TCCOLD_DATA_L_EXIT_FAILED)) + ret = -EIO; + else + break; + } + + if (++tries == 3) + break; + + if (ret == -EAGAIN) + msleep(1); + } + + if (ret) + drm_err(&i915->drm, "TC cold %sblock failed\n", + block ? "" : "un"); + else + drm_dbg_kms(&i915->drm, "TC cold %sblock succeeded\n", + block ? "" : "un"); +} + +static void +tgl_tc_cold_off_power_well_enable(struct drm_i915_private *i915, + struct i915_power_well *power_well) +{ + tgl_tc_cold_request(i915, true); +} + +static void +tgl_tc_cold_off_power_well_disable(struct drm_i915_private *i915, + struct i915_power_well *power_well) +{ + tgl_tc_cold_request(i915, false); +} + +static void +tgl_tc_cold_off_power_well_sync_hw(struct drm_i915_private *i915, + struct i915_power_well *power_well) +{ + if (power_well->count > 0) + tgl_tc_cold_off_power_well_enable(i915, power_well); + else + tgl_tc_cold_off_power_well_disable(i915, power_well); +} + +static bool +tgl_tc_cold_off_power_well_is_enabled(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + /* + * Not the correctly implementation but there is no way to just read it + * from PCODE, so returning count to avoid state mismatch errors + */ + return power_well->count; +} + +static const struct i915_power_well_ops tgl_tc_cold_off_ops = { + .sync_hw = tgl_tc_cold_off_power_well_sync_hw, + .enable = tgl_tc_cold_off_power_well_enable, + .disable = tgl_tc_cold_off_power_well_disable, + .is_enabled = tgl_tc_cold_off_power_well_is_enabled, }; static const struct i915_power_well_desc tgl_power_wells[] = { @@ -3949,7 +3990,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { .name = "power well 3", .domains = TGL_PW_3_POWER_DOMAINS, .ops = &hsw_power_well_ops, - .id = TGL_DISP_PW_3, + .id = ICL_DISP_PW_3, { .hsw.regs = &hsw_power_well_regs, .hsw.idx = ICL_PW_CTL_IDX_PW_3, @@ -4051,7 +4092,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX A", .domains = TGL_AUX_A_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4061,7 +4102,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX B", .domains = TGL_AUX_B_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4071,7 +4112,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX C", .domains = TGL_AUX_C_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4081,7 +4122,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX D TC1", .domains = TGL_AUX_D_TC1_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4092,7 +4133,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX E TC2", .domains = TGL_AUX_E_TC2_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4103,7 +4144,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX F TC3", .domains = TGL_AUX_F_TC3_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4114,7 +4155,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX G TC4", .domains = TGL_AUX_G_TC4_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4125,7 +4166,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX H TC5", .domains = TGL_AUX_H_TC5_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4136,7 +4177,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX I TC6", .domains = TGL_AUX_I_TC6_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4147,7 +4188,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX D TBT1", .domains = TGL_AUX_D_TBT1_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4158,7 +4199,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX E TBT2", .domains = TGL_AUX_E_TBT2_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4169,7 +4210,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX F TBT3", .domains = TGL_AUX_F_TBT3_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4180,7 +4221,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX G TBT4", .domains = TGL_AUX_G_TBT4_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4191,7 +4232,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX H TBT5", .domains = TGL_AUX_H_TBT5_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4202,7 +4243,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX I TBT6", .domains = TGL_AUX_I_TBT6_IO_POWER_DOMAINS, - .ops = &icl_tc_phy_aux_power_well_ops, + .ops = &icl_aux_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4234,6 +4275,12 @@ static const struct i915_power_well_desc tgl_power_wells[] = { .hsw.irq_pipe_mask = BIT(PIPE_D), }, }, + { + .name = "TC cold off", + .domains = TGL_TC_COLD_OFF_POWER_DOMAINS, + .ops = &tgl_tc_cold_off_ops, + .id = DISP_PW_ID_NONE, + }, }; static int @@ -4383,8 +4430,6 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) */ if (IS_GEN(dev_priv, 12)) { err = set_power_wells(power_domains, tgl_power_wells); - } else if (IS_ELKHARTLAKE(dev_priv)) { - err = set_power_wells(power_domains, ehl_power_wells); } else if (IS_GEN(dev_priv, 11)) { err = set_power_wells(power_domains, icl_power_wells); } else if (IS_CANNONLAKE(dev_priv)) { @@ -4446,9 +4491,8 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) mutex_unlock(&power_domains->lock); } -static inline -bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv, - i915_reg_t reg, bool enable) +static bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv, + i915_reg_t reg, bool enable) { u32 val, status; diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index da64a5edae7a..6c917699293b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -76,6 +76,7 @@ enum intel_display_power_domain { POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, POWER_DOMAIN_DPLL_DC_OFF, + POWER_DOMAIN_TC_COLD_OFF, POWER_DOMAIN_INIT, POWER_DOMAIN_NUM, @@ -100,7 +101,7 @@ enum i915_power_well_id { SKL_DISP_PW_MISC_IO, SKL_DISP_PW_1, SKL_DISP_PW_2, - TGL_DISP_PW_3, + ICL_DISP_PW_3, SKL_DISP_DC_OFF, }; @@ -266,6 +267,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain); bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); +bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, + enum i915_power_well_id power_well_id); bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ba8c08145c88..9488449e4b94 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -974,8 +974,7 @@ struct intel_crtc_state { /* Panel fitter placement and size for Ironlake+ */ struct { - u32 pos; - u32 size; + struct drm_rect dst; bool enabled; bool force_thru; } pch_pfit; @@ -1368,6 +1367,9 @@ struct intel_dp { /* This is called before a link training is starterd */ void (*prepare_link_retrain)(struct intel_dp *intel_dp); + void (*set_link_train)(struct intel_dp *intel_dp, u8 dp_train_pat); + void (*set_idle_link_train)(struct intel_dp *intel_dp); + void (*set_signal_levels)(struct intel_dp *intel_dp); /* Displayport compliance testing */ struct intel_dp_compliance compliance; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index d4fcc9583869..6952b0295096 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -48,7 +48,6 @@ #include "intel_audio.h" #include "intel_connector.h" #include "intel_ddi.h" -#include "intel_display_debugfs.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_link_training.h" @@ -2340,15 +2339,13 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, static int intel_dp_ycbcr420_config(struct intel_dp *intel_dp, - struct drm_connector *connector, - struct intel_crtc_state *crtc_state) + struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct drm_connector *connector = conn_state->connector; const struct drm_display_info *info = &connector->display_info; const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - int ret; if (!drm_mode_is_420_only(info, adjusted_mode) || !intel_dp_get_colorimetry_status(intel_dp) || @@ -2357,17 +2354,7 @@ intel_dp_ycbcr420_config(struct intel_dp *intel_dp, crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR420; - /* YCBCR 420 output conversion needs a scaler */ - ret = skl_update_scaler_crtc(crtc_state); - if (ret) { - drm_dbg_kms(&i915->drm, - "Scaler allocation for output failed\n"); - return ret; - } - - intel_pch_panel_fitting(crtc, crtc_state, DRM_MODE_SCALE_FULLSCREEN); - - return 0; + return intel_pch_panel_fitting(crtc_state, conn_state); } bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state, @@ -2546,7 +2533,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder); enum port port = encoder->port; - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_connector *intel_connector = intel_dp->attached_connector; struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); @@ -2562,9 +2548,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (lspcon->active) lspcon_ycbcr420_config(&intel_connector->base, pipe_config); else - ret = intel_dp_ycbcr420_config(intel_dp, &intel_connector->base, - pipe_config); - + ret = intel_dp_ycbcr420_config(intel_dp, pipe_config, + conn_state); if (ret) return ret; @@ -2580,18 +2565,12 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_fixed_panel_mode(intel_connector->panel.fixed_mode, adjusted_mode); - if (INTEL_GEN(dev_priv) >= 9) { - ret = skl_update_scaler_crtc(pipe_config); - if (ret) - return ret; - } - if (HAS_GMCH(dev_priv)) - intel_gmch_panel_fitting(intel_crtc, pipe_config, - conn_state->scaling_mode); + ret = intel_gmch_panel_fitting(pipe_config, conn_state); else - intel_pch_panel_fitting(intel_crtc, pipe_config, - conn_state->scaling_mode); + ret = intel_pch_panel_fitting(pipe_config, conn_state); + if (ret) + return ret; } if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) @@ -2671,9 +2650,6 @@ static void intel_dp_prepare(struct intel_encoder *encoder, intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)); - intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port); - intel_dp->regs.dp_tp_status = DP_TP_STATUS(port); - /* * There are four kinds of DP registers: * @@ -3642,90 +3618,63 @@ static void chv_post_disable_dp(struct intel_atomic_state *state, } static void -_intel_dp_set_link_train(struct intel_dp *intel_dp, - u32 *DP, - u8 dp_train_pat) +cpt_set_link_train(struct intel_dp *intel_dp, + u8 dp_train_pat) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - enum port port = intel_dig_port->base.port; - u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd); + u32 *DP = &intel_dp->DP; - if (dp_train_pat & train_pat_mask) - drm_dbg_kms(&dev_priv->drm, - "Using DP training pattern TPS%d\n", - dp_train_pat & train_pat_mask); - - if (HAS_DDI(dev_priv)) { - u32 temp = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl); + *DP &= ~DP_LINK_TRAIN_MASK_CPT; - if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE) - temp |= DP_TP_CTL_SCRAMBLE_DISABLE; - else - temp &= ~DP_TP_CTL_SCRAMBLE_DISABLE; - - temp &= ~DP_TP_CTL_LINK_TRAIN_MASK; - switch (dp_train_pat & train_pat_mask) { - case DP_TRAINING_PATTERN_DISABLE: - temp |= DP_TP_CTL_LINK_TRAIN_NORMAL; - - break; - case DP_TRAINING_PATTERN_1: - temp |= DP_TP_CTL_LINK_TRAIN_PAT1; - break; - case DP_TRAINING_PATTERN_2: - temp |= DP_TP_CTL_LINK_TRAIN_PAT2; - break; - case DP_TRAINING_PATTERN_3: - temp |= DP_TP_CTL_LINK_TRAIN_PAT3; - break; - case DP_TRAINING_PATTERN_4: - temp |= DP_TP_CTL_LINK_TRAIN_PAT4; - break; - } - intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, temp); + switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) { + case DP_TRAINING_PATTERN_DISABLE: + *DP |= DP_LINK_TRAIN_OFF_CPT; + break; + case DP_TRAINING_PATTERN_1: + *DP |= DP_LINK_TRAIN_PAT_1_CPT; + break; + case DP_TRAINING_PATTERN_2: + *DP |= DP_LINK_TRAIN_PAT_2_CPT; + break; + case DP_TRAINING_PATTERN_3: + drm_dbg_kms(&dev_priv->drm, + "TPS3 not supported, using TPS2 instead\n"); + *DP |= DP_LINK_TRAIN_PAT_2_CPT; + break; + } - } else if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) || - (HAS_PCH_CPT(dev_priv) && port != PORT_A)) { - *DP &= ~DP_LINK_TRAIN_MASK_CPT; + intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); + intel_de_posting_read(dev_priv, intel_dp->output_reg); +} - switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) { - case DP_TRAINING_PATTERN_DISABLE: - *DP |= DP_LINK_TRAIN_OFF_CPT; - break; - case DP_TRAINING_PATTERN_1: - *DP |= DP_LINK_TRAIN_PAT_1_CPT; - break; - case DP_TRAINING_PATTERN_2: - *DP |= DP_LINK_TRAIN_PAT_2_CPT; - break; - case DP_TRAINING_PATTERN_3: - drm_dbg_kms(&dev_priv->drm, - "TPS3 not supported, using TPS2 instead\n"); - *DP |= DP_LINK_TRAIN_PAT_2_CPT; - break; - } +static void +g4x_set_link_train(struct intel_dp *intel_dp, + u8 dp_train_pat) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u32 *DP = &intel_dp->DP; - } else { - *DP &= ~DP_LINK_TRAIN_MASK; + *DP &= ~DP_LINK_TRAIN_MASK; - switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) { - case DP_TRAINING_PATTERN_DISABLE: - *DP |= DP_LINK_TRAIN_OFF; - break; - case DP_TRAINING_PATTERN_1: - *DP |= DP_LINK_TRAIN_PAT_1; - break; - case DP_TRAINING_PATTERN_2: - *DP |= DP_LINK_TRAIN_PAT_2; - break; - case DP_TRAINING_PATTERN_3: - drm_dbg_kms(&dev_priv->drm, - "TPS3 not supported, using TPS2 instead\n"); - *DP |= DP_LINK_TRAIN_PAT_2; - break; - } + switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) { + case DP_TRAINING_PATTERN_DISABLE: + *DP |= DP_LINK_TRAIN_OFF; + break; + case DP_TRAINING_PATTERN_1: + *DP |= DP_LINK_TRAIN_PAT_1; + break; + case DP_TRAINING_PATTERN_2: + *DP |= DP_LINK_TRAIN_PAT_2; + break; + case DP_TRAINING_PATTERN_3: + drm_dbg_kms(&dev_priv->drm, + "TPS3 not supported, using TPS2 instead\n"); + *DP |= DP_LINK_TRAIN_PAT_2; + break; } + + intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); + intel_de_posting_read(dev_priv, intel_dp->output_reg); } static void intel_dp_enable_port(struct intel_dp *intel_dp, @@ -4064,7 +4013,7 @@ intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing) } } -static u32 vlv_signal_levels(struct intel_dp *intel_dp) +static void vlv_set_signal_levels(struct intel_dp *intel_dp) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; unsigned long demph_reg_value, preemph_reg_value, @@ -4092,7 +4041,7 @@ static u32 vlv_signal_levels(struct intel_dp *intel_dp) uniqtranscale_reg_value = 0x5598DA3A; break; default: - return 0; + return; } break; case DP_TRAIN_PRE_EMPH_LEVEL_1: @@ -4111,7 +4060,7 @@ static u32 vlv_signal_levels(struct intel_dp *intel_dp) uniqtranscale_reg_value = 0x55ADDA3A; break; default: - return 0; + return; } break; case DP_TRAIN_PRE_EMPH_LEVEL_2: @@ -4126,7 +4075,7 @@ static u32 vlv_signal_levels(struct intel_dp *intel_dp) uniqtranscale_reg_value = 0x55ADDA3A; break; default: - return 0; + return; } break; case DP_TRAIN_PRE_EMPH_LEVEL_3: @@ -4137,20 +4086,18 @@ static u32 vlv_signal_levels(struct intel_dp *intel_dp) uniqtranscale_reg_value = 0x55ADDA3A; break; default: - return 0; + return; } break; default: - return 0; + return; } vlv_set_phy_signal_level(encoder, demph_reg_value, preemph_reg_value, uniqtranscale_reg_value, 0); - - return 0; } -static u32 chv_signal_levels(struct intel_dp *intel_dp) +static void chv_set_signal_levels(struct intel_dp *intel_dp) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; u32 deemph_reg_value, margin_reg_value; @@ -4178,7 +4125,7 @@ static u32 chv_signal_levels(struct intel_dp *intel_dp) uniq_trans_scale = true; break; default: - return 0; + return; } break; case DP_TRAIN_PRE_EMPH_LEVEL_1: @@ -4196,7 +4143,7 @@ static u32 chv_signal_levels(struct intel_dp *intel_dp) margin_reg_value = 154; break; default: - return 0; + return; } break; case DP_TRAIN_PRE_EMPH_LEVEL_2: @@ -4210,7 +4157,7 @@ static u32 chv_signal_levels(struct intel_dp *intel_dp) margin_reg_value = 154; break; default: - return 0; + return; } break; case DP_TRAIN_PRE_EMPH_LEVEL_3: @@ -4220,21 +4167,18 @@ static u32 chv_signal_levels(struct intel_dp *intel_dp) margin_reg_value = 154; break; default: - return 0; + return; } break; default: - return 0; + return; } chv_set_phy_signal_level(encoder, deemph_reg_value, margin_reg_value, uniq_trans_scale); - - return 0; } -static u32 -g4x_signal_levels(u8 train_set) +static u32 g4x_signal_levels(u8 train_set) { u32 signal_levels = 0; @@ -4271,12 +4215,31 @@ g4x_signal_levels(u8 train_set) return signal_levels; } +static void +g4x_set_signal_levels(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u8 train_set = intel_dp->train_set[0]; + u32 signal_levels; + + signal_levels = g4x_signal_levels(train_set); + + drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n", + signal_levels); + + intel_dp->DP &= ~(DP_VOLTAGE_MASK | DP_PRE_EMPHASIS_MASK); + intel_dp->DP |= signal_levels; + + intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); + intel_de_posting_read(dev_priv, intel_dp->output_reg); +} + /* SNB CPU eDP voltage swing and pre-emphasis control */ -static u32 -snb_cpu_edp_signal_levels(u8 train_set) +static u32 snb_cpu_edp_signal_levels(u8 train_set) { - int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | - DP_TRAIN_PRE_EMPHASIS_MASK); + u8 signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | + DP_TRAIN_PRE_EMPHASIS_MASK); + switch (signal_levels) { case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0: case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0: @@ -4299,12 +4262,31 @@ snb_cpu_edp_signal_levels(u8 train_set) } } +static void +snb_cpu_edp_set_signal_levels(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u8 train_set = intel_dp->train_set[0]; + u32 signal_levels; + + signal_levels = snb_cpu_edp_signal_levels(train_set); + + drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n", + signal_levels); + + intel_dp->DP &= ~EDP_LINK_TRAIN_VOL_EMP_MASK_SNB; + intel_dp->DP |= signal_levels; + + intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); + intel_de_posting_read(dev_priv, intel_dp->output_reg); +} + /* IVB CPU eDP voltage swing and pre-emphasis control */ -static u32 -ivb_cpu_edp_signal_levels(u8 train_set) +static u32 ivb_cpu_edp_signal_levels(u8 train_set) { - int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | - DP_TRAIN_PRE_EMPHASIS_MASK); + u8 signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | + DP_TRAIN_PRE_EMPHASIS_MASK); + switch (signal_levels) { case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0: return EDP_LINK_TRAIN_400MV_0DB_IVB; @@ -4330,38 +4312,29 @@ ivb_cpu_edp_signal_levels(u8 train_set) } } -void -intel_dp_set_signal_levels(struct intel_dp *intel_dp) +static void +ivb_cpu_edp_set_signal_levels(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - enum port port = intel_dig_port->base.port; - u32 signal_levels, mask = 0; u8 train_set = intel_dp->train_set[0]; + u32 signal_levels; - if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - signal_levels = bxt_signal_levels(intel_dp); - } else if (HAS_DDI(dev_priv)) { - signal_levels = ddi_signal_levels(intel_dp); - mask = DDI_BUF_EMP_MASK; - } else if (IS_CHERRYVIEW(dev_priv)) { - signal_levels = chv_signal_levels(intel_dp); - } else if (IS_VALLEYVIEW(dev_priv)) { - signal_levels = vlv_signal_levels(intel_dp); - } else if (IS_IVYBRIDGE(dev_priv) && port == PORT_A) { - signal_levels = ivb_cpu_edp_signal_levels(train_set); - mask = EDP_LINK_TRAIN_VOL_EMP_MASK_IVB; - } else if (IS_GEN(dev_priv, 6) && port == PORT_A) { - signal_levels = snb_cpu_edp_signal_levels(train_set); - mask = EDP_LINK_TRAIN_VOL_EMP_MASK_SNB; - } else { - signal_levels = g4x_signal_levels(train_set); - mask = DP_VOLTAGE_MASK | DP_PRE_EMPHASIS_MASK; - } + signal_levels = ivb_cpu_edp_signal_levels(train_set); - if (mask) - drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n", - signal_levels); + drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n", + signal_levels); + + intel_dp->DP &= ~EDP_LINK_TRAIN_VOL_EMP_MASK_IVB; + intel_dp->DP |= signal_levels; + + intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); + intel_de_posting_read(dev_priv, intel_dp->output_reg); +} + +void intel_dp_set_signal_levels(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u8 train_set = intel_dp->train_set[0]; drm_dbg_kms(&dev_priv->drm, "Using vswing level %d%s\n", train_set & DP_TRAIN_VOLTAGE_SWING_MASK, @@ -4372,55 +4345,28 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp) train_set & DP_TRAIN_MAX_PRE_EMPHASIS_REACHED ? " (max)" : ""); - intel_dp->DP = (intel_dp->DP & ~mask) | signal_levels; - - intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); - intel_de_posting_read(dev_priv, intel_dp->output_reg); + intel_dp->set_signal_levels(intel_dp); } void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, u8 dp_train_pat) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = - to_i915(intel_dig_port->base.base.dev); + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd); - _intel_dp_set_link_train(intel_dp, &intel_dp->DP, dp_train_pat); + if (dp_train_pat & train_pat_mask) + drm_dbg_kms(&dev_priv->drm, + "Using DP training pattern TPS%d\n", + dp_train_pat & train_pat_mask); - intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); - intel_de_posting_read(dev_priv, intel_dp->output_reg); + intel_dp->set_link_train(intel_dp, dp_train_pat); } void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - enum port port = intel_dig_port->base.port; - u32 val; - - if (!HAS_DDI(dev_priv)) - return; - - val = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl); - val &= ~DP_TP_CTL_LINK_TRAIN_MASK; - val |= DP_TP_CTL_LINK_TRAIN_IDLE; - intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, val); - - /* - * Until TGL on PORT_A we can have only eDP in SST mode. There the only - * reason we need to set idle transmission mode is to work around a HW - * issue where we enable the pipe while not in idle link-training mode. - * In this case there is requirement to wait for a minimum number of - * idle patterns to be sent. - */ - if (port == PORT_A && INTEL_GEN(dev_priv) < 12) - return; - - if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, - DP_TP_STATUS_IDLE_DONE, 1)) - drm_err(&dev_priv->drm, - "Timed out waiting for DP idle patterns\n"); + if (intel_dp->set_idle_link_train) + intel_dp->set_idle_link_train(intel_dp); } static void @@ -5567,7 +5513,7 @@ void intel_dp_process_phy_request(struct intel_dp *intel_dp) static u8 intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) { - u8 test_result = DP_TEST_NAK; + u8 test_result; test_result = intel_dp_prepare_phytest(intel_dp); if (test_result != DP_TEST_ACK) @@ -5629,61 +5575,51 @@ static int intel_dp_check_mst_status(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); - bool bret; + bool need_retrain = false; - if (intel_dp->is_mst) { - u8 esi[DP_DPRX_ESI_LEN] = { 0 }; - int ret = 0; + if (!intel_dp->is_mst) + return -EINVAL; + + WARN_ON_ONCE(intel_dp->active_mst_links < 0); + + for (;;) { + u8 esi[DP_DPRX_ESI_LEN] = {}; + bool bret, handled; int retry; - bool handled; - WARN_ON_ONCE(intel_dp->active_mst_links < 0); bret = intel_dp_get_sink_irq_esi(intel_dp, esi); -go_again: - if (bret == true) { - - /* check link status - esi[10] = 0x200c */ - if (intel_dp->active_mst_links > 0 && - !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) { - drm_dbg_kms(&i915->drm, - "channel EQ not ok, retraining\n"); - intel_dp_start_link_train(intel_dp); - intel_dp_stop_link_train(intel_dp); - } + if (!bret) { + drm_dbg_kms(&i915->drm, + "failed to get ESI - device may have failed\n"); + return -EINVAL; + } - drm_dbg_kms(&i915->drm, "got esi %3ph\n", esi); - ret = drm_dp_mst_hpd_irq(&intel_dp->mst_mgr, esi, &handled); - - if (handled) { - for (retry = 0; retry < 3; retry++) { - int wret; - wret = drm_dp_dpcd_write(&intel_dp->aux, - DP_SINK_COUNT_ESI+1, - &esi[1], 3); - if (wret == 3) { - break; - } - } + /* check link status - esi[10] = 0x200c */ + if (intel_dp->active_mst_links > 0 && !need_retrain && + !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) { + drm_dbg_kms(&i915->drm, + "channel EQ not ok, retraining\n"); + need_retrain = true; + } - bret = intel_dp_get_sink_irq_esi(intel_dp, esi); - if (bret == true) { - drm_dbg_kms(&i915->drm, - "got esi2 %3ph\n", esi); - goto go_again; - } - } else - ret = 0; + drm_dbg_kms(&i915->drm, "got esi %3ph\n", esi); - return ret; - } else { - drm_dbg_kms(&i915->drm, - "failed to get ESI - device may have failed\n"); - intel_dp->is_mst = false; - drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, - intel_dp->is_mst); + drm_dp_mst_hpd_irq(&intel_dp->mst_mgr, esi, &handled); + if (!handled) + break; + + for (retry = 0; retry < 3; retry++) { + int wret; + + wret = drm_dp_dpcd_write(&intel_dp->aux, + DP_SINK_COUNT_ESI+1, + &esi[1], 3); + if (wret == 3) + break; } } - return -EINVAL; + + return need_retrain; } static bool @@ -5720,20 +5656,102 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp) return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count); } +static bool intel_dp_has_connector(struct intel_dp *intel_dp, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_encoder *encoder; + enum pipe pipe; + + if (!conn_state->best_encoder) + return false; + + /* SST */ + encoder = &dp_to_dig_port(intel_dp)->base; + if (conn_state->best_encoder == &encoder->base) + return true; + + /* MST */ + for_each_pipe(i915, pipe) { + encoder = &intel_dp->mst_encoders[pipe]->base; + if (conn_state->best_encoder == &encoder->base) + return true; + } + + return false; +} + +static int intel_dp_prep_link_retrain(struct intel_dp *intel_dp, + struct drm_modeset_acquire_ctx *ctx, + u32 *crtc_mask) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct drm_connector_list_iter conn_iter; + struct intel_connector *connector; + int ret = 0; + + *crtc_mask = 0; + + if (!intel_dp_needs_link_retrain(intel_dp)) + return 0; + + drm_connector_list_iter_begin(&i915->drm, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { + struct drm_connector_state *conn_state = + connector->base.state; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + + if (!intel_dp_has_connector(intel_dp, conn_state)) + continue; + + crtc = to_intel_crtc(conn_state->crtc); + if (!crtc) + continue; + + ret = drm_modeset_lock(&crtc->base.mutex, ctx); + if (ret) + break; + + crtc_state = to_intel_crtc_state(crtc->base.state); + + drm_WARN_ON(&i915->drm, !intel_crtc_has_dp_encoder(crtc_state)); + + if (!crtc_state->hw.active) + continue; + + if (conn_state->commit && + !try_wait_for_completion(&conn_state->commit->hw_done)) + continue; + + *crtc_mask |= drm_crtc_mask(&crtc->base); + } + drm_connector_list_iter_end(&conn_iter); + + if (!intel_dp_needs_link_retrain(intel_dp)) + *crtc_mask = 0; + + return ret; +} + +static bool intel_dp_is_connected(struct intel_dp *intel_dp) +{ + struct intel_connector *connector = intel_dp->attached_connector; + + return connector->base.status == connector_status_connected || + intel_dp->is_mst; +} + int intel_dp_retrain_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - struct intel_connector *connector = intel_dp->attached_connector; - struct drm_connector_state *conn_state; - struct intel_crtc_state *crtc_state; struct intel_crtc *crtc; + u32 crtc_mask; int ret; - /* FIXME handle the MST connectors as well */ - - if (!connector || connector->base.status != connector_status_connected) + if (!intel_dp_is_connected(intel_dp)) return 0; ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, @@ -5741,46 +5759,42 @@ int intel_dp_retrain_link(struct intel_encoder *encoder, if (ret) return ret; - conn_state = connector->base.state; - - crtc = to_intel_crtc(conn_state->crtc); - if (!crtc) - return 0; - - ret = drm_modeset_lock(&crtc->base.mutex, ctx); + ret = intel_dp_prep_link_retrain(intel_dp, ctx, &crtc_mask); if (ret) return ret; - crtc_state = to_intel_crtc_state(crtc->base.state); - - drm_WARN_ON(&dev_priv->drm, !intel_crtc_has_dp_encoder(crtc_state)); - - if (!crtc_state->hw.active) + if (crtc_mask == 0) return 0; - if (conn_state->commit && - !try_wait_for_completion(&conn_state->commit->hw_done)) - return 0; + drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] retraining link\n", + encoder->base.base.id, encoder->base.name); - if (!intel_dp_needs_link_retrain(intel_dp)) - return 0; + for_each_intel_crtc_mask(&dev_priv->drm, crtc, crtc_mask) { + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); - /* Suppress underruns caused by re-training */ - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); - if (crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, - intel_crtc_pch_transcoder(crtc), false); + /* Suppress underruns caused by re-training */ + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); + if (crtc_state->has_pch_encoder) + intel_set_pch_fifo_underrun_reporting(dev_priv, + intel_crtc_pch_transcoder(crtc), false); + } intel_dp_start_link_train(intel_dp); intel_dp_stop_link_train(intel_dp); - /* Keep underrun reporting disabled until things are stable */ - intel_wait_for_vblank(dev_priv, crtc->pipe); + for_each_intel_crtc_mask(&dev_priv->drm, crtc, crtc_mask) { + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); - if (crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, - intel_crtc_pch_transcoder(crtc), true); + /* Keep underrun reporting disabled until things are stable */ + intel_wait_for_vblank(dev_priv, crtc->pipe); + + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); + if (crtc_state->has_pch_encoder) + intel_set_pch_fifo_underrun_reporting(dev_priv, + intel_crtc_pch_transcoder(crtc), true); + } return 0; } @@ -6451,8 +6465,6 @@ intel_dp_connector_register(struct drm_connector *connector) if (ret) return ret; - intel_connector_debugfs_add(connector); - drm_dbg_kms(&i915->drm, "registering %s bus for %s\n", intel_dp->aux.name, connector->kdev->kobj.name); @@ -6833,9 +6845,9 @@ static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = { 0, 0 }, }; -static inline -int intel_dp_hdcp2_read_rx_status(struct intel_digital_port *intel_dig_port, - u8 *rx_status) +static int +intel_dp_hdcp2_read_rx_status(struct intel_digital_port *intel_dig_port, + u8 *rx_status) { struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev); ssize_t ret; @@ -7424,7 +7436,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) } if (intel_dp->is_mst) { - if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { + switch (intel_dp_check_mst_status(intel_dp)) { + case -EINVAL: /* * If we were in MST mode, and device is not * there, get out of MST mode @@ -7438,6 +7451,10 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) intel_dp->is_mst); return IRQ_NONE; + case 1: + return IRQ_NONE; + default: + break; } } @@ -8468,8 +8485,27 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->post_disable = g4x_post_disable_dp; } + if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) || + (HAS_PCH_CPT(dev_priv) && port != PORT_A)) + intel_dig_port->dp.set_link_train = cpt_set_link_train; + else + intel_dig_port->dp.set_link_train = g4x_set_link_train; + + if (IS_CHERRYVIEW(dev_priv)) + intel_dig_port->dp.set_signal_levels = chv_set_signal_levels; + else if (IS_VALLEYVIEW(dev_priv)) + intel_dig_port->dp.set_signal_levels = vlv_set_signal_levels; + else if (IS_IVYBRIDGE(dev_priv) && port == PORT_A) + intel_dig_port->dp.set_signal_levels = ivb_cpu_edp_set_signal_levels; + else if (IS_GEN(dev_priv, 6) && port == PORT_A) + intel_dig_port->dp.set_signal_levels = snb_cpu_edp_set_signal_levels; + else + intel_dig_port->dp.set_signal_levels = g4x_set_signal_levels; + intel_dig_port->dp.output_reg = output_reg; intel_dig_port->max_lanes = 4; + intel_dig_port->dp.regs.dp_tp_ctl = DP_TP_CTL(port); + intel_dig_port->dp.regs.dp_tp_status = DP_TP_STATUS(port); intel_encoder->type = INTEL_OUTPUT_DP; intel_encoder->power_domain = intel_port_to_power_domain(port); diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 4b916468540f..0722540d64ad 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -358,6 +358,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector) */ if (i915->vbt.backlight.type != INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE && + i915_modparams.enable_dpcd_backlight != 1 && !drm_dp_has_quirk(&intel_dp->desc, intel_dp->edid_quirks, DP_QUIRK_FORCE_DPCD_BACKLIGHT)) { drm_info(&i915->drm, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a83f910d8e15..4d2384650383 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -489,7 +489,7 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, * here for the following ones. */ if (INTEL_GEN(dev_priv) < 12 || !first_mst_stream) - intel_ddi_enable_pipe_clock(pipe_config); + intel_ddi_enable_pipe_clock(encoder, pipe_config); intel_ddi_set_dp_msa(pipe_config, conn_state); @@ -508,6 +508,8 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder); + intel_ddi_enable_transcoder_func(encoder, pipe_config); + intel_enable_pipe(pipe_config); intel_crtc_vblank_on(pipe_config); diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 2d47f1f756a2..b45185b80bec 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -80,7 +80,7 @@ intel_atomic_get_shared_dpll_state(struct drm_atomic_state *s) { struct intel_atomic_state *state = to_intel_atomic_state(s); - WARN_ON(!drm_modeset_is_locked(&s->dev->mode_config.connection_mutex)); + drm_WARN_ON(s->dev, !drm_modeset_is_locked(&s->dev->mode_config.connection_mutex)); if (!state->dpll_set) { state->dpll_set = true; @@ -979,7 +979,7 @@ hsw_ddi_spll_get_dpll(struct intel_atomic_state *state, struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - if (WARN_ON(crtc_state->port_clock / 2 != 135000)) + if (drm_WARN_ON(crtc->base.dev, crtc_state->port_clock / 2 != 135000)) return NULL; crtc_state->dpll_hw_state.spll = SPLL_PLL_ENABLE | SPLL_FREQ_1350MHz | @@ -1616,7 +1616,7 @@ static int skl_ddi_wrpll_get_freq(struct drm_i915_private *i915, dco_freq += ((pll_state->cfgcr1 & DPLL_CFGCR1_DCO_FRACTION_MASK) >> 9) * ref_clock / 0x8000; - if (WARN_ON(p0 == 0 || p1 == 0 || p2 == 0)) + if (drm_WARN_ON(&i915->drm, p0 == 0 || p1 == 0 || p2 == 0)) return 0; return dco_freq / (p0 * p1 * p2 * 5); @@ -2074,7 +2074,7 @@ bxt_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state, clk_div->p1 = best_clock.p1; clk_div->p2 = best_clock.p2; - WARN_ON(best_clock.m1 != 2); + drm_WARN_ON(&i915->drm, best_clock.m1 != 2); clk_div->n = best_clock.n; clk_div->m2_int = best_clock.m2 >> 22; clk_div->m2_frac = best_clock.m2 & ((1 << 22) - 1); diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index d7a6bf2277df..29fec6a92d17 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -34,7 +34,7 @@ #define DSB_BYTE_EN_SHIFT 20 #define DSB_REG_VALUE_MASK 0xfffff -static inline bool is_dsb_busy(struct intel_dsb *dsb) +static bool is_dsb_busy(struct intel_dsb *dsb) { struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -43,7 +43,7 @@ static inline bool is_dsb_busy(struct intel_dsb *dsb) return DSB_STATUS & intel_de_read(dev_priv, DSB_CTRL(pipe, dsb->id)); } -static inline bool intel_dsb_enable_engine(struct intel_dsb *dsb) +static bool intel_dsb_enable_engine(struct intel_dsb *dsb) { struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -63,7 +63,7 @@ static inline bool intel_dsb_enable_engine(struct intel_dsb *dsb) return true; } -static inline bool intel_dsb_disable_engine(struct intel_dsb *dsb) +static bool intel_dsb_disable_engine(struct intel_dsb *dsb) { struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 3c9c05478a03..eed037ec0b29 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -121,7 +121,7 @@ struct i2c_adapter_lookup { #define ICL_GPIO_DDPA_CTRLCLK_2 8 #define ICL_GPIO_DDPA_CTRLDATA_2 9 -static inline enum port intel_dsi_seq_port_to_port(u8 port) +static enum port intel_dsi_seq_port_to_port(u8 port) { return port ? PORT_C : PORT_A; } diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 56bcd6c52a02..c6afa10e814c 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -485,9 +485,8 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, if (!ret) goto err_llb; else if (ret > 1) { - drm_info(&dev_priv->drm, - "Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); - + drm_info_once(&dev_priv->drm, + "Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); } fbc->threshold = ret; diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 6cb02c912acc..2979ed2588eb 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -302,12 +302,14 @@ void intel_frontbuffer_track(struct intel_frontbuffer *old, BITS_PER_TYPE(atomic_t)); if (old) { - WARN_ON(!(atomic_read(&old->bits) & frontbuffer_bits)); + drm_WARN_ON(old->obj->base.dev, + !(atomic_read(&old->bits) & frontbuffer_bits)); atomic_andnot(frontbuffer_bits, &old->bits); } if (new) { - WARN_ON(atomic_read(&new->bits) & frontbuffer_bits); + drm_WARN_ON(new->obj->base.dev, + atomic_read(&new->bits) & frontbuffer_bits); atomic_or(frontbuffer_bits, &new->bits); } } diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c index 6f72feb14f3e..212d4ee68205 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.c +++ b/drivers/gpu/drm/i915/display/intel_global_state.c @@ -64,7 +64,7 @@ static void assert_global_state_read_locked(struct intel_atomic_state *state) return; } - WARN(1, "Global state not read locked\n"); + drm_WARN(&dev_priv->drm, 1, "Global state not read locked\n"); } struct intel_global_state * @@ -148,7 +148,7 @@ void intel_atomic_swap_global_state(struct intel_atomic_state *state) for_each_oldnew_global_obj_in_state(state, obj, old_obj_state, new_obj_state, i) { - WARN_ON(obj->state != old_obj_state); + drm_WARN_ON(&dev_priv->drm, obj->state != old_obj_state); /* * If the new state wasn't modified (and properly diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 1fd3a5a6296b..a8d119b6b45c 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -379,8 +379,7 @@ gmbus_wait_idle(struct drm_i915_private *dev_priv) return ret; } -static inline -unsigned int gmbus_max_xfer_size(struct drm_i915_private *dev_priv) +static unsigned int gmbus_max_xfer_size(struct drm_i915_private *dev_priv) { return INTEL_GEN(dev_priv) >= 9 ? GEN9_GMBUS_BYTE_COUNT_MAX : GMBUS_BYTE_COUNT_MAX; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index d3ad10653b2e..2cbc4619b4ce 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -109,18 +109,16 @@ bool intel_hdcp2_capable(struct intel_connector *connector) return capable; } -static inline -bool intel_hdcp_in_use(struct drm_i915_private *dev_priv, - enum transcoder cpu_transcoder, enum port port) +static bool intel_hdcp_in_use(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { return intel_de_read(dev_priv, HDCP_STATUS(dev_priv, cpu_transcoder, port)) & HDCP_STATUS_ENC; } -static inline -bool intel_hdcp2_in_use(struct drm_i915_private *dev_priv, - enum transcoder cpu_transcoder, enum port port) +static bool intel_hdcp2_in_use(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { return intel_de_read(dev_priv, HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & @@ -853,8 +851,7 @@ static int _intel_hdcp_enable(struct intel_connector *connector) return ret; } -static inline -struct intel_connector *intel_hdcp_to_connector(struct intel_hdcp *hdcp) +static struct intel_connector *intel_hdcp_to_connector(struct intel_hdcp *hdcp) { return container_of(hdcp, struct intel_connector, hdcp); } @@ -1856,8 +1853,7 @@ static const struct component_ops i915_hdcp_component_ops = { .unbind = i915_hdcp_component_unbind, }; -static inline -enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port) +static enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port) { switch (port) { case PORT_A: @@ -1869,8 +1865,7 @@ enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port) } } -static inline -enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) +static enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) { switch (cpu_transcoder) { case TRANSCODER_A ... TRANSCODER_D: @@ -1880,8 +1875,8 @@ enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) } } -static inline int initialize_hdcp_port_data(struct intel_connector *connector, - const struct intel_hdcp_shim *shim) +static int initialize_hdcp_port_data(struct intel_connector *connector, + const struct intel_hdcp_shim *shim) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 6b1bc955124c..010f37240710 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -44,7 +44,6 @@ #include "intel_audio.h" #include "intel_connector.h" #include "intel_ddi.h" -#include "intel_display_debugfs.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dpio_phy.h" @@ -1615,10 +1614,10 @@ static int get_hdcp2_msg_timeout(u8 msg_id, bool is_paired) return -EINVAL; } -static inline -int hdcp2_detect_msg_availability(struct intel_digital_port *intel_dig_port, - u8 msg_id, bool *msg_ready, - ssize_t *msg_sz) +static int +hdcp2_detect_msg_availability(struct intel_digital_port *intel_dig_port, + u8 msg_id, bool *msg_ready, + ssize_t *msg_sz) { struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev); u8 rx_status[HDCP_2_2_HDMI_RXSTATUS_LEN]; @@ -1751,12 +1750,6 @@ int intel_hdmi_hdcp2_capable(struct intel_digital_port *intel_dig_port, return ret; } -static inline -enum hdcp_wired_protocol intel_hdmi_hdcp2_protocol(void) -{ - return HDCP_PROTOCOL_HDMI; -} - static const struct intel_hdcp_shim intel_hdmi_hdcp_shim = { .write_an_aksv = intel_hdmi_hdcp_write_an_aksv, .read_bksv = intel_hdmi_hdcp_read_bksv, @@ -2328,32 +2321,27 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, return true; } -static bool -intel_hdmi_ycbcr420_config(struct drm_connector *connector, - struct intel_crtc_state *config) +static int +intel_hdmi_ycbcr420_config(struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct drm_connector *connector = conn_state->connector; struct drm_i915_private *i915 = to_i915(connector->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(config->uapi.crtc); + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + + if (!drm_mode_is_420_only(&connector->display_info, adjusted_mode)) + return 0; if (!connector->ycbcr_420_allowed) { drm_err(&i915->drm, "Platform doesn't support YCBCR420 output\n"); - return false; - } - - config->output_format = INTEL_OUTPUT_FORMAT_YCBCR420; - - /* YCBCR 420 output conversion needs a scaler */ - if (skl_update_scaler_crtc(config)) { - drm_dbg_kms(&i915->drm, - "Scaler allocation for output failed\n"); - return false; + return -EINVAL; } - intel_pch_panel_fitting(intel_crtc, config, - DRM_MODE_SCALE_FULLSCREEN); + crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR420; - return true; + return intel_pch_panel_fitting(crtc_state, conn_state); } static int intel_hdmi_port_clock(int clock, int bpc) @@ -2481,13 +2469,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) pipe_config->pixel_multiplier = 2; - if (drm_mode_is_420_only(&connector->display_info, adjusted_mode)) { - if (!intel_hdmi_ycbcr420_config(connector, pipe_config)) { - drm_err(&dev_priv->drm, - "Can't support YCBCR420 output\n"); - return -EINVAL; - } - } + ret = intel_hdmi_ycbcr420_config(pipe_config, conn_state); + if (ret) + return ret; pipe_config->limited_color_range = intel_hdmi_limited_color_range(pipe_config, conn_state); @@ -2878,8 +2862,6 @@ intel_hdmi_connector_register(struct drm_connector *connector) if (ret) return ret; - intel_connector_debugfs_add(connector); - intel_hdmi_create_i2c_symlink(connector); return ret; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index fe591f82163e..872f2a489339 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -403,6 +403,7 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); unsigned int lvds_bpp; + int ret; /* Should never happen!! */ if (INTEL_GEN(dev_priv) < 4 && intel_crtc->pipe == 0) { @@ -436,16 +437,15 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; - if (HAS_PCH_SPLIT(dev_priv)) { + if (HAS_PCH_SPLIT(dev_priv)) pipe_config->has_pch_encoder = true; - intel_pch_panel_fitting(intel_crtc, pipe_config, - conn_state->scaling_mode); - } else { - intel_gmch_panel_fitting(intel_crtc, pipe_config, - conn_state->scaling_mode); - - } + if (HAS_GMCH(dev_priv)) + ret = intel_gmch_panel_fitting(pipe_config, conn_state); + else + ret = intel_pch_panel_fitting(pipe_config, conn_state); + if (ret) + return ret; /* * XXX: It would be nice to support lower refresh rates on the diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 6e1d66323223..66711e62fa71 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -281,7 +281,7 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay, enum pipe pipe = overlay->crtc->pipe; struct intel_frontbuffer *from = NULL, *to = NULL; - WARN_ON(overlay->old_vma); + drm_WARN_ON(&overlay->i915->drm, overlay->old_vma); if (overlay->vma) from = intel_frontbuffer_get(overlay->vma->obj); @@ -350,7 +350,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) struct i915_vma *vma; vma = fetch_and_zero(&overlay->old_vma); - if (WARN_ON(!vma)) + if (drm_WARN_ON(&overlay->i915->drm, !vma)) return; intel_frontbuffer_flip_complete(overlay->i915, @@ -396,7 +396,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) struct i915_request *rq; u32 *cs, flip_addr = overlay->flip_addr; - WARN_ON(!overlay->active); + drm_WARN_ON(&overlay->i915->drm, !overlay->active); /* According to intel docs the overlay hw may hang (when switching * off) without loading the filter coeffs. It is however unclear whether diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 08bfecfbe681..aa931f9f0d6a 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -176,24 +176,23 @@ intel_panel_vbt_fixed_mode(struct intel_connector *connector) } /* adjusted_mode has been preset to be the panel's fixed mode */ -void -intel_pch_panel_fitting(struct intel_crtc *intel_crtc, - struct intel_crtc_state *pipe_config, - int fitting_mode) +int intel_pch_panel_fitting(struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - int x = 0, y = 0, width = 0, height = 0; + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + int x, y, width, height; /* Native modes don't need fitting */ - if (adjusted_mode->crtc_hdisplay == pipe_config->pipe_src_w && - adjusted_mode->crtc_vdisplay == pipe_config->pipe_src_h && - pipe_config->output_format != INTEL_OUTPUT_FORMAT_YCBCR420) - goto done; + if (adjusted_mode->crtc_hdisplay == crtc_state->pipe_src_w && + adjusted_mode->crtc_vdisplay == crtc_state->pipe_src_h && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420) + return 0; - switch (fitting_mode) { + switch (conn_state->scaling_mode) { case DRM_MODE_SCALE_CENTER: - width = pipe_config->pipe_src_w; - height = pipe_config->pipe_src_h; + width = crtc_state->pipe_src_w; + height = crtc_state->pipe_src_h; x = (adjusted_mode->crtc_hdisplay - width + 1)/2; y = (adjusted_mode->crtc_vdisplay - height + 1)/2; break; @@ -202,18 +201,18 @@ intel_pch_panel_fitting(struct intel_crtc *intel_crtc, /* Scale but preserve the aspect ratio */ { u32 scaled_width = adjusted_mode->crtc_hdisplay - * pipe_config->pipe_src_h; - u32 scaled_height = pipe_config->pipe_src_w + * crtc_state->pipe_src_h; + u32 scaled_height = crtc_state->pipe_src_w * adjusted_mode->crtc_vdisplay; if (scaled_width > scaled_height) { /* pillar */ - width = scaled_height / pipe_config->pipe_src_h; + width = scaled_height / crtc_state->pipe_src_h; if (width & 1) width++; x = (adjusted_mode->crtc_hdisplay - width + 1) / 2; y = 0; height = adjusted_mode->crtc_vdisplay; } else if (scaled_width < scaled_height) { /* letter */ - height = scaled_width / pipe_config->pipe_src_w; + height = scaled_width / crtc_state->pipe_src_w; if (height & 1) height++; y = (adjusted_mode->crtc_vdisplay - height + 1) / 2; @@ -227,6 +226,10 @@ intel_pch_panel_fitting(struct intel_crtc *intel_crtc, } break; + case DRM_MODE_SCALE_NONE: + WARN_ON(adjusted_mode->crtc_hdisplay != crtc_state->pipe_src_w); + WARN_ON(adjusted_mode->crtc_vdisplay != crtc_state->pipe_src_h); + /* fall through */ case DRM_MODE_SCALE_FULLSCREEN: x = y = 0; width = adjusted_mode->crtc_hdisplay; @@ -234,14 +237,15 @@ intel_pch_panel_fitting(struct intel_crtc *intel_crtc, break; default: - WARN(1, "bad panel fit mode: %d\n", fitting_mode); - return; + MISSING_CASE(conn_state->scaling_mode); + return -EINVAL; } -done: - pipe_config->pch_pfit.pos = (x << 16) | y; - pipe_config->pch_pfit.size = (width << 16) | height; - pipe_config->pch_pfit.enabled = pipe_config->pch_pfit.size != 0; + drm_rect_init(&crtc_state->pch_pfit.dst, + x, y, width, height); + crtc_state->pch_pfit.enabled = true; + + return 0; } static void @@ -287,7 +291,7 @@ centre_vertically(struct drm_display_mode *adjusted_mode, adjusted_mode->crtc_vsync_end = adjusted_mode->crtc_vsync_start + sync_width; } -static inline u32 panel_fitter_scaling(u32 source, u32 target) +static u32 panel_fitter_scaling(u32 source, u32 target) { /* * Floating point operation is not supported. So the FACTOR @@ -300,13 +304,14 @@ static inline u32 panel_fitter_scaling(u32 source, u32 target) return (FACTOR * ratio + FACTOR/2) / FACTOR; } -static void i965_scale_aspect(struct intel_crtc_state *pipe_config, +static void i965_scale_aspect(struct intel_crtc_state *crtc_state, u32 *pfit_control) { - const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; u32 scaled_width = adjusted_mode->crtc_hdisplay * - pipe_config->pipe_src_h; - u32 scaled_height = pipe_config->pipe_src_w * + crtc_state->pipe_src_h; + u32 scaled_height = crtc_state->pipe_src_w * adjusted_mode->crtc_vdisplay; /* 965+ is easy, it does everything in hw */ @@ -316,18 +321,18 @@ static void i965_scale_aspect(struct intel_crtc_state *pipe_config, else if (scaled_width < scaled_height) *pfit_control |= PFIT_ENABLE | PFIT_SCALING_LETTER; - else if (adjusted_mode->crtc_hdisplay != pipe_config->pipe_src_w) + else if (adjusted_mode->crtc_hdisplay != crtc_state->pipe_src_w) *pfit_control |= PFIT_ENABLE | PFIT_SCALING_AUTO; } -static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config, +static void i9xx_scale_aspect(struct intel_crtc_state *crtc_state, u32 *pfit_control, u32 *pfit_pgm_ratios, u32 *border) { - struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; + struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; u32 scaled_width = adjusted_mode->crtc_hdisplay * - pipe_config->pipe_src_h; - u32 scaled_height = pipe_config->pipe_src_w * + crtc_state->pipe_src_h; + u32 scaled_height = crtc_state->pipe_src_w * adjusted_mode->crtc_vdisplay; u32 bits; @@ -339,11 +344,11 @@ static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config, if (scaled_width > scaled_height) { /* pillar */ centre_horizontally(adjusted_mode, scaled_height / - pipe_config->pipe_src_h); + crtc_state->pipe_src_h); *border = LVDS_BORDER_ENABLE; - if (pipe_config->pipe_src_h != adjusted_mode->crtc_vdisplay) { - bits = panel_fitter_scaling(pipe_config->pipe_src_h, + if (crtc_state->pipe_src_h != adjusted_mode->crtc_vdisplay) { + bits = panel_fitter_scaling(crtc_state->pipe_src_h, adjusted_mode->crtc_vdisplay); *pfit_pgm_ratios |= (bits << PFIT_HORIZ_SCALE_SHIFT | @@ -355,11 +360,11 @@ static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config, } else if (scaled_width < scaled_height) { /* letter */ centre_vertically(adjusted_mode, scaled_width / - pipe_config->pipe_src_w); + crtc_state->pipe_src_w); *border = LVDS_BORDER_ENABLE; - if (pipe_config->pipe_src_w != adjusted_mode->crtc_hdisplay) { - bits = panel_fitter_scaling(pipe_config->pipe_src_w, + if (crtc_state->pipe_src_w != adjusted_mode->crtc_hdisplay) { + bits = panel_fitter_scaling(crtc_state->pipe_src_w, adjusted_mode->crtc_hdisplay); *pfit_pgm_ratios |= (bits << PFIT_HORIZ_SCALE_SHIFT | @@ -377,35 +382,35 @@ static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config, } } -void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc, - struct intel_crtc_state *pipe_config, - int fitting_mode) +int intel_gmch_panel_fitting(struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 pfit_control = 0, pfit_pgm_ratios = 0, border = 0; - struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; + struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; /* Native modes don't need fitting */ - if (adjusted_mode->crtc_hdisplay == pipe_config->pipe_src_w && - adjusted_mode->crtc_vdisplay == pipe_config->pipe_src_h) + if (adjusted_mode->crtc_hdisplay == crtc_state->pipe_src_w && + adjusted_mode->crtc_vdisplay == crtc_state->pipe_src_h) goto out; - switch (fitting_mode) { + switch (conn_state->scaling_mode) { case DRM_MODE_SCALE_CENTER: /* * For centered modes, we have to calculate border widths & * heights and modify the values programmed into the CRTC. */ - centre_horizontally(adjusted_mode, pipe_config->pipe_src_w); - centre_vertically(adjusted_mode, pipe_config->pipe_src_h); + centre_horizontally(adjusted_mode, crtc_state->pipe_src_w); + centre_vertically(adjusted_mode, crtc_state->pipe_src_h); border = LVDS_BORDER_ENABLE; break; case DRM_MODE_SCALE_ASPECT: /* Scale but preserve the aspect ratio */ if (INTEL_GEN(dev_priv) >= 4) - i965_scale_aspect(pipe_config, &pfit_control); + i965_scale_aspect(crtc_state, &pfit_control); else - i9xx_scale_aspect(pipe_config, &pfit_control, + i9xx_scale_aspect(crtc_state, &pfit_control, &pfit_pgm_ratios, &border); break; case DRM_MODE_SCALE_FULLSCREEN: @@ -413,8 +418,8 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc, * Full scaling, even if it changes the aspect ratio. * Fortunately this is all done for us in hw. */ - if (pipe_config->pipe_src_h != adjusted_mode->crtc_vdisplay || - pipe_config->pipe_src_w != adjusted_mode->crtc_hdisplay) { + if (crtc_state->pipe_src_h != adjusted_mode->crtc_vdisplay || + crtc_state->pipe_src_w != adjusted_mode->crtc_hdisplay) { pfit_control |= PFIT_ENABLE; if (INTEL_GEN(dev_priv) >= 4) pfit_control |= PFIT_SCALING_AUTO; @@ -426,15 +431,14 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc, } break; default: - drm_WARN(&dev_priv->drm, 1, "bad panel fit mode: %d\n", - fitting_mode); - return; + MISSING_CASE(conn_state->scaling_mode); + return -EINVAL; } /* 965+ wants fuzzy fitting */ /* FIXME: handle multiple panels by failing gracefully */ if (INTEL_GEN(dev_priv) >= 4) - pfit_control |= PFIT_PIPE(intel_crtc->pipe) | PFIT_FILTER_FUZZY; + pfit_control |= PFIT_PIPE(crtc->pipe) | PFIT_FILTER_FUZZY; out: if ((pfit_control & PFIT_ENABLE) == 0) { @@ -443,12 +447,14 @@ out: } /* Make sure pre-965 set dither correctly for 18bpp panels. */ - if (INTEL_GEN(dev_priv) < 4 && pipe_config->pipe_bpp == 18) + if (INTEL_GEN(dev_priv) < 4 && crtc_state->pipe_bpp == 18) pfit_control |= PANEL_8TO6_DITHER_ENABLE; - pipe_config->gmch_pfit.control = pfit_control; - pipe_config->gmch_pfit.pgm_ratios = pfit_pgm_ratios; - pipe_config->gmch_pfit.lvds_border_bits = border; + crtc_state->gmch_pfit.control = pfit_control; + crtc_state->gmch_pfit.pgm_ratios = pfit_pgm_ratios; + crtc_state->gmch_pfit.lvds_border_bits = border; + + return 0; } /** @@ -484,8 +490,8 @@ static u32 scale(u32 source_val, } /* Scale user_level in range [0..user_max] to [hw_min..hw_max]. */ -static inline u32 scale_user_to_hw(struct intel_connector *connector, - u32 user_level, u32 user_max) +static u32 scale_user_to_hw(struct intel_connector *connector, + u32 user_level, u32 user_max) { struct intel_panel *panel = &connector->panel; @@ -495,8 +501,8 @@ static inline u32 scale_user_to_hw(struct intel_connector *connector, /* Scale user_level in range [0..user_max] to [0..hw_max], clamping the result * to [hw_min..hw_max]. */ -static inline u32 clamp_user_to_hw(struct intel_connector *connector, - u32 user_level, u32 user_max) +static u32 clamp_user_to_hw(struct intel_connector *connector, + u32 user_level, u32 user_max) { struct intel_panel *panel = &connector->panel; u32 hw_level; @@ -508,8 +514,8 @@ static inline u32 clamp_user_to_hw(struct intel_connector *connector, } /* Scale hw_level in range [hw_min..hw_max] to [0..user_max]. */ -static inline u32 scale_hw_to_user(struct intel_connector *connector, - u32 hw_level, u32 user_max) +static u32 scale_hw_to_user(struct intel_connector *connector, + u32 hw_level, u32 user_max) { struct intel_panel *panel = &connector->panel; diff --git a/drivers/gpu/drm/i915/display/intel_panel.h b/drivers/gpu/drm/i915/display/intel_panel.h index 11f2f6b628d8..968b95281cb4 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.h +++ b/drivers/gpu/drm/i915/display/intel_panel.h @@ -25,12 +25,10 @@ int intel_panel_init(struct intel_panel *panel, void intel_panel_fini(struct intel_panel *panel); void intel_fixed_panel_mode(const struct drm_display_mode *fixed_mode, struct drm_display_mode *adjusted_mode); -void intel_pch_panel_fitting(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config, - int fitting_mode); -void intel_gmch_panel_fitting(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config, - int fitting_mode); +int intel_pch_panel_fitting(struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); +int intel_gmch_panel_fitting(struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); void intel_panel_set_backlight_acpi(const struct drm_connector_state *conn_state, u32 level, u32 max); int intel_panel_setup_backlight(struct drm_connector *connector, diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 275618bedf32..d3bd5e798fbc 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -34,6 +34,7 @@ tc_port_load_fia_params(struct drm_i915_private *i915, if (INTEL_INFO(i915)->display.has_modular_fia) { modular_fia = intel_uncore_read(&i915->uncore, PORT_TX_DFLEXDPSP(FIA1)); + drm_WARN_ON(&i915->drm, modular_fia == 0xffffffff); modular_fia &= MODULAR_FIA_MASK; } else { modular_fia = 0; @@ -52,6 +53,62 @@ tc_port_load_fia_params(struct drm_i915_private *i915, } } +static enum intel_display_power_domain +tc_cold_get_power_domain(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + + if (INTEL_GEN(i915) == 11) + return intel_legacy_aux_to_power_domain(dig_port->aux_ch); + else + return POWER_DOMAIN_TC_COLD_OFF; +} + +static intel_wakeref_t +tc_cold_block(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum intel_display_power_domain domain; + + if (INTEL_GEN(i915) == 11 && !dig_port->tc_legacy_port) + return 0; + + domain = tc_cold_get_power_domain(dig_port); + return intel_display_power_get(i915, domain); +} + +static void +tc_cold_unblock(struct intel_digital_port *dig_port, intel_wakeref_t wakeref) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum intel_display_power_domain domain; + + /* + * wakeref == -1, means some error happened saving save_depot_stack but + * power should still be put down and 0 is a invalid save_depot_stack + * id so can be used to skip it for non TC legacy ports. + */ + if (wakeref == 0) + return; + + domain = tc_cold_get_power_domain(dig_port); + intel_display_power_put_async(i915, domain, wakeref); +} + +static void +assert_tc_cold_blocked(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + bool enabled; + + if (INTEL_GEN(i915) == 11 && !dig_port->tc_legacy_port) + return; + + enabled = intel_display_power_is_enabled(i915, + tc_cold_get_power_domain(dig_port)); + drm_WARN_ON(&i915->drm, !enabled); +} + u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); @@ -62,6 +119,7 @@ u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) PORT_TX_DFLEXDPSP(dig_port->tc_phy_fia)); drm_WARN_ON(&i915->drm, lane_mask == 0xffffffff); + assert_tc_cold_blocked(dig_port); lane_mask &= DP_LANE_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx); return lane_mask >> DP_LANE_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx); @@ -77,6 +135,7 @@ u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port) PORT_TX_DFLEXPA1(dig_port->tc_phy_fia)); drm_WARN_ON(&i915->drm, pin_mask == 0xffffffff); + assert_tc_cold_blocked(dig_port); return (pin_mask & DP_PIN_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx)) >> DP_PIN_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx); @@ -91,6 +150,8 @@ int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port) if (dig_port->tc_mode != TC_PORT_DP_ALT) return 4; + assert_tc_cold_blocked(dig_port); + lane_mask = 0; with_intel_display_power(i915, POWER_DOMAIN_DISPLAY_CORE, wakeref) lane_mask = intel_tc_port_get_lane_mask(dig_port); @@ -123,6 +184,8 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, drm_WARN_ON(&i915->drm, lane_reversal && dig_port->tc_mode != TC_PORT_LEGACY); + assert_tc_cold_blocked(dig_port); + val = intel_uncore_read(uncore, PORT_TX_DFLEXDPMLE1(dig_port->tc_phy_fia)); val &= ~DFLEXDPMLE1_DPMLETC_MASK(dig_port->tc_phy_fia_idx); @@ -420,9 +483,14 @@ static void intel_tc_port_reset_mode(struct intel_digital_port *dig_port, enum tc_port_mode old_tc_mode = dig_port->tc_mode; intel_display_power_flush_work(i915); - drm_WARN_ON(&i915->drm, - intel_display_power_is_enabled(i915, - intel_aux_power_domain(dig_port))); + if (INTEL_GEN(i915) != 11 || !dig_port->tc_legacy_port) { + enum intel_display_power_domain aux_domain; + bool aux_powered; + + aux_domain = intel_aux_power_domain(dig_port); + aux_powered = intel_display_power_is_enabled(i915, aux_domain); + drm_WARN_ON(&i915->drm, aux_powered); + } icl_tc_phy_disconnect(dig_port); icl_tc_phy_connect(dig_port, required_lanes); @@ -445,9 +513,11 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); struct intel_encoder *encoder = &dig_port->base; + intel_wakeref_t tc_cold_wref; int active_links = 0; mutex_lock(&dig_port->tc_lock); + tc_cold_wref = tc_cold_block(dig_port); dig_port->tc_mode = intel_tc_port_get_current_mode(dig_port); if (dig_port->dp.is_mst) @@ -473,6 +543,7 @@ out: dig_port->tc_port_name, tc_port_mode_name(dig_port->tc_mode)); + tc_cold_unblock(dig_port, tc_cold_wref); mutex_unlock(&dig_port->tc_lock); } @@ -494,10 +565,15 @@ static bool intel_tc_port_needs_reset(struct intel_digital_port *dig_port) bool intel_tc_port_connected(struct intel_digital_port *dig_port) { bool is_connected; + intel_wakeref_t tc_cold_wref; intel_tc_port_lock(dig_port); + tc_cold_wref = tc_cold_block(dig_port); + is_connected = tc_port_live_status_mask(dig_port) & BIT(dig_port->tc_mode); + + tc_cold_unblock(dig_port, tc_cold_wref); intel_tc_port_unlock(dig_port); return is_connected; @@ -513,9 +589,16 @@ static void __intel_tc_port_lock(struct intel_digital_port *dig_port, mutex_lock(&dig_port->tc_lock); - if (!dig_port->tc_link_refcount && - intel_tc_port_needs_reset(dig_port)) - intel_tc_port_reset_mode(dig_port, required_lanes); + if (!dig_port->tc_link_refcount) { + intel_wakeref_t tc_cold_wref; + + tc_cold_wref = tc_cold_block(dig_port); + + if (intel_tc_port_needs_reset(dig_port)) + intel_tc_port_reset_mode(dig_port, required_lanes); + + tc_cold_unblock(dig_port, tc_cold_wref); + } drm_WARN_ON(&i915->drm, dig_port->tc_lock_wakeref); dig_port->tc_lock_wakeref = wakeref; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 4e18d4627065..f582ab52f0b0 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -267,7 +267,6 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi, base); struct intel_connector *intel_connector = intel_dsi->attached_connector; - struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; int ret; @@ -279,11 +278,11 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, intel_fixed_panel_mode(fixed_mode, adjusted_mode); if (HAS_GMCH(dev_priv)) - intel_gmch_panel_fitting(crtc, pipe_config, - conn_state->scaling_mode); + ret = intel_gmch_panel_fitting(pipe_config, conn_state); else - intel_pch_panel_fitting(crtc, pipe_config, - conn_state->scaling_mode); + ret = intel_pch_panel_fitting(pipe_config, conn_state); + if (ret) + return ret; } if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) @@ -864,7 +863,7 @@ static void bxt_dsi_enable(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - WARN_ON(crtc_state->has_pch_encoder); + drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder); intel_crtc_vblank_on(crtc_state); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 11d9135cf21a..900ea8b7fc8f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -130,9 +130,7 @@ static void lut_close(struct i915_gem_context *ctx) if (&lut->obj_link != &obj->lut_list) { i915_lut_handle_free(lut); radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); - if (atomic_dec_and_test(&vma->open_count) && - !i915_vma_is_ggtt(vma)) - i915_vma_close(vma); + i915_vma_close(vma); i915_gem_object_put(obj); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index af43e82f45c7..7f76fc68f498 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -368,7 +368,6 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_vma *vma; - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); if (list_empty(&obj->vma.list)) return; @@ -400,12 +399,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { - struct drm_i915_gem_object *obj = vma->obj; - - assert_object_held(obj); - /* Bump the LRU to try and avoid premature eviction whilst flipping */ - i915_gem_object_bump_inactive_ggtt(obj); + i915_gem_object_bump_inactive_ggtt(vma->obj); i915_vma_unpin(vma); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 517898aa634c..964f73f062c1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -830,7 +830,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb, return 0; err: - atomic_dec(&vma->open_count); + i915_vma_close(vma); i915_vma_put(vma); i915_lut_handle_free(lut); return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 3f01cdd1a39b..9d1d0131f7c2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -135,9 +135,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) if (vma) { GEM_BUG_ON(vma->obj != obj); GEM_BUG_ON(!atomic_read(&vma->open_count)); - if (atomic_dec_and_test(&vma->open_count) && - !i915_vma_is_ggtt(vma)) - i915_vma_close(vma); + i915_vma_close(vma); } mutex_unlock(&ctx->mutex); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index 37f77aee1212..0158e49bf9bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -182,21 +182,35 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode, unsigned int stride) { struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; - struct i915_vma *vma; + struct i915_vma *vma, *vn; + LIST_HEAD(unbind); int ret = 0; if (tiling_mode == I915_TILING_NONE) return 0; mutex_lock(&ggtt->vm.mutex); + + spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { + GEM_BUG_ON(vma->vm != &ggtt->vm); + if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; + list_move(&vma->vm_link, &unbind); + } + spin_unlock(&obj->vma.lock); + + list_for_each_entry_safe(vma, vn, &unbind, vm_link) { ret = __i915_vma_unbind(vma); - if (ret) + if (ret) { + /* Restore the remaining vma on an error */ + list_splice(&unbind, &ggtt->vm.bound_list); break; + } } + mutex_unlock(&ggtt->vm.mutex); return ret; @@ -268,6 +282,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, } mutex_unlock(&obj->mm.lock); + spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { vma->fence_size = i915_gem_fence_size(i915, vma->size, tiling, stride); @@ -278,6 +293,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if (vma->fence) vma->fence->dirty = true; } + spin_unlock(&obj->vma.lock); obj->tiling_and_stride = tiling | stride; i915_gem_object_unlock(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 2d0fd50c5312..c9988b6d5c88 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -421,7 +421,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg) err = i915_vma_pin(vma, 0, 0, PIN_USER); if (err) - goto out_close; + goto out_put; err = igt_check_page_sizes(vma); @@ -432,8 +432,6 @@ static int igt_mock_exhaust_device_supported_pages(void *arg) } i915_vma_unpin(vma); - i915_vma_close(vma); - i915_gem_object_put(obj); if (err) @@ -443,8 +441,6 @@ static int igt_mock_exhaust_device_supported_pages(void *arg) goto out_device; -out_close: - i915_vma_close(vma); out_put: i915_gem_object_put(obj); out_device: @@ -492,7 +488,7 @@ static int igt_mock_memory_region_huge_pages(void *arg) err = i915_vma_pin(vma, 0, 0, PIN_USER); if (err) - goto out_close; + goto out_put; err = igt_check_page_sizes(vma); if (err) @@ -515,8 +511,6 @@ static int igt_mock_memory_region_huge_pages(void *arg) } i915_vma_unpin(vma); - i915_vma_close(vma); - __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } @@ -526,8 +520,6 @@ static int igt_mock_memory_region_huge_pages(void *arg) out_unpin: i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); out_put: i915_gem_object_put(obj); out_region: @@ -587,10 +579,8 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) } err = i915_vma_pin(vma, 0, 0, flags); - if (err) { - i915_vma_close(vma); + if (err) goto out_unpin; - } err = igt_check_page_sizes(vma); @@ -603,10 +593,8 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) i915_vma_unpin(vma); - if (err) { - i915_vma_close(vma); + if (err) goto out_unpin; - } /* * Try all the other valid offsets until the next @@ -615,16 +603,12 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) */ for (offset = 4096; offset < page_size; offset += 4096) { err = i915_vma_unbind(vma); - if (err) { - i915_vma_close(vma); + if (err) goto out_unpin; - } err = i915_vma_pin(vma, 0, 0, flags | offset); - if (err) { - i915_vma_close(vma); + if (err) goto out_unpin; - } err = igt_check_page_sizes(vma); @@ -636,10 +620,8 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) i915_vma_unpin(vma); - if (err) { - i915_vma_close(vma); + if (err) goto out_unpin; - } if (igt_timeout(end_time, "%s timed out at offset %x with page-size %x\n", @@ -647,8 +629,6 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) break; } - i915_vma_close(vma); - i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); @@ -670,12 +650,6 @@ static void close_object_list(struct list_head *objects, struct drm_i915_gem_object *obj, *on; list_for_each_entry_safe(obj, on, objects, st_link) { - struct i915_vma *vma; - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (!IS_ERR(vma)) - i915_vma_close(vma); - list_del(&obj->st_link); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); @@ -912,7 +886,7 @@ static int igt_mock_ppgtt_64K(void *arg) err = i915_vma_pin(vma, 0, 0, flags); if (err) - goto out_vma_close; + goto out_object_unpin; err = igt_check_page_sizes(vma); if (err) @@ -945,8 +919,6 @@ static int igt_mock_ppgtt_64K(void *arg) } i915_vma_unpin(vma); - i915_vma_close(vma); - i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); @@ -957,8 +929,6 @@ static int igt_mock_ppgtt_64K(void *arg) out_vma_unpin: i915_vma_unpin(vma); -out_vma_close: - i915_vma_close(vma); out_object_unpin: i915_gem_object_unpin_pages(obj); out_object_put: @@ -1070,7 +1040,7 @@ static int __igt_write_huge(struct intel_context *ce, err = i915_vma_unbind(vma); if (err) - goto out_vma_close; + return err; err = i915_vma_pin(vma, size, 0, flags | offset); if (err) { @@ -1081,7 +1051,7 @@ static int __igt_write_huge(struct intel_context *ce, if (err == -ENOSPC && i915_is_ggtt(ce->vm)) err = 0; - goto out_vma_close; + return err; } err = igt_check_page_sizes(vma); @@ -1102,8 +1072,6 @@ static int __igt_write_huge(struct intel_context *ce, out_vma_unpin: i915_vma_unpin(vma); -out_vma_close: - __i915_vma_put(vma); return err; } @@ -1477,8 +1445,10 @@ static int igt_ppgtt_pin_update(void *arg) unsigned int page_size = BIT(first); obj = i915_gem_object_create_internal(dev_priv, page_size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { @@ -1488,7 +1458,7 @@ static int igt_ppgtt_pin_update(void *arg) err = i915_vma_pin(vma, SZ_2M, 0, flags); if (err) - goto out_close; + goto out_put; if (vma->page_sizes.sg < page_size) { pr_info("Unable to allocate page-size %x, finishing test early\n", @@ -1525,14 +1495,14 @@ static int igt_ppgtt_pin_update(void *arg) goto out_unpin; i915_vma_unpin(vma); - i915_vma_close(vma); - i915_gem_object_put(obj); } obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { @@ -1542,7 +1512,7 @@ static int igt_ppgtt_pin_update(void *arg) err = i915_vma_pin(vma, 0, 0, flags); if (err) - goto out_close; + goto out_put; /* * Make sure we don't end up with something like where the pde is still @@ -1572,8 +1542,6 @@ static int igt_ppgtt_pin_update(void *arg) out_unpin: i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); out_put: i915_gem_object_put(obj); out_vm: @@ -1625,13 +1593,11 @@ static int igt_tmpfs_fallback(void *arg) err = i915_vma_pin(vma, 0, 0, PIN_USER); if (err) - goto out_close; + goto out_put; err = igt_check_page_sizes(vma); i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); out_put: i915_gem_object_put(obj); out_restore: @@ -1678,7 +1644,7 @@ static int igt_shrink_thp(void *arg) err = i915_vma_pin(vma, 0, 0, flags); if (err) - goto out_close; + goto out_put; if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { pr_info("failed to allocate THP, finishing test early\n"); @@ -1702,7 +1668,7 @@ static int igt_shrink_thp(void *arg) i915_gem_context_unlock_engines(ctx); i915_vma_unpin(vma); if (err) - goto out_close; + goto out_put; /* * Now that the pages are *unpinned* shrink-all should invoke @@ -1712,18 +1678,18 @@ static int igt_shrink_thp(void *arg) if (i915_gem_object_has_pages(obj)) { pr_err("shrink-all didn't truncate the pages\n"); err = -EINVAL; - goto out_close; + goto out_put; } if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { pr_err("residual page-size bits left\n"); err = -EINVAL; - goto out_close; + goto out_put; } err = i915_vma_pin(vma, 0, 0, flags); if (err) - goto out_close; + goto out_put; while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); @@ -1733,8 +1699,6 @@ static int igt_shrink_thp(void *arg) out_unpin: i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); out_put: i915_gem_object_put(obj); out_vm: @@ -1773,21 +1737,20 @@ int i915_gem_huge_page_mock_selftests(void) if (!i915_vm_is_4lvl(&ppgtt->vm)) { pr_err("failed to create 48b PPGTT\n"); err = -EINVAL; - goto out_close; + goto out_put; } /* If we were ever hit this then it's time to mock the 64K scratch */ if (!i915_vm_has_scratch_64K(&ppgtt->vm)) { pr_err("PPGTT missing 64K scratch page\n"); err = -EINVAL; - goto out_close; + goto out_put; } err = i915_subtests(tests, ppgtt); -out_close: +out_put: i915_vm_put(&ppgtt->vm); - out_unlock: drm_dev_put(&dev_priv->drm); return err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index b972be165e85..be268511cb6d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -7,9 +7,12 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" +#include "gt/intel_gpu_commands.h" +#include "gem/i915_gem_lmem.h" #include "selftests/igt_flush_test.h" #include "selftests/mock_drm.h" +#include "selftests/i915_random.h" #include "huge_gem_object.h" #include "mock_context.h" @@ -127,10 +130,602 @@ static int igt_client_fill(void *arg) } while (1); } +#define WIDTH 512 +#define HEIGHT 32 + +struct blit_buffer { + struct i915_vma *vma; + u32 start_val; + u32 tiling; +}; + +struct tiled_blits { + struct intel_context *ce; + struct blit_buffer buffers[3]; + struct blit_buffer scratch; + struct i915_vma *batch; + u64 hole; + u32 width; + u32 height; +}; + +static int prepare_blit(const struct tiled_blits *t, + struct blit_buffer *dst, + struct blit_buffer *src, + struct drm_i915_gem_object *batch) +{ + const int gen = INTEL_GEN(to_i915(batch->base.dev)); + bool use_64b_reloc = gen >= 8; + u32 src_pitch, dst_pitch; + u32 cmd, *cs; + + cs = i915_gem_object_pin_map(batch, I915_MAP_WC); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(BCS_SWCTRL); + cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; + if (src->tiling == I915_TILING_Y) + cmd |= BCS_SRC_Y; + if (dst->tiling == I915_TILING_Y) + cmd |= BCS_DST_Y; + *cs++ = cmd; + + cmd = MI_FLUSH_DW; + if (gen >= 8) + cmd++; + *cs++ = cmd; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2); + if (gen >= 8) + cmd += 2; + + src_pitch = t->width * 4; + if (src->tiling) { + cmd |= XY_SRC_COPY_BLT_SRC_TILED; + src_pitch /= 4; + } + + dst_pitch = t->width * 4; + if (dst->tiling) { + cmd |= XY_SRC_COPY_BLT_DST_TILED; + dst_pitch /= 4; + } + + *cs++ = cmd; + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch; + *cs++ = 0; + *cs++ = t->height << 16 | t->width; + *cs++ = lower_32_bits(dst->vma->node.start); + if (use_64b_reloc) + *cs++ = upper_32_bits(dst->vma->node.start); + *cs++ = 0; + *cs++ = src_pitch; + *cs++ = lower_32_bits(src->vma->node.start); + if (use_64b_reloc) + *cs++ = upper_32_bits(src->vma->node.start); + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(batch); + i915_gem_object_unpin_map(batch); + + return 0; +} + +static void tiled_blits_destroy_buffers(struct tiled_blits *t) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(t->buffers); i++) + i915_vma_put(t->buffers[i].vma); + + i915_vma_put(t->scratch.vma); + i915_vma_put(t->batch); +} + +static struct i915_vma * +__create_vma(struct tiled_blits *t, size_t size, bool lmem) +{ + struct drm_i915_private *i915 = t->ce->vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + if (lmem) + obj = i915_gem_object_create_lmem(i915, size, 0); + else + obj = i915_gem_object_create_shmem(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, t->ce->vm, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + +static struct i915_vma *create_vma(struct tiled_blits *t, bool lmem) +{ + return __create_vma(t, PAGE_ALIGN(t->width * t->height * 4), lmem); +} + +static int tiled_blits_create_buffers(struct tiled_blits *t, + int width, int height, + struct rnd_state *prng) +{ + struct drm_i915_private *i915 = t->ce->engine->i915; + int i; + + t->width = width; + t->height = height; + + t->batch = __create_vma(t, PAGE_SIZE, false); + if (IS_ERR(t->batch)) + return PTR_ERR(t->batch); + + t->scratch.vma = create_vma(t, false); + if (IS_ERR(t->scratch.vma)) { + i915_vma_put(t->batch); + return PTR_ERR(t->scratch.vma); + } + + for (i = 0; i < ARRAY_SIZE(t->buffers); i++) { + struct i915_vma *vma; + + vma = create_vma(t, HAS_LMEM(i915) && i % 2); + if (IS_ERR(vma)) { + tiled_blits_destroy_buffers(t); + return PTR_ERR(vma); + } + + t->buffers[i].vma = vma; + t->buffers[i].tiling = + i915_prandom_u32_max_state(I915_TILING_Y + 1, prng); + } + + return 0; +} + +static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val) +{ + int i; + + t->scratch.start_val = val; + for (i = 0; i < t->width * t->height; i++) + vaddr[i] = val++; + + i915_gem_object_flush_map(t->scratch.vma->obj); +} + +static void hexdump(const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + pr_info("*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + pr_info("[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + +static u64 swizzle_bit(unsigned int bit, u64 offset) +{ + return (offset & BIT_ULL(bit)) >> (bit - 6); +} + +static u64 tiled_offset(const struct intel_gt *gt, + u64 v, + unsigned int stride, + unsigned int tiling) +{ + unsigned int swizzle; + u64 x, y; + + if (tiling == I915_TILING_NONE) + return v; + + y = div64_u64_rem(v, stride, &x); + + if (tiling == I915_TILING_X) { + v = div64_u64_rem(y, 8, &y) * stride * 8; + v += y * 512; + v += div64_u64_rem(x, 512, &x) << 12; + v += x; + + swizzle = gt->ggtt->bit_6_swizzle_x; + } else { + const unsigned int ytile_span = 16; + const unsigned int ytile_height = 512; + + v = div64_u64_rem(y, 32, &y) * stride * 32; + v += y * ytile_span; + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; + v += x; + + swizzle = gt->ggtt->bit_6_swizzle_y; + } + + switch (swizzle) { + case I915_BIT_6_SWIZZLE_9: + v ^= swizzle_bit(9, v); + break; + case I915_BIT_6_SWIZZLE_9_10: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); + break; + case I915_BIT_6_SWIZZLE_9_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); + break; + case I915_BIT_6_SWIZZLE_9_10_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); + break; + } + + return v; +} + +static const char *repr_tiling(int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: return "linear"; + case I915_TILING_X: return "X"; + case I915_TILING_Y: return "Y"; + default: return "unknown"; + } +} + +static int verify_buffer(const struct tiled_blits *t, + struct blit_buffer *buf, + struct rnd_state *prng) +{ + const u32 *vaddr; + int ret = 0; + int x, y, p; + + x = i915_prandom_u32_max_state(t->width, prng); + y = i915_prandom_u32_max_state(t->height, prng); + p = y * t->width + x; + + vaddr = i915_gem_object_pin_map(buf->vma->obj, I915_MAP_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + if (vaddr[0] != buf->start_val) { + ret = -EINVAL; + } else { + u64 v = tiled_offset(buf->vma->vm->gt, + p * 4, t->width * 4, + buf->tiling); + + if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p) + ret = -EINVAL; + } + if (ret) { + pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n", + repr_tiling(buf->tiling), + x, y, buf->start_val); + hexdump(vaddr, 4096); + } + + i915_gem_object_unpin_map(buf->vma->obj); + return ret; +} + +static int move_to_active(struct i915_vma *vma, + struct i915_request *rq, + unsigned int flags) +{ + int err; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, flags); + i915_vma_unlock(vma); + + return err; +} + +static int pin_buffer(struct i915_vma *vma, u64 addr) +{ + int err; + + if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) { + err = i915_vma_unbind(vma); + if (err) + return err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED | addr); + if (err) + return err; + + return 0; +} + +static int +tiled_blit(struct tiled_blits *t, + struct blit_buffer *dst, u64 dst_addr, + struct blit_buffer *src, u64 src_addr) +{ + struct i915_request *rq; + int err; + + err = pin_buffer(src->vma, src_addr); + if (err) { + pr_err("Cannot pin src @ %llx\n", src_addr); + return err; + } + + err = pin_buffer(dst->vma, dst_addr); + if (err) { + pr_err("Cannot pin dst @ %llx\n", dst_addr); + goto err_src; + } + + err = i915_vma_pin(t->batch, 0, 0, PIN_USER | PIN_HIGH); + if (err) { + pr_err("cannot pin batch\n"); + goto err_dst; + } + + err = prepare_blit(t, dst, src, t->batch->obj); + if (err) + goto err_bb; + + rq = intel_context_create_request(t->ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_bb; + } + + err = move_to_active(t->batch, rq, 0); + if (!err) + err = move_to_active(src->vma, rq, 0); + if (!err) + err = move_to_active(dst->vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + t->batch->node.start, + t->batch->node.size, + 0); + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 2) < 0) + err = -ETIME; + i915_request_put(rq); + + dst->start_val = src->start_val; +err_bb: + i915_vma_unpin(t->batch); +err_dst: + i915_vma_unpin(dst->vma); +err_src: + i915_vma_unpin(src->vma); + return err; +} + +static struct tiled_blits * +tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) +{ + struct drm_mm_node hole; + struct tiled_blits *t; + u64 hole_size; + int err; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return ERR_PTR(-ENOMEM); + + t->ce = intel_context_create(engine); + if (IS_ERR(t->ce)) { + err = PTR_ERR(t->ce); + goto err_free; + } + + hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + hole_size *= 2; /* room to maneuver */ + hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + + mutex_lock(&t->ce->vm->mutex); + memset(&hole, 0, sizeof(hole)); + err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, + hole_size, 0, I915_COLOR_UNEVICTABLE, + 0, U64_MAX, + DRM_MM_INSERT_BEST); + if (!err) + drm_mm_remove_node(&hole); + mutex_unlock(&t->ce->vm->mutex); + if (err) { + err = -ENODEV; + goto err_put; + } + + t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + pr_info("Using hole at %llx\n", t->hole); + + err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); + if (err) + goto err_put; + + return t; + +err_put: + intel_context_put(t->ce); +err_free: + kfree(t); + return ERR_PTR(err); +} + +static void tiled_blits_destroy(struct tiled_blits *t) +{ + tiled_blits_destroy_buffers(t); + + intel_context_put(t->ce); + kfree(t); +} + +static int tiled_blits_prepare(struct tiled_blits *t, + struct rnd_state *prng) +{ + u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u32 *map; + int err; + int i; + + map = i915_gem_object_pin_map(t->scratch.vma->obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + /* Use scratch to fill objects */ + for (i = 0; i < ARRAY_SIZE(t->buffers); i++) { + fill_scratch(t, map, prandom_u32_state(prng)); + GEM_BUG_ON(verify_buffer(t, &t->scratch, prng)); + + err = tiled_blit(t, + &t->buffers[i], t->hole + offset, + &t->scratch, t->hole); + if (err == 0) + err = verify_buffer(t, &t->buffers[i], prng); + if (err) { + pr_err("Failed to create buffer %d\n", i); + break; + } + } + + i915_gem_object_unpin_map(t->scratch.vma->obj); + return err; +} + +static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) +{ + u64 offset = + round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + int err; + + /* We want to check position invariant tiling across GTT eviction */ + + err = tiled_blit(t, + &t->buffers[1], t->hole + offset / 2, + &t->buffers[0], t->hole + 2 * offset); + if (err) + return err; + + /* Reposition so that we overlap the old addresses, and slightly off */ + err = tiled_blit(t, + &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, + &t->buffers[1], t->hole + 3 * offset / 2); + if (err) + return err; + + err = verify_buffer(t, &t->buffers[2], prng); + if (err) + return err; + + return 0; +} + +static int __igt_client_tiled_blits(struct intel_engine_cs *engine, + struct rnd_state *prng) +{ + struct tiled_blits *t; + int err; + + t = tiled_blits_create(engine, prng); + if (IS_ERR(t)) + return PTR_ERR(t); + + err = tiled_blits_prepare(t, prng); + if (err) + goto out; + + err = tiled_blits_bounce(t, prng); + if (err) + goto out; + +out: + tiled_blits_destroy(t); + return err; +} + +static bool has_bit17_swizzle(int sw) +{ + return (sw == I915_BIT_6_SWIZZLE_9_10_17 || + sw == I915_BIT_6_SWIZZLE_9_17); +} + +static bool bad_swizzling(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) + return true; + + if (has_bit17_swizzle(ggtt->bit_6_swizzle_x) || + has_bit17_swizzle(ggtt->bit_6_swizzle_y)) + return true; + + return false; +} + +static int igt_client_tiled_blits(void *arg) +{ + struct drm_i915_private *i915 = arg; + I915_RND_STATE(prng); + int inst = 0; + + /* Test requires explicit BLT tiling controls */ + if (INTEL_GEN(i915) < 4) + return 0; + + if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */ + return 0; + + do { + struct intel_engine_cs *engine; + int err; + + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + err = __igt_client_tiled_blits(engine, &prng); + if (err == -ENODEV) + err = 0; + if (err) + return err; + } while (1); +} + int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_client_fill), + SUBTEST(igt_client_tiled_blits), }; if (intel_gt_is_wedged(&i915->gt)) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index f4f933240b39..87d264fe54b2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1687,7 +1687,6 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto skip_request; i915_vma_unpin(vma); - i915_vma_close(vma); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index aab30d908072..174a24553322 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -10,6 +10,7 @@ #include "debugfs_gt_pm.h" #include "i915_drv.h" #include "intel_gt.h" +#include "intel_gt_clock_utils.h" #include "intel_llc.h" #include "intel_rc6.h" #include "intel_rps.h" @@ -268,7 +269,7 @@ static int frequency_show(struct seq_file *m, void *unused) yesno(rpmodectl & GEN6_RP_ENABLE)); seq_printf(m, "SW control enabled: %s\n", yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); + GEN6_RP_MEDIA_SW_MODE)); vlv_punit_get(i915); freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); @@ -300,8 +301,9 @@ static int frequency_show(struct seq_file *m, void *unused) u32 rp_state_cap; u32 rpmodectl, rpinclimit, rpdeclimit; u32 rpstat, cagf, reqf; - u32 rpupei, rpcurup, rpprevup; - u32 rpdownei, rpcurdown, rpprevdown; + u32 rpcurupei, rpcurup, rpprevup; + u32 rpcurdownei, rpcurdown, rpprevdown; + u32 rpupei, rpupt, rpdownei, rpdownt; u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; int max_freq; @@ -334,12 +336,19 @@ static int frequency_show(struct seq_file *m, void *unused) rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); - rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; + rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; - rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; + rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; + + rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); + rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + + rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); + rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + cagf = intel_rps_read_actual_frequency(rps); intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); @@ -372,7 +381,7 @@ static int frequency_show(struct seq_file *m, void *unused) yesno(rpmodectl & GEN6_RP_ENABLE)); seq_printf(m, "SW control enabled: %s\n", yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); + GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_mask); @@ -394,23 +403,35 @@ static int frequency_show(struct seq_file *m, void *unused) seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); seq_printf(m, "CAGF: %dMHz\n", cagf); - seq_printf(m, "RP CUR UP EI: %d (%dus)\n", - rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei)); - seq_printf(m, "RP CUR UP: %d (%dus)\n", - rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup)); - seq_printf(m, "RP PREV UP: %d (%dus)\n", - rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup)); + seq_printf(m, "RP CUR UP EI: %d (%dns)\n", + rpcurupei, + intel_gt_pm_interval_to_ns(gt, rpcurupei)); + seq_printf(m, "RP CUR UP: %d (%dns)\n", + rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); + seq_printf(m, "RP PREV UP: %d (%dns)\n", + rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); seq_printf(m, "Up threshold: %d%%\n", rps->power.up_threshold); - - seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", - rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei)); - seq_printf(m, "RP CUR DOWN: %d (%dus)\n", - rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown)); - seq_printf(m, "RP PREV DOWN: %d (%dus)\n", - rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown)); + seq_printf(m, "RP UP EI: %d (%dns)\n", + rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); + seq_printf(m, "RP UP THRESHOLD: %d (%dns)\n", + rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); + + seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n", + rpcurdownei, + intel_gt_pm_interval_to_ns(gt, rpcurdownei)); + seq_printf(m, "RP CUR DOWN: %d (%dns)\n", + rpcurdown, + intel_gt_pm_interval_to_ns(gt, rpcurdown)); + seq_printf(m, "RP PREV DOWN: %d (%dns)\n", + rpprevdown, + intel_gt_pm_interval_to_ns(gt, rpprevdown)); seq_printf(m, "Down threshold: %d%%\n", rps->power.down_threshold); + seq_printf(m, "RP DOWN EI: %d (%dns)\n", + rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); + seq_printf(m, "RP DOWN THRESHOLD: %d (%dns)\n", + rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; @@ -535,7 +556,8 @@ static int rps_boost_show(struct seq_file *m, void *data) struct drm_i915_private *i915 = gt->i915; struct intel_rps *rps = >->rps; - seq_printf(m, "RPS enabled? %d\n", rps->enabled); + seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps))); + seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps))); seq_printf(m, "GPU busy? %s\n", yesno(gt->awake)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); @@ -555,7 +577,7 @@ static int rps_boost_show(struct seq_file *m, void *data) seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); - if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) { + if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) { struct intel_uncore *uncore = gt->uncore; u32 rpup, rpupei; u32 rpdown, rpdownei; diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c index 57a30956c922..487299cb91f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c @@ -25,8 +25,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, return PTR_ERR(cs); offset = i915_ggtt_offset(ce->state) + - LRC_STATE_PN * PAGE_SIZE + - CTX_R_PWR_CLK_STATE * 4; + LRC_STATE_OFFSET + CTX_R_PWR_CLK_STATE * 4; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 07cb83a0d017..4954b0df4864 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -69,7 +69,13 @@ struct intel_context { #define CONTEXT_NOPREEMPT 7 u32 *lrc_reg_state; - u64 lrc_desc; + union { + struct { + u32 lrca; + u32 ccid; + }; + u64 desc; + } lrc; u32 tag; /* cookie passed to HW to track this context on submission */ /* Time on GPU as tracked by the hw. */ @@ -96,6 +102,8 @@ struct intel_context { /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; + + u8 wa_bb_page; /* if set, page num reserved for context workarounds */ }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index d9ee64e2ef79..d10e52ff059f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -310,9 +310,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); -int intel_enable_engine_stats(struct intel_engine_cs *engine); -void intel_disable_engine_stats(struct intel_engine_cs *engine); - ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); struct i915_request * diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index b1f8527f02c8..c9e46c5ced43 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -834,7 +834,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_cleanup_cmd_parser(engine); if (engine->default_state) - i915_gem_object_put(engine->default_state); + fput(engine->default_state); if (engine->kernel_context) { intel_context_unpin(engine->kernel_context); @@ -1425,7 +1425,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, len = scnprintf(hdr, sizeof(hdr), "\t\tActive[%d]: ccid:%08x, ", (int)(port - execlists->active), - upper_32_bits(rq->context->lrc_desc)); + rq->context->lrc.ccid); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); @@ -1437,7 +1437,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, len = scnprintf(hdr, sizeof(hdr), "\t\tPending[%d]: ccid:%08x, ", (int)(port - execlists->pending), - upper_32_bits(rq->context->lrc_desc)); + rq->context->lrc.ccid); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); @@ -1589,58 +1589,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_engine_print_breadcrumbs(engine, m); } -/** - * intel_enable_engine_stats() - Enable engine busy tracking on engine - * @engine: engine to enable stats collection - * - * Start collecting the engine busyness data for @engine. - * - * Returns 0 on success or a negative error code. - */ -int intel_enable_engine_stats(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - unsigned long flags; - int err = 0; - - if (!intel_engine_supports_stats(engine)) - return -ENODEV; - - execlists_active_lock_bh(execlists); - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (unlikely(engine->stats.enabled == ~0)) { - err = -EBUSY; - goto unlock; - } - - if (engine->stats.enabled++ == 0) { - struct i915_request * const *port; - struct i915_request *rq; - - engine->stats.enabled_at = ktime_get(); - - /* XXX submission method oblivious? */ - for (port = execlists->active; (rq = *port); port++) - engine->stats.active++; - - for (port = execlists->pending; (rq = *port); port++) { - /* Exclude any contexts already counted in active */ - if (!intel_context_inflight_count(rq->context)) - engine->stats.active++; - } - - if (engine->stats.active) - engine->stats.start = engine->stats.enabled_at; - } - -unlock: - write_sequnlock_irqrestore(&engine->stats.lock, flags); - execlists_active_unlock_bh(execlists); - - return err; -} - static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) { ktime_t total = engine->stats.total; @@ -1649,7 +1597,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) * If the engine is executing something at the moment * add it to the total. */ - if (engine->stats.active) + if (atomic_read(&engine->stats.active)) total = ktime_add(total, ktime_sub(ktime_get(), engine->stats.start)); @@ -1675,28 +1623,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) return total; } -/** - * intel_disable_engine_stats() - Disable engine busy tracking on engine - * @engine: engine to disable stats collection - * - * Stops collecting the engine busyness data for @engine. - */ -void intel_disable_engine_stats(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (!intel_engine_supports_stats(engine)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - WARN_ON_ONCE(engine->stats.enabled == 0); - if (--engine->stats.enabled == 0) { - engine->stats.total = __intel_engine_get_busy_time(engine); - engine->stats.active = 0; - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - static bool match_ring(struct i915_request *rq) { u32 ring = ENGINE_READ(rq->engine, RING_START); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 3be679741d22..446e35ac0224 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -15,6 +15,7 @@ #include "intel_gt_pm.h" #include "intel_rc6.h" #include "intel_ring.h" +#include "shmem_utils.h" static int __engine_unpark(struct intel_wakeref *wf) { @@ -30,10 +31,8 @@ static int __engine_unpark(struct intel_wakeref *wf) /* Pin the default state for fast resets from atomic context. */ map = NULL; if (engine->default_state) - map = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (!IS_ERR_OR_NULL(map)) - engine->pinned_default_state = map; + map = shmem_pin_map(engine->default_state); + engine->pinned_default_state = map; /* Discard stale context state from across idling */ ce = engine->kernel_context; @@ -264,7 +263,8 @@ static int __engine_park(struct intel_wakeref *wf) engine->park(engine); if (engine->pinned_default_state) { - i915_gem_object_unpin_map(engine->default_state); + shmem_unpin_map(engine->default_state, + engine->pinned_default_state); engine->pinned_default_state = NULL; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 01d4bd781a2f..f760e2ef285b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -157,6 +157,11 @@ struct intel_engine_execlists { struct i915_priolist default_priolist; /** + * @ccid: identifier for contexts submitted to this engine + */ + u32 ccid; + + /** * @yield: CCID at the time of the last semaphore-wait interrupt. * * Instead of leaving a semaphore busy-spinning on an engine, we would @@ -304,8 +309,7 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; - unsigned int context_tag; -#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) + unsigned long context_tag; struct rb_node uabi_node; @@ -335,7 +339,7 @@ struct intel_engine_cs { unsigned long wakeref_serial; struct intel_wakeref wakeref; - struct drm_i915_gem_object *default_state; + struct file *default_state; void *pinned_default_state; struct { @@ -419,6 +423,7 @@ struct intel_engine_cs { void (*irq_enable)(struct intel_engine_cs *engine); void (*irq_disable)(struct intel_engine_cs *engine); + void (*sanitize)(struct intel_engine_cs *engine); int (*resume)(struct intel_engine_cs *engine); struct { @@ -527,34 +532,34 @@ struct intel_engine_cs { struct { /** - * @lock: Lock protecting the below fields. - */ - seqlock_t lock; - /** - * @enabled: Reference count indicating number of listeners. + * @active: Number of contexts currently scheduled in. */ - unsigned int enabled; + atomic_t active; + /** - * @active: Number of contexts currently scheduled in. + * @lock: Lock protecting the below fields. */ - unsigned int active; + seqlock_t lock; + /** - * @enabled_at: Timestamp when busy stats were enabled. + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). */ - ktime_t enabled_at; + ktime_t total; + /** * @start: Timestamp of the last idle to active transition. * * Idle is defined as active == 0, active is active > 0. */ ktime_t start; + /** - * @total: Total time this engine was busy. - * - * Accumulated time not counting the most recent block in cases - * where engine is currently busy (active > 0). + * @rps: Utilisation at last RPS sampling. */ - ktime_t total; + ktime_t rps; } stats; struct { diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index eebd1190506f..66165b10256e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -840,7 +840,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) struct pci_dev *pdev = i915->drm.pdev; unsigned int size; u16 snb_gmch_ctl; - int err; /* TODO: We're not aware of mappable constraints on gen8 yet */ if (!IS_DGFX(i915)) { @@ -848,13 +847,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->mappable_end = resource_size(&ggtt->gmadr); } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); - if (err) - drm_err(&i915->drm, - "Can't set DMA mask/consistent mask (%d)\n", err); - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); if (IS_CHERRYVIEW(i915)) size = chv_get_total_gtt_size(snb_gmch_ctl); @@ -990,7 +982,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) struct pci_dev *pdev = i915->drm.pdev; unsigned int size; u16 snb_gmch_ctl; - int err; ggtt->gmadr = pci_resource(pdev, 2); ggtt->mappable_end = resource_size(&ggtt->gmadr); @@ -1005,12 +996,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) return -ENXIO; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); - if (err) - drm_err(&i915->drm, - "Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); size = gen6_get_total_gtt_size(snb_gmch_ctl); diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index f04214a54f75..ee10122a511e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -138,7 +138,7 @@ */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ -#define MI_LRI_CS_MMIO (1<<19) +#define MI_LRI_LRM_CS_MMIO REG_BIT(19) #define MI_LRI_FORCE_POSTED (1<<12) #define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) @@ -156,6 +156,7 @@ #define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) #define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) #define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1) +#define MI_LRR_SOURCE_CS_MMIO REG_BIT(18) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 1c99cc72305a..52593edf8aa0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_gt.h" +#include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" #include "intel_mocs.h" @@ -15,6 +16,7 @@ #include "intel_rps.h" #include "intel_uncore.h" #include "intel_pm.h" +#include "shmem_utils.h" void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { @@ -370,18 +372,6 @@ static struct i915_address_space *kernel_vm(struct intel_gt *gt) return i915_vm_get(>->ggtt->vm); } -static int __intel_context_flush_retire(struct intel_context *ce) -{ - struct intel_timeline *tl; - - tl = intel_context_timeline_lock(ce); - if (IS_ERR(tl)) - return PTR_ERR(tl); - - intel_context_timeline_unlock(tl); - return 0; -} - static int __engines_record_defaults(struct intel_gt *gt) { struct i915_request *requests[I915_NUM_ENGINES] = {}; @@ -447,8 +437,7 @@ err_rq: for (id = 0; id < ARRAY_SIZE(requests); id++) { struct i915_request *rq; - struct i915_vma *state; - void *vaddr; + struct file *state; rq = requests[id]; if (!rq) @@ -460,48 +449,16 @@ err_rq: } GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); - state = rq->context->state; - if (!state) + if (!rq->context->state) continue; - /* Serialise with retirement on another CPU */ - GEM_BUG_ON(!i915_request_completed(rq)); - err = __intel_context_flush_retire(rq->context); - if (err) - goto out; - - /* We want to be able to unbind the state from the GGTT */ - GEM_BUG_ON(intel_context_is_pinned(rq->context)); - - /* - * As we will hold a reference to the logical state, it will - * not be torn down with the context, and importantly the - * object will hold onto its vma (making it possible for a - * stray GTT write to corrupt our defaults). Unmap the vma - * from the GTT to prevent such accidents and reclaim the - * space. - */ - err = i915_vma_unbind(state); - if (err) - goto out; - - i915_gem_object_lock(state->obj); - err = i915_gem_object_set_to_cpu_domain(state->obj, false); - i915_gem_object_unlock(state->obj); - if (err) - goto out; - - i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); - - /* Check we can acquire the image of the context state */ - vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); + /* Keep a copy of the state's backing pages; free the obj */ + state = shmem_create_from_object(rq->context->state->obj); + if (IS_ERR(state)) { + err = PTR_ERR(state); goto out; } - - rq->engine->default_state = i915_gem_object_get(state->obj); - i915_gem_object_unpin_map(state->obj); + rq->engine->default_state = state; } out: @@ -576,6 +533,8 @@ int intel_gt_init(struct intel_gt *gt) */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + intel_gt_init_clock_frequency(gt); + err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); if (err) goto out_fw; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c new file mode 100644 index 000000000000..999079686846 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_clock_utils.h" + +#define MHZ_12 12000000 /* 12MHz (24MHz/2), 83.333ns */ +#define MHZ_12_5 12500000 /* 12.5MHz (25MHz/2), 80ns */ +#define MHZ_19_2 19200000 /* 19.2MHz, 52.083ns */ + +static u32 read_clock_frequency(const struct intel_gt *gt) +{ + if (INTEL_GEN(gt->i915) >= 11) { + u32 config; + + config = intel_uncore_read(gt->uncore, RPM_CONFIG0); + config &= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK; + config >>= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + + switch (config) { + case 0: return MHZ_12; + case 1: + case 2: return MHZ_19_2; + default: + case 3: return MHZ_12_5; + } + } else if (INTEL_GEN(gt->i915) >= 9) { + if (IS_GEN9_LP(gt->i915)) + return MHZ_19_2; + else + return MHZ_12; + } else { + return MHZ_12_5; + } +} + +void intel_gt_init_clock_frequency(struct intel_gt *gt) +{ + /* + * Note that on gen11+, the clock frequency may be reconfigured. + * We do not, and we assume nobody else does. + */ + gt->clock_frequency = read_clock_frequency(gt); + GT_TRACE(gt, + "Using clock frequency: %dkHz\n", + gt->clock_frequency / 1000); +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +void intel_gt_check_clock_frequency(const struct intel_gt *gt) +{ + if (gt->clock_frequency != read_clock_frequency(gt)) { + dev_err(gt->i915->drm.dev, + "GT clock frequency changed, was %uHz, now %uHz!\n", + gt->clock_frequency, + read_clock_frequency(gt)); + } +} +#endif + +static u64 div_u64_roundup(u64 nom, u32 den) +{ + return div_u64(nom + den - 1, den); +} + +u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count) +{ + return div_u64_roundup(mul_u32_u32(count, 1000 * 1000 * 1000), + gt->clock_frequency); +} + +u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count) +{ + return intel_gt_clock_interval_to_ns(gt, 16 * count); +} + +u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns) +{ + return div_u64_roundup(mul_u32_u32(gt->clock_frequency, ns), + 1000 * 1000 * 1000); +} + +u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns) +{ + u32 val; + + /* + * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS + * 8300) freezing up around GPU hangs. Looks as if even + * scheduling/timer interrupts start misbehaving if the RPS + * EI/thresholds are "bad", leading to a very sluggish or even + * frozen machine. + */ + val = DIV_ROUND_UP(intel_gt_ns_to_clock_interval(gt, ns), 16); + if (IS_GEN(gt->i915, 6)) + val = roundup(val, 25); + + return val; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h new file mode 100644 index 000000000000..f793c89f2cbd --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_GT_CLOCK_UTILS_H__ +#define __INTEL_GT_CLOCK_UTILS_H__ + +#include <linux/types.h> + +struct intel_gt; + +void intel_gt_init_clock_frequency(struct intel_gt *gt); + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +void intel_gt_check_clock_frequency(const struct intel_gt *gt); +#else +static inline void intel_gt_check_clock_frequency(const struct intel_gt *gt) {} +#endif + +u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count); +u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count); + +u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns); +u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns); + +#endif /* __INTEL_GT_CLOCK_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 3e8a56c7d818..5097786f4375 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -12,6 +12,7 @@ #include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" +#include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" #include "intel_llc.h" @@ -138,6 +139,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) wakeref = intel_runtime_pm_get(gt->uncore->rpm); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + intel_gt_check_clock_frequency(gt); + /* * As we have just resumed the machine and woken the device up from * deep PCI sleep (presumably D3_cold), assume the HW has been reset @@ -147,6 +150,10 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (intel_gt_is_wedged(gt)) intel_gt_unset_wedged(gt); + for_each_engine(engine, gt, id) + if (engine->sanitize) + engine->sanitize(engine); + intel_uc_sanitize(>->uc); for_each_engine(engine, gt, id) @@ -191,11 +198,12 @@ int intel_gt_resume(struct intel_gt *gt) * Only the kernel contexts should remain pinned over suspend, * allowing us to fixup the user contexts on their first pin. */ + gt_sanitize(gt, true); + intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); - gt_sanitize(gt, true); if (intel_gt_is_wedged(gt)) { err = -EIO; goto out_fw; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 835ec184763e..16ff47c83bd5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -26,6 +26,11 @@ static bool retire_requests(struct intel_timeline *tl) return !i915_active_fence_isset(&tl->last_request); } +static bool engine_active(const struct intel_engine_cs *engine) +{ + return !list_empty(&engine->kernel_context->timeline->requests); +} + static bool flush_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; @@ -37,8 +42,13 @@ static bool flush_submission(struct intel_gt *gt) for_each_engine(engine, gt, id) { intel_engine_flush_submission(engine); - active |= flush_work(&engine->retire_work); - active |= flush_delayed_work(&engine->wakeref.work); + + /* Flush the background retirement and idle barriers */ + flush_work(&engine->retire_work); + flush_delayed_work(&engine->wakeref.work); + + /* Is the idle barrier still outstanding? */ + active |= engine_active(engine); } return active; @@ -162,7 +172,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) } } - if (!retire_requests(tl) || flush_submission(gt)) + if (!retire_requests(tl)) active_count++; mutex_unlock(&tl->mutex); @@ -173,7 +183,6 @@ out_active: spin_lock(&timelines->lock); if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); - /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { GEM_BUG_ON(atomic_read(&tl->active_count)); @@ -185,6 +194,9 @@ out_active: spin_lock(&timelines->lock); list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); + if (flush_submission(gt)) /* Wait, there's more! */ + active_count++; + return active_count ? timeout : 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 96890dd12b5f..d02ccb735e24 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -61,6 +61,7 @@ struct intel_gt { struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ + ktime_t last_init_time; struct intel_reset reset; /** @@ -72,14 +73,12 @@ struct intel_gt { */ intel_wakeref_t awake; + u32 clock_frequency; + struct intel_llc llc; struct intel_rc6 rc6; struct intel_rps rps; - ktime_t last_init_time; - - struct i915_vma *scratch; - spinlock_t irq_lock; u32 gt_imr; u32 pm_ier; @@ -97,6 +96,8 @@ struct intel_gt { * Reserved for exclusive use by the kernel. */ struct i915_address_space *vm; + + struct i915_vma *scratch; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 6fbad5e2343f..4311b12542fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -147,6 +147,7 @@ #include "intel_reset.h" #include "intel_ring.h" #include "intel_workarounds.h" +#include "shmem_utils.h" #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) @@ -238,6 +239,112 @@ __execlists_update_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine, u32 head); +static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x60; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x54; + else if (engine->class == RENDER_CLASS) + return 0x58; + else + return -1; +} + +static int lrc_ring_gpr0(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x74; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x68; + else if (engine->class == RENDER_CLASS) + return 0xd8; + else + return -1; +} + +static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x12; + else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) + return 0x18; + else + return -1; +} + +static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) +{ + int x; + + x = lrc_ring_wa_bb_per_ctx(engine); + if (x < 0) + return x; + + return x + 2; +} + +static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) +{ + int x; + + x = lrc_ring_indirect_ptr(engine); + if (x < 0) + return x; + + return x + 2; +} + +static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) +{ + if (engine->class != RENDER_CLASS) + return -1; + + if (INTEL_GEN(engine->i915) >= 12) + return 0xb6; + else if (INTEL_GEN(engine->i915) >= 11) + return 0xaa; + else + return -1; +} + +static u32 +lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) +{ + switch (INTEL_GEN(engine->i915)) { + default: + MISSING_CASE(INTEL_GEN(engine->i915)); + fallthrough; + case 12: + return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 11: + return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 10: + return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 9: + return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 8: + return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + } +} + +static void +lrc_ring_setup_indirect_ctx(u32 *regs, + const struct intel_engine_cs *engine, + u32 ctx_bb_ggtt_addr, + u32 size) +{ + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); + GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); + regs[lrc_ring_indirect_ptr(engine) + 1] = + ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); + + GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); + regs[lrc_ring_indirect_offset(engine) + 1] = + lrc_ring_indirect_offset_default(engine) << 6; +} + static u32 intel_context_get_runtime(const struct intel_context *ce) { /* @@ -467,10 +574,10 @@ assert_priority_queue(const struct i915_request *prev, * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ -static u64 +static u32 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - u64 desc; + u32 desc; desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) @@ -481,21 +588,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ - /* - * The following 32bits are copied into the OA reports (dword 2). - * Consider updating oa_get_render_ctx_id in i915_perf.c when changing - * anything below. - */ - if (INTEL_GEN(engine->i915) >= 11) { - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; - /* bits 48-53 */ - - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; - /* bits 61-63 */ - } - - return desc; + return i915_ggtt_offset(ce->state) | desc; } static inline unsigned int dword_in_page(void *addr) @@ -514,7 +607,7 @@ static void set_offsets(u32 *regs, #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END(x) 0, (x) +#define END(total_state_size) 0, (total_state_size) { const u32 base = engine->mmio_base; @@ -537,7 +630,7 @@ static void set_offsets(u32 *regs, if (flags & POSTED) *regs |= MI_LRI_FORCE_POSTED; if (INTEL_GEN(engine->i915) >= 11) - *regs |= MI_LRI_CS_MMIO; + *regs |= MI_LRI_LRM_CS_MMIO; regs++; GEM_BUG_ON(!count); @@ -922,8 +1015,63 @@ static const u8 gen12_rcs_offsets[] = { NOP(6), LRI(1, 0), REG(0x0c8), + NOP(3 + 9 + 1), + + LRI(51, POSTED), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + REG(0x084), + NOP(1), - END(80) + END(192) }; #undef END @@ -1051,17 +1199,14 @@ static void intel_engine_context_in(struct intel_engine_cs *engine) { unsigned long flags; - if (READ_ONCE(engine->stats.enabled) == 0) + if (atomic_add_unless(&engine->stats.active, 1, 0)) return; write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - if (engine->stats.active++ == 0) - engine->stats.start = ktime_get(); - GEM_BUG_ON(engine->stats.active == 0); + if (!atomic_add_unless(&engine->stats.active, 1, 0)) { + engine->stats.start = ktime_get(); + atomic_inc(&engine->stats.active); } - write_sequnlock_irqrestore(&engine->stats.lock, flags); } @@ -1069,51 +1214,20 @@ static void intel_engine_context_out(struct intel_engine_cs *engine) { unsigned long flags; - if (READ_ONCE(engine->stats.enabled) == 0) + GEM_BUG_ON(!atomic_read(&engine->stats.active)); + + if (atomic_add_unless(&engine->stats.active, -1, 1)) return; write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - ktime_t last; - - if (engine->stats.active && --engine->stats.active == 0) { - /* - * Decrement the active context count and in case GPU - * is now idle add up to the running total. - */ - last = ktime_sub(ktime_get(), engine->stats.start); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } else if (engine->stats.active == 0) { - /* - * After turning on engine stats, context out might be - * the first event in which case we account from the - * time stats gathering was turned on. - */ - last = ktime_sub(ktime_get(), engine->stats.enabled_at); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } + if (atomic_dec_and_test(&engine->stats.active)) { + engine->stats.total = + ktime_add(engine->stats.total, + ktime_sub(ktime_get(), engine->stats.start)); } - write_sequnlock_irqrestore(&engine->stats.lock, flags); } -static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) -{ - if (INTEL_GEN(engine->i915) >= 12) - return 0x60; - else if (INTEL_GEN(engine->i915) >= 9) - return 0x54; - else if (engine->class == RENDER_CLASS) - return 0x58; - else - return -1; -} - static void execlists_check_context(const struct intel_context *ce, const struct intel_engine_cs *engine) @@ -1161,7 +1275,7 @@ static void restore_default_state(struct intel_context *ce, if (engine->pinned_default_state) memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->pinned_default_state + LRC_STATE_OFFSET, engine->context_size - PAGE_SIZE); execlists_init_reg_state(regs, ce, engine, ce->ring, false); @@ -1204,7 +1318,7 @@ static void reset_active(struct i915_request *rq, __execlists_update_reg_state(ce, engine, head); /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) @@ -1252,18 +1366,23 @@ __execlists_schedule_in(struct i915_request *rq) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) execlists_check_context(ce, engine); - ce->lrc_desc &= ~GENMASK_ULL(47, 37); if (ce->tag) { /* Use a fixed tag for OA and friends */ - ce->lrc_desc |= (u64)ce->tag << 32; + GEM_BUG_ON(ce->tag <= BITS_PER_LONG); + ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc_desc |= - (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << - GEN11_SW_CTX_ID_SHIFT; - BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + unsigned int tag = ffs(engine->context_tag); + + GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); + clear_bit(tag - 1, &engine->context_tag); + ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); } + ce->lrc.ccid |= engine->execlists.ccid; + __intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1303,7 +1422,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) static inline void __execlists_schedule_out(struct i915_request *rq, - struct intel_engine_cs * const engine) + struct intel_engine_cs * const engine, + unsigned int ccid) { struct intel_context * const ce = rq->context; @@ -1321,6 +1441,14 @@ __execlists_schedule_out(struct i915_request *rq, i915_request_completed(rq)) intel_engine_add_retire(engine, ce->timeline); + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (ccid < BITS_PER_LONG) { + GEM_BUG_ON(ccid == 0); + GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); + set_bit(ccid - 1, &engine->context_tag); + } + intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); @@ -1346,15 +1474,17 @@ execlists_schedule_out(struct i915_request *rq) { struct intel_context * const ce = rq->context; struct intel_engine_cs *cur, *old; + u32 ccid; trace_i915_request_out(rq); + ccid = rq->context->lrc.ccid; old = READ_ONCE(ce->inflight); do cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; while (!try_cmpxchg(&ce->inflight, &old, cur)); if (!cur) - __execlists_schedule_out(rq, old); + __execlists_schedule_out(rq, old, ccid); i915_request_put(rq); } @@ -1362,7 +1492,7 @@ execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; u32 tail, prev; /* @@ -1401,7 +1531,7 @@ static u64 execlists_update_context(struct i915_request *rq) */ wmb(); - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1422,8 +1552,9 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) if (!rq) return ""; - snprintf(buf, buflen, "%s%llx:%lld%s prio %d", + snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", prefix, + rq->context->lrc.ccid, rq->fence.context, rq->fence.seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : @@ -1460,9 +1591,12 @@ static __maybe_unused bool assert_pending_valid(const struct intel_engine_execlists *execlists, const char *msg) { + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); struct i915_request * const *port, *rq; struct intel_context *ce = NULL; bool sentinel = false; + u32 ccid = -1; trace_ports(execlists, msg, execlists->pending); @@ -1471,13 +1605,14 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, return true; if (!execlists->pending[0]) { - GEM_TRACE_ERR("Nothing pending for promotion!\n"); + GEM_TRACE_ERR("%s: Nothing pending for promotion!\n", + engine->name); return false; } if (execlists->pending[execlists_num_ports(execlists)]) { - GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", - execlists_num_ports(execlists)); + GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n", + engine->name, execlists_num_ports(execlists)); return false; } @@ -1489,20 +1624,31 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, GEM_BUG_ON(!i915_request_is_active(rq)); if (ce == rq->context) { - GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n", + GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); return false; } ce = rq->context; + if (ccid == ce->lrc.ccid) { + GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n", + engine->name, + ccid, ce->timeline->fence_context, + port - execlists->pending); + return false; + } + ccid = ce->lrc.ccid; + /* * Sentinels are supposed to be lonely so they flush the * current exection off the HW. Check that they are the * only request in the pending submission. */ if (sentinel) { - GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n", + GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); return false; @@ -1510,7 +1656,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, sentinel = i915_request_has_sentinel(rq); if (sentinel && port != execlists->pending) { - GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n", + GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); return false; @@ -1525,7 +1672,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, if (i915_active_is_idle(&ce->active) && !intel_context_is_barrier(ce)) { - GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n", + GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); ok = false; @@ -1533,7 +1681,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, } if (!i915_vma_is_pinned(ce->state)) { - GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n", + GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); ok = false; @@ -1541,7 +1690,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, } if (!i915_vma_is_pinned(ce->ring->vma)) { - GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n", + GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); ok = false; @@ -1799,7 +1949,7 @@ timeslice_yield(const struct intel_engine_execlists *el, * safe, yield if it might be stuck -- it will be given a fresh * timeslice in the near future. */ - return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield); + return rq->context->lrc.ccid == READ_ONCE(el->yield); } static bool @@ -2289,8 +2439,8 @@ done: clear_ports(port + 1, last_port - port); WRITE_ONCE(execlists->yield, -1); - execlists_submit_ports(engine); set_preempt_timeout(engine, *active); + execlists_submit_ports(engine); } else { skip_submit: ring_set_paused(engine, 0); @@ -2384,13 +2534,6 @@ gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); } -static inline void flush_hwsp(const struct i915_request *rq) -{ - mb(); - clflush((void *)READ_ONCE(rq->hwsp_seqno)); - mb(); -} - static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -2498,7 +2641,11 @@ static void process_csb(struct intel_engine_cs *engine) * We rely on the hardware being strongly * ordered, that the breadcrumb write is * coherent (visible from the CPU) before the - * user interrupt and CSB is processed. + * user interrupt is processed. One might assume + * that the breadcrumb write being before the + * user interrupt and the CS event for the context + * switch would therefore be before the CS event + * itself... */ if (GEM_SHOW_DEBUG() && !i915_request_completed(*execlists->active)) { @@ -2506,19 +2653,8 @@ static void process_csb(struct intel_engine_cs *engine) const u32 *regs __maybe_unused = rq->context->lrc_reg_state; - /* - * Flush the breadcrumb before crying foul. - * - * Since we have hit this on icl and seen the - * breadcrumb advance as we print out the debug - * info (so the problem corrected itself without - * lasting damage), and we know that icl suffers - * from missing global observation points in - * execlists, presume that affects even more - * coherency. - */ - flush_hwsp(rq); - + ENGINE_TRACE(engine, + "context completed before request!\n"); ENGINE_TRACE(engine, "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", ENGINE_READ(engine, RING_START), @@ -2538,11 +2674,6 @@ static void process_csb(struct intel_engine_cs *engine) regs[CTX_RING_START], regs[CTX_RING_HEAD], regs[CTX_RING_TAIL]); - - /* Still? Declare it caput! */ - if (!i915_request_completed(rq) && - !reset_in_progress(execlists)) - GEM_BUG_ON("context completed before request"); } execlists_schedule_out(*execlists->active++); @@ -2845,7 +2976,7 @@ active_context(struct intel_engine_cs *engine, u32 ccid) */ for (port = el->active; (rq = *port); port++) { - if (upper_32_bits(rq->context->lrc_desc) == ccid) { + if (rq->context->lrc.ccid == ccid) { ENGINE_TRACE(engine, "ccid found at active:%zd\n", port - el->active); @@ -2854,7 +2985,7 @@ active_context(struct intel_engine_cs *engine, u32 ccid) } for (port = el->pending; (rq = *port); port++) { - if (upper_32_bits(rq->context->lrc_desc) == ccid) { + if (rq->context->lrc.ccid == ccid) { ENGINE_TRACE(engine, "ccid found at pending:%zd\n", port - el->pending); @@ -3136,12 +3267,132 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) static void execlists_context_unpin(struct intel_context *ce) { - check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, + check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, ce->engine); i915_gem_object_unpin_map(ce->state->obj); } +static u32 * +gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + + CTX_TIMESTAMP * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_REG | + MI_LRR_SOURCE_CS_MMIO | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); + + *cs++ = MI_LOAD_REGISTER_REG | + MI_LRR_SOURCE_CS_MMIO | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); + + return cs; +} + +static u32 * +gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) +{ + GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + + (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); + *cs++ = 0; + + return cs; +} + +static u32 * +gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) +{ + GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + + (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_REG | + MI_LRR_SOURCE_CS_MMIO | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); + + return cs; +} + +static u32 * +gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) +{ + cs = gen12_emit_timestamp_wa(ce, cs); + cs = gen12_emit_cmd_buf_wa(ce, cs); + cs = gen12_emit_restore_scratch(ce, cs); + + return cs; +} + +static u32 * +gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) +{ + cs = gen12_emit_timestamp_wa(ce, cs); + cs = gen12_emit_restore_scratch(ce, cs); + + return cs; +} + +static inline u32 context_wa_bb_offset(const struct intel_context *ce) +{ + return PAGE_SIZE * ce->wa_bb_page; +} + +static u32 *context_indirect_bb(const struct intel_context *ce) +{ + void *ptr; + + GEM_BUG_ON(!ce->wa_bb_page); + + ptr = ce->lrc_reg_state; + ptr -= LRC_STATE_OFFSET; /* back to start of context image */ + ptr += context_wa_bb_offset(ce); + + return ptr; +} + +static void +setup_indirect_ctx_bb(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 *(*emit)(const struct intel_context *, u32 *)) +{ + u32 * const start = context_indirect_bb(ce); + u32 *cs; + + cs = emit(ce, start); + GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); + while ((unsigned long)cs % CACHELINE_BYTES) + *cs++ = MI_NOOP; + + lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine, + i915_ggtt_offset(ce->state) + + context_wa_bb_offset(ce), + (cs - start) * sizeof(*cs)); +} + static void __execlists_update_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -3165,6 +3416,18 @@ __execlists_update_reg_state(const struct intel_context *ce, i915_oa_init_reg_state(ce, engine); } + + if (ce->wa_bb_page) { + u32 *(*fn)(const struct intel_context *ce, u32 *cs); + + fn = gen12_emit_indirect_ctx_xcs; + if (ce->engine->class == RENDER_CLASS) + fn = gen12_emit_indirect_ctx_rcs; + + /* Mutually exclusive wrt to global indirect bb */ + GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); + setup_indirect_ctx_bb(ce, engine, fn); + } } static int @@ -3182,8 +3445,8 @@ __execlists_context_pin(struct intel_context *ce, if (IS_ERR(vaddr)) return PTR_ERR(vaddr); - ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; - ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; + ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; __execlists_update_reg_state(ce, engine, ce->ring->tail); return 0; @@ -3211,7 +3474,7 @@ static void execlists_context_reset(struct intel_context *ce) ce, ce->engine, ce->ring, true); __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static const struct intel_context_ops execlists_context_ops = { @@ -3615,6 +3878,65 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } +static void reset_csb_pointers(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + const unsigned int reset_value = execlists->csb_size - 1; + + ring_set_paused(engine, 0); + + /* + * After a reset, the HW starts writing into CSB entry [0]. We + * therefore have to set our HEAD pointer back one entry so that + * the *first* entry we check is entry 0. To complicate this further, + * as we don't wait for the first interrupt after reset, we have to + * fake the HW write to point back to the last entry so that our + * inline comparison of our cached head position against the last HW + * write works even before the first interrupt. + */ + execlists->csb_head = reset_value; + WRITE_ONCE(*execlists->csb_write, reset_value); + wmb(); /* Make sure this is visible to HW (paranoia?) */ + + /* + * Sometimes Icelake forgets to reset its pointers on a GPU reset. + * Bludgeon them with a mmio update to be sure. + */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + + invalidate_csb_entries(&execlists->csb_status[0], + &execlists->csb_status[reset_value]); +} + +static void execlists_sanitize(struct intel_engine_cs *engine) +{ + /* + * Poison residual state on resume, in case the suspend didn't! + * + * We have to assume that across suspend/resume (or other loss + * of control) that the contents of our pinned buffers has been + * lost, replaced by garbage. Since this doesn't always happen, + * let's poison such state so that we more quickly spot when + * we falsely assume it has been preserved. + */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); + + reset_csb_pointers(engine); + + /* + * The kernel_context HWSP is stored in the status_page. As above, + * that may be lost on resume/initialisation, and so we need to + * reset the value in the HWSP. + */ + intel_timeline_reset_seqno(engine->kernel_context->timeline); + + /* And scrub the dirty cachelines for the HWSP */ + clflush_cache_range(engine->status_page.addr, PAGE_SIZE); +} + static void enable_error_interrupt(struct intel_engine_cs *engine) { u32 status; @@ -3681,7 +4003,7 @@ static void enable_execlists(struct intel_engine_cs *engine) enable_error_interrupt(engine); - engine->context_tag = 0; + engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); } static bool unexpected_starting_state(struct intel_engine_cs *engine) @@ -3754,38 +4076,6 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) intel_engine_stop_cs(engine); } -static void reset_csb_pointers(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - const unsigned int reset_value = execlists->csb_size - 1; - - ring_set_paused(engine, 0); - - /* - * After a reset, the HW starts writing into CSB entry [0]. We - * therefore have to set our HEAD pointer back one entry so that - * the *first* entry we check is entry 0. To complicate this further, - * as we don't wait for the first interrupt after reset, we have to - * fake the HW write to point back to the last entry so that our - * inline comparison of our cached head position against the last HW - * write works even before the first interrupt. - */ - execlists->csb_head = reset_value; - WRITE_ONCE(*execlists->csb_write, reset_value); - wmb(); /* Make sure this is visible to HW (paranoia?) */ - - /* - * Sometimes Icelake forgets to reset its pointers on a GPU reset. - * Bludgeon them with a mmio update to be sure. - */ - ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, - reset_value << 8 | reset_value); - ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); - - invalidate_csb_entries(&execlists->csb_status[0], - &execlists->csb_status[reset_value]); -} - static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) { int x; @@ -3895,7 +4185,7 @@ out_replay: head, ce->ring->tail); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine, head); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -4534,6 +4824,8 @@ static void execlists_shutdown(struct intel_engine_cs *engine) static void execlists_release(struct intel_engine_cs *engine) { + engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ + execlists_shutdown(engine); intel_engine_cleanup_common(engine); @@ -4659,48 +4951,18 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; - reset_csb_pointers(engine); + if (INTEL_GEN(engine->i915) >= 11) { + execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); + execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); + } /* Finally, take ownership and responsibility for cleanup! */ + engine->sanitize = execlists_sanitize; engine->release = execlists_release; return 0; } -static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) -{ - u32 indirect_ctx_offset; - - switch (INTEL_GEN(engine->i915)) { - default: - MISSING_CASE(INTEL_GEN(engine->i915)); - /* fall through */ - case 12: - indirect_ctx_offset = - GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 11: - indirect_ctx_offset = - GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 10: - indirect_ctx_offset = - GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 9: - indirect_ctx_offset = - GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 8: - indirect_ctx_offset = - GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - } - - return indirect_ctx_offset; -} - - static void init_common_reg_state(u32 * const regs, const struct intel_engine_cs *engine, const struct intel_ring *ring, @@ -4722,27 +4984,23 @@ static void init_common_reg_state(u32 * const regs, } static void init_wa_bb_reg_state(u32 * const regs, - const struct intel_engine_cs *engine, - u32 pos_bb_per_ctx) + const struct intel_engine_cs *engine) { const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; if (wa_ctx->per_ctx.size) { const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - regs[pos_bb_per_ctx] = + GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); + regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } if (wa_ctx->indirect_ctx.size) { - const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - regs[pos_bb_per_ctx + 2] = - (ggtt_offset + wa_ctx->indirect_ctx.offset) | - (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - - regs[pos_bb_per_ctx + 4] = - intel_lr_indirect_ctx_offset(engine) << 6; + lrc_ring_setup_indirect_ctx(regs, engine, + i915_ggtt_offset(wa_ctx->vma) + + wa_ctx->indirect_ctx.offset, + wa_ctx->indirect_ctx.size); } } @@ -4791,10 +5049,7 @@ static void execlists_init_reg_state(u32 *regs, init_common_reg_state(regs, engine, ring, inhibit); init_ppgtt_reg_state(regs, vm_alias(ce->vm)); - init_wa_bb_reg_state(regs, engine, - INTEL_GEN(engine->i915) >= 12 ? - GEN12_CTX_BB_PER_CTX_PTR : - CTX_BB_PER_CTX_PTR); + init_wa_bb_reg_state(regs, engine); __reset_stop_ring(regs, engine); } @@ -4807,30 +5062,18 @@ populate_lr_context(struct intel_context *ce, { bool inhibit = true; void *vaddr; - int ret; vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - drm_dbg(&engine->i915->drm, - "Could not map object pages! (%d)\n", ret); - return ret; + drm_dbg(&engine->i915->drm, "Could not map object pages!\n"); + return PTR_ERR(vaddr); } set_redzone(vaddr, engine); if (engine->default_state) { - void *defaults; - - defaults = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (IS_ERR(defaults)) { - ret = PTR_ERR(defaults); - goto err_unpin_ctx; - } - - memcpy(vaddr, defaults, engine->context_size); - i915_gem_object_unpin_map(engine->default_state); + shmem_read(engine->default_state, 0, + vaddr, engine->context_size); __set_bit(CONTEXT_VALID_BIT, &ce->flags); inhibit = false; } @@ -4842,14 +5085,12 @@ populate_lr_context(struct intel_context *ce, * The second page of the context object contains some registers which * must be set up prior to the first execution. */ - execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, + execlists_init_reg_state(vaddr + LRC_STATE_OFFSET, ce, engine, ring, inhibit); - ret = 0; -err_unpin_ctx: __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); i915_gem_object_unpin_map(ctx_obj); - return ret; + return 0; } static int __execlists_context_alloc(struct intel_context *ce, @@ -4867,6 +5108,11 @@ static int __execlists_context_alloc(struct intel_context *ce, if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) context_size += I915_GTT_PAGE_SIZE; /* for redzone */ + if (INTEL_GEN(engine->i915) == 12) { + ce->wa_bb_page = context_size / PAGE_SIZE; + context_size += PAGE_SIZE; + } + ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); if (IS_ERR(ctx_obj)) return PTR_ERR(ctx_obj); @@ -5086,12 +5332,15 @@ static void virtual_submission_tasklet(unsigned long data) return; local_irq_disable(); - for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) { - struct intel_engine_cs *sibling = ve->siblings[n]; + for (n = 0; n < ve->num_siblings; n++) { + struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]); struct ve_node * const node = &ve->nodes[sibling->id]; struct rb_node **parent, *rb; bool first; + if (!READ_ONCE(ve->request)) + break; /* already handled by a sibling's tasklet */ + if (unlikely(!(mask & sibling->mask))) { if (!RB_EMPTY_NODE(&node->rb)) { spin_lock(&sibling->active.lock); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index dfbc214e14f5..91fd8e452d9b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -90,6 +90,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine); #define LRC_PPHWSP_SZ (1) /* After the PPHWSP we have the logical state for the context */ #define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) +#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE) /* Space within PPHWSP reserved to be used as scratch */ #define LRC_PPHWSP_SCRATCH 0x34 diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index d39b72590e40..93cb6c460508 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -9,14 +9,13 @@ #include <linux/types.h> -/* GEN8 to GEN11 Reg State Context */ +/* GEN8 to GEN12 Reg State Context */ #define CTX_CONTEXT_CONTROL (0x02 + 1) #define CTX_RING_HEAD (0x04 + 1) #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_STATE (0x10 + 1) -#define CTX_BB_PER_CTX_PTR (0x18 + 1) #define CTX_TIMESTAMP (0x22 + 1) #define CTX_PDP3_UDW (0x24 + 1) #define CTX_PDP3_LDW (0x26 + 1) @@ -30,9 +29,6 @@ #define GEN9_CTX_RING_MI_MODE 0x54 -/* GEN12+ Reg State Context */ -#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1) - #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 26e78db33675..708cb7808865 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -194,7 +194,7 @@ int intel_renderstate_init(struct intel_renderstate *so, err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) - goto err_vma; + goto err_obj; err = render_state_setup(so, engine->i915); if (err) @@ -204,8 +204,6 @@ int intel_renderstate_init(struct intel_renderstate *so, err_unpin: i915_vma_unpin(so->vma); -err_vma: - i915_vma_close(so->vma); err_obj: i915_gem_object_put(obj); so->vma = NULL; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index d015f7b8b28e..ca7286e58409 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -42,6 +42,7 @@ #include "intel_reset.h" #include "intel_ring.h" #include "intel_workarounds.h" +#include "shmem_utils.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -1241,7 +1242,7 @@ alloc_context_vma(struct intel_engine_cs *engine) i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); if (engine->default_state) { - void *defaults, *vaddr; + void *vaddr; vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { @@ -1249,15 +1250,8 @@ alloc_context_vma(struct intel_engine_cs *engine) goto err_obj; } - defaults = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (IS_ERR(defaults)) { - err = PTR_ERR(defaults); - goto err_map; - } - - memcpy(vaddr, defaults, engine->context_size); - i915_gem_object_unpin_map(engine->default_state); + shmem_read(engine->default_state, 0, + vaddr, engine->context_size); i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); @@ -1271,8 +1265,6 @@ alloc_context_vma(struct intel_engine_cs *engine) return vma; -err_map: - i915_gem_object_unpin_map(obj); err_obj: i915_gem_object_put(obj); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 4dcfae16a7ce..c682355ec79e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -8,12 +8,15 @@ #include "i915_drv.h" #include "intel_gt.h" +#include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" #include "intel_gt_pm_irq.h" #include "intel_rps.h" #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" +#define BUSY_MAX_EI 20u /* ms */ + /* * Lock protecting IPS related data structures */ @@ -44,6 +47,100 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) intel_uncore_write_fw(uncore, reg, val); } +static void rps_timer(struct timer_list *t) +{ + struct intel_rps *rps = from_timer(rps, t, timer); + struct intel_engine_cs *engine; + enum intel_engine_id id; + s64 max_busy[3] = {}; + ktime_t dt, last; + + for_each_engine(engine, rps_to_gt(rps), id) { + s64 busy; + int i; + + dt = intel_engine_get_busy_time(engine); + last = engine->stats.rps; + engine->stats.rps = dt; + + busy = ktime_to_ns(ktime_sub(dt, last)); + for (i = 0; i < ARRAY_SIZE(max_busy); i++) { + if (busy > max_busy[i]) + swap(busy, max_busy[i]); + } + } + + dt = ktime_get(); + last = rps->pm_timestamp; + rps->pm_timestamp = dt; + + if (intel_rps_is_active(rps)) { + s64 busy; + int i; + + dt = ktime_sub(dt, last); + + /* + * Our goal is to evaluate each engine independently, so we run + * at the lowest clocks required to sustain the heaviest + * workload. However, a task may be split into sequential + * dependent operations across a set of engines, such that + * the independent contributions do not account for high load, + * but overall the task is GPU bound. For example, consider + * video decode on vcs followed by colour post-processing + * on vecs, followed by general post-processing on rcs. + * Since multi-engines being active does imply a single + * continuous workload across all engines, we hedge our + * bets by only contributing a factor of the distributed + * load into our busyness calculation. + */ + busy = max_busy[0]; + for (i = 1; i < ARRAY_SIZE(max_busy); i++) { + if (!max_busy[i]) + break; + + busy += div_u64(max_busy[i], 1 << i); + } + GT_TRACE(rps_to_gt(rps), + "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", + busy, (int)div64_u64(100 * busy, dt), + max_busy[0], max_busy[1], max_busy[2], + rps->pm_interval); + + if (100 * busy > rps->power.up_threshold * dt && + rps->cur_freq < rps->max_freq_softlimit) { + rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; + rps->pm_interval = 1; + schedule_work(&rps->work); + } else if (100 * busy < rps->power.down_threshold * dt && + rps->cur_freq > rps->min_freq_softlimit) { + rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; + rps->pm_interval = 1; + schedule_work(&rps->work); + } else { + rps->last_adj = 0; + } + + mod_timer(&rps->timer, + jiffies + msecs_to_jiffies(rps->pm_interval)); + rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); + } +} + +static void rps_start_timer(struct intel_rps *rps) +{ + rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); + rps->pm_interval = 1; + mod_timer(&rps->timer, jiffies + 1); +} + +static void rps_stop_timer(struct intel_rps *rps) +{ + del_timer_sync(&rps->timer); + rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); + cancel_work_sync(&rps->work); +} + static u32 rps_pm_mask(struct intel_rps *rps, u8 val) { u32 mask = 0; @@ -57,7 +154,7 @@ static u32 rps_pm_mask(struct intel_rps *rps, u8 val) if (val < rps->max_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; - mask &= READ_ONCE(rps->pm_events); + mask &= rps->pm_events; return rps_pm_sanitize_mask(rps, ~mask); } @@ -70,18 +167,11 @@ static void rps_reset_ei(struct intel_rps *rps) static void rps_enable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - u32 events; - rps_reset_ei(rps); + GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", + rps->pm_events, rps_pm_mask(rps, rps->last_freq)); - if (IS_VALLEYVIEW(gt->i915)) - /* WaGsvRC0ResidencyMethod:vlv */ - events = GEN6_PM_RP_UP_EI_EXPIRED; - else - events = (GEN6_PM_RP_UP_THRESHOLD | - GEN6_PM_RP_DOWN_THRESHOLD | - GEN6_PM_RP_DOWN_TIMEOUT); - WRITE_ONCE(rps->pm_events, events); + rps_reset_ei(rps); spin_lock_irq(>->irq_lock); gen6_gt_pm_enable_irq(gt, rps->pm_events); @@ -120,8 +210,6 @@ static void rps_disable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - WRITE_ONCE(rps->pm_events, 0); - intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); @@ -140,6 +228,7 @@ static void rps_disable_interrupts(struct intel_rps *rps) cancel_work_sync(&rps->work); rps_reset_interrupts(rps); + GT_TRACE(gt, "interrupts:off\n"); } static const struct cparams { @@ -532,8 +621,8 @@ static u32 rps_limits(struct intel_rps *rps, u8 val) static void rps_set_power(struct intel_rps *rps, int new_power) { - struct intel_uncore *uncore = rps_to_uncore(rps); - struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_gt *gt = rps_to_gt(rps); + struct intel_uncore *uncore = gt->uncore; u32 threshold_up = 0, threshold_down = 0; /* in % */ u32 ei_up = 0, ei_down = 0; @@ -542,55 +631,49 @@ static void rps_set_power(struct intel_rps *rps, int new_power) if (new_power == rps->power.mode) return; + threshold_up = 95; + threshold_down = 85; + /* Note the units here are not exactly 1us, but 1280ns. */ switch (new_power) { case LOW_POWER: - /* Upclock if more than 95% busy over 16ms */ ei_up = 16000; - threshold_up = 95; - - /* Downclock if less than 85% busy over 32ms */ ei_down = 32000; - threshold_down = 85; break; case BETWEEN: - /* Upclock if more than 90% busy over 13ms */ ei_up = 13000; - threshold_up = 90; - - /* Downclock if less than 75% busy over 32ms */ ei_down = 32000; - threshold_down = 75; break; case HIGH_POWER: - /* Upclock if more than 85% busy over 10ms */ ei_up = 10000; - threshold_up = 85; - - /* Downclock if less than 60% busy over 32ms */ ei_down = 32000; - threshold_down = 60; break; } /* When byt can survive without system hang with dynamic * sw freq adjustments, this restriction can be lifted. */ - if (IS_VALLEYVIEW(i915)) + if (IS_VALLEYVIEW(gt->i915)) goto skip_hw_write; - set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up)); + GT_TRACE(gt, + "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", + new_power, threshold_up, ei_up, threshold_down, ei_down); + + set(uncore, GEN6_RP_UP_EI, + intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); set(uncore, GEN6_RP_UP_THRESHOLD, - GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100)); + intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10)); - set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down)); + set(uncore, GEN6_RP_DOWN_EI, + intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); set(uncore, GEN6_RP_DOWN_THRESHOLD, - GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100)); + intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); set(uncore, GEN6_RP_CONTROL, - (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | GEN6_RP_ENABLE | @@ -645,9 +728,11 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) { + GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive)); + mutex_lock(&rps->power.mutex); if (interactive) { - if (!rps->power.interactive++ && READ_ONCE(rps->active)) + if (!rps->power.interactive++ && intel_rps_is_active(rps)) rps_set_power(rps, HIGH_POWER); } else { GEM_BUG_ON(!rps->power.interactive); @@ -672,6 +757,9 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) GEN6_AGGRESSIVE_TURBO); set(uncore, GEN6_RPNSWREQ, swreq); + GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n", + val, intel_gpu_freq(rps, val), swreq); + return 0; } @@ -684,6 +772,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val) err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); vlv_punit_put(i915); + GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n", + val, intel_gpu_freq(rps, val)); + return err; } @@ -714,28 +805,30 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update) void intel_rps_unpark(struct intel_rps *rps) { - if (!rps->enabled) + if (!intel_rps_is_enabled(rps)) return; + GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq); + /* * Use the user's desired frequency as a guide, but for better * performance, jump directly to RPe as our starting frequency. */ mutex_lock(&rps->lock); - WRITE_ONCE(rps->active, true); - + intel_rps_set_active(rps); intel_rps_set(rps, clamp(rps->cur_freq, rps->min_freq_softlimit, rps->max_freq_softlimit)); - rps->last_adj = 0; - mutex_unlock(&rps->lock); - if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps->pm_iir = 0; + if (intel_rps_has_interrupts(rps)) rps_enable_interrupts(rps); + if (intel_rps_uses_timer(rps)) + rps_start_timer(rps); if (IS_GEN(rps_to_i915(rps), 5)) gen5_rps_update(rps); @@ -743,15 +836,16 @@ void intel_rps_unpark(struct intel_rps *rps) void intel_rps_park(struct intel_rps *rps) { - struct drm_i915_private *i915 = rps_to_i915(rps); + int adj; - if (!rps->enabled) + if (!intel_rps_clear_active(rps)) return; - if (INTEL_GEN(i915) >= 6) + if (intel_rps_uses_timer(rps)) + rps_stop_timer(rps); + if (intel_rps_has_interrupts(rps)) rps_disable_interrupts(rps); - WRITE_ONCE(rps->active, false); if (rps->last_freq <= rps->idle_freq) return; @@ -782,8 +876,15 @@ void intel_rps_park(struct intel_rps *rps) * (Note we accommodate Cherryview's limitation of only using an * even bin by applying it to all.) */ - rps->cur_freq = - max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq); + adj = rps->last_adj; + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = -2; + rps->last_adj = adj; + rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); + + GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); } void intel_rps_boost(struct i915_request *rq) @@ -791,7 +892,7 @@ void intel_rps_boost(struct i915_request *rq) struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; unsigned long flags; - if (i915_request_signaled(rq) || !READ_ONCE(rps->active)) + if (i915_request_signaled(rq) || !intel_rps_is_active(rps)) return; /* Serializes with i915_request_retire() */ @@ -800,6 +901,9 @@ void intel_rps_boost(struct i915_request *rq) !dma_fence_is_signaled_locked(&rq->fence)) { set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", + rq->fence.context, rq->fence.seqno); + if (!atomic_fetch_inc(&rps->num_waiters) && READ_ONCE(rps->cur_freq) < rps->boost_freq) schedule_work(&rps->work); @@ -817,7 +921,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val) GEM_BUG_ON(val > rps->max_freq); GEM_BUG_ON(val < rps->min_freq); - if (rps->active) { + if (intel_rps_is_active(rps)) { err = rps_set(rps, val, true); if (err) return err; @@ -826,7 +930,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val) * Make sure we continue to get interrupts * until we hit the minimum or maximum frequencies. */ - if (INTEL_GEN(rps_to_i915(rps)) >= 6) { + if (intel_rps_has_interrupts(rps)) { struct intel_uncore *uncore = rps_to_uncore(rps); set(uncore, @@ -895,6 +999,7 @@ static void gen6_rps_init(struct intel_rps *rps) static bool rps_reset(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + /* force a reset */ rps->power.mode = -1; rps->last_freq = -1; @@ -911,20 +1016,18 @@ static bool rps_reset(struct intel_rps *rps) /* See the Gen9_GT_PM_Programming_Guide doc for the below */ static bool gen9_rps_enable(struct intel_rps *rps) { - struct drm_i915_private *i915 = rps_to_i915(rps); - struct intel_uncore *uncore = rps_to_uncore(rps); + struct intel_gt *gt = rps_to_gt(rps); + struct intel_uncore *uncore = gt->uncore; /* Program defaults and thresholds for RPS */ - if (IS_GEN(i915, 9)) + if (IS_GEN(gt->i915, 9)) intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, GEN9_FREQUENCY(rps->rp1_freq)); - /* 1 second timeout */ - intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, - GT_INTERVAL_FROM_US(i915, 1000000)); - intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); + rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; + return rps_reset(rps); } @@ -935,12 +1038,10 @@ static bool gen8_rps_enable(struct intel_rps *rps) intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(rps->rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, - 100000000 / 128); /* 1 second timeout */ - intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; + return rps_reset(rps); } @@ -952,6 +1053,10 @@ static bool gen6_rps_enable(struct intel_rps *rps) intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + return rps_reset(rps); } @@ -1037,6 +1142,10 @@ static bool chv_rps_enable(struct intel_rps *rps) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + /* Setting Fixed Bias */ vlv_punit_get(i915); @@ -1135,6 +1244,9 @@ static bool vlv_rps_enable(struct intel_rps *rps) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_CONT); + /* WaGsvRC0ResidencyMethod:vlv */ + rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; + vlv_punit_get(i915); /* Setting Fixed Bias */ @@ -1193,33 +1305,71 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips) return ips->gfx_power + state2; } +static bool has_busy_stats(struct intel_rps *rps) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, rps_to_gt(rps), id) { + if (!intel_engine_supports_stats(engine)) + return false; + } + + return true; +} + void intel_rps_enable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + bool enabled = false; + + if (!HAS_RPS(i915)) + return; + + intel_gt_check_clock_frequency(rps_to_gt(rps)); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - if (IS_CHERRYVIEW(i915)) - rps->enabled = chv_rps_enable(rps); + if (rps->max_freq <= rps->min_freq) + /* leave disabled, no room for dynamic reclocking */; + else if (IS_CHERRYVIEW(i915)) + enabled = chv_rps_enable(rps); else if (IS_VALLEYVIEW(i915)) - rps->enabled = vlv_rps_enable(rps); + enabled = vlv_rps_enable(rps); else if (INTEL_GEN(i915) >= 9) - rps->enabled = gen9_rps_enable(rps); + enabled = gen9_rps_enable(rps); else if (INTEL_GEN(i915) >= 8) - rps->enabled = gen8_rps_enable(rps); + enabled = gen8_rps_enable(rps); else if (INTEL_GEN(i915) >= 6) - rps->enabled = gen6_rps_enable(rps); + enabled = gen6_rps_enable(rps); else if (IS_IRONLAKE_M(i915)) - rps->enabled = gen5_rps_enable(rps); + enabled = gen5_rps_enable(rps); + else + MISSING_CASE(INTEL_GEN(i915)); intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - if (!rps->enabled) + if (!enabled) return; - drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq); - drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq); + GT_TRACE(rps_to_gt(rps), + "min:%x, max:%x, freq:[%d, %d]\n", + rps->min_freq, rps->max_freq, + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->max_freq)); + + GEM_BUG_ON(rps->max_freq < rps->min_freq); + GEM_BUG_ON(rps->idle_freq > rps->max_freq); - drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq); - drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq); + GEM_BUG_ON(rps->efficient_freq < rps->min_freq); + GEM_BUG_ON(rps->efficient_freq > rps->max_freq); + + if (has_busy_stats(rps)) + intel_rps_set_timer(rps); + else if (INTEL_GEN(i915) >= 6) + intel_rps_set_interrupts(rps); + else + /* Ironlake currently uses intel_ips.ko */ {} + + intel_rps_set_enabled(rps); } static void gen6_rps_disable(struct intel_rps *rps) @@ -1231,7 +1381,9 @@ void intel_rps_disable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - rps->enabled = false; + intel_rps_clear_enabled(rps); + intel_rps_clear_interrupts(rps); + intel_rps_clear_timer(rps); if (INTEL_GEN(i915) >= 6) gen6_rps_disable(rps); @@ -1469,7 +1621,7 @@ static void rps_work(struct work_struct *work) u32 pm_iir = 0; spin_lock_irq(>->irq_lock); - pm_iir = fetch_and_zero(&rps->pm_iir) & READ_ONCE(rps->pm_events); + pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; client_boost = atomic_read(&rps->num_waiters); spin_unlock_irq(>->irq_lock); @@ -1478,6 +1630,10 @@ static void rps_work(struct work_struct *work) goto out; mutex_lock(&rps->lock); + if (!intel_rps_is_active(rps)) { + mutex_unlock(&rps->lock); + return; + } pm_iir |= vlv_wa_c0_ei(rps, pm_iir); @@ -1487,6 +1643,12 @@ static void rps_work(struct work_struct *work) max = rps->max_freq_softlimit; if (client_boost) max = rps->max_freq; + + GT_TRACE(gt, + "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n", + pm_iir, yesno(client_boost), + adj, new_freq, min, max); + if (client_boost && new_freq < rps->boost_freq) { new_freq = rps->boost_freq; adj = 0; @@ -1518,30 +1680,18 @@ static void rps_work(struct work_struct *work) adj = 0; } - rps->last_adj = adj; - /* - * Limit deboosting and boosting to keep ourselves at the extremes - * when in the respective power modes (i.e. slowly decrease frequencies - * while in the HIGH_POWER zone and slowly increase frequencies while - * in the LOW_POWER zone). On idle, we will hit the timeout and drop - * to the next level quickly, and conversely if busy we expect to - * hit a waitboost and rapidly switch into max power. - */ - if ((adj < 0 && rps->power.mode == HIGH_POWER) || - (adj > 0 && rps->power.mode == LOW_POWER)) - rps->last_adj = 0; - - /* sysfs frequency interfaces may have snuck in while servicing the - * interrupt + * sysfs frequency limits may have snuck in while + * servicing the interrupt */ new_freq += adj; new_freq = clamp_t(int, new_freq, min, max); if (intel_rps_set(rps, new_freq)) { drm_dbg(&i915->drm, "Failed to set new GPU frequency\n"); - rps->last_adj = 0; + adj = 0; } + rps->last_adj = adj; mutex_unlock(&rps->lock); @@ -1561,6 +1711,8 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) if (unlikely(!events)) return; + GT_TRACE(gt, "irq events:%x\n", events); + gen6_gt_pm_mask_irq(gt, events); rps->pm_iir |= events; @@ -1572,10 +1724,12 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) struct intel_gt *gt = rps_to_gt(rps); u32 events; - events = pm_iir & READ_ONCE(rps->pm_events); + events = pm_iir & rps->pm_events; if (events) { spin_lock(>->irq_lock); + GT_TRACE(gt, "irq events:%x\n", events); + gen6_gt_pm_mask_irq(gt, events); rps->pm_iir |= events; @@ -1633,6 +1787,7 @@ void intel_rps_init_early(struct intel_rps *rps) mutex_init(&rps->power.mutex); INIT_WORK(&rps->work, rps_work); + timer_setup(&rps->timer, rps_timer, 0); atomic_set(&rps->num_waiters, 0); } @@ -1689,6 +1844,9 @@ void intel_rps_init(struct intel_rps *rps) if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + + if (INTEL_GEN(i915) >= 6) + rps_disable_interrupts(rps); } u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) @@ -1718,7 +1876,7 @@ static u32 read_cagf(struct intel_rps *rps) freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); } else { - freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1); + freq = intel_uncore_read(rps_to_uncore(rps), GEN6_RPSTAT1); } return intel_rps_get_cagf(rps, freq); @@ -1726,7 +1884,7 @@ static u32 read_cagf(struct intel_rps *rps) u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { - struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm; + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; intel_wakeref_t wakeref; u32 freq = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index dfa98194f3b2..af07fa5b7584 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -36,4 +36,64 @@ void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); +static inline bool intel_rps_is_enabled(const struct intel_rps *rps) +{ + return test_bit(INTEL_RPS_ENABLED, &rps->flags); +} + +static inline void intel_rps_set_enabled(struct intel_rps *rps) +{ + set_bit(INTEL_RPS_ENABLED, &rps->flags); +} + +static inline void intel_rps_clear_enabled(struct intel_rps *rps) +{ + clear_bit(INTEL_RPS_ENABLED, &rps->flags); +} + +static inline bool intel_rps_is_active(const struct intel_rps *rps) +{ + return test_bit(INTEL_RPS_ACTIVE, &rps->flags); +} + +static inline void intel_rps_set_active(struct intel_rps *rps) +{ + set_bit(INTEL_RPS_ACTIVE, &rps->flags); +} + +static inline bool intel_rps_clear_active(struct intel_rps *rps) +{ + return test_and_clear_bit(INTEL_RPS_ACTIVE, &rps->flags); +} + +static inline bool intel_rps_has_interrupts(const struct intel_rps *rps) +{ + return test_bit(INTEL_RPS_INTERRUPTS, &rps->flags); +} + +static inline void intel_rps_set_interrupts(struct intel_rps *rps) +{ + set_bit(INTEL_RPS_INTERRUPTS, &rps->flags); +} + +static inline void intel_rps_clear_interrupts(struct intel_rps *rps) +{ + clear_bit(INTEL_RPS_INTERRUPTS, &rps->flags); +} + +static inline bool intel_rps_uses_timer(const struct intel_rps *rps) +{ + return test_bit(INTEL_RPS_TIMER, &rps->flags); +} + +static inline void intel_rps_set_timer(struct intel_rps *rps) +{ + set_bit(INTEL_RPS_TIMER, &rps->flags); +} + +static inline void intel_rps_clear_timer(struct intel_rps *rps) +{ + clear_bit(INTEL_RPS_TIMER, &rps->flags); +} + #endif /* INTEL_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h index c2e279154bd5..38083f0402d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -31,6 +31,13 @@ struct intel_rps_ei { u32 media_c0; }; +enum { + INTEL_RPS_ENABLED = 0, + INTEL_RPS_ACTIVE, + INTEL_RPS_INTERRUPTS, + INTEL_RPS_TIMER, +}; + struct intel_rps { struct mutex lock; /* protects enabling and the worker */ @@ -38,9 +45,12 @@ struct intel_rps { * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock */ + struct timer_list timer; struct work_struct work; - bool enabled; - bool active; + unsigned long flags; + + ktime_t pm_timestamp; + u32 pm_interval; u32 pm_iir; /* PM interrupt bits that should never be masked */ diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 3779c2ae0d65..e1fac1b38f27 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -337,6 +337,13 @@ int intel_timeline_pin(struct intel_timeline *tl) return 0; } +void intel_timeline_reset_seqno(const struct intel_timeline *tl) +{ + /* Must be pinned to be writable, and no requests in flight. */ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); +} + void intel_timeline_enter(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; @@ -365,8 +372,16 @@ void intel_timeline_enter(struct intel_timeline *tl) return; spin_lock(&timelines->lock); - if (!atomic_fetch_inc(&tl->active_count)) + if (!atomic_fetch_inc(&tl->active_count)) { + /* + * The HWSP is volatile, and may have been lost while inactive, + * e.g. across suspend/resume. Be paranoid, and ensure that + * the HWSP value matches our seqno so we don't proclaim + * the next request as already complete. + */ + intel_timeline_reset_seqno(tl); list_add_tail(&tl->link, &timelines->active_list); + } spin_unlock(&timelines->lock); } @@ -529,6 +544,8 @@ int intel_timeline_read_hwsp(struct i915_request *from, rcu_read_lock(); cl = rcu_dereference(from->hwsp_cacheline); + if (i915_request_completed(from)) /* confirm cacheline is valid */ + goto unlock; if (unlikely(!i915_active_acquire_if_busy(&cl->active))) goto unlock; /* seqno wrapped and completed! */ if (unlikely(i915_request_completed(from))) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index f5b7eade3809..c8e59a333182 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -84,6 +84,8 @@ int intel_timeline_get_seqno(struct intel_timeline *tl, void intel_timeline_exit(struct intel_timeline *tl); void intel_timeline_unpin(struct intel_timeline *tl); +void intel_timeline_reset_seqno(const struct intel_timeline *tl); + int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *until, u32 *hwsp_offset); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index e874dfaa5316..b8ed3cbe1277 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -155,7 +155,7 @@ static int live_context_size(void *arg) for_each_engine(engine, gt, id) { struct { - struct drm_i915_gem_object *state; + struct file *state; void *pinned; } saved; diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index c50bb502fe03..242181a5214c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -53,7 +53,13 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_rc6_manual), + SUBTEST(live_rps_clock_interval), + SUBTEST(live_rps_control), + SUBTEST(live_rps_frequency_cs), + SUBTEST(live_rps_frequency_srm), + SUBTEST(live_rps_power), SUBTEST(live_rps_interrupt), + SUBTEST(live_rps_dynamic), SUBTEST(live_gt_resume), }; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 6f5e35afe1b2..7529df92f6a2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -21,7 +21,8 @@ #include "gem/selftests/mock_context.h" #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) -#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ +#define NUM_GPR 16 +#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ static struct i915_vma *create_scratch(struct intel_gt *gt) { @@ -2791,6 +2792,331 @@ static int live_preempt_gang(void *arg) return 0; } +static struct i915_vma * +create_gpr_user(struct intel_engine_cs *engine, + struct i915_vma *result, + unsigned int offset) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + int i; + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, result->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + i915_vma_put(vma); + return ERR_PTR(err); + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_vma_put(vma); + return ERR_CAST(cs); + } + + /* All GPR are clear for new contexts. We use GPR(0) as a constant */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = CS_GPR(engine, 0); + *cs++ = 1; + + for (i = 1; i < NUM_GPR; i++) { + u64 addr; + + /* + * Perform: GPR[i]++ + * + * As we read and write into the context saved GPR[i], if + * we restart this batch buffer from an earlier point, we + * will repeat the increment and store a value > 1. + */ + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); + *cs++ = MI_MATH_ADD; + *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); + + addr = result->node.start + offset + i * sizeof(*cs); + *cs++ = MI_STORE_REGISTER_MEM_GEN8; + *cs++ = CS_GPR(engine, 2 * i); + *cs++ = lower_32_bits(addr); + *cs++ = upper_32_bits(addr); + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = i; + *cs++ = lower_32_bits(result->node.start); + *cs++ = upper_32_bits(result->node.start); + } + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + return vma; +} + +static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, sz); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_ggtt_pin(vma, 0, 0); + if (err) { + i915_vma_put(vma); + return ERR_PTR(err); + } + + return vma; +} + +static struct i915_request * +create_gpr_client(struct intel_engine_cs *engine, + struct i915_vma *global, + unsigned int offset) +{ + struct i915_vma *batch, *vma; + struct intel_context *ce; + struct i915_request *rq; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + vma = i915_vma_instance(global->obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_ce; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_ce; + + batch = create_gpr_user(engine, vma, offset); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_vma; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_batch; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + + i915_vma_lock(batch); + if (!err) + err = i915_request_await_object(rq, batch->obj, false); + if (!err) + err = i915_vma_move_to_active(batch, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + batch->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(batch); + i915_vma_unpin(batch); + + if (!err) + i915_request_get(rq); + i915_request_add(rq); + +out_batch: + i915_vma_put(batch); +out_vma: + i915_vma_unpin(vma); +out_ce: + intel_context_put(ce); + return err ? ERR_PTR(err) : rq; +} + +static int preempt_user(struct intel_engine_cs *engine, + struct i915_vma *global, + int id) +{ + struct i915_sched_attr attr = { + .priority = I915_PRIORITY_MAX + }; + struct i915_request *rq; + int err = 0; + u32 *cs; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(global); + *cs++ = 0; + *cs++ = id; + + intel_ring_advance(rq, cs); + + i915_request_get(rq); + i915_request_add(rq); + + engine->schedule(rq, &attr); + + if (i915_request_wait(rq, 0, HZ / 2) < 0) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static int live_preempt_user(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_vma *global; + enum intel_engine_id id; + u32 *result; + int err = 0; + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + /* + * In our other tests, we look at preemption in carefully + * controlled conditions in the ringbuffer. Since most of the + * time is spent in user batches, most of our preemptions naturally + * occur there. We want to verify that when we preempt inside a batch + * we continue on from the current instruction and do not roll back + * to the start, or another earlier arbitration point. + * + * To verify this, we create a batch which is a mixture of + * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with + * a few preempting contexts thrown into the mix, we look for any + * repeated instructions (which show up as incorrect values). + */ + + global = create_global(gt, 4096); + if (IS_ERR(global)) + return PTR_ERR(global); + + result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); + if (IS_ERR(result)) { + i915_vma_unpin_and_release(&global, 0); + return PTR_ERR(result); + } + + for_each_engine(engine, gt, id) { + struct i915_request *client[3] = {}; + struct igt_live_test t; + int i; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) + continue; /* we need per-context GPR */ + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + + memset(result, 0, 4096); + + for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_request *rq; + + rq = create_gpr_client(engine, global, + NUM_GPR * i * sizeof(u32)); + if (IS_ERR(rq)) + goto end_test; + + client[i] = rq; + } + + /* Continuously preempt the set of 3 running contexts */ + for (i = 1; i <= NUM_GPR; i++) { + err = preempt_user(engine, global, i); + if (err) + goto end_test; + } + + if (READ_ONCE(result[0]) != NUM_GPR) { + pr_err("%s: Failed to release semaphore\n", + engine->name); + err = -EIO; + goto end_test; + } + + for (i = 0; i < ARRAY_SIZE(client); i++) { + int gpr; + + if (i915_request_wait(client[i], 0, HZ / 2) < 0) { + err = -ETIME; + goto end_test; + } + + for (gpr = 1; gpr < NUM_GPR; gpr++) { + if (result[NUM_GPR * i + gpr] != 1) { + pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", + engine->name, + i, gpr, result[NUM_GPR * i + gpr]); + err = -EINVAL; + goto end_test; + } + } + } + +end_test: + for (i = 0; i < ARRAY_SIZE(client); i++) { + if (!client[i]) + break; + + i915_request_put(client[i]); + } + + /* Flush the semaphores on error */ + smp_store_mb(result[0], -1); + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); + return err; +} + static int live_preempt_timeout(void *arg) { struct intel_gt *gt = arg; @@ -3998,6 +4324,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_chain_preempt), SUBTEST(live_preempt_gang), SUBTEST(live_preempt_timeout), + SUBTEST(live_preempt_user), SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), @@ -4125,13 +4452,12 @@ static int live_lrc_layout(void *arg) if (!engine->default_state) continue; - hw = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); + hw = shmem_pin_map(engine->default_state); if (IS_ERR(hw)) { err = PTR_ERR(hw); break; } - hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + hw += LRC_STATE_OFFSET / sizeof(*hw); execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), engine->kernel_context, @@ -4198,7 +4524,7 @@ static int live_lrc_layout(void *arg) hexdump(lrc, PAGE_SIZE); } - i915_gem_object_unpin_map(engine->default_state); + shmem_unpin_map(engine->default_state, hw); if (err) break; } @@ -4267,10 +4593,35 @@ static int live_lrc_fixed(void *arg) "BB_STATE" }, { + i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), + lrc_ring_wa_bb_per_ctx(engine), + "RING_BB_PER_CTX_PTR" + }, + { + i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), + lrc_ring_indirect_ptr(engine), + "RING_INDIRECT_CTX_PTR" + }, + { + i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), + lrc_ring_indirect_offset(engine), + "RING_INDIRECT_CTX_OFFSET" + }, + { i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), CTX_TIMESTAMP - 1, "RING_CTX_TIMESTAMP" }, + { + i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), + lrc_ring_gpr0(engine), + "RING_CS_GPR0" + }, + { + i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), + lrc_ring_cmd_buf_cctl(engine), + "RING_CMD_BUF_CCTL" + }, { }, }, *t; u32 *hw; @@ -4278,13 +4629,12 @@ static int live_lrc_fixed(void *arg) if (!engine->default_state) continue; - hw = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); + hw = shmem_pin_map(engine->default_state); if (IS_ERR(hw)) { err = PTR_ERR(hw); break; } - hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + hw += LRC_STATE_OFFSET / sizeof(*hw); for (t = tbl; t->name; t++) { int dw = find_offset(hw, t->reg); @@ -4300,7 +4650,7 @@ static int live_lrc_fixed(void *arg) } } - i915_gem_object_unpin_map(engine->default_state); + shmem_unpin_map(engine->default_state, hw); } return err; @@ -4870,7 +5220,7 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) x = 0; dw = 0; hw = ce->engine->pinned_default_state; - hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; @@ -5023,7 +5373,7 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) dw = 0; hw = ce->engine->pinned_default_state; - hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; @@ -5147,12 +5497,12 @@ static int compare_isolation(struct intel_engine_cs *engine, err = PTR_ERR(lrc); goto err_B1; } - lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + lrc += LRC_STATE_OFFSET / sizeof(*hw); x = 0; dw = 0; hw = engine->pinned_default_state; - hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; @@ -5363,6 +5713,161 @@ static int live_lrc_isolation(void *arg) return err; } +static int indirect_ctx_submit_req(struct intel_context *ce) +{ + struct i915_request *rq; + int err = 0; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + + i915_request_put(rq); + + return err; +} + +#define CTX_BB_CANARY_OFFSET (3 * 1024) +#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) + +static u32 * +emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +{ + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(RING_START(0)); + *cs++ = i915_ggtt_offset(ce->state) + + context_wa_bb_offset(ce) + + CTX_BB_CANARY_OFFSET; + *cs++ = 0; + + return cs; +} + +static void +indirect_ctx_bb_setup(struct intel_context *ce) +{ + u32 *cs = context_indirect_bb(ce); + + cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; + + setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); +} + +static bool check_ring_start(struct intel_context *ce) +{ + const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - + LRC_STATE_OFFSET + context_wa_bb_offset(ce); + + if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) + return true; + + pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", + ctx_bb[CTX_BB_CANARY_INDEX], + ce->lrc_reg_state[CTX_RING_START]); + + return false; +} + +static int indirect_ctx_bb_check(struct intel_context *ce) +{ + int err; + + err = indirect_ctx_submit_req(ce); + if (err) + return err; + + if (!check_ring_start(ce)) + return -EINVAL; + + return 0; +} + +static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) +{ + struct intel_context *a, *b; + int err; + + a = intel_context_create(engine); + if (IS_ERR(a)) + return PTR_ERR(a); + err = intel_context_pin(a); + if (err) + goto put_a; + + b = intel_context_create(engine); + if (IS_ERR(b)) { + err = PTR_ERR(b); + goto unpin_a; + } + err = intel_context_pin(b); + if (err) + goto put_b; + + /* We use the already reserved extra page in context state */ + if (!a->wa_bb_page) { + GEM_BUG_ON(b->wa_bb_page); + GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); + goto unpin_b; + } + + /* + * In order to test that our per context bb is truly per context, + * and executes at the intended spot on context restoring process, + * make the batch store the ring start value to memory. + * As ring start is restored apriori of starting the indirect ctx bb and + * as it will be different for each context, it fits to this purpose. + */ + indirect_ctx_bb_setup(a); + indirect_ctx_bb_setup(b); + + err = indirect_ctx_bb_check(a); + if (err) + goto unpin_b; + + err = indirect_ctx_bb_check(b); + +unpin_b: + intel_context_unpin(b); +put_b: + intel_context_put(b); +unpin_a: + intel_context_unpin(a); +put_a: + intel_context_put(a); + + return err; +} + +static int live_lrc_indirect_ctx_bb(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_lrc_indirect_ctx_bb(engine); + intel_engine_pm_put(engine); + + if (igt_flush_test(gt->i915)) + err = -EIO; + + if (err) + break; + } + + return err; +} + static void garbage_reset(struct intel_engine_cs *engine, struct i915_request *rq) { @@ -5394,7 +5899,7 @@ static struct i915_request *garbage(struct intel_context *ce, prandom_bytes_state(prng, ce->lrc_reg_state, ce->engine->context_size - - LRC_STATE_PN * PAGE_SIZE); + LRC_STATE_OFFSET); rq = intel_context_create_request(ce); if (IS_ERR(rq)) { @@ -5598,6 +6103,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_lrc_timestamp), SUBTEST(live_lrc_garbage), SUBTEST(live_pphwsp_runtime), + SUBTEST(live_lrc_indirect_ctx_bb), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 08c3dbd41b12..2dc460624bbc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -11,22 +11,7 @@ #include "selftest_rc6.h" #include "selftests/i915_random.h" - -static u64 energy_uJ(struct intel_rc6 *rc6) -{ - unsigned long long power; - u32 units; - - if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) - return 0; - - units = (power & 0x1f00) >> 8; - - if (rdmsrl_safe(MSR_PP1_ENERGY_STATUS, &power)) - return 0; - - return (1000000 * power) >> units; /* convert to uJ */ -} +#include "selftests/librapl.h" static u64 rc6_residency(struct intel_rc6 *rc6) { @@ -74,9 +59,9 @@ int live_rc6_manual(void *arg) res[0] = rc6_residency(rc6); dt = ktime_get(); - rc0_power = energy_uJ(rc6); + rc0_power = librapl_energy_uJ(); msleep(250); - rc0_power = energy_uJ(rc6) - rc0_power; + rc0_power = librapl_energy_uJ() - rc0_power; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); if ((res[1] - res[0]) >> 10) { @@ -99,9 +84,9 @@ int live_rc6_manual(void *arg) res[0] = rc6_residency(rc6); intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL); dt = ktime_get(); - rc6_power = energy_uJ(rc6); + rc6_power = librapl_energy_uJ(); msleep(100); - rc6_power = energy_uJ(rc6) - rc6_power; + rc6_power = librapl_energy_uJ() - rc6_power; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); if (res[1] == res[0]) { diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 26aadc2ae3be..b89a7d7611f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -3,17 +3,879 @@ * Copyright © 2020 Intel Corporation */ +#include <linux/pm_qos.h> +#include <linux/sort.h> + +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" +#include "intel_gpu_commands.h" +#include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_rc6.h" #include "selftest_rps.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_spinner.h" +#include "selftests/librapl.h" + +/* Try to isolate the impact of cstates from determing frequency response */ +#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ + +static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine) +{ + unsigned long old; + + old = fetch_and_zero(&engine->props.heartbeat_interval_ms); + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); + + return old; +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} static void dummy_rps_work(struct work_struct *wrk) { } +static int cmp_u64(const void *A, const void *B) +{ + const u64 *a = A, *b = B; + + if (a < b) + return -1; + else if (a > b) + return 1; + else + return 0; +} + +static struct i915_vma * +create_spin_counter(struct intel_engine_cs *engine, + struct i915_address_space *vm, + bool srm, + u32 **cancel, + u32 **counter) +{ + enum { + COUNT, + INC, + __NGPR__, + }; +#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned long end; + u32 *base, *cs; + int loop, i; + int err; + + obj = i915_gem_object_create_internal(vm->i915, 64 << 10); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + end = obj->base.size / sizeof(u32) - 1; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + i915_vma_put(vma); + return ERR_PTR(err); + } + + base = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(base)) { + i915_gem_object_put(obj); + return ERR_CAST(base); + } + cs = base; + + *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); + for (i = 0; i < __NGPR__; i++) { + *cs++ = i915_mmio_reg_offset(CS_GPR(i)); + *cs++ = 0; + *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; + *cs++ = 0; + } + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); + *cs++ = 1; + + loop = cs - base; + + /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ + for (i = 0; i < 1024; i++) { + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); + *cs++ = MI_MATH_ADD; + *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); + + if (srm) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8; + *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); + *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs)); + *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs)); + } + } + + *cs++ = MI_BATCH_BUFFER_START_GEN8; + *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs)); + *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs)); + GEM_BUG_ON(cs - base > end); + + i915_gem_object_flush_map(obj); + + *cancel = base + loop; + *counter = srm ? memset32(base + end, 0, 1) : NULL; + return vma; +} + +static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) +{ + u8 history[64], i; + unsigned long end; + int sleep; + + i = 0; + memset(history, freq, sizeof(history)); + sleep = 20; + + /* The PCU does not change instantly, but drifts towards the goal? */ + end = jiffies + msecs_to_jiffies(timeout_ms); + do { + u8 act; + + act = read_cagf(rps); + if (time_after(jiffies, end)) + return act; + + /* Target acquired */ + if (act == freq) + return act; + + /* Any change within the last N samples? */ + if (!memchr_inv(history, act, sizeof(history))) + return act; + + history[i] = act; + i = (i + 1) % ARRAY_SIZE(history); + + usleep_range(sleep, 2 * sleep); + sleep *= 2; + if (sleep > timeout_ms * 20) + sleep = timeout_ms * 20; + } while (1); +} + +static u8 rps_set_check(struct intel_rps *rps, u8 freq) +{ + mutex_lock(&rps->lock); + GEM_BUG_ON(!intel_rps_is_active(rps)); + intel_rps_set(rps, freq); + GEM_BUG_ON(rps->last_freq != freq); + mutex_unlock(&rps->lock); + + return wait_for_freq(rps, freq, 50); +} + +static void show_pstate_limits(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (IS_BROXTON(i915)) { + pr_info("P_STATE_CAP[%x]: 0x%08x\n", + i915_mmio_reg_offset(BXT_RP_STATE_CAP), + intel_uncore_read(rps_to_uncore(rps), + BXT_RP_STATE_CAP)); + } else if (IS_GEN(i915, 9)) { + pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", + i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), + intel_uncore_read(rps_to_uncore(rps), + GEN9_RP_STATE_LIMITS)); + } +} + +int live_rps_clock_interval(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + void (*saved_work)(struct work_struct *wrk); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + intel_gt_pm_get(gt); + intel_rps_disable(>->rps); + + intel_gt_check_clock_frequency(gt); + + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + ktime_t dt; + u32 cycles; + + if (!intel_engine_can_store_dword(engine)) + continue; + + saved_heartbeat = engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + engine_heartbeat_enable(engine, saved_heartbeat); + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: RPS spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); + + /* Set the evaluation interval to infinity! */ + intel_uncore_write_fw(gt->uncore, + GEN6_RP_UP_EI, 0xffffffff); + intel_uncore_write_fw(gt->uncore, + GEN6_RP_UP_THRESHOLD, 0xffffffff); + + intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, + GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); + + if (wait_for(intel_uncore_read_fw(gt->uncore, + GEN6_RP_CUR_UP_EI), + 10)) { + /* Just skip the test; assume lack of HW support */ + pr_notice("%s: rps evaluation interval not ticking\n", + engine->name); + err = -ENODEV; + } else { + preempt_disable(); + dt = ktime_get(); + cycles = -intel_uncore_read_fw(gt->uncore, + GEN6_RP_CUR_UP_EI); + udelay(1000); + dt = ktime_sub(ktime_get(), dt); + cycles += intel_uncore_read_fw(gt->uncore, + GEN6_RP_CUR_UP_EI); + preempt_enable(); + } + + intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + + if (err == 0) { + u64 time = intel_gt_pm_interval_to_ns(gt, cycles); + u32 expected = + intel_gt_ns_to_pm_interval(gt, ktime_to_ns(dt)); + + pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", + engine->name, cycles, time, ktime_to_ns(dt), expected, + gt->clock_frequency / 1000); + + if (10 * time < 8 * ktime_to_ns(dt) || + 8 * time > 10 * ktime_to_ns(dt)) { + pr_err("%s: rps clock time does not match walltime!\n", + engine->name); + err = -EINVAL; + } + + if (10 * expected < 8 * cycles || + 8 * expected > 10 * cycles) { + pr_err("%s: walltime does not match rps clock ticks!\n", + engine->name); + err = -EINVAL; + } + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + break; /* once is enough */ + } + + intel_rps_enable(>->rps); + intel_gt_pm_put(gt); + + igt_spinner_fini(&spin); + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + if (err == -ENODEV) /* skipped, don't report a fail */ + err = 0; + + return err; +} + +int live_rps_control(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + void (*saved_work)(struct work_struct *wrk); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * Check that the actual frequency matches our requested frequency, + * to verify our control mechanism. We have to be careful that the + * PCU may throttle the GPU in which case the actual frequency used + * will be lowered than requested. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + ktime_t min_dt, max_dt; + int f, limit; + int min, max; + + if (!intel_engine_can_store_dword(engine)) + continue; + + saved_heartbeat = engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: RPS spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { + pr_err("%s: could not set minimum frequency [%x], only %x!\n", + engine->name, rps->min_freq, read_cagf(rps)); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + show_pstate_limits(rps); + err = -EINVAL; + break; + } + + for (f = rps->min_freq + 1; f < rps->max_freq; f++) { + if (rps_set_check(rps, f) < f) + break; + } + + limit = rps_set_check(rps, f); + + if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { + pr_err("%s: could not restore minimum frequency [%x], only %x!\n", + engine->name, rps->min_freq, read_cagf(rps)); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + show_pstate_limits(rps); + err = -EINVAL; + break; + } + + max_dt = ktime_get(); + max = rps_set_check(rps, limit); + max_dt = ktime_sub(ktime_get(), max_dt); + + min_dt = ktime_get(); + min = rps_set_check(rps, rps->min_freq); + min_dt = ktime_sub(ktime_get(), min_dt); + + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + + pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", + engine->name, + rps->min_freq, intel_gpu_freq(rps, rps->min_freq), + rps->max_freq, intel_gpu_freq(rps, rps->max_freq), + limit, intel_gpu_freq(rps, limit), + min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); + + if (limit == rps->min_freq) { + pr_err("%s: GPU throttled to minimum!\n", + engine->name); + show_pstate_limits(rps); + err = -ENODEV; + break; + } + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + intel_gt_pm_put(gt); + + igt_spinner_fini(&spin); + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + return err; +} + +static void show_pcu_config(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + unsigned int max_gpu_freq, min_gpu_freq; + intel_wakeref_t wakeref; + int gpu_freq; + + if (!HAS_LLC(i915)) + return; + + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (INTEL_GEN(i915) >= 9) { + /* Convert GT frequency to 50 HZ units */ + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; + } + + wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); + + pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); + for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { + int ia_freq = gpu_freq; + + sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &ia_freq, NULL); + + pr_info("%5d %5d %5d\n", + gpu_freq * 50, + ((ia_freq >> 0) & 0xff) * 100, + ((ia_freq >> 8) & 0xff) * 100); + } + + intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); +} + +static u64 __measure_frequency(u32 *cntr, int duration_ms) +{ + u64 dc, dt; + + dt = ktime_get(); + dc = READ_ONCE(*cntr); + usleep_range(1000 * duration_ms, 2000 * duration_ms); + dc = READ_ONCE(*cntr) - dc; + dt = ktime_get() - dt; + + return div64_u64(1000 * 1000 * dc, dt); +} + +static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) +{ + u64 x[5]; + int i; + + *freq = rps_set_check(rps, *freq); + for (i = 0; i < 5; i++) + x[i] = __measure_frequency(cntr, 2); + *freq = (*freq + read_cagf(rps)) / 2; + + /* A simple triangle filter for better result stability */ + sort(x, 5, sizeof(*x), cmp_u64, NULL); + return div_u64(x[1] + 2 * x[2] + x[3], 4); +} + +static u64 __measure_cs_frequency(struct intel_engine_cs *engine, + int duration_ms) +{ + u64 dc, dt; + + dt = ktime_get(); + dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); + usleep_range(1000 * duration_ms, 2000 * duration_ms); + dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; + dt = ktime_get() - dt; + + return div64_u64(1000 * 1000 * dc, dt); +} + +static u64 measure_cs_frequency_at(struct intel_rps *rps, + struct intel_engine_cs *engine, + int *freq) +{ + u64 x[5]; + int i; + + *freq = rps_set_check(rps, *freq); + for (i = 0; i < 5; i++) + x[i] = __measure_cs_frequency(engine, 2); + *freq = (*freq + read_cagf(rps)) / 2; + + /* A simple triangle filter for better result stability */ + sort(x, 5, sizeof(*x), cmp_u64, NULL); + return div_u64(x[1] + 2 * x[2] + x[3], 4); +} + +static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) +{ + return f_d * x > f_n * y && f_n * x < f_d * y; +} + +int live_rps_frequency_cs(void *arg) +{ + void (*saved_work)(struct work_struct *wrk); + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + struct intel_engine_cs *engine; + struct pm_qos_request qos; + enum intel_engine_id id; + int err = 0; + + /* + * The premise is that the GPU does change freqency at our behest. + * Let's check there is a correspondence between the requested + * frequency, the actual frequency, and the observed clock rate. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */ + return 0; + + if (CPU_LATENCY >= 0) + cpu_latency_qos_add_request(&qos, CPU_LATENCY); + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cancel, *cntr; + struct { + u64 count; + int freq; + } min, max; + + saved_heartbeat = engine_heartbeat_disable(engine); + + vma = create_spin_counter(engine, + engine->kernel_context->vm, false, + &cancel, &cntr); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + engine_heartbeat_enable(engine, saved_heartbeat); + break; + } + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(vma); + i915_request_add(rq); + if (err) + goto err_vma; + + if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), + 10)) { + pr_err("%s: timed loop did not start\n", + engine->name); + goto err_vma; + } + + min.freq = rps->min_freq; + min.count = measure_cs_frequency_at(rps, engine, &min.freq); + + max.freq = rps->max_freq; + max.count = measure_cs_frequency_at(rps, engine, &max.freq); + + pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", + engine->name, + min.count, intel_gpu_freq(rps, min.freq), + max.count, intel_gpu_freq(rps, max.freq), + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, + max.freq * min.count)); + + if (!scaled_within(max.freq * min.count, + min.freq * max.count, + 2, 3)) { + int f; + + pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", + engine->name, + max.freq * min.count, + min.freq * max.count); + show_pcu_config(rps); + + for (f = min.freq + 1; f <= rps->max_freq; f++) { + int act = f; + u64 count; + + count = measure_cs_frequency_at(rps, engine, &act); + if (act < f) + break; + + pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", + engine->name, + act, intel_gpu_freq(rps, act), count, + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, + act * min.count)); + + f = act; /* may skip ahead [pcu granularity] */ + } + + err = -EINVAL; + } + +err_vma: + *cancel = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(vma->obj); + i915_vma_unpin(vma); + i915_vma_put(vma); + + engine_heartbeat_enable(engine, saved_heartbeat); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + if (CPU_LATENCY >= 0) + cpu_latency_qos_remove_request(&qos); + + return err; +} + +int live_rps_frequency_srm(void *arg) +{ + void (*saved_work)(struct work_struct *wrk); + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + struct intel_engine_cs *engine; + struct pm_qos_request qos; + enum intel_engine_id id; + int err = 0; + + /* + * The premise is that the GPU does change freqency at our behest. + * Let's check there is a correspondence between the requested + * frequency, the actual frequency, and the observed clock rate. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */ + return 0; + + if (CPU_LATENCY >= 0) + cpu_latency_qos_add_request(&qos, CPU_LATENCY); + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cancel, *cntr; + struct { + u64 count; + int freq; + } min, max; + + saved_heartbeat = engine_heartbeat_disable(engine); + + vma = create_spin_counter(engine, + engine->kernel_context->vm, true, + &cancel, &cntr); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + engine_heartbeat_enable(engine, saved_heartbeat); + break; + } + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(vma); + i915_request_add(rq); + if (err) + goto err_vma; + + if (wait_for(READ_ONCE(*cntr), 10)) { + pr_err("%s: timed loop did not start\n", + engine->name); + goto err_vma; + } + + min.freq = rps->min_freq; + min.count = measure_frequency_at(rps, cntr, &min.freq); + + max.freq = rps->max_freq; + max.count = measure_frequency_at(rps, cntr, &max.freq); + + pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", + engine->name, + min.count, intel_gpu_freq(rps, min.freq), + max.count, intel_gpu_freq(rps, max.freq), + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, + max.freq * min.count)); + + if (!scaled_within(max.freq * min.count, + min.freq * max.count, + 1, 2)) { + int f; + + pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", + engine->name, + max.freq * min.count, + min.freq * max.count); + show_pcu_config(rps); + + for (f = min.freq + 1; f <= rps->max_freq; f++) { + int act = f; + u64 count; + + count = measure_frequency_at(rps, cntr, &act); + if (act < f) + break; + + pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", + engine->name, + act, intel_gpu_freq(rps, act), count, + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, + act * min.count)); + + f = act; /* may skip ahead [pcu granularity] */ + } + + err = -EINVAL; + } + +err_vma: + *cancel = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(vma->obj); + i915_vma_unpin(vma); + i915_vma_put(vma); + + engine_heartbeat_enable(engine, saved_heartbeat); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + if (CPU_LATENCY >= 0) + cpu_latency_qos_remove_request(&qos); + + return err; +} + +static void sleep_for_ei(struct intel_rps *rps, int timeout_us) +{ + /* Flush any previous EI */ + usleep_range(timeout_us, 2 * timeout_us); + + /* Reset the interrupt status */ + rps_disable_interrupts(rps); + GEM_BUG_ON(rps->pm_iir); + rps_enable_interrupts(rps); + + /* And then wait for the timeout, for real this time */ + usleep_range(2 * timeout_us, 3 * timeout_us); +} + static int __rps_up_interrupt(struct intel_rps *rps, struct intel_engine_cs *engine, struct igt_spinner *spin) @@ -25,11 +887,7 @@ static int __rps_up_interrupt(struct intel_rps *rps, if (!intel_engine_can_store_dword(engine)) return 0; - intel_gt_pm_wait_for_idle(engine->gt); - GEM_BUG_ON(rps->active); - - rps->pm_iir = 0; - rps->cur_freq = rps->min_freq; + rps_set_check(rps, rps->min_freq); rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) @@ -46,7 +904,7 @@ static int __rps_up_interrupt(struct intel_rps *rps, return -EIO; } - if (!rps->active) { + if (!intel_rps_is_active(rps)) { pr_err("%s: RPS not enabled on starting spinner\n", engine->name); igt_spinner_end(spin); @@ -69,9 +927,10 @@ static int __rps_up_interrupt(struct intel_rps *rps, } timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); - timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout); + timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); + timeout = DIV_ROUND_UP(timeout, 1000); - usleep_range(2 * timeout, 3 * timeout); + sleep_for_ei(rps, timeout); GEM_BUG_ON(i915_request_completed(rq)); igt_spinner_end(spin); @@ -92,7 +951,6 @@ static int __rps_up_interrupt(struct intel_rps *rps, return -EINVAL; } - intel_gt_pm_wait_for_idle(engine->gt); return 0; } @@ -102,10 +960,7 @@ static int __rps_down_interrupt(struct intel_rps *rps, struct intel_uncore *uncore = engine->uncore; u32 timeout; - mutex_lock(&rps->lock); - GEM_BUG_ON(!rps->active); - intel_rps_set(rps, rps->max_freq); - mutex_unlock(&rps->lock); + rps_set_check(rps, rps->max_freq); if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { pr_err("%s: RPS did not register DOWN interrupt\n", @@ -120,18 +975,10 @@ static int __rps_down_interrupt(struct intel_rps *rps, } timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); - timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout); + timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); + timeout = DIV_ROUND_UP(timeout, 1000); - /* Flush any previous EI */ - usleep_range(timeout, 2 * timeout); - - /* Reset the interrupt status */ - rps_disable_interrupts(rps); - GEM_BUG_ON(rps->pm_iir); - rps_enable_interrupts(rps); - - /* And then wait for the timeout, for real this time */ - usleep_range(2 * timeout, 3 * timeout); + sleep_for_ei(rps, timeout); if (rps->cur_freq != rps->max_freq) { pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", @@ -170,7 +1017,7 @@ int live_rps_interrupt(void *arg) * First, let's check whether or not we are receiving interrupts. */ - if (!rps->enabled || rps->max_freq <= rps->min_freq) + if (!intel_rps_has_interrupts(rps)) return 0; intel_gt_pm_get(gt); @@ -191,20 +1038,33 @@ int live_rps_interrupt(void *arg) for_each_engine(engine, gt, id) { /* Keep the engine busy with a spinner; expect an UP! */ if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { + unsigned long saved_heartbeat; + + intel_gt_pm_wait_for_idle(engine->gt); + GEM_BUG_ON(intel_rps_is_active(rps)); + + saved_heartbeat = engine_heartbeat_disable(engine); + err = __rps_up_interrupt(rps, engine, &spin); + + engine_heartbeat_enable(engine, saved_heartbeat); if (err) goto out; + + intel_gt_pm_wait_for_idle(engine->gt); } /* Keep the engine awake but idle and check for DOWN */ if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { - intel_engine_pm_get(engine); + unsigned long saved_heartbeat; + + saved_heartbeat = engine_heartbeat_disable(engine); intel_rc6_disable(>->rc6); err = __rps_down_interrupt(rps, engine); intel_rc6_enable(>->rc6); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine, saved_heartbeat); if (err) goto out; } @@ -221,3 +1081,223 @@ out: return err; } + +static u64 __measure_power(int duration_ms) +{ + u64 dE, dt; + + dt = ktime_get(); + dE = librapl_energy_uJ(); + usleep_range(1000 * duration_ms, 2000 * duration_ms); + dE = librapl_energy_uJ() - dE; + dt = ktime_get() - dt; + + return div64_u64(1000 * 1000 * dE, dt); +} + +static u64 measure_power_at(struct intel_rps *rps, int *freq) +{ + u64 x[5]; + int i; + + *freq = rps_set_check(rps, *freq); + for (i = 0; i < 5; i++) + x[i] = __measure_power(5); + *freq = (*freq + read_cagf(rps)) / 2; + + /* A simple triangle filter for better result stability */ + sort(x, 5, sizeof(*x), cmp_u64, NULL); + return div_u64(x[1] + 2 * x[2] + x[3], 4); +} + +int live_rps_power(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + void (*saved_work)(struct work_struct *wrk); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * Our fundamental assumption is that running at lower frequency + * actually saves power. Let's see if our RAPL measurement support + * that theory. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (!librapl_energy_uJ()) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + struct { + u64 power; + int freq; + } min, max; + + if (!intel_engine_can_store_dword(engine)) + continue; + + saved_heartbeat = engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + engine_heartbeat_enable(engine, saved_heartbeat); + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: RPS spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + max.freq = rps->max_freq; + max.power = measure_power_at(rps, &max.freq); + + min.freq = rps->min_freq; + min.power = measure_power_at(rps, &min.freq); + + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + + pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", + engine->name, + min.power, intel_gpu_freq(rps, min.freq), + max.power, intel_gpu_freq(rps, max.freq)); + + if (10 * min.freq >= 9 * max.freq) { + pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", + min.freq, intel_gpu_freq(rps, min.freq), + max.freq, intel_gpu_freq(rps, max.freq)); + continue; + } + + if (11 * min.power > 10 * max.power) { + pr_err("%s: did not conserve power when setting lower frequency!\n", + engine->name); + err = -EINVAL; + break; + } + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + igt_spinner_fini(&spin); + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + return err; +} + +int live_rps_dynamic(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * We've looked at the bascs, and have established that we + * can change the clock frequency and that the HW will generate + * interrupts based on load. Now we check how we integrate those + * moving parts into dynamic reclocking based on load. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct i915_request *rq; + struct { + ktime_t dt; + u8 freq; + } min, max; + + if (!intel_engine_can_store_dword(engine)) + continue; + + intel_gt_pm_wait_for_idle(gt); + GEM_BUG_ON(intel_rps_is_active(rps)); + rps->cur_freq = rps->min_freq; + + intel_engine_pm_get(engine); + intel_rc6_disable(>->rc6); + GEM_BUG_ON(rps->last_freq != rps->min_freq); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err; + } + + i915_request_add(rq); + + max.dt = ktime_get(); + max.freq = wait_for_freq(rps, rps->max_freq, 500); + max.dt = ktime_sub(ktime_get(), max.dt); + + igt_spinner_end(&spin); + + min.dt = ktime_get(); + min.freq = wait_for_freq(rps, rps->min_freq, 2000); + min.dt = ktime_sub(ktime_get(), min.dt); + + pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", + engine->name, + max.freq, intel_gpu_freq(rps, max.freq), + ktime_to_ns(max.dt), + min.freq, intel_gpu_freq(rps, min.freq), + ktime_to_ns(min.dt)); + if (min.freq >= max.freq) { + pr_err("%s: dynamic reclocking of spinner failed\n!", + engine->name); + err = -EINVAL; + } + +err: + intel_rc6_enable(>->rc6); + intel_engine_pm_put(engine); + + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + igt_spinner_fini(&spin); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h index abba66420996..6e82a631cfa1 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.h +++ b/drivers/gpu/drm/i915/gt/selftest_rps.h @@ -6,6 +6,12 @@ #ifndef SELFTEST_RPS_H #define SELFTEST_RPS_H +int live_rps_control(void *arg); +int live_rps_clock_interval(void *arg); +int live_rps_frequency_cs(void *arg); +int live_rps_frequency_srm(void *arg); +int live_rps_power(void *arg); int live_rps_interrupt(void *arg); +int live_rps_dynamic(void *arg); #endif /* SELFTEST_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c new file mode 100644 index 000000000000..43c7acbdc79d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/shmem_fs.h> + +#include "gem/i915_gem_object.h" +#include "shmem_utils.h" + +struct file *shmem_create_from_data(const char *name, void *data, size_t len) +{ + struct file *file; + int err; + + file = shmem_file_setup(name, PAGE_ALIGN(len), VM_NORESERVE); + if (IS_ERR(file)) + return file; + + err = shmem_write(file, 0, data, len); + if (err) { + fput(file); + return ERR_PTR(err); + } + + return file; +} + +struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) +{ + struct file *file; + void *ptr; + + if (obj->ops == &i915_gem_shmem_ops) { + file = obj->base.filp; + atomic_long_inc(&file->f_count); + return file; + } + + ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(ptr)) + return ERR_CAST(ptr); + + file = shmem_create_from_data("", ptr, obj->base.size); + i915_gem_object_unpin_map(obj); + + return file; +} + +static size_t shmem_npte(struct file *file) +{ + return file->f_mapping->host->i_size >> PAGE_SHIFT; +} + +static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte) +{ + unsigned long pfn; + + vunmap(ptr); + + for (pfn = 0; pfn < n_pte; pfn++) { + struct page *page; + + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, + GFP_KERNEL); + if (!WARN_ON(IS_ERR(page))) { + put_page(page); + put_page(page); + } + } +} + +void *shmem_pin_map(struct file *file) +{ + const size_t n_pte = shmem_npte(file); + pte_t *stack[32], **ptes, **mem; + struct vm_struct *area; + unsigned long pfn; + + mem = stack; + if (n_pte > ARRAY_SIZE(stack)) { + mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL); + if (!mem) + return NULL; + } + + area = alloc_vm_area(n_pte << PAGE_SHIFT, mem); + if (!area) { + if (mem != stack) + kvfree(mem); + return NULL; + } + + ptes = mem; + for (pfn = 0; pfn < n_pte; pfn++) { + struct page *page; + + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, + GFP_KERNEL); + if (IS_ERR(page)) + goto err_page; + + **ptes++ = mk_pte(page, PAGE_KERNEL); + } + + if (mem != stack) + kvfree(mem); + + mapping_set_unevictable(file->f_mapping); + return area->addr; + +err_page: + if (mem != stack) + kvfree(mem); + + __shmem_unpin_map(file, area->addr, pfn); + return NULL; +} + +void shmem_unpin_map(struct file *file, void *ptr) +{ + mapping_clear_unevictable(file->f_mapping); + __shmem_unpin_map(file, ptr, shmem_npte(file)); +} + +static int __shmem_rw(struct file *file, loff_t off, + void *ptr, size_t len, + bool write) +{ + unsigned long pfn; + + for (pfn = off >> PAGE_SHIFT; len; pfn++) { + unsigned int this = + min_t(size_t, PAGE_SIZE - offset_in_page(off), len); + struct page *page; + void *vaddr; + + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, + GFP_KERNEL); + if (IS_ERR(page)) + return PTR_ERR(page); + + vaddr = kmap(page); + if (write) + memcpy(vaddr + offset_in_page(off), ptr, this); + else + memcpy(ptr, vaddr + offset_in_page(off), this); + kunmap(page); + put_page(page); + + len -= this; + ptr += this; + off = 0; + } + + return 0; +} + +int shmem_read(struct file *file, loff_t off, void *dst, size_t len) +{ + return __shmem_rw(file, off, dst, len, false); +} + +int shmem_write(struct file *file, loff_t off, void *src, size_t len) +{ + return __shmem_rw(file, off, src, len, true); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "st_shmem_utils.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h b/drivers/gpu/drm/i915/gt/shmem_utils.h new file mode 100644 index 000000000000..c1669170c351 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shmem_utils.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef SHMEM_UTILS_H +#define SHMEM_UTILS_H + +#include <linux/types.h> + +struct drm_i915_gem_object; +struct file; + +struct file *shmem_create_from_data(const char *name, void *data, size_t len); +struct file *shmem_create_from_object(struct drm_i915_gem_object *obj); + +void *shmem_pin_map(struct file *file); +void shmem_unpin_map(struct file *file, void *ptr); + +int shmem_read(struct file *file, loff_t off, void *dst, size_t len); +int shmem_write(struct file *file, loff_t off, void *src, size_t len); + +#endif /* SHMEM_UTILS_H */ diff --git a/drivers/gpu/drm/i915/gt/st_shmem_utils.c b/drivers/gpu/drm/i915/gt/st_shmem_utils.c new file mode 100644 index 000000000000..b279fe88b70e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/st_shmem_utils.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +/* Just a quick and causal check of the shmem_utils API */ + +static int igt_shmem_basic(void *ignored) +{ + u32 datum = 0xdeadbeef, result; + struct file *file; + u32 *map; + int err; + + file = shmem_create_from_data("mock", &datum, sizeof(datum)); + if (IS_ERR(file)) + return PTR_ERR(file); + + result = 0; + err = shmem_read(file, 0, &result, sizeof(result)); + if (err) + goto out_file; + + if (result != datum) { + pr_err("Incorrect read back from shmemfs: %x != %x\n", + result, datum); + err = -EINVAL; + goto out_file; + } + + result = 0xc0ffee; + err = shmem_write(file, 0, &result, sizeof(result)); + if (err) + goto out_file; + + map = shmem_pin_map(file); + if (!map) { + err = -ENOMEM; + goto out_file; + } + + if (*map != result) { + pr_err("Incorrect read back via mmap of last write: %x != %x\n", + *map, result); + err = -EINVAL; + goto out_map; + } + +out_map: + shmem_unpin_map(file, map); +out_file: + fput(file); + return err; +} + +int shmem_utils_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_shmem_basic), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index fe7778c28d2d..aa6d56e25a10 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc, static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); + u32 ctx_desc = rq->context->lrc.ccid; u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); guc_wq_item_append(guc, engine->guc_id, ctx_desc, diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 2a4b23f8aa74..d2b0d85b39bc 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2341,12 +2341,27 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, { const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; int ret; + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_engine_cs *engine; + int i; if (bytes != 4 && bytes != 8) return -EINVAL; off -= info->gtt_start_offset; ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); + + /* if ggtt of last submitted context is written, + * that context is probably got unpinned. + * Set last shadowed ctx to invalid. + */ + for_each_engine(engine, vgpu->gvt->gt, i) { + if (!s->last_ctx[i].valid) + continue; + + if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift)) + s->last_ctx[i].valid = false; + } return ret; } diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 9e1787867894..c7c561237883 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -31,7 +31,6 @@ */ #include <linux/types.h> -#include <xen/xen.h> #include <linux/kthread.h> #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 58c2c7932e3f..a4a6db6b7f90 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -163,6 +163,11 @@ struct intel_vgpu_submission { const struct intel_vgpu_submission_ops *ops; int virtual_submission_interface; bool active; + struct { + u32 lrca; + bool valid; + u64 ring_context_gpa; + } last_ctx[I915_NUM_ENGINES]; }; struct intel_vgpu { diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index b17c4a1599cd..b79da5124f83 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -79,6 +79,4 @@ struct intel_gvt_mpt { bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn); }; -extern struct intel_gvt_mpt xengt_mpt; - #endif /* _GVT_HYPERCALL_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index cb11c3184085..35ad540622ac 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -128,16 +128,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; - struct drm_i915_gem_object *ctx_obj = - workload->req->context->state->obj; + struct intel_context *ctx = workload->req->context; struct execlist_ring_context *shadow_ring_context; - struct page *page; void *dst; + void *context_base; unsigned long context_gpa, context_page_num; + unsigned long gpa_base; /* first gpa of consecutive GPAs */ + unsigned long gpa_size; /* size of consecutive GPAs */ + struct intel_vgpu_submission *s = &vgpu->submission; int i; + bool skip = false; + int ring_id = workload->engine->id; - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap(page); + GEM_BUG_ON(!intel_context_is_pinned(ctx)); + + context_base = (void *) ctx->lrc_reg_state - + (LRC_STATE_PN << I915_GTT_PAGE_SHIFT); + + shadow_ring_context = (void *) ctx->lrc_reg_state; sr_oa_regs(workload, (u32 *)shadow_ring_context, true); #define COPY_REG(name) \ @@ -169,23 +177,43 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); sr_oa_regs(workload, (u32 *)shadow_ring_context, false); - kunmap(page); - if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val)) - return 0; + gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx", + workload->engine->name, workload->ctx_desc.lrca, + workload->ctx_desc.context_id, + workload->ring_context_gpa); - gvt_dbg_sched("ring %s workload lrca %x", - workload->engine->name, - workload->ctx_desc.lrca); + /* only need to ensure this context is not pinned/unpinned during the + * period from last submission to this this submission. + * Upon reaching this function, the currently submitted context is not + * supposed to get unpinned. If a misbehaving guest driver ever does + * this, it would corrupt itself. + */ + if (s->last_ctx[ring_id].valid && + (s->last_ctx[ring_id].lrca == + workload->ctx_desc.lrca) && + (s->last_ctx[ring_id].ring_context_gpa == + workload->ring_context_gpa)) + skip = true; + + s->last_ctx[ring_id].lrca = workload->ctx_desc.lrca; + s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa; + if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip) + return 0; + + s->last_ctx[ring_id].valid = false; context_page_num = workload->engine->context_size; context_page_num = context_page_num >> PAGE_SHIFT; if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0) context_page_num = 19; - i = 2; - while (i < context_page_num) { + /* find consecutive GPAs from gma until the first inconsecutive GPA. + * read from the continuous GPAs into dst virtual address + */ + gpa_size = 0; + for (i = 2; i < context_page_num; i++) { context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, (u32)((workload->ctx_desc.lrca + i) << I915_GTT_PAGE_SHIFT)); @@ -194,13 +222,26 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return -EFAULT; } - page = i915_gem_object_get_page(ctx_obj, i); - dst = kmap(page); - intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, - I915_GTT_PAGE_SIZE); - kunmap(page); - i++; + if (gpa_size == 0) { + gpa_base = context_gpa; + dst = context_base + (i << I915_GTT_PAGE_SHIFT); + } else if (context_gpa != gpa_base + gpa_size) + goto read; + + gpa_size += I915_GTT_PAGE_SIZE; + + if (i == context_page_num - 1) + goto read; + + continue; + +read: + intel_gvt_hypervisor_read_gpa(vgpu, gpa_base, dst, gpa_size); + gpa_base = context_gpa; + gpa_size = I915_GTT_PAGE_SIZE; + dst = context_base + (i << I915_GTT_PAGE_SHIFT); } + s->last_ctx[ring_id].valid = true; return 0; } @@ -290,7 +331,7 @@ static void shadow_context_descriptor_update(struct intel_context *ce, struct intel_vgpu_workload *workload) { - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; /* * Update bits 0-11 of the context descriptor which includes flags @@ -300,7 +341,7 @@ shadow_context_descriptor_update(struct intel_context *ce, desc |= (u64)workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - ce->lrc_desc = desc; + ce->lrc.desc = desc; } static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) @@ -595,10 +636,9 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) if (bb->va && !IS_ERR(bb->va)) i915_gem_object_unpin_map(bb->obj); - if (bb->vma && !IS_ERR(bb->vma)) { + if (bb->vma && !IS_ERR(bb->vma)) i915_vma_unpin(bb->vma); - i915_vma_close(bb->vma); - } + i915_gem_object_put(bb->obj); } list_del(&bb->list); @@ -784,11 +824,13 @@ static void update_guest_context(struct intel_vgpu_workload *workload) { struct i915_request *rq = workload->req; struct intel_vgpu *vgpu = workload->vgpu; - struct drm_i915_gem_object *ctx_obj = rq->context->state->obj; struct execlist_ring_context *shadow_ring_context; - struct page *page; + struct intel_context *ctx = workload->req->context; + void *context_base; void *src; unsigned long context_gpa, context_page_num; + unsigned long gpa_base; /* first gpa of consecutive GPAs */ + unsigned long gpa_size; /* size of consecutive GPAs*/ int i; u32 ring_base; u32 head, tail; @@ -797,6 +839,8 @@ static void update_guest_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id, workload->ctx_desc.lrca); + GEM_BUG_ON(!intel_context_is_pinned(ctx)); + head = workload->rb_head; tail = workload->rb_tail; wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF; @@ -820,9 +864,14 @@ static void update_guest_context(struct intel_vgpu_workload *workload) if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0) context_page_num = 19; - i = 2; + context_base = (void *) ctx->lrc_reg_state - + (LRC_STATE_PN << I915_GTT_PAGE_SHIFT); - while (i < context_page_num) { + /* find consecutive GPAs from gma until the first inconsecutive GPA. + * write to the consecutive GPAs from src virtual address + */ + gpa_size = 0; + for (i = 2; i < context_page_num; i++) { context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, (u32)((workload->ctx_desc.lrca + i) << I915_GTT_PAGE_SHIFT)); @@ -831,19 +880,30 @@ static void update_guest_context(struct intel_vgpu_workload *workload) return; } - page = i915_gem_object_get_page(ctx_obj, i); - src = kmap(page); - intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, - I915_GTT_PAGE_SIZE); - kunmap(page); - i++; + if (gpa_size == 0) { + gpa_base = context_gpa; + src = context_base + (i << I915_GTT_PAGE_SHIFT); + } else if (context_gpa != gpa_base + gpa_size) + goto write; + + gpa_size += I915_GTT_PAGE_SIZE; + + if (i == context_page_num - 1) + goto write; + + continue; + +write: + intel_gvt_hypervisor_write_gpa(vgpu, gpa_base, src, gpa_size); + gpa_base = context_gpa; + gpa_size = I915_GTT_PAGE_SIZE; + src = context_base + (i << I915_GTT_PAGE_SHIFT); } intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4); - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap(page); + shadow_ring_context = (void *) ctx->lrc_reg_state; #define COPY_REG(name) \ intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \ @@ -860,8 +920,6 @@ static void update_guest_context(struct intel_vgpu_workload *workload) (void *)shadow_ring_context + sizeof(*shadow_ring_context), I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); - - kunmap(page); } void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, @@ -1260,6 +1318,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) atomic_set(&s->running_workload_num, 0); bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); + memset(s->last_ctx, 0, sizeof(s->last_ctx)); + i915_vm_put(&ppgtt->vm); return 0; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index aa35a59f1c7d..c09e1afb5f79 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,6 +32,7 @@ #include <drm/drm_debugfs.h> #include "gem/i915_gem_context.h" +#include "gt/intel_gt_clock_utils.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" @@ -926,21 +927,30 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); seq_printf(m, "CAGF: %dMHz\n", cagf); - seq_printf(m, "RP CUR UP EI: %d (%dus)\n", - rpupei, GT_PM_INTERVAL_TO_US(dev_priv, rpupei)); - seq_printf(m, "RP CUR UP: %d (%dus)\n", - rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup)); - seq_printf(m, "RP PREV UP: %d (%dus)\n", - rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup)); + seq_printf(m, "RP CUR UP EI: %d (%dns)\n", + rpupei, + intel_gt_pm_interval_to_ns(&dev_priv->gt, rpupei)); + seq_printf(m, "RP CUR UP: %d (%dun)\n", + rpcurup, + intel_gt_pm_interval_to_ns(&dev_priv->gt, rpcurup)); + seq_printf(m, "RP PREV UP: %d (%dns)\n", + rpprevup, + intel_gt_pm_interval_to_ns(&dev_priv->gt, rpprevup)); seq_printf(m, "Up threshold: %d%%\n", rps->power.up_threshold); - seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", - rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei)); - seq_printf(m, "RP CUR DOWN: %d (%dus)\n", - rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown)); - seq_printf(m, "RP PREV DOWN: %d (%dus)\n", - rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown)); + seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n", + rpdownei, + intel_gt_pm_interval_to_ns(&dev_priv->gt, + rpdownei)); + seq_printf(m, "RP CUR DOWN: %d (%dns)\n", + rpcurdown, + intel_gt_pm_interval_to_ns(&dev_priv->gt, + rpcurdown)); + seq_printf(m, "RP PREV DOWN: %d (%dns)\n", + rpprevdown, + intel_gt_pm_interval_to_ns(&dev_priv->gt, + rpprevdown)); seq_printf(m, "Down threshold: %d%%\n", rps->power.down_threshold); @@ -1189,7 +1199,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_rps *rps = &dev_priv->gt.rps; - seq_printf(m, "RPS enabled? %d\n", rps->enabled); + seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps))); + seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps))); seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); @@ -1209,7 +1220,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); - if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && dev_priv->gt.awake) { + if (INTEL_GEN(dev_priv) >= 6 && intel_rps_is_active(rps)) { u32 rpup, rpupei; u32 rpdown, rpdownei; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ff9a5b1b4c6d..34ee12f3f02d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -228,14 +228,14 @@ static int i915_driver_modeset_probe_noirq(struct drm_i915_private *i915) ret = drm_vblank_init(&i915->drm, INTEL_NUM_PIPES(i915)); if (ret) - goto out; + return ret; } intel_bios_init(i915); ret = intel_vga_register(i915); if (ret) - goto out; + goto cleanup_bios; intel_power_domains_init_hw(i915, false); @@ -243,13 +243,16 @@ static int i915_driver_modeset_probe_noirq(struct drm_i915_private *i915) ret = intel_modeset_init_noirq(i915); if (ret) - goto cleanup_vga_client; + goto cleanup_vga_client_pw_domain_csr; return 0; -cleanup_vga_client: +cleanup_vga_client_pw_domain_csr: + intel_csr_ucode_fini(i915); + intel_power_domains_driver_remove(i915); intel_vga_unregister(i915); -out: +cleanup_bios: + intel_bios_driver_remove(i915); return ret; } @@ -308,13 +311,13 @@ static void i915_driver_modeset_remove(struct drm_i915_private *i915) /* part #2: call after irq uninstall */ static void i915_driver_modeset_remove_noirq(struct drm_i915_private *i915) { - intel_modeset_driver_remove_noirq(i915); + intel_csr_ucode_fini(i915); - intel_bios_driver_remove(i915); + intel_power_domains_driver_remove(i915); intel_vga_unregister(i915); - intel_csr_ucode_fini(i915); + intel_bios_driver_remove(i915); } static void intel_init_dpio(struct drm_i915_private *dev_priv) @@ -567,6 +570,62 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) } /** + * i915_set_dma_info - set all relevant PCI dma info as configured for the + * platform + * @i915: valid i915 instance + * + * Set the dma max segment size, device and coherent masks. The dma mask set + * needs to occur before i915_ggtt_probe_hw. + * + * A couple of platforms have special needs. Address them as well. + * + */ +static int i915_set_dma_info(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + unsigned int mask_size = INTEL_INFO(i915)->dma_mask_size; + int ret; + + GEM_BUG_ON(!mask_size); + + /* + * We don't have a max segment size, so set it to the max so sg's + * debugging layer doesn't complain + */ + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + + ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(mask_size)); + if (ret) + goto mask_err; + + /* overlay on gen2 is broken and can't address above 1G */ + if (IS_GEN(i915, 2)) + mask_size = 30; + + /* + * 965GM sometimes incorrectly writes to hardware status page (HWS) + * using 32bit addressing, overwriting memory if HWS is located + * above 4GB. + * + * The documentation also mentions an issue with undefined + * behaviour if any general state is accessed within a page above 4GB, + * which also needs to be handled carefully. + */ + if (IS_I965G(i915) || IS_I965GM(i915)) + mask_size = 32; + + ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(mask_size)); + if (ret) + goto mask_err; + + return 0; + +mask_err: + drm_err(&i915->drm, "Can't set DMA mask/consistent mask (%d)\n", ret); + return ret; +} + +/** * i915_driver_hw_probe - setup state requiring device access * @dev_priv: device private * @@ -611,6 +670,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) /* needs to be done before ggtt probe */ intel_dram_edram_detect(dev_priv); + ret = i915_set_dma_info(dev_priv); + if (ret) + return ret; + i915_perf_init(dev_priv); ret = i915_ggtt_probe_hw(dev_priv); @@ -639,40 +702,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) pci_set_master(pdev); - /* - * We don't have a max segment size, so set it to the max so sg's - * debugging layer doesn't complain - */ - dma_set_max_seg_size(&pdev->dev, UINT_MAX); - - /* overlay on gen2 is broken and can't address above 1G */ - if (IS_GEN(dev_priv, 2)) { - ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(30)); - if (ret) { - drm_err(&dev_priv->drm, "failed to set DMA mask\n"); - - goto err_mem_regions; - } - } - - /* 965GM sometimes incorrectly writes to hardware status page (HWS) - * using 32bit addressing, overwriting memory if HWS is located - * above 4GB. - * - * The documentation also mentions an issue with undefined - * behaviour if any general state is accessed within a page above 4GB, - * which also needs to be handled carefully. - */ - if (IS_I965G(dev_priv) || IS_I965GM(dev_priv)) { - ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); - - if (ret) { - drm_err(&dev_priv->drm, "failed to set DMA mask\n"); - - goto err_mem_regions; - } - } - cpu_latency_qos_add_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE); intel_gt_init_workarounds(dev_priv); @@ -984,7 +1013,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) out_cleanup_irq: intel_irq_uninstall(i915); out_cleanup_modeset: - /* FIXME */ + i915_driver_modeset_remove_noirq(i915); out_cleanup_hw: i915_driver_hw_remove(i915); intel_memory_regions_driver_release(i915); @@ -1020,12 +1049,12 @@ void i915_driver_remove(struct drm_i915_private *i915) intel_irq_uninstall(i915); - i915_driver_modeset_remove_noirq(i915); + intel_modeset_driver_remove_noirq(i915); i915_reset_error_state(i915); i915_gem_driver_remove(i915); - intel_power_domains_driver_remove(i915); + i915_driver_modeset_remove_noirq(i915); i915_driver_hw_remove(i915); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b00f0845cbc3..6af69555733e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -108,8 +108,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20200417" -#define DRIVER_TIMESTAMP 1587105300 +#define DRIVER_DATE "20200430" +#define DRIVER_TIMESTAMP 1588234401 struct drm_i915_gem_object; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 424ad975a360..eec292d06f11 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -467,14 +467,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m, if (!erq->seqno) return; - err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, start %08x, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, head %08x, tail %08x\n", prefix, erq->pid, erq->context, erq->seqno, test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &erq->flags) ? "!" : "", test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &erq->flags) ? "+" : "", erq->sched_attr.priority, - erq->start, erq->head, erq->tail); + erq->head, erq->tail); } static void error_print_context(struct drm_i915_error_state_buf *m, @@ -1207,21 +1207,22 @@ static void engine_record_registers(struct intel_engine_coredump *ee) static void record_request(const struct i915_request *request, struct i915_request_coredump *erq) { - const struct i915_gem_context *ctx; - erq->flags = request->fence.flags; erq->context = request->fence.context; erq->seqno = request->fence.seqno; erq->sched_attr = request->sched.attr; - erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; erq->tail = request->tail; erq->pid = 0; rcu_read_lock(); - ctx = rcu_dereference(request->context->gem_context); - if (ctx) - erq->pid = pid_nr(ctx->pid); + if (!intel_context_is_closed(request->context)) { + const struct i915_gem_context *ctx; + + ctx = rcu_dereference(request->context->gem_context); + if (ctx) + erq->pid = pid_nr(ctx->pid); + } rcu_read_unlock(); } @@ -1319,26 +1320,6 @@ capture_user(struct intel_engine_capture_vma *capture, return capture; } -static struct i915_vma_coredump * -capture_object(const struct intel_gt *gt, - struct drm_i915_gem_object *obj, - const char *name, - struct i915_vma_compress *compress) -{ - if (obj && i915_gem_object_has_pages(obj)) { - struct i915_vma fake = { - .node = { .start = U64_MAX, .size = obj->base.size }, - .size = obj->base.size, - .pages = obj->mm.pages, - .obj = obj, - }; - - return i915_vma_coredump_create(gt, &fake, name, compress); - } else { - return NULL; - } -} - static void add_vma(struct intel_engine_coredump *ee, struct i915_vma_coredump *vma) { @@ -1427,12 +1408,6 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, engine->wa_ctx.vma, "WA context", compress)); - - add_vma(ee, - capture_object(engine->gt, - engine->default_state, - "NULL context", - compress)); } static struct intel_engine_coredump * diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 0d1f6c8ff355..fa2d82a6de04 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -50,7 +50,6 @@ struct i915_request_coredump { pid_t pid; u32 context; u32 seqno; - u32 start; u32 head; u32 tail; struct i915_sched_attr sched_attr; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1502ab44f1a5..bd722d0650c8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3358,7 +3358,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; - u32 de_pipe_masked = GEN8_PIPE_CDCLK_CRC_DONE; + u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) | + GEN8_PIPE_CDCLK_CRC_DONE; u32 de_pipe_enables; u32 de_port_masked = GEN8_AUX_CHANNEL_A; u32 de_port_enables; @@ -3369,13 +3370,10 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) de_misc_masked |= GEN8_DE_MISC_GSE; if (INTEL_GEN(dev_priv) >= 9) { - de_pipe_masked |= GEN9_DE_PIPE_IRQ_FAULT_ERRORS; de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C | GEN9_AUX_CHANNEL_D; if (IS_GEN9_LP(dev_priv)) de_port_masked |= BXT_DE_PORT_GMBUS; - } else { - de_pipe_masked |= GEN8_DE_PIPE_IRQ_FAULT_ERRORS; } if (INTEL_GEN(dev_priv) >= 11) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2741fb3e30cb..1faf9d6ec0a4 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -171,6 +171,7 @@ .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = false, \ + .dma_mask_size = 32, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ @@ -190,6 +191,7 @@ .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = false, \ + .dma_mask_size = 32, \ I845_PIPE_OFFSETS, \ I845_CURSOR_OFFSETS, \ I9XX_COLORS, \ @@ -226,6 +228,7 @@ static const struct intel_device_info i865g_info = { .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ + .dma_mask_size = 32, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ @@ -286,6 +289,7 @@ static const struct intel_device_info g33_info = { PLATFORM(INTEL_G33), .display.has_hotplug = 1, .display.has_overlay = 1, + .dma_mask_size = 36, }; static const struct intel_device_info pnv_g_info = { @@ -293,6 +297,7 @@ static const struct intel_device_info pnv_g_info = { PLATFORM(INTEL_PINEVIEW), .display.has_hotplug = 1, .display.has_overlay = 1, + .dma_mask_size = 36, }; static const struct intel_device_info pnv_m_info = { @@ -301,6 +306,7 @@ static const struct intel_device_info pnv_m_info = { .is_mobile = 1, .display.has_hotplug = 1, .display.has_overlay = 1, + .dma_mask_size = 36, }; #define GEN4_FEATURES \ @@ -313,6 +319,7 @@ static const struct intel_device_info pnv_m_info = { .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ + .dma_mask_size = 36, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I965_COLORS, \ @@ -365,6 +372,7 @@ static const struct intel_device_info gm45_info = { .has_coherent_ggtt = true, \ /* ilk does support rc6, but we do not implement [power] contexts */ \ .has_rc6 = 0, \ + .dma_mask_size = 36, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ ILK_COLORS, \ @@ -395,6 +403,7 @@ static const struct intel_device_info ilk_m_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_rps = true, \ + .dma_mask_size = 40, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ I9XX_PIPE_OFFSETS, \ @@ -445,6 +454,7 @@ static const struct intel_device_info snb_m_gt2_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_rps = true, \ + .dma_mask_size = 40, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ @@ -504,6 +514,7 @@ static const struct intel_device_info vlv_info = { .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, + .dma_mask_size = 40, .ppgtt_type = INTEL_PPGTT_ALIASING, .ppgtt_size = 31, .has_snoop = true, @@ -554,6 +565,7 @@ static const struct intel_device_info hsw_gt3_info = { G75_FEATURES, \ GEN(8), \ .has_logical_ring_contexts = 1, \ + .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ .has_64bit_reloc = 1, \ @@ -602,6 +614,7 @@ static const struct intel_device_info chv_info = { .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, + .dma_mask_size = 39, .ppgtt_type = INTEL_PPGTT_ALIASING, .ppgtt_size = 32, .has_reset_engine = 1, @@ -685,6 +698,7 @@ static const struct intel_device_info skl_gt4_info = { .has_logical_ring_contexts = 1, \ .has_logical_ring_preemption = 1, \ .has_gt_uc = 1, \ + .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ .has_reset_engine = 1, \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 5cde3e4e7be6..c533f569dd42 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1263,8 +1263,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * dropped by GuC. They won't be part of the context * ID in the OA reports, so squash those lower bits. */ - stream->specific_ctx_id = - lower_32_bits(ce->lrc_desc) >> 12; + stream->specific_ctx_id = ce->lrc.lrca >> 12; /* * GuC uses the top bit to signal proxy submission, so @@ -1281,11 +1280,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); /* * Pick an unused context id - * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts + * 0 - BITS_PER_LONG are used by other contexts * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context */ stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG); break; } @@ -2098,7 +2096,7 @@ gen8_store_flex(struct i915_request *rq, if (IS_ERR(cs)) return PTR_ERR(cs); - offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; + offset = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET; do { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = offset + flex->offset * sizeof(u32); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 230e9256ab30..83c6a8ccd2cb 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -439,29 +439,9 @@ static u64 count_interrupts(struct drm_i915_private *i915) return sum; } -static void engine_event_destroy(struct perf_event *event) -{ - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - struct intel_engine_cs *engine; - - engine = intel_engine_lookup_user(i915, - engine_event_class(event), - engine_event_instance(event)); - if (drm_WARN_ON_ONCE(&i915->drm, !engine)) - return; - - if (engine_event_sample(event) == I915_SAMPLE_BUSY && - intel_engine_supports_stats(engine)) - intel_disable_engine_stats(engine); -} - static void i915_pmu_event_destroy(struct perf_event *event) { WARN_ON(event->parent); - - if (is_engine_event(event)) - engine_event_destroy(event); } static int @@ -514,23 +494,13 @@ static int engine_event_init(struct perf_event *event) struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); struct intel_engine_cs *engine; - u8 sample; - int ret; engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); if (!engine) return -ENODEV; - sample = engine_event_sample(event); - ret = engine_event_status(engine, sample); - if (ret) - return ret; - - if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) - ret = intel_enable_engine_stats(engine); - - return ret; + return engine_event_status(engine, engine_event_sample(event)); } static int i915_pmu_event_init(struct perf_event *event) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index edda3f29c8aa..fd9f2904d93c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -34,8 +34,8 @@ * Follow the style described here for new macros, and while changing existing * macros. Do **not** mass change existing definitions just to update the style. * - * Layout - * ~~~~~~ + * File Layout + * ~~~~~~~~~~~ * * Keep helper macros near the top. For example, _PIPE() and friends. * @@ -561,6 +561,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) * Registers used only by the command parser */ #define BCS_SWCTRL _MMIO(0x22200) +#define BCS_SRC_Y REG_BIT(0) +#define BCS_DST_Y REG_BIT(1) /* There are 16 GPR registers */ #define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) @@ -2657,6 +2659,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_DMA_FADD_UDW(base) _MMIO((base) + 0x60) /* gen8+ */ #define RING_INSTPM(base) _MMIO((base) + 0xc0) #define RING_MI_MODE(base) _MMIO((base) + 0x9c) +#define RING_CMD_BUF_CCTL(base) _MMIO((base) + 0x84) #define INSTPS _MMIO(0x2070) /* 965+ only */ #define GEN4_INSTDONE1 _MMIO(0x207c) /* 965+ only, aka INSTDONE_2 on SNB */ #define ACTHD_I965 _MMIO(0x2074) @@ -4013,31 +4016,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN6_RP_STATE_LIMITS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994) #define GEN6_RP_STATE_CAP _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998) #define BXT_RP_STATE_CAP _MMIO(0x138170) - -/* - * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS - * 8300) freezing up around GPU hangs. Looks as if even - * scheduling/timer interrupts start misbehaving if the RPS - * EI/thresholds are "bad", leading to a very sluggish or even - * frozen machine. - */ -#define INTERVAL_1_28_US(us) roundup(((us) * 100) >> 7, 25) -#define INTERVAL_1_33_US(us) (((us) * 3) >> 2) -#define INTERVAL_0_833_US(us) (((us) * 6) / 5) -#define GT_INTERVAL_FROM_US(dev_priv, us) (INTEL_GEN(dev_priv) >= 9 ? \ - (IS_GEN9_LP(dev_priv) ? \ - INTERVAL_0_833_US(us) : \ - INTERVAL_1_33_US(us)) : \ - INTERVAL_1_28_US(us)) - -#define INTERVAL_1_28_TO_US(interval) (((interval) << 7) / 100) -#define INTERVAL_1_33_TO_US(interval) (((interval) << 2) / 3) -#define INTERVAL_0_833_TO_US(interval) (((interval) * 5) / 6) -#define GT_PM_INTERVAL_TO_US(dev_priv, interval) (INTEL_GEN(dev_priv) >= 9 ? \ - (IS_GEN9_LP(dev_priv) ? \ - INTERVAL_0_833_TO_US(interval) : \ - INTERVAL_1_33_TO_US(interval)) : \ - INTERVAL_1_28_TO_US(interval)) +#define GEN9_RP_STATE_LIMITS _MMIO(0x138148) /* * Logical Context regs @@ -9108,8 +9087,13 @@ enum { #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) #define GEN6_PCODE_READ_D_COMP 0x10 #define GEN6_PCODE_WRITE_D_COMP 0x11 +#define ICL_PCODE_EXIT_TCCOLD 0x12 #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 #define DISPLAY_IPS_CONTROL 0x19 +#define TGL_PCODE_TCCOLD 0x26 +#define TGL_PCODE_EXIT_TCCOLD_DATA_L_EXIT_FAILED REG_BIT(0) +#define TGL_PCODE_EXIT_TCCOLD_DATA_H_BLOCK_REQ 0 +#define TGL_PCODE_EXIT_TCCOLD_DATA_H_UNBLOCK_REQ REG_BIT(0) /* See also IPS_CTL */ #define IPS_PCODE_CONTROL (1 << 30) #define HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL 0x1A @@ -9396,6 +9380,22 @@ enum { #define AUD_PIN_BUF_CTL _MMIO(0x48414) #define AUD_PIN_BUF_ENABLE REG_BIT(31) +/* Display Audio Config Reg */ +#define AUD_CONFIG_BE _MMIO(0x65ef0) +#define HBLANK_EARLY_ENABLE_ICL(pipe) (0x1 << (20 - (pipe))) +#define HBLANK_EARLY_ENABLE_TGL(pipe) (0x1 << (24 + (pipe))) +#define HBLANK_START_COUNT_MASK(pipe) (0x7 << (3 + ((pipe) * 6))) +#define HBLANK_START_COUNT(pipe, val) (((val) & 0x7) << (3 + ((pipe)) * 6)) +#define NUMBER_SAMPLES_PER_LINE_MASK(pipe) (0x3 << ((pipe) * 6)) +#define NUMBER_SAMPLES_PER_LINE(pipe, val) (((val) & 0x3) << ((pipe) * 6)) + +#define HBLANK_START_COUNT_8 0 +#define HBLANK_START_COUNT_16 1 +#define HBLANK_START_COUNT_32 2 +#define HBLANK_START_COUNT_64 3 +#define HBLANK_START_COUNT_96 4 +#define HBLANK_START_COUNT_128 5 + /* * HSW - ICL power wells * diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index f0383a68c981..fc14ebf9a0b7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -158,16 +158,18 @@ vma_create(struct drm_i915_gem_object *obj, GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE)); + spin_lock(&obj->vma.lock); + if (i915_is_ggtt(vm)) { if (unlikely(overflows_type(vma->size, u32))) - goto err_vma; + goto err_unlock; vma->fence_size = i915_gem_fence_size(vm->i915, vma->size, i915_gem_object_get_tiling(obj), i915_gem_object_get_stride(obj)); if (unlikely(vma->fence_size < vma->size || /* overflow */ vma->fence_size > vm->total)) - goto err_vma; + goto err_unlock; GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I915_GTT_MIN_ALIGNMENT)); @@ -179,8 +181,6 @@ vma_create(struct drm_i915_gem_object *obj, __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma)); } - spin_lock(&obj->vma.lock); - rb = NULL; p = &obj->vma.tree.rb_node; while (*p) { @@ -225,6 +225,8 @@ vma_create(struct drm_i915_gem_object *obj, return vma; +err_unlock: + spin_unlock(&obj->vma.lock); err_vma: i915_vma_free(vma); return ERR_PTR(-E2BIG); @@ -520,7 +522,6 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags) GEM_BUG_ON(!obj); i915_vma_unpin(vma); - i915_vma_close(vma); if (flags & I915_VMA_RELEASE_MAP) i915_gem_object_unpin_map(obj); @@ -1021,13 +1022,8 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) } while (1); } -void i915_vma_close(struct i915_vma *vma) +static void __vma_close(struct i915_vma *vma, struct intel_gt *gt) { - struct intel_gt *gt = vma->vm->gt; - unsigned long flags; - - GEM_BUG_ON(i915_vma_is_closed(vma)); - /* * We defer actually closing, unbinding and destroying the VMA until * the next idle point, or if the object is freed in the meantime. By @@ -1040,9 +1036,25 @@ void i915_vma_close(struct i915_vma *vma) * causing us to rebind the VMA once more. This ends up being a lot * of wasted work for the steady state. */ - spin_lock_irqsave(>->closed_lock, flags); + GEM_BUG_ON(i915_vma_is_closed(vma)); list_add(&vma->closed_link, >->closed_vma); - spin_unlock_irqrestore(>->closed_lock, flags); +} + +void i915_vma_close(struct i915_vma *vma) +{ + struct intel_gt *gt = vma->vm->gt; + unsigned long flags; + + if (i915_vma_is_ggtt(vma)) + return; + + GEM_BUG_ON(!atomic_read(&vma->open_count)); + if (atomic_dec_and_lock_irqsave(&vma->open_count, + >->closed_lock, + flags)) { + __vma_close(vma, gt); + spin_unlock_irqrestore(>->closed_lock, flags); + } } static void __i915_vma_remove_closed(struct i915_vma *vma) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index db8496b4c38d..91bb7891c70c 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -98,6 +98,7 @@ void intel_device_info_print_static(const struct intel_device_info *info, drm_printf(p, "platform: %s\n", intel_platform_name(info->platform)); drm_printf(p, "ppgtt-size: %d\n", info->ppgtt_size); drm_printf(p, "ppgtt-type: %d\n", info->ppgtt_type); + drm_printf(p, "dma_mask_size: %u\n", info->dma_mask_size); #define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, yesno(info->name)); DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index cce6a72c5ebc..69c9257c6c6a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -158,6 +158,8 @@ struct intel_device_info { enum intel_platform platform; + unsigned int dma_mask_size; /* available DMA address bits */ + enum intel_ppgtt_type ppgtt_type; unsigned int ppgtt_size; /* log2, e.g. 31/32/48 bits */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b632b6bb9c3e..bfb180fe8047 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3757,42 +3757,38 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) return 0; } -bool intel_can_enable_sagv(struct intel_atomic_state *state) +void intel_sagv_pre_plane_update(struct intel_atomic_state *state) { - struct drm_device *dev = state->base.dev; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + if (!intel_can_enable_sagv(state)) + intel_disable_sagv(dev_priv); +} + +void intel_sagv_post_plane_update(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + if (intel_can_enable_sagv(state)) + intel_enable_sagv(dev_priv); +} + +static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ + struct drm_device *dev = crtc_state->uapi.crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_plane *plane; - struct intel_crtc_state *crtc_state; - enum pipe pipe; int level, latency; - if (!intel_has_sagv(dev_priv)) - return false; - - /* - * If there are no active CRTCs, no additional checks need be performed - */ - if (hweight8(state->active_pipes) == 0) + if (!crtc_state->hw.active) return true; - /* - * SKL+ workaround: bspec recommends we disable SAGV when we have - * more then one pipe enabled - */ - if (hweight8(state->active_pipes) > 1) - return false; - - /* Since we're now guaranteed to only have one active CRTC... */ - pipe = ffs(state->active_pipes) - 1; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - crtc_state = to_intel_crtc_state(crtc->base.state); - if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) return false; for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct skl_plane_wm *wm = + const struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane->id]; /* Skip this plane if it's not enabled */ @@ -3823,6 +3819,37 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) return true; } +bool intel_can_enable_sagv(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + const struct intel_crtc_state *crtc_state; + enum pipe pipe; + + if (!intel_has_sagv(dev_priv)) + return false; + + /* + * If there are no active CRTCs, no additional checks need be performed + */ + if (hweight8(state->active_pipes) == 0) + return true; + + /* + * SKL+ workaround: bspec recommends we disable SAGV when we have + * more then one pipe enabled + */ + if (hweight8(state->active_pipes) > 1) + return false; + + /* Since we're now guaranteed to only have one active CRTC... */ + pipe = ffs(state->active_pipes) - 1; + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc_state = to_intel_crtc_state(crtc->base.state); + + return intel_crtc_can_enable_sagv(crtc_state); +} + /* * Calculate initial DBuf slice offset, based on slice size * and mask(i.e if slice size is 1024 and second slice is enabled @@ -5428,8 +5455,8 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv, return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm); } -static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, - const struct skl_ddb_entry *b) +static bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, + const struct skl_ddb_entry *b) { return a->start < b->end && b->start < a->end; } @@ -5880,8 +5907,7 @@ static void ilk_optimize_watermarks(struct intel_atomic_state *state, mutex_unlock(&dev_priv->wm.wm_mutex); } -static inline void skl_wm_level_from_reg_val(u32 val, - struct skl_wm_level *level) +static void skl_wm_level_from_reg_val(u32 val, struct skl_wm_level *level) { level->plane_en = val & PLANE_WM_EN; level->ignore_lines = val & PLANE_WM_IGNORE_LINES; @@ -6854,6 +6880,10 @@ static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0)) I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) | TGL_VRH_GATING_DIS); + + /* Wa_14011059788:tgl */ + intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, + 0, DFR_DISABLE); } static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index d60a85421c5a..9a6036ab0f90 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -44,6 +44,8 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv); bool intel_can_enable_sagv(struct intel_atomic_state *state); int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); +void intel_sagv_pre_plane_update(struct intel_atomic_state *state); +void intel_sagv_post_plane_update(struct intel_atomic_state *state); bool skl_wm_level_equals(const struct skl_wm_level *l1, const struct skl_wm_level *l2); bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb, diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 3f13baaef058..14daf6af6854 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -336,7 +336,7 @@ void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value, intel_sbi_rw(i915, reg, destination, &value, false); } -static inline int gen6_check_mailbox_status(u32 mbox) +static int gen6_check_mailbox_status(u32 mbox) { switch (mbox & GEN6_PCODE_ERROR_MASK) { case GEN6_PCODE_SUCCESS: @@ -356,7 +356,7 @@ static inline int gen6_check_mailbox_status(u32 mbox) } } -static inline int gen7_check_mailbox_status(u32 mbox) +static int gen7_check_mailbox_status(u32 mbox) { switch (mbox & GEN6_PCODE_ERROR_MASK) { case GEN6_PCODE_SUCCESS: diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index fa86b7ab2d99..078f5b2eb8a4 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1092,8 +1092,7 @@ static const struct intel_forcewake_range __gen9_fw_ranges[] = { /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen11_fw_ranges[] = { - GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), - GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */ + GEN_FW_RANGE(0x0, 0x1fff, 0), /* uncore range */ GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER), @@ -1103,27 +1102,31 @@ static const struct intel_forcewake_range __gen11_fw_ranges[] = { GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8500, 0x87ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8800, 0x8bff, 0), GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER), - GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), - GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8d00, 0x94cf, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x9560, 0x95ff, 0), + GEN_FW_RANGE(0x9600, 0xafff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), GEN_FW_RANGE(0xb480, 0xdeff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xdf00, 0xe8ff, FORCEWAKE_RENDER), GEN_FW_RANGE(0xe900, 0x16dff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x16e00, 0x19fff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x1a000, 0x243ff, FORCEWAKE_BLITTER), - GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1a000, 0x23fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x24000, 0x2407f, 0), + GEN_FW_RANGE(0x24080, 0x2417f, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x24180, 0x242ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x24300, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x24400, 0x24fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x25000, 0x3ffff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x40000, 0x1bffff, 0), GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), - GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), - GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), - GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1c4000, 0x1c7fff, 0), + GEN_FW_RANGE(0x1c8000, 0x1cffff, FORCEWAKE_MEDIA_VEBOX0), GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), - GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), - GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1) + GEN_FW_RANGE(0x1d4000, 0x1dbfff, 0) }; /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c index 2186386a45c8..6942487c14a9 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.c +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -89,7 +89,7 @@ void intel_wopcm_init_early(struct intel_wopcm *wopcm) drm_dbg(&i915->drm, "WOPCM: %uK\n", wopcm->size / 1024); } -static inline u32 context_reserved_size(struct drm_i915_private *i915) +static u32 context_reserved_size(struct drm_i915_private *i915) { if (IS_GEN9_LP(i915)) return BXT_WOPCM_RC6_CTX_RESERVED; @@ -99,8 +99,8 @@ static inline u32 context_reserved_size(struct drm_i915_private *i915) return 0; } -static inline bool gen9_check_dword_gap(struct drm_i915_private *i915, - u32 guc_wopcm_base, u32 guc_wopcm_size) +static bool gen9_check_dword_gap(struct drm_i915_private *i915, + u32 guc_wopcm_base, u32 guc_wopcm_size) { u32 offset; @@ -122,8 +122,8 @@ static inline bool gen9_check_dword_gap(struct drm_i915_private *i915, return true; } -static inline bool gen9_check_huc_fw_fits(struct drm_i915_private *i915, - u32 guc_wopcm_size, u32 huc_fw_size) +static bool gen9_check_huc_fw_fits(struct drm_i915_private *i915, + u32 guc_wopcm_size, u32 huc_fw_size) { /* * On Gen9 & CNL A0, hardware requires the total available GuC WOPCM @@ -141,9 +141,9 @@ static inline bool gen9_check_huc_fw_fits(struct drm_i915_private *i915, return true; } -static inline bool check_hw_restrictions(struct drm_i915_private *i915, - u32 guc_wopcm_base, u32 guc_wopcm_size, - u32 huc_fw_size) +static bool check_hw_restrictions(struct drm_i915_private *i915, + u32 guc_wopcm_base, u32 guc_wopcm_size, + u32 huc_fw_size) { if (IS_GEN(i915, 9) && !gen9_check_dword_gap(i915, guc_wopcm_base, guc_wopcm_size)) @@ -157,9 +157,9 @@ static inline bool check_hw_restrictions(struct drm_i915_private *i915, return true; } -static inline bool __check_layout(struct drm_i915_private *i915, u32 wopcm_size, - u32 guc_wopcm_base, u32 guc_wopcm_size, - u32 guc_fw_size, u32 huc_fw_size) +static bool __check_layout(struct drm_i915_private *i915, u32 wopcm_size, + u32 guc_wopcm_base, u32 guc_wopcm_size, + u32 guc_fw_size, u32 huc_fw_size) { const u32 ctx_rsvd = context_reserved_size(i915); u32 size; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 5d2a02fcf595..2e471500a646 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -331,9 +331,6 @@ static void close_object_list(struct list_head *objects, vma = i915_vma_instance(obj, vm, NULL); if (!IS_ERR(vma)) ignored = i915_vma_unbind(vma); - /* Only ppgtt vma may be closed before the object is freed */ - if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma)) - i915_vma_close(vma); list_del(&obj->st_link); i915_gem_object_put(obj); @@ -591,7 +588,7 @@ static int walk_hole(struct i915_address_space *vm, pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n", __func__, addr, vma->size, hole_start, hole_end, err); - goto err_close; + goto err_put; } i915_vma_unpin(vma); @@ -600,14 +597,14 @@ static int walk_hole(struct i915_address_space *vm, pr_err("%s incorrect at %llx + %llx\n", __func__, addr, vma->size); err = -EINVAL; - goto err_close; + goto err_put; } err = i915_vma_unbind(vma); if (err) { pr_err("%s unbind failed at %llx + %llx with err=%d\n", __func__, addr, vma->size, err); - goto err_close; + goto err_put; } GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); @@ -616,13 +613,10 @@ static int walk_hole(struct i915_address_space *vm, "%s timed out at %llx\n", __func__, addr)) { err = -EINTR; - goto err_close; + goto err_put; } } -err_close: - if (!i915_vma_is_ggtt(vma)) - i915_vma_close(vma); err_put: i915_gem_object_put(obj); if (err) @@ -675,7 +669,7 @@ static int pot_hole(struct i915_address_space *vm, addr, hole_start, hole_end, err); - goto err; + goto err_obj; } if (!drm_mm_node_allocated(&vma->node) || @@ -685,7 +679,7 @@ static int pot_hole(struct i915_address_space *vm, i915_vma_unpin(vma); err = i915_vma_unbind(vma); err = -EINVAL; - goto err; + goto err_obj; } i915_vma_unpin(vma); @@ -697,13 +691,10 @@ static int pot_hole(struct i915_address_space *vm, "%s timed out after %d/%d\n", __func__, pot, fls64(hole_end - 1) - 1)) { err = -EINTR; - goto err; + goto err_obj; } } -err: - if (!i915_vma_is_ggtt(vma)) - i915_vma_close(vma); err_obj: i915_gem_object_put(obj); return err; @@ -778,7 +769,7 @@ static int drunk_hole(struct i915_address_space *vm, addr, BIT_ULL(size), hole_start, hole_end, err); - goto err; + goto err_obj; } if (!drm_mm_node_allocated(&vma->node) || @@ -788,7 +779,7 @@ static int drunk_hole(struct i915_address_space *vm, i915_vma_unpin(vma); err = i915_vma_unbind(vma); err = -EINVAL; - goto err; + goto err_obj; } i915_vma_unpin(vma); @@ -799,13 +790,10 @@ static int drunk_hole(struct i915_address_space *vm, "%s timed out after %d/%d\n", __func__, n, count)) { err = -EINTR; - goto err; + goto err_obj; } } -err: - if (!i915_vma_is_ggtt(vma)) - i915_vma_close(vma); err_obj: i915_gem_object_put(obj); kfree(order); diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 5b39bab4da1d..6a2be7d0dd95 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -16,6 +16,7 @@ * Tests are executed in order by igt/drv_selftest */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ +selftest(shmem, shmem_utils_mock_selftests) selftest(fence, i915_sw_fence_mock_selftests) selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index 3bf7f53e9924..d8da142985eb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -16,5 +16,6 @@ * Tests are executed in order by igt/i915_selftest */ selftest(engine_cs, intel_engine_cs_perf_selftests) +selftest(request, i915_request_perf_selftests) selftest(blt, i915_gem_object_blt_perf_selftests) selftest(region, intel_memory_region_perf_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 1dab0360f76a..15b1ca9f7a01 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -23,6 +23,7 @@ */ #include <linux/prime_numbers.h> +#include <linux/pm_qos.h> #include "gem/i915_gem_pm.h" #include "gem/selftests/mock_context.h" @@ -1239,7 +1240,7 @@ static int live_parallel_engines(void *arg) struct igt_live_test t; unsigned int idx; - snprintf(name, sizeof(name), "%ps", fn); + snprintf(name, sizeof(name), "%ps", *fn); err = igt_live_test_begin(&t, i915, __func__, name); if (err) break; @@ -1476,3 +1477,572 @@ int i915_request_live_selftests(struct drm_i915_private *i915) return i915_subtests(tests, i915); } + +static int switch_to_kernel_sync(struct intel_context *ce, int err) +{ + struct i915_request *rq; + struct dma_fence *fence; + + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + fence = i915_active_fence_get(&ce->timeline->last_request); + if (fence) { + i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + rq = i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err) + err = -ETIME; + i915_request_put(rq); + + while (!err && !intel_engine_is_idle(ce->engine)) + intel_engine_flush_submission(ce->engine); + + return err; +} + +struct perf_stats { + struct intel_engine_cs *engine; + unsigned long count; + ktime_t time; + ktime_t busy; + u64 runtime; +}; + +struct perf_series { + struct drm_i915_private *i915; + unsigned int nengines; + struct intel_context *ce[]; +}; + +static int s_sync0(void *arg) +{ + struct perf_series *ps = arg; + IGT_TIMEOUT(end_time); + unsigned int idx = 0; + int err = 0; + + GEM_BUG_ON(!ps->nengines); + do { + struct i915_request *rq; + + rq = i915_request_create(ps->ce[idx]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + break; + + if (++idx == ps->nengines) + idx = 0; + } while (!__igt_timeout(end_time, NULL)); + + return err; +} + +static int s_sync1(void *arg) +{ + struct perf_series *ps = arg; + struct i915_request *prev = NULL; + IGT_TIMEOUT(end_time); + unsigned int idx = 0; + int err = 0; + + GEM_BUG_ON(!ps->nengines); + do { + struct i915_request *rq; + + rq = i915_request_create(ps->ce[idx]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(prev); + prev = rq; + if (err) + break; + + if (++idx == ps->nengines) + idx = 0; + } while (!__igt_timeout(end_time, NULL)); + i915_request_put(prev); + + return err; +} + +static int s_many(void *arg) +{ + struct perf_series *ps = arg; + IGT_TIMEOUT(end_time); + unsigned int idx = 0; + + GEM_BUG_ON(!ps->nengines); + do { + struct i915_request *rq; + + rq = i915_request_create(ps->ce[idx]); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + + if (++idx == ps->nengines) + idx = 0; + } while (!__igt_timeout(end_time, NULL)); + + return 0; +} + +static int perf_series_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + s_sync0, + s_sync1, + s_many, + NULL, + }; + const unsigned int nengines = num_uabi_engines(i915); + struct intel_engine_cs *engine; + int (* const *fn)(void *arg); + struct pm_qos_request qos; + struct perf_stats *stats; + struct perf_series *ps; + unsigned int idx; + int err = 0; + + stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL); + if (!stats) + return -ENOMEM; + + ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL); + if (!ps) { + kfree(stats); + return -ENOMEM; + } + + cpu_latency_qos_add_request(&qos, 0); /* disable cstates */ + + ps->i915 = i915; + ps->nengines = nengines; + + idx = 0; + for_each_uabi_engine(engine, i915) { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + goto out; + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out; + } + + ps->ce[idx++] = ce; + } + GEM_BUG_ON(idx != ps->nengines); + + for (fn = func; *fn && !err; fn++) { + char name[KSYM_NAME_LEN]; + struct igt_live_test t; + + snprintf(name, sizeof(name), "%ps", *fn); + err = igt_live_test_begin(&t, i915, __func__, name); + if (err) + break; + + for (idx = 0; idx < nengines; idx++) { + struct perf_stats *p = + memset(&stats[idx], 0, sizeof(stats[idx])); + struct intel_context *ce = ps->ce[idx]; + + p->engine = ps->ce[idx]->engine; + intel_engine_pm_get(p->engine); + + if (intel_engine_supports_stats(p->engine)) + p->busy = intel_engine_get_busy_time(p->engine) + 1; + p->runtime = -intel_context_get_total_runtime_ns(ce); + p->time = ktime_get(); + } + + err = (*fn)(ps); + if (igt_live_test_end(&t)) + err = -EIO; + + for (idx = 0; idx < nengines; idx++) { + struct perf_stats *p = &stats[idx]; + struct intel_context *ce = ps->ce[idx]; + int integer, decimal; + u64 busy, dt; + + p->time = ktime_sub(ktime_get(), p->time); + if (p->busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(p->engine), + p->busy - 1); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime += intel_context_get_total_runtime_ns(ce); + intel_engine_pm_put(p->engine); + + busy = 100 * ktime_to_ns(p->busy); + dt = ktime_to_ns(p->time); + if (dt) { + integer = div64_u64(busy, dt); + busy -= integer * dt; + decimal = div64_u64(100 * busy, dt); + } else { + integer = 0; + decimal = 0; + } + + pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", + name, p->engine->name, ce->timeline->seqno, + integer, decimal, + div_u64(p->runtime, 1000 * 1000), + div_u64(ktime_to_ns(p->time), 1000 * 1000)); + } + } + +out: + for (idx = 0; idx < nengines; idx++) { + if (IS_ERR_OR_NULL(ps->ce[idx])) + break; + + intel_context_unpin(ps->ce[idx]); + intel_context_put(ps->ce[idx]); + } + kfree(ps); + + cpu_latency_qos_remove_request(&qos); + kfree(stats); + return err; +} + +static int p_sync0(void *arg) +{ + struct perf_stats *p = arg; + struct intel_engine_cs *engine = p->engine; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + unsigned long count; + bool busy; + int err = 0; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + return err; + } + + busy = false; + if (intel_engine_supports_stats(engine)) { + p->busy = intel_engine_get_busy_time(engine); + busy = true; + } + + p->time = ktime_get(); + count = 0; + do { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + err = 0; + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + break; + + count++; + } while (!__igt_timeout(end_time, NULL)); + p->time = ktime_sub(ktime_get(), p->time); + + if (busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(engine), + p->busy); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime = intel_context_get_total_runtime_ns(ce); + p->count = count; + + intel_context_unpin(ce); + intel_context_put(ce); + return err; +} + +static int p_sync1(void *arg) +{ + struct perf_stats *p = arg; + struct intel_engine_cs *engine = p->engine; + struct i915_request *prev = NULL; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + unsigned long count; + bool busy; + int err = 0; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + return err; + } + + busy = false; + if (intel_engine_supports_stats(engine)) { + p->busy = intel_engine_get_busy_time(engine); + busy = true; + } + + p->time = ktime_get(); + count = 0; + do { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + err = 0; + if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(prev); + prev = rq; + if (err) + break; + + count++; + } while (!__igt_timeout(end_time, NULL)); + i915_request_put(prev); + p->time = ktime_sub(ktime_get(), p->time); + + if (busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(engine), + p->busy); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime = intel_context_get_total_runtime_ns(ce); + p->count = count; + + intel_context_unpin(ce); + intel_context_put(ce); + return err; +} + +static int p_many(void *arg) +{ + struct perf_stats *p = arg; + struct intel_engine_cs *engine = p->engine; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + unsigned long count; + int err = 0; + bool busy; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + return err; + } + + busy = false; + if (intel_engine_supports_stats(engine)) { + p->busy = intel_engine_get_busy_time(engine); + busy = true; + } + + count = 0; + p->time = ktime_get(); + do { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + count++; + } while (!__igt_timeout(end_time, NULL)); + p->time = ktime_sub(ktime_get(), p->time); + + if (busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(engine), + p->busy); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime = intel_context_get_total_runtime_ns(ce); + p->count = count; + + intel_context_unpin(ce); + intel_context_put(ce); + return err; +} + +static int perf_parallel_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + p_sync0, + p_sync1, + p_many, + NULL, + }; + const unsigned int nengines = num_uabi_engines(i915); + struct intel_engine_cs *engine; + int (* const *fn)(void *arg); + struct pm_qos_request qos; + struct { + struct perf_stats p; + struct task_struct *tsk; + } *engines; + int err = 0; + + engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL); + if (!engines) + return -ENOMEM; + + cpu_latency_qos_add_request(&qos, 0); + + for (fn = func; *fn; fn++) { + char name[KSYM_NAME_LEN]; + struct igt_live_test t; + unsigned int idx; + + snprintf(name, sizeof(name), "%ps", *fn); + err = igt_live_test_begin(&t, i915, __func__, name); + if (err) + break; + + atomic_set(&i915->selftest.counter, nengines); + + idx = 0; + for_each_uabi_engine(engine, i915) { + intel_engine_pm_get(engine); + + memset(&engines[idx].p, 0, sizeof(engines[idx].p)); + engines[idx].p.engine = engine; + + engines[idx].tsk = kthread_run(*fn, &engines[idx].p, + "igt:%s", engine->name); + if (IS_ERR(engines[idx].tsk)) { + err = PTR_ERR(engines[idx].tsk); + intel_engine_pm_put(engine); + break; + } + get_task_struct(engines[idx++].tsk); + } + + yield(); /* start all threads before we kthread_stop() */ + + idx = 0; + for_each_uabi_engine(engine, i915) { + int status; + + if (IS_ERR(engines[idx].tsk)) + break; + + status = kthread_stop(engines[idx].tsk); + if (status && !err) + err = status; + + intel_engine_pm_put(engine); + put_task_struct(engines[idx++].tsk); + } + + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + + idx = 0; + for_each_uabi_engine(engine, i915) { + struct perf_stats *p = &engines[idx].p; + u64 busy = 100 * ktime_to_ns(p->busy); + u64 dt = ktime_to_ns(p->time); + int integer, decimal; + + if (dt) { + integer = div64_u64(busy, dt); + busy -= integer * dt; + decimal = div64_u64(100 * busy, dt); + } else { + integer = 0; + decimal = 0; + } + + GEM_BUG_ON(engine != p->engine); + pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", + name, engine->name, p->count, integer, decimal, + div_u64(p->runtime, 1000 * 1000), + div_u64(ktime_to_ns(p->time), 1000 * 1000)); + idx++; + } + } + + cpu_latency_qos_remove_request(&qos); + kfree(engines); + return err; +} + +int i915_request_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_series_engines), + SUBTEST(perf_parallel_engines), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 58b5f40a07dd..af89c7fc8f59 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -173,7 +173,7 @@ static int igt_vma_create(void *arg) } nc = 0; - for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) { + for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) { for (; nc < num_ctx; nc++) { ctx = mock_context(i915, "mock"); if (!ctx) diff --git a/drivers/gpu/drm/i915/selftests/librapl.c b/drivers/gpu/drm/i915/selftests/librapl.c new file mode 100644 index 000000000000..58710ac3f979 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/librapl.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <asm/msr.h> + +#include "librapl.h" + +u64 librapl_energy_uJ(void) +{ + unsigned long long power; + u32 units; + + if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) + return 0; + + units = (power & 0x1f00) >> 8; + + if (rdmsrl_safe(MSR_PP1_ENERGY_STATUS, &power)) + return 0; + + return (1000000 * power) >> units; /* convert to uJ */ +} diff --git a/drivers/gpu/drm/i915/selftests/librapl.h b/drivers/gpu/drm/i915/selftests/librapl.h new file mode 100644 index 000000000000..887f3e91dd05 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/librapl.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef SELFTEST_LIBRAPL_H +#define SELFTEST_LIBRAPL_H + +#include <linux/types.h> + +u64 librapl_energy_uJ(void); + +#endif /* SELFTEST_LIBRAPL_H */ diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index 2c27627b6659..f15b20da5315 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -1211,8 +1211,7 @@ static int atom_execute_table_locked(struct atom_context *ctx, int index, uint32 SDEBUG("<<\n"); free: - if (ws) - kfree(ectx.ws); + kfree(ectx.ws); return ret; } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 58176db85952..95006cbf42c3 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -828,7 +828,7 @@ int radeon_enable_vblank_kms(struct drm_crtc *crtc) unsigned long irqflags; int r; - if (pipe < 0 || pipe >= rdev->num_crtc) { + if (pipe >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", pipe); return -EINVAL; } @@ -854,7 +854,7 @@ void radeon_disable_vblank_kms(struct drm_crtc *crtc) struct radeon_device *rdev = dev->dev_private; unsigned long irqflags; - if (pipe < 0 || pipe >= rdev->num_crtc) { + if (pipe >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", pipe); return; } diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 4f6676428c5c..b6be62356d34 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -251,7 +251,7 @@ struct kfd_memory_exception_failure { __u32 imprecise; /* Can't determine the exact fault address */ }; -/* memory exception data*/ +/* memory exception data */ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; __u64 va; |