diff options
Diffstat (limited to 'drivers/gpu/drm')
277 files changed, 7912 insertions, 14344 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index d6e5b7273853..be5e5acc3e39 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -63,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o # add GMC block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 31126df06c8c..1ac81be374dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -68,6 +68,7 @@ #include "amdgpu_vce.h" #include "amdgpu_vcn.h" #include "amdgpu_mn.h" +#include "amdgpu_gmc.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" #include "amdgpu_gart.h" @@ -127,6 +128,7 @@ extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; +extern int amdgpu_emu_mode; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -318,13 +320,6 @@ struct amdgpu_vm_pte_funcs { void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, uint64_t value, unsigned count, uint32_t incr); - - /* maximum nums of PTEs/PDEs in a single operation */ - uint32_t set_max_nums_pte_pde; - - /* number of dw to reserve per operation */ - unsigned set_pte_pde_num_dw; - /* for linear pte/pde updates without addr mapping */ void (*set_pte_pde)(struct amdgpu_ib *ib, uint64_t pe, @@ -332,28 +327,6 @@ struct amdgpu_vm_pte_funcs { uint32_t incr, uint64_t flags); }; -/* provided by the gmc block */ -struct amdgpu_gart_funcs { - /* flush the vm tlb via mmio */ - void (*flush_gpu_tlb)(struct amdgpu_device *adev, - uint32_t vmid); - /* write pte/pde updates using the cpu */ - int (*set_pte_pde)(struct amdgpu_device *adev, - void *cpu_pt_addr, /* cpu addr of page table */ - uint32_t gpu_page_idx, /* pte/pde to update */ - uint64_t addr, /* addr to write into pte/pde */ - uint64_t flags); /* access flags */ - /* enable/disable PRT support */ - void (*set_prt)(struct amdgpu_device *adev, bool enable); - /* set pte flags based per asic */ - uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, - uint32_t flags); - /* get the pde for a given mc addr */ - void (*get_vm_pde)(struct amdgpu_device *adev, int level, - u64 *dst, u64 *flags); - uint32_t (*get_invalidate_req)(unsigned int vmid); -}; - /* provided by the ih block */ struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ @@ -418,8 +391,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, int flags); -int amdgpu_gem_prime_pin(struct drm_gem_object *obj); -void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); @@ -494,56 +467,6 @@ int amdgpu_fence_slab_init(void); void amdgpu_fence_slab_fini(void); /* - * VMHUB structures, functions & helpers - */ -struct amdgpu_vmhub { - uint32_t ctx0_ptb_addr_lo32; - uint32_t ctx0_ptb_addr_hi32; - uint32_t vm_inv_eng0_req; - uint32_t vm_inv_eng0_ack; - uint32_t vm_context0_cntl; - uint32_t vm_l2_pro_fault_status; - uint32_t vm_l2_pro_fault_cntl; -}; - -/* - * GPU MC structures, functions & helpers - */ -struct amdgpu_mc { - resource_size_t aper_size; - resource_size_t aper_base; - resource_size_t agp_base; - /* for some chips with <= 32MB we need to lie - * about vram size near mc fb location */ - u64 mc_vram_size; - u64 visible_vram_size; - u64 gart_size; - u64 gart_start; - u64 gart_end; - u64 vram_start; - u64 vram_end; - unsigned vram_width; - u64 real_vram_size; - int vram_mtrr; - u64 mc_mask; - const struct firmware *fw; /* MC firmware */ - uint32_t fw_version; - struct amdgpu_irq_src vm_fault; - uint32_t vram_type; - uint32_t srbm_soft_reset; - bool prt_warning; - uint64_t stolen_size; - /* apertures */ - u64 shared_aperture_start; - u64 shared_aperture_end; - u64 private_aperture_start; - u64 private_aperture_end; - /* protects concurrent invalidation */ - spinlock_t invalidate_lock; - bool translate_further; -}; - -/* * GPU doorbell structures, functions & helpers */ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT @@ -1125,8 +1048,9 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; - unsigned vmid; uint64_t vm_pd_addr; + unsigned vmid; + unsigned pasid; uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; @@ -1288,6 +1212,11 @@ struct amdgpu_asic_funcs { void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes); /* get config memsize register */ u32 (*get_config_memsize)(struct amdgpu_device *adev); + /* flush hdp write queue */ + void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + /* invalidate hdp read cache */ + void (*invalidate_hdp)(struct amdgpu_device *adev, + struct amdgpu_ring *ring); }; /* @@ -1431,7 +1360,7 @@ struct amdgpu_nbio_funcs { u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); u32 (*get_rev_id)(struct amdgpu_device *adev); void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); - void (*hdp_flush)(struct amdgpu_device *adev); + void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); u32 (*get_memsize)(struct amdgpu_device *adev); void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index); @@ -1574,7 +1503,7 @@ struct amdgpu_device { struct amdgpu_clock clock; /* MC */ - struct amdgpu_mc mc; + struct amdgpu_gmc gmc; struct amdgpu_gart gart; struct amdgpu_dummy_page dummy_page; struct amdgpu_vm_manager vm_manager; @@ -1726,6 +1655,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); +int emu_soc_asic_init(struct amdgpu_device *adev); + /* * Registers read & write functions. */ @@ -1838,13 +1769,17 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) -#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) -#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) -#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags)) +#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) +#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) +#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) +#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) +#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) +#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) +#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) -#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags)) #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) @@ -1857,11 +1792,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) -#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) +#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) @@ -1871,7 +1806,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) -#define amdgpu_display_vblank_wait(adev, crtc) (adev)->mode_info.funcs->vblank_wait((adev), (crtc)) #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) #define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e)) #define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h)) @@ -1894,16 +1828,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job, bool force); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); -void amdgpu_update_display_priority(struct amdgpu_device *adev); +void amdgpu_display_update_priority(struct amdgpu_device *adev); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_device_vram_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc, u64 base); + struct amdgpu_gmc *mc, u64 base); void amdgpu_device_gart_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc); + struct amdgpu_gmc *mc); int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); int amdgpu_ttm_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1d605e1c1d66..450426dbed92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -216,8 +216,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, return -ENOMEM; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0, - &(*mem)->bo); + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); @@ -281,24 +280,29 @@ void get_local_mem_info(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *)kgd; uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : ~((1ULL << 32) - 1); - resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size; + resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size; memset(mem_info, 0, sizeof(*mem_info)); - if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) { - mem_info->local_mem_size_public = adev->mc.visible_vram_size; - mem_info->local_mem_size_private = adev->mc.real_vram_size - - adev->mc.visible_vram_size; + if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) { + mem_info->local_mem_size_public = adev->gmc.visible_vram_size; + mem_info->local_mem_size_private = adev->gmc.real_vram_size - + adev->gmc.visible_vram_size; } else { mem_info->local_mem_size_public = 0; - mem_info->local_mem_size_private = adev->mc.real_vram_size; + mem_info->local_mem_size_private = adev->gmc.real_vram_size; } - mem_info->vram_width = adev->mc.vram_width; + mem_info->vram_width = adev->gmc.vram_width; pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n", - &adev->mc.aper_base, &aper_limit, + &adev->gmc.aper_base, &aper_limit, mem_info->local_mem_size_public, mem_info->local_mem_size_private); + if (amdgpu_emu_mode == 1) { + mem_info->mem_clk_max = 100; + return; + } + if (amdgpu_sriov_vf(adev)) mem_info->mem_clk_max = adev->clock.default_mclk / 100; else @@ -319,6 +323,9 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) struct amdgpu_device *adev = (struct amdgpu_device *)kgd; /* the sclk is in quantas of 10kHz */ + if (amdgpu_emu_mode == 1) + return 100; + if (amdgpu_sriov_vf(adev)) return adev->clock.default_sclk / 100; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index e2c3c5ec42d1..c53095b3b0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -568,6 +568,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { /* HG _PR3 doesn't seem to work on this A+A weston board */ { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 63ec1e1bb6aa..2fb299afc12b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -81,7 +81,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, n = AMDGPU_BENCHMARK_ITERATIONS; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, - NULL, 0, &sobj); + NULL, &sobj); if (r) { goto out_cleanup; } @@ -94,7 +94,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, goto out_cleanup; } r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, - NULL, 0, &dobj); + NULL, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 59089e027f4d..92be7f6de197 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -233,8 +233,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, for (i = 0; i < list->num_entries; i++) { unsigned priority = list->array[i].priority; - list_add_tail(&list->array[i].tv.head, - &bucket[priority]); + if (!list->array[i].robj->parent) + list_add_tail(&list->array[i].tv.head, + &bucket[priority]); + list->array[i].user_pages = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 4466f3535e2d..dc3360b16bda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -109,7 +109,7 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, *handle = 0; ret = amdgpu_bo_create(adev, size, align, true, domain, flags, - NULL, NULL, 0, &obj); + NULL, NULL, &obj); if (ret) { DRM_ERROR("(%d) bo create failed\n", ret); return ret; @@ -953,6 +953,11 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, (amdgpu_crtc->v_border * 2); mode_info->vblank_time_us = vblank_lines * line_time_us; mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); + /* we have issues with mclk switching with refresh rates + * over 120 hz on the non-DC code. + */ + if (mode_info->refresh_rate > 120) + mode_info->vblank_time_us = 0; mode_info = NULL; } } @@ -1187,6 +1192,18 @@ static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device, return amdgpu_cgs_acpi_eval_object(cgs_device, &info); } +static int amdgpu_cgs_set_temperature_range(struct cgs_device *cgs_device, + int min_temperature, + int max_temperature) +{ + CGS_FUNC_ADEV; + + adev->pm.dpm.thermal.min_temp = min_temperature; + adev->pm.dpm.thermal.max_temp = max_temperature; + + return 0; +} + static const struct cgs_ops amdgpu_cgs_ops = { .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem, .free_gpu_mem = amdgpu_cgs_free_gpu_mem, @@ -1214,6 +1231,7 @@ static const struct cgs_ops amdgpu_cgs_ops = { .enter_safe_mode = amdgpu_cgs_enter_safe_mode, .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, .register_pp_handle = amdgpu_cgs_register_pp_handle, + .set_temperature_range = amdgpu_cgs_set_temperature_range, }; static const struct cgs_os_ops amdgpu_cgs_os_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 8ca3783f2deb..ffc1f6f46913 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -877,7 +877,7 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) ret = connector_status_disconnected; if (amdgpu_connector->ddc_bus) - dret = amdgpu_ddc_probe(amdgpu_connector, false); + dret = amdgpu_display_ddc_probe(amdgpu_connector, false); if (dret) { amdgpu_connector->detected_by_load = false; amdgpu_connector_free_edid(connector); @@ -998,7 +998,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) } if (amdgpu_connector->ddc_bus) - dret = amdgpu_ddc_probe(amdgpu_connector, false); + dret = amdgpu_display_ddc_probe(amdgpu_connector, false); if (dret) { amdgpu_connector->detected_by_load = false; amdgpu_connector_free_edid(connector); @@ -1401,7 +1401,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) /* setup ddc on the bridge */ amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder); /* bridge chips are always aux */ - if (amdgpu_ddc_probe(amdgpu_connector, true)) /* try DDC */ + /* try DDC */ + if (amdgpu_display_ddc_probe(amdgpu_connector, true)) ret = connector_status_connected; else if (amdgpu_connector->dac_load_detect) { /* try load detection */ const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; @@ -1421,7 +1422,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) ret = connector_status_connected; } else { /* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */ - if (amdgpu_ddc_probe(amdgpu_connector, false)) + if (amdgpu_display_ddc_probe(amdgpu_connector, + false)) ret = connector_status_connected; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e80fc38141b5..eaa3cb0c3ad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -257,7 +257,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, return; } - total_vram = adev->mc.real_vram_size - adev->vram_pin_size; + total_vram = adev->gmc.real_vram_size - adev->vram_pin_size; used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; @@ -302,8 +302,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); /* Do the same for visible VRAM if half of it is free */ - if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { - u64 total_vis_vram = adev->mc.visible_vram_size; + if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) { + u64 total_vis_vram = adev->gmc.visible_vram_size; u64 used_vis_vram = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -359,7 +359,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, * to move it. Don't move anything if the threshold is zero. */ if (p->bytes_moved < p->bytes_moved_threshold) { - if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { /* And don't move a CPU_ACCESS_REQUIRED BO to limited * visible VRAM if we've depleted our allowance to do @@ -381,9 +381,9 @@ retry: r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; - if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -437,9 +437,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ amdgpu_ttm_placement_from_domain(bo, other); update_bytes_moved_vis = - adev->mc.visible_vram_size < adev->mc.real_vram_size && + adev->gmc.visible_vram_size < adev->gmc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; + bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT; initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); bytes_moved = atomic64_read(&adev->num_bytes_moved) - @@ -542,7 +542,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - if (p->uf_entry.robj) + if (p->uf_entry.robj && !p->uf_entry.robj->parent) list_add(&p->uf_entry.tv.head, &p->validated); while (1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 00a50cc5ec9a..d2a5f48c5767 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -544,7 +544,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) * as parameter. */ void amdgpu_device_vram_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc, u64 base) + struct amdgpu_gmc *mc, u64 base) { uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; @@ -570,11 +570,11 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev, * FIXME: when reducing GTT size align new size on power of 2. */ void amdgpu_device_gart_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc) + struct amdgpu_gmc *mc) { u64 size_af, size_bf; - size_af = adev->mc.mc_mask - mc->vram_end; + size_af = adev->gmc.mc_mask - mc->vram_end; size_bf = mc->vram_start; if (size_bf > size_af) { if (mc->gart_size > size_bf) { @@ -608,7 +608,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev, */ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) { - u64 space_needed = roundup_pow_of_two(adev->mc.real_vram_size); + u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; struct pci_bus *root; struct resource *res; @@ -1036,7 +1036,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, if (!ip_block_version) return -EINVAL; - DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks, + DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, ip_block_version->funcs->name); adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; @@ -1310,6 +1310,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) return r; } adev->ip_blocks[i].status.sw = true; + /* need to do gmc hw init early so we can allocate gpu mem */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { r = amdgpu_device_vram_scratch_init(adev); @@ -1343,8 +1344,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.sw) continue; - /* gmc hw init is done early */ - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) + if (adev->ip_blocks[i].status.hw) continue; r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); if (r) { @@ -1378,6 +1378,9 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) { int i = 0, r; + if (amdgpu_emu_mode == 1) + return 0; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -1483,6 +1486,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } + /* disable all interrupts */ + amdgpu_irq_disable_all(adev); + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) continue; @@ -1701,6 +1707,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_BONAIRE: case CHIP_HAWAII: case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: case CHIP_CARRIZO: case CHIP_STONEY: case CHIP_POLARIS11: @@ -1711,9 +1719,6 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #if defined(CONFIG_DRM_AMD_DC_PRE_VEGA) return amdgpu_dc != 0; #endif - case CHIP_KABINI: - case CHIP_MULLINS: - return amdgpu_dc > 0; case CHIP_VEGA10: #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case CHIP_RAVEN: @@ -1768,14 +1773,16 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->flags = flags; adev->asic_type = flags & AMD_ASIC_MASK; adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; - adev->mc.gart_size = 512 * 1024 * 1024; + if (amdgpu_emu_mode == 1) + adev->usec_timeout *= 2; + adev->gmc.gart_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; adev->vm_manager.vm_pte_num_rings = 0; - adev->gart.gart_funcs = NULL; + adev->gmc.gmc_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); @@ -1882,6 +1889,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (runtime) vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); + if (amdgpu_emu_mode == 1) { + /* post the asic on emulation mode */ + emu_soc_asic_init(adev); + goto fence_driver_init; + } + /* Read BIOS */ if (!amdgpu_get_bios(adev)) { r = -EINVAL; @@ -1934,6 +1947,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_atombios_i2c_init(adev); } +fence_driver_init: /* Fence driver */ r = amdgpu_fence_driver_init(adev); if (r) { @@ -2076,7 +2090,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev) /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); - amdgpu_atombios_fini(adev); + + if (amdgpu_emu_mode != 1) + amdgpu_atombios_fini(adev); + kfree(adev->bios); adev->bios = NULL; if (!pci_is_thunderbolt_attached(adev->pdev)) @@ -2284,14 +2301,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } drm_modeset_unlock_all(dev); - } else { - /* - * There is no equivalent atomic helper to turn on - * display, so we defined our own function for this, - * once suspend resume is supported by the atomic - * framework this will be reworked - */ - amdgpu_dm_display_resume(adev); } } @@ -2726,7 +2735,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (amdgpu_device_has_dc_support(adev)) { if (drm_atomic_helper_resume(adev->ddev, state)) dev_info(adev->dev, "drm resume failed:%d\n", r); - amdgpu_dm_display_resume(adev); } else { drm_helper_resume_force_mode(adev->ddev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 38d47559f098..1eb0861c9147 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -29,6 +29,7 @@ #include "amdgpu_i2c.h" #include "atom.h" #include "amdgpu_connectors.h" +#include "amdgpu_display.h" #include <asm/div64.h> #include <linux/pm_runtime.h> @@ -36,7 +37,8 @@ #include <drm/drm_edid.h> #include <drm/drm_fb_helper.h> -static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) +static void amdgpu_display_flip_callback(struct dma_fence *f, + struct dma_fence_cb *cb) { struct amdgpu_flip_work *work = container_of(cb, struct amdgpu_flip_work, cb); @@ -45,8 +47,8 @@ static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) schedule_work(&work->flip_work.work); } -static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work, - struct dma_fence **f) +static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work, + struct dma_fence **f) { struct dma_fence *fence= *f; @@ -55,14 +57,15 @@ static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work, *f = NULL; - if (!dma_fence_add_callback(fence, &work->cb, amdgpu_flip_callback)) + if (!dma_fence_add_callback(fence, &work->cb, + amdgpu_display_flip_callback)) return true; dma_fence_put(fence); return false; } -static void amdgpu_flip_work_func(struct work_struct *__work) +static void amdgpu_display_flip_work_func(struct work_struct *__work) { struct delayed_work *delayed_work = container_of(__work, struct delayed_work, work); @@ -76,20 +79,20 @@ static void amdgpu_flip_work_func(struct work_struct *__work) unsigned i; int vpos, hpos; - if (amdgpu_flip_handle_fence(work, &work->excl)) + if (amdgpu_display_flip_handle_fence(work, &work->excl)) return; for (i = 0; i < work->shared_count; ++i) - if (amdgpu_flip_handle_fence(work, &work->shared[i])) + if (amdgpu_display_flip_handle_fence(work, &work->shared[i])) return; /* Wait until we're out of the vertical blank period before the one * targeted by the flip */ if (amdgpu_crtc->enabled && - (amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0, - &vpos, &hpos, NULL, NULL, - &crtc->hwmode) + (amdgpu_display_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0, + &vpos, &hpos, NULL, NULL, + &crtc->hwmode) & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) == (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && (int)(work->target_vblank - @@ -117,7 +120,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work) /* * Handle unpin events outside the interrupt handler proper. */ -static void amdgpu_unpin_work_func(struct work_struct *__work) +static void amdgpu_display_unpin_work_func(struct work_struct *__work) { struct amdgpu_flip_work *work = container_of(__work, struct amdgpu_flip_work, unpin_work); @@ -139,11 +142,11 @@ static void amdgpu_unpin_work_func(struct work_struct *__work) kfree(work); } -int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, uint32_t target, - struct drm_modeset_acquire_ctx *ctx) +int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t page_flip_flags, uint32_t target, + struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; @@ -162,8 +165,8 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, if (work == NULL) return -ENOMEM; - INIT_DELAYED_WORK(&work->flip_work, amdgpu_flip_work_func); - INIT_WORK(&work->unpin_work, amdgpu_unpin_work_func); + INIT_DELAYED_WORK(&work->flip_work, amdgpu_display_flip_work_func); + INIT_WORK(&work->unpin_work, amdgpu_display_unpin_work_func); work->event = event; work->adev = adev; @@ -189,7 +192,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, goto cleanup; } - r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base); + r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; @@ -228,7 +231,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, /* update crtc fb */ crtc->primary->fb = fb; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - amdgpu_flip_work_func(&work->flip_work.work); + amdgpu_display_flip_work_func(&work->flip_work.work); return 0; pflip_cleanup: @@ -254,8 +257,8 @@ cleanup: return r; } -int amdgpu_crtc_set_config(struct drm_mode_set *set, - struct drm_modeset_acquire_ctx *ctx) +int amdgpu_display_crtc_set_config(struct drm_mode_set *set, + struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev; struct amdgpu_device *adev; @@ -352,7 +355,7 @@ static const char *hpd_names[6] = { "HPD6", }; -void amdgpu_print_display_setup(struct drm_device *dev) +void amdgpu_display_print_display_setup(struct drm_device *dev) { struct drm_connector *connector; struct amdgpu_connector *amdgpu_connector; @@ -429,11 +432,11 @@ void amdgpu_print_display_setup(struct drm_device *dev) } /** - * amdgpu_ddc_probe + * amdgpu_display_ddc_probe * */ -bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, - bool use_aux) +bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, + bool use_aux) { u8 out = 0x0; u8 buf[8]; @@ -479,7 +482,7 @@ bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, return true; } -static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb) +static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); @@ -488,9 +491,10 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb) kfree(amdgpu_fb); } -static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) +static int amdgpu_display_user_framebuffer_create_handle( + struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int *handle) { struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); @@ -498,15 +502,28 @@ static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb, } static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { - .destroy = amdgpu_user_framebuffer_destroy, - .create_handle = amdgpu_user_framebuffer_create_handle, + .destroy = amdgpu_display_user_framebuffer_destroy, + .create_handle = amdgpu_display_user_framebuffer_create_handle, }; -int -amdgpu_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev) +{ + uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; + +#if defined(CONFIG_DRM_AMD_DC) + if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN && + adev->flags & AMD_IS_APU && + amdgpu_device_asic_has_dc_support(adev->asic_type)) + domain |= AMDGPU_GEM_DOMAIN_GTT; +#endif + + return domain; +} + +int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { int ret; rfb->obj = obj; @@ -520,9 +537,9 @@ amdgpu_framebuffer_init(struct drm_device *dev, } struct drm_framebuffer * -amdgpu_user_framebuffer_create(struct drm_device *dev, - struct drm_file *file_priv, - const struct drm_mode_fb_cmd2 *mode_cmd) +amdgpu_display_user_framebuffer_create(struct drm_device *dev, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj; struct amdgpu_framebuffer *amdgpu_fb; @@ -547,7 +564,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOMEM); } - ret = amdgpu_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj); + ret = amdgpu_display_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj); if (ret) { kfree(amdgpu_fb); drm_gem_object_put_unlocked(obj); @@ -558,7 +575,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, } const struct drm_mode_config_funcs amdgpu_mode_funcs = { - .fb_create = amdgpu_user_framebuffer_create, + .fb_create = amdgpu_display_user_framebuffer_create, .output_poll_changed = drm_fb_helper_output_poll_changed, }; @@ -580,7 +597,7 @@ static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = { AMDGPU_FMT_DITHER_ENABLE, "on" }, }; -int amdgpu_modeset_create_props(struct amdgpu_device *adev) +int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) { int sz; @@ -629,7 +646,7 @@ int amdgpu_modeset_create_props(struct amdgpu_device *adev) return 0; } -void amdgpu_update_display_priority(struct amdgpu_device *adev) +void amdgpu_display_update_priority(struct amdgpu_device *adev) { /* adjustment options for the display watermarks */ if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2)) @@ -639,7 +656,7 @@ void amdgpu_update_display_priority(struct amdgpu_device *adev) } -static bool is_hdtv_mode(const struct drm_display_mode *mode) +static bool amdgpu_display_is_hdtv_mode(const struct drm_display_mode *mode) { /* try and guess if this is a tv or a monitor */ if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */ @@ -651,9 +668,9 @@ static bool is_hdtv_mode(const struct drm_display_mode *mode) return false; } -bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) +bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) { struct drm_device *dev = crtc->dev; struct drm_encoder *encoder; @@ -696,7 +713,7 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) || ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) && drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && - is_hdtv_mode(mode)))) { + amdgpu_display_is_hdtv_mode(mode)))) { if (amdgpu_encoder->underscan_hborder != 0) amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder; else @@ -764,10 +781,10 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, * unknown small number of scanlines wrt. real scanout position. * */ -int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, - unsigned int flags, int *vpos, int *hpos, - ktime_t *stime, ktime_t *etime, - const struct drm_display_mode *mode) +int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, + unsigned int pipe, unsigned int flags, int *vpos, + int *hpos, ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode) { u32 vbl = 0, position = 0; int vbl_start, vbl_end, vtotal, ret = 0; @@ -859,7 +876,7 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, return ret; } -int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc) +int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc) { if (crtc < 0 || crtc >= adev->mode_info.num_crtc) return AMDGPU_CRTC_IRQ_NONE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 0bcb6c6e0ca9..2b11d808f297 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,9 +23,10 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ +uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev); struct drm_framebuffer * -amdgpu_user_framebuffer_create(struct drm_device *dev, - struct drm_file *file_priv, - const struct drm_mode_fb_cmd2 *mode_cmd); +amdgpu_display_user_framebuffer_create(struct drm_device *dev, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index a8437a3296a6..bd745a4fae0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -265,9 +265,6 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_read_sensor(adev, idx, value, size) \ ((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size))) -#define amdgpu_dpm_get_temperature(adev) \ - ((adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle)) - #define amdgpu_dpm_set_fan_control_mode(adev, m) \ ((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m))) @@ -328,8 +325,8 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_set_mclk_od(adev, value) \ ((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value)) -#define amdgpu_dpm_dispatch_task(adev, task_id, input, output) \ - ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (input), (output)) +#define amdgpu_dpm_dispatch_task(adev, task_id, user_state) \ + ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (user_state)) #define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \ ((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal))) @@ -366,6 +363,22 @@ enum amdgpu_pcie_gen { (adev)->powerplay.pp_handle, virtual_addr_low, \ virtual_addr_hi, mc_addr_low, mc_addr_hi, size) +#define amdgpu_dpm_get_power_profile_mode(adev, buf) \ + ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ + (adev)->powerplay.pp_handle, buf)) + +#define amdgpu_dpm_set_power_profile_mode(adev, parameter, size) \ + ((adev)->powerplay.pp_funcs->set_power_profile_mode(\ + (adev)->powerplay.pp_handle, parameter, size)) + +#define amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, size) \ + ((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\ + (adev)->powerplay.pp_handle, type, parameter, size)) + +#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \ + ((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \ + (adev)->powerplay.pp_handle)) + struct amdgpu_dpm { struct amdgpu_ps *ps; /* number of valid power states */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 50afcf65181a..88ec9280a67a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -73,9 +73,11 @@ * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl * - 3.23.0 - Add query for VRAM lost counter + * - 3.24.0 - Add high priority compute support for gfx9 + * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 23 +#define KMS_DRIVER_MINOR 25 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -119,7 +121,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; -uint amdgpu_pp_feature_mask = 0xffffffff; +uint amdgpu_pp_feature_mask = 0x3fff; int amdgpu_ngg = 0; int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; @@ -129,6 +131,7 @@ int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ +int amdgpu_emu_mode = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -284,6 +287,9 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); +MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable"); +module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); + #ifdef CONFIG_DRM_AMDGPU_SI #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) @@ -576,6 +582,11 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, struct drm_device *dev; unsigned long flags = ent->driver_data; int ret, retry = 0; + bool supports_atomic = false; + + if (!amdgpu_virtual_display && + amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK)) + supports_atomic = true; if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { DRM_INFO("This hardware requires experimental hardware support.\n" @@ -596,6 +607,13 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) return ret; + /* warn the user if they mix atomic and non-atomic capable GPUs */ + if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic) + DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n"); + /* support atomic early so the atomic debugfs stuff gets created */ + if (supports_atomic) + kms_driver.driver_features |= DRIVER_ATOMIC; + dev = drm_dev_alloc(&kms_driver, &pdev->dev); if (IS_ERR(dev)) return PTR_ERR(dev); @@ -835,8 +853,8 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode) { - return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos, - stime, etime, mode); + return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos, + stime, etime, mode); } static struct drm_driver kms_driver = { @@ -854,9 +872,6 @@ static struct drm_driver kms_driver = { .disable_vblank = amdgpu_disable_vblank_kms, .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, .get_scanout_position = amdgpu_get_crtc_scanout_position, - .irq_preinstall = amdgpu_irq_preinstall, - .irq_postinstall = amdgpu_irq_postinstall, - .irq_uninstall = amdgpu_irq_uninstall, .irq_handler = amdgpu_irq_handler, .ioctls = amdgpu_ioctls_kms, .gem_free_object_unlocked = amdgpu_gem_object_free, @@ -869,9 +884,7 @@ static struct drm_driver kms_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = amdgpu_gem_prime_export, - .gem_prime_import = drm_gem_prime_import, - .gem_prime_pin = amdgpu_gem_prime_pin, - .gem_prime_unpin = amdgpu_gem_prime_unpin, + .gem_prime_import = amdgpu_gem_prime_import, .gem_prime_res_obj = amdgpu_gem_prime_res_obj, .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table, .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index ff3e9beb7d19..12063019751b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -38,6 +38,8 @@ #include <linux/vga_switcheroo.h> +#include "amdgpu_display.h" + /* object hierarchy - this contains a helper + a amdgpu fb the helper contains a pointer to amdgpu framebuffer baseclass. @@ -124,7 +126,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, struct drm_gem_object *gobj = NULL; struct amdgpu_bo *abo = NULL; bool fb_tiled = false; /* useful for testing */ - u32 tiling_flags = 0; + u32 tiling_flags = 0, domain; int ret; int aligned_size, size; int height = mode_cmd->height; @@ -135,12 +137,12 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); + domain = amdgpu_display_framebuffer_domains(adev); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, - AMDGPU_GEM_DOMAIN_VRAM, + ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_VRAM_CLEARED, @@ -166,7 +168,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, } - ret = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, NULL); + ret = amdgpu_bo_pin(abo, domain, NULL); if (ret) { amdgpu_bo_unreserve(abo); goto out_unref; @@ -225,7 +227,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper, info->par = rfbdev; info->skip_vt_switch = true; - ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj); + ret = amdgpu_display_framebuffer_init(adev->ddev, &rfbdev->rfb, + &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto out; @@ -242,8 +245,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper, info->fbops = &amdgpufb_ops; - tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start; - info->fix.smem_start = adev->mc.aper_base + tmp; + tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start; + info->fix.smem_start = adev->gmc.aper_base + tmp; info->fix.smem_len = amdgpu_bo_size(abo); info->screen_base = amdgpu_bo_kptr(abo); info->screen_size = amdgpu_bo_size(abo); @@ -252,7 +255,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, /* setup aperture base/size for vesafb takeover */ info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base; - info->apertures->ranges[0].size = adev->mc.aper_size; + info->apertures->ranges[0].size = adev->gmc.aper_size; /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ @@ -262,7 +265,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, } DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start); - DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->mc.aper_base); + DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->gmc.aper_base); DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo)); DRM_INFO("fb depth is %d\n", fb->format->depth); DRM_INFO(" pitch is %d\n", fb->pitches[0]); @@ -319,7 +322,7 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev) return 0; /* select 8 bpp console on low vram cards */ - if (adev->mc.real_vram_size <= (32*1024*1024)) + if (adev->gmc.real_vram_size <= (32*1024*1024)) bpp_sel = 8; rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 0a4f34afaaaa..008eaee57114 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -120,7 +120,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &adev->gart.robj); + NULL, NULL, &adev->gart.robj); if (r) { return r; } @@ -241,13 +241,14 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, continue; for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { - amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, - t, page_base, flags); + amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr, + t, page_base, flags); page_base += AMDGPU_GPU_PAGE_SIZE; } } mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); + amdgpu_gmc_flush_gpu_tlb(adev, 0); return 0; } @@ -279,7 +280,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, for (i = 0; i < pages; i++) { page_base = dma_addr[i]; for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { - amdgpu_gart_set_pte_pde(adev, dst, t, page_base, flags); + amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags); page_base += AMDGPU_GPU_PAGE_SIZE; } } @@ -329,7 +330,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, return r; mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); + amdgpu_gmc_flush_gpu_tlb(adev, 0); return 0; } @@ -357,8 +359,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev) if (r) return r; /* Compute table size */ - adev->gart.num_cpu_pages = adev->mc.gart_size / PAGE_SIZE; - adev->gart.num_gpu_pages = adev->mc.gart_size / AMDGPU_GPU_PAGE_SIZE; + adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE; + adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE; DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", adev->gart.num_cpu_pages, adev->gart.num_gpu_pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index d4a43302c2be..456295c00291 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -31,7 +31,6 @@ */ struct amdgpu_device; struct amdgpu_bo; -struct amdgpu_gart_funcs; #define AMDGPU_GPU_PAGE_SIZE 4096 #define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1) @@ -52,8 +51,6 @@ struct amdgpu_gart { /* Asic default pte flags */ uint64_t gart_pte_flags; - - const struct amdgpu_gart_funcs *gart_funcs; }; int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index e48b4ec88c8c..55a840ae6d68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -60,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, retry: r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, - flags, NULL, resv, 0, &bo); + flags, NULL, resv, &bo); if (r) { if (r != -ERESTARTSYS) { if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { @@ -523,12 +523,13 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, goto error; if (operation == AMDGPU_VA_OP_MAP || - operation == AMDGPU_VA_OP_REPLACE) + operation == AMDGPU_VA_OP_REPLACE) { r = amdgpu_vm_bo_update(adev, bo_va, false); + if (r) + goto error; + } r = amdgpu_vm_update_directories(adev, vm); - if (r) - goto error; error: if (r && r != -ERESTARTSYS) @@ -634,7 +635,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, if (r) goto error_backoff; - va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -654,7 +655,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, if (r) goto error_backoff; - va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h new file mode 100644 index 000000000000..893c2490b783 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -0,0 +1,112 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +#ifndef __AMDGPU_GMC_H__ +#define __AMDGPU_GMC_H__ + +#include <linux/types.h> + +#include "amdgpu_irq.h" + +struct firmware; + +/* + * VMHUB structures, functions & helpers + */ +struct amdgpu_vmhub { + uint32_t ctx0_ptb_addr_lo32; + uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_req; + uint32_t vm_inv_eng0_ack; + uint32_t vm_context0_cntl; + uint32_t vm_l2_pro_fault_status; + uint32_t vm_l2_pro_fault_cntl; +}; + +/* + * GPU MC structures, functions & helpers + */ +struct amdgpu_gmc_funcs { + /* flush the vm tlb via mmio */ + void (*flush_gpu_tlb)(struct amdgpu_device *adev, + uint32_t vmid); + /* flush the vm tlb via ring */ + uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, + uint64_t pd_addr); + /* Change the VMID -> PASID mapping */ + void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid, + unsigned pasid); + /* write pte/pde updates using the cpu */ + int (*set_pte_pde)(struct amdgpu_device *adev, + void *cpu_pt_addr, /* cpu addr of page table */ + uint32_t gpu_page_idx, /* pte/pde to update */ + uint64_t addr, /* addr to write into pte/pde */ + uint64_t flags); /* access flags */ + /* enable/disable PRT support */ + void (*set_prt)(struct amdgpu_device *adev, bool enable); + /* set pte flags based per asic */ + uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, + uint32_t flags); + /* get the pde for a given mc addr */ + void (*get_vm_pde)(struct amdgpu_device *adev, int level, + u64 *dst, u64 *flags); +}; + +struct amdgpu_gmc { + resource_size_t aper_size; + resource_size_t aper_base; + /* for some chips with <= 32MB we need to lie + * about vram size near mc fb location */ + u64 mc_vram_size; + u64 visible_vram_size; + u64 gart_size; + u64 gart_start; + u64 gart_end; + u64 vram_start; + u64 vram_end; + unsigned vram_width; + u64 real_vram_size; + int vram_mtrr; + u64 mc_mask; + const struct firmware *fw; /* MC firmware */ + uint32_t fw_version; + struct amdgpu_irq_src vm_fault; + uint32_t vram_type; + uint32_t srbm_soft_reset; + bool prt_warning; + uint64_t stolen_size; + /* apertures */ + u64 shared_aperture_start; + u64 shared_aperture_end; + u64 private_aperture_start; + u64 private_aperture_end; + /* protects concurrent invalidation */ + spinlock_t invalidate_lock; + bool translate_further; + + const struct amdgpu_gmc_funcs *gmc_funcs; +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index e14ab34d8262..da7b1b92d9cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -56,7 +56,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, return -ENOMEM; start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; - size = (adev->mc.gart_size >> PAGE_SHIFT) - start; + size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); atomic64_set(&mgr->available, p_size); @@ -75,7 +75,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) { struct amdgpu_gtt_mgr *mgr = man->priv; - + spin_lock(&mgr->lock); drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); kfree(mgr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a162d87ca0c8..8ea342dc6376 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -184,12 +184,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_hdp_flush #ifdef CONFIG_X86_64 - && !(adev->flags & AMD_IS_APU) + if (!(adev->flags & AMD_IS_APU)) #endif - ) - amdgpu_ring_emit_hdp_flush(ring); + { + if (ring->funcs->emit_hdp_flush) + amdgpu_ring_emit_hdp_flush(ring); + else + amdgpu_asic_flush_hdp(adev, ring); + } skip_preamble = ring->current_ctx == fence_ctx; need_ctx_switch = ring->current_ctx != fence_ctx; @@ -219,12 +222,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->emit_tmz) amdgpu_ring_emit_tmz(ring, false); - if (ring->funcs->emit_hdp_invalidate #ifdef CONFIG_X86_64 - && !(adev->flags & AMD_IS_APU) + if (!(adev->flags & AMD_IS_APU)) #endif - ) - amdgpu_ring_emit_hdp_invalidate(ring); + amdgpu_asic_invalidate_hdp(adev, ring); r = amdgpu_fence_emit(ring, f); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 16884a0b677b..a1c78f90eadf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -40,6 +40,12 @@ */ static DEFINE_IDA(amdgpu_pasid_ida); +/* Helper to free pasid from a fence callback */ +struct amdgpu_pasid_cb { + struct dma_fence_cb cb; + unsigned int pasid; +}; + /** * amdgpu_pasid_alloc - Allocate a PASID * @bits: Maximum width of the PASID in bits, must be at least 1 @@ -63,6 +69,9 @@ int amdgpu_pasid_alloc(unsigned int bits) break; } + if (pasid >= 0) + trace_amdgpu_pasid_allocated(pasid); + return pasid; } @@ -72,9 +81,86 @@ int amdgpu_pasid_alloc(unsigned int bits) */ void amdgpu_pasid_free(unsigned int pasid) { + trace_amdgpu_pasid_freed(pasid); ida_simple_remove(&amdgpu_pasid_ida, pasid); } +static void amdgpu_pasid_free_cb(struct dma_fence *fence, + struct dma_fence_cb *_cb) +{ + struct amdgpu_pasid_cb *cb = + container_of(_cb, struct amdgpu_pasid_cb, cb); + + amdgpu_pasid_free(cb->pasid); + dma_fence_put(fence); + kfree(cb); +} + +/** + * amdgpu_pasid_free_delayed - free pasid when fences signal + * + * @resv: reservation object with the fences to wait for + * @pasid: pasid to free + * + * Free the pasid only after all the fences in resv are signaled. + */ +void amdgpu_pasid_free_delayed(struct reservation_object *resv, + unsigned int pasid) +{ + struct dma_fence *fence, **fences; + struct amdgpu_pasid_cb *cb; + unsigned count; + int r; + + r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences); + if (r) + goto fallback; + + if (count == 0) { + amdgpu_pasid_free(pasid); + return; + } + + if (count == 1) { + fence = fences[0]; + kfree(fences); + } else { + uint64_t context = dma_fence_context_alloc(1); + struct dma_fence_array *array; + + array = dma_fence_array_create(count, fences, context, + 1, false); + if (!array) { + kfree(fences); + goto fallback; + } + fence = &array->base; + } + + cb = kmalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) { + /* Last resort when we are OOM */ + dma_fence_wait(fence, false); + dma_fence_put(fence); + amdgpu_pasid_free(pasid); + } else { + cb->pasid = pasid; + if (dma_fence_add_callback(fence, &cb->cb, + amdgpu_pasid_free_cb)) + amdgpu_pasid_free_cb(fence, &cb->cb); + } + + return; + +fallback: + /* Not enough memory for the delayed delete, as last resort + * block for all the fences to complete. + */ + reservation_object_wait_timeout_rcu(resv, true, false, + MAX_SCHEDULE_TIMEOUT); + amdgpu_pasid_free(pasid); +} + /* * VMID manager * @@ -96,164 +182,185 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, atomic_read(&adev->gpu_reset_counter); } -/* idr_mgr->lock must be held */ -static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm, - struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct dma_fence *fence, - struct amdgpu_job *job) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; - uint64_t fence_context = adev->fence_context + ring->idx; - struct amdgpu_vmid *id = vm->reserved_vmid[vmhub]; - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct dma_fence *updates = sync->last_vm_update; - int r = 0; - struct dma_fence *flushed, *tmp; - bool needs_flush = vm->use_cpu_for_update; - - flushed = id->flushed_updates; - if ((amdgpu_vmid_had_gpu_reset(adev, id)) || - (atomic64_read(&id->owner) != vm->entity.fence_context) || - (job->vm_pd_addr != id->pd_gpu_addr) || - (updates && (!flushed || updates->context != flushed->context || - dma_fence_is_later(updates, flushed))) || - (!id->last_flush || (id->last_flush->context != fence_context && - !dma_fence_is_signaled(id->last_flush)))) { - needs_flush = true; - /* to prevent one context starved by another context */ - id->pd_gpu_addr = 0; - tmp = amdgpu_sync_peek_fence(&id->active, ring); - if (tmp) { - r = amdgpu_sync_fence(adev, sync, tmp, false); - return r; - } - } - - /* Good we can use this VMID. Remember this submission as - * user of the VMID. - */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); - if (r) - goto out; - - if (updates && (!flushed || updates->context != flushed->context || - dma_fence_is_later(updates, flushed))) { - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - } - id->pd_gpu_addr = job->vm_pd_addr; - atomic64_set(&id->owner, vm->entity.fence_context); - job->vm_needs_flush = needs_flush; - if (needs_flush) { - dma_fence_put(id->last_flush); - id->last_flush = NULL; - } - job->vmid = id - id_mgr->ids; - trace_amdgpu_vm_grab_id(vm, ring, job); -out: - return r; -} - /** - * amdgpu_vm_grab_id - allocate the next free VMID + * amdgpu_vm_grab_idle - grab idle VMID * * @vm: vm to allocate id for * @ring: ring we want to submit job to * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse + * @idle: resulting idle VMID * - * Allocate an id for the vm, adding fences to the sync obj as necessary. + * Try to find an idle VMID, if none is idle add a fence to wait to the sync + * object. Returns -ENOMEM when we are out of memory. */ -int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job) +static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct amdgpu_vmid **idle) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; - struct amdgpu_vmid *id, *idle; struct dma_fence **fences; unsigned i; - int r = 0; + int r; + + if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) + return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false); - mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub]) { - r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job); - mutex_unlock(&id_mgr->lock); - return r; - } fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); - if (!fences) { - mutex_unlock(&id_mgr->lock); + if (!fences) return -ENOMEM; - } + /* Check if we have an idle VMID */ i = 0; - list_for_each_entry(idle, &id_mgr->ids_lru, list) { - fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); + list_for_each_entry((*idle), &id_mgr->ids_lru, list) { + fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring); if (!fences[i]) break; ++i; } /* If we can't find a idle VMID to use, wait till one becomes available */ - if (&idle->list == &id_mgr->ids_lru) { + if (&(*idle)->list == &id_mgr->ids_lru) { u64 fence_context = adev->vm_manager.fence_context + ring->idx; unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; struct dma_fence_array *array; unsigned j; + *idle = NULL; for (j = 0; j < i; ++j) dma_fence_get(fences[j]); array = dma_fence_array_create(i, fences, fence_context, - seqno, true); + seqno, true); if (!array) { for (j = 0; j < i; ++j) dma_fence_put(fences[j]); kfree(fences); - r = -ENOMEM; - goto error; + return -ENOMEM; } + r = amdgpu_sync_fence(adev, sync, &array->base, false); + dma_fence_put(ring->vmid_wait); + ring->vmid_wait = &array->base; + return r; + } + kfree(fences); - r = amdgpu_sync_fence(ring->adev, sync, &array->base, false); - dma_fence_put(&array->base); - if (r) - goto error; + return 0; +} - mutex_unlock(&id_mgr->lock); - return 0; +/** + * amdgpu_vm_grab_reserved - try to assign reserved VMID + * + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies + * @fence: fence protecting ID from reuse + * @job: job who wants to use the VMID + * + * Try to assign a reserved VMID. + */ +static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job, + struct amdgpu_vmid **id) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + uint64_t fence_context = adev->fence_context + ring->idx; + struct dma_fence *updates = sync->last_vm_update; + bool needs_flush = vm->use_cpu_for_update; + int r = 0; + + *id = vm->reserved_vmid[vmhub]; + if (updates && (*id)->flushed_updates && + updates->context == (*id)->flushed_updates->context && + !dma_fence_is_later(updates, (*id)->flushed_updates)) + updates = NULL; + + if ((*id)->owner != vm->entity.fence_context || + job->vm_pd_addr != (*id)->pd_gpu_addr || + updates || !(*id)->last_flush || + ((*id)->last_flush->context != fence_context && + !dma_fence_is_signaled((*id)->last_flush))) { + struct dma_fence *tmp; + /* to prevent one context starved by another context */ + (*id)->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); + if (tmp) { + *id = NULL; + r = amdgpu_sync_fence(adev, sync, tmp, false); + return r; + } + needs_flush = true; } - kfree(fences); + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); + if (r) + return r; + + if (updates) { + dma_fence_put((*id)->flushed_updates); + (*id)->flushed_updates = dma_fence_get(updates); + } + job->vm_needs_flush = needs_flush; + return 0; +} + +/** + * amdgpu_vm_grab_used - try to reuse a VMID + * + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies + * @fence: fence protecting ID from reuse + * @job: job who wants to use the VMID + * @id: resulting VMID + * + * Try to reuse a VMID for this submission. + */ +static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job, + struct amdgpu_vmid **id) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + uint64_t fence_context = adev->fence_context + ring->idx; + struct dma_fence *updates = sync->last_vm_update; + int r; job->vm_needs_flush = vm->use_cpu_for_update; + /* Check if we can use a VMID already assigned to this VM */ - list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { - struct dma_fence *flushed; + list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) { bool needs_flush = vm->use_cpu_for_update; + struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ - if (amdgpu_vmid_had_gpu_reset(adev, id)) - continue; - - if (atomic64_read(&id->owner) != vm->entity.fence_context) + if ((*id)->owner != vm->entity.fence_context) continue; - if (job->vm_pd_addr != id->pd_gpu_addr) + if ((*id)->pd_gpu_addr != job->vm_pd_addr) continue; - if (!id->last_flush || - (id->last_flush->context != fence_context && - !dma_fence_is_signaled(id->last_flush))) + if (!(*id)->last_flush || + ((*id)->last_flush->context != fence_context && + !dma_fence_is_signaled((*id)->last_flush))) needs_flush = true; - flushed = id->flushed_updates; + flushed = (*id)->flushed_updates; if (updates && (!flushed || dma_fence_is_later(updates, flushed))) needs_flush = true; @@ -261,47 +368,91 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (adev->asic_type < CHIP_VEGA10 && needs_flush) continue; - /* Good we can use this VMID. Remember this submission as + /* Good, we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); + r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); if (r) - goto error; + return r; if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); + dma_fence_put((*id)->flushed_updates); + (*id)->flushed_updates = dma_fence_get(updates); } - if (needs_flush) - goto needs_flush; - else - goto no_flush_needed; + job->vm_needs_flush |= needs_flush; + return 0; + } - }; + *id = NULL; + return 0; +} - /* Still no ID to use? Then use the idle one found earlier */ - id = idle; +/** + * amdgpu_vm_grab_id - allocate the next free VMID + * + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies + * @fence: fence protecting ID from reuse + * @job: job who wants to use the VMID + * + * Allocate an id for the vm, adding fences to the sync obj as necessary. + */ +int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync, struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *idle = NULL; + struct amdgpu_vmid *id = NULL; + int r = 0; - /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); - if (r) + mutex_lock(&id_mgr->lock); + r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle); + if (r || !idle) goto error; - id->pd_gpu_addr = job->vm_pd_addr; - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - atomic64_set(&id->owner, vm->entity.fence_context); + if (vm->reserved_vmid[vmhub]) { + r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id); + if (r || !id) + goto error; + } else { + r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id); + if (r) + goto error; -needs_flush: - job->vm_needs_flush = true; - dma_fence_put(id->last_flush); - id->last_flush = NULL; + if (!id) { + struct dma_fence *updates = sync->last_vm_update; -no_flush_needed: - list_move_tail(&id->list, &id_mgr->ids_lru); + /* Still no ID to use? Then use the idle one found earlier */ + id = idle; + /* Remember this submission as user of the VMID */ + r = amdgpu_sync_fence(ring->adev, &id->active, + fence, false); + if (r) + goto error; + + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + job->vm_needs_flush = true; + } + + list_move_tail(&id->list, &id_mgr->ids_lru); + } + + id->pd_gpu_addr = job->vm_pd_addr; + id->owner = vm->entity.fence_context; + + if (job->vm_needs_flush) { + dma_fence_put(id->last_flush); + id->last_flush = NULL; + } job->vmid = id - id_mgr->ids; + job->pasid = vm->pasid; trace_amdgpu_vm_grab_id(vm, ring, job); error: @@ -370,13 +521,15 @@ void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[vmid]; - atomic64_set(&id->owner, 0); + mutex_lock(&id_mgr->lock); + id->owner = 0; id->gds_base = 0; id->gds_size = 0; id->gws_base = 0; id->gws_size = 0; id->oa_base = 0; id->oa_size = 0; + mutex_unlock(&id_mgr->lock); } /** @@ -454,6 +607,7 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) amdgpu_sync_free(&id->active); dma_fence_put(id->flushed_updates); dma_fence_put(id->last_flush); + dma_fence_put(id->pasid_mapping); } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index ad931fa570b3..7625419f0fc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -43,7 +43,7 @@ struct amdgpu_vmid { struct list_head list; struct amdgpu_sync active; struct dma_fence *last_flush; - atomic64_t owner; + uint64_t owner; uint64_t pd_gpu_addr; /* last flushed PD/PT update */ @@ -57,6 +57,9 @@ struct amdgpu_vmid { uint32_t gws_size; uint32_t oa_base; uint32_t oa_size; + + unsigned pasid; + struct dma_fence *pasid_mapping; }; struct amdgpu_vmid_mgr { @@ -69,6 +72,8 @@ struct amdgpu_vmid_mgr { int amdgpu_pasid_alloc(unsigned int bits); void amdgpu_pasid_free(unsigned int pasid); +void amdgpu_pasid_free_delayed(struct reservation_object *resv, + unsigned int pasid); bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 29cf10927a92..b8a7dba69595 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -109,7 +109,7 @@ struct amdgpu_iv_entry { unsigned vmid_src; uint64_t timestamp; unsigned timestamp_src; - unsigned pas_id; + unsigned pasid; unsigned pasid_src; unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW]; const uint32_t *iv_entry; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 56bcd59c3399..f6f2a662bb8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -92,7 +92,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) } /* Disable *all* interrupts */ -static void amdgpu_irq_disable_all(struct amdgpu_device *adev) +void amdgpu_irq_disable_all(struct amdgpu_device *adev) { unsigned long irqflags; unsigned i, j, k; @@ -123,55 +123,6 @@ static void amdgpu_irq_disable_all(struct amdgpu_device *adev) } /** - * amdgpu_irq_preinstall - drm irq preinstall callback - * - * @dev: drm dev pointer - * - * Gets the hw ready to enable irqs (all asics). - * This function disables all interrupt sources on the GPU. - */ -void amdgpu_irq_preinstall(struct drm_device *dev) -{ - struct amdgpu_device *adev = dev->dev_private; - - /* Disable *all* interrupts */ - amdgpu_irq_disable_all(adev); - /* Clear bits */ - amdgpu_ih_process(adev); -} - -/** - * amdgpu_irq_postinstall - drm irq preinstall callback - * - * @dev: drm dev pointer - * - * Handles stuff to be done after enabling irqs (all asics). - * Returns 0 on success. - */ -int amdgpu_irq_postinstall(struct drm_device *dev) -{ - dev->max_vblank_count = 0x00ffffff; - return 0; -} - -/** - * amdgpu_irq_uninstall - drm irq uninstall callback - * - * @dev: drm dev pointer - * - * This function disables all interrupt sources on the GPU (all asics). - */ -void amdgpu_irq_uninstall(struct drm_device *dev) -{ - struct amdgpu_device *adev = dev->dev_private; - - if (adev == NULL) { - return; - } - amdgpu_irq_disable_all(adev); -} - -/** * amdgpu_irq_handler - irq handler * * @int irq, void *arg: args @@ -261,6 +212,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) cancel_work_sync(&adev->reset_work); return r; } + adev->ddev->max_vblank_count = 0x00ffffff; DRM_DEBUG("amdgpu: irq initialized.\n"); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 0610cc4a9788..3375ad778edc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -78,9 +78,7 @@ struct amdgpu_irq { uint32_t srbm_soft_reset; }; -void amdgpu_irq_preinstall(struct drm_device *dev); -int amdgpu_irq_postinstall(struct drm_device *dev); -void amdgpu_irq_uninstall(struct drm_device *dev); +void amdgpu_irq_disable_all(struct amdgpu_device *adev); irqreturn_t amdgpu_irq_handler(int irq, void *arg); int amdgpu_irq_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bd6e9a40f421..e851c66cbb5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -191,7 +191,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = 0; break; case AMDGPU_INFO_FW_GMC: - fw_info->ver = adev->mc.fw_version; + fw_info->ver = adev->gmc.fw_version; fw_info->feature = 0; break; case AMDGPU_INFO_FW_GFX_ME: @@ -470,9 +470,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_VRAM_GTT: { struct drm_amdgpu_info_vram_gtt vram_gtt; - vram_gtt.vram_size = adev->mc.real_vram_size; + vram_gtt.vram_size = adev->gmc.real_vram_size; vram_gtt.vram_size -= adev->vram_pin_size; - vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size; + vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size; vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size); vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; vram_gtt.gtt_size *= PAGE_SIZE; @@ -484,17 +484,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_memory_info mem; memset(&mem, 0, sizeof(mem)); - mem.vram.total_heap_size = adev->mc.real_vram_size; + mem.vram.total_heap_size = adev->gmc.real_vram_size; mem.vram.usable_heap_size = - adev->mc.real_vram_size - adev->vram_pin_size; + adev->gmc.real_vram_size - adev->vram_pin_size; mem.vram.heap_usage = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = - adev->mc.visible_vram_size; + adev->gmc.visible_vram_size; mem.cpu_accessible_vram.usable_heap_size = - adev->mc.visible_vram_size - + adev->gmc.visible_vram_size - (adev->vram_pin_size - adev->invisible_pin_size); mem.cpu_accessible_vram.heap_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -580,11 +580,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; + vm_size -= AMDGPU_VA_RESERVED_SIZE; + + /* Older VCE FW versions are buggy and can handle only 40bits */ + if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45) + vm_size = min(vm_size, 1ULL << 40); + dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; dev_info.virtual_address_max = min(vm_size, AMDGPU_VA_HOLE_START); - vm_size -= AMDGPU_VA_RESERVED_SIZE; if (vm_size > AMDGPU_VA_HOLE_START) { dev_info.high_va_offset = AMDGPU_VA_HOLE_END; dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size; @@ -599,8 +604,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file sizeof(adev->gfx.cu_info.ao_cu_bitmap)); memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], sizeof(adev->gfx.cu_info.bitmap)); - dev_info.vram_type = adev->mc.vram_type; - dev_info.vram_bit_width = adev->mc.vram_width; + dev_info.vram_type = adev->gmc.vram_type; + dev_info.vram_bit_width = adev->gmc.vram_width; dev_info.vce_harvest_config = adev->vce.harvest_config; dev_info.gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; @@ -758,6 +763,24 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return -EINVAL; } break; + case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK: + /* get stable pstate sclk in Mhz */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 /= 100; + break; + case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK: + /* get stable pstate mclk in Mhz */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 /= 100; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->sensor_info.type); @@ -805,7 +828,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { struct amdgpu_device *adev = dev->dev_private; struct amdgpu_fpriv *fpriv; - int r; + int r, pasid; file_priv->driver_priv = NULL; @@ -819,28 +842,25 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } - r = amdgpu_vm_init(adev, &fpriv->vm, - AMDGPU_VM_CONTEXT_GFX, 0); - if (r) { - kfree(fpriv); - goto out_suspend; + pasid = amdgpu_pasid_alloc(16); + if (pasid < 0) { + dev_warn(adev->dev, "No more PASIDs available!"); + pasid = 0; } + r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid); + if (r) + goto error_pasid; fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); if (!fpriv->prt_va) { r = -ENOMEM; - amdgpu_vm_fini(adev, &fpriv->vm); - kfree(fpriv); - goto out_suspend; + goto error_vm; } if (amdgpu_sriov_vf(adev)) { r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); - if (r) { - amdgpu_vm_fini(adev, &fpriv->vm); - kfree(fpriv); - goto out_suspend; - } + if (r) + goto error_vm; } mutex_init(&fpriv->bo_list_lock); @@ -849,6 +869,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); file_priv->driver_priv = fpriv; + goto out_suspend; + +error_vm: + amdgpu_vm_fini(adev, &fpriv->vm); + +error_pasid: + if (pasid) + amdgpu_pasid_free(pasid); + + kfree(fpriv); out_suspend: pm_runtime_mark_last_busy(dev->dev); @@ -871,6 +901,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, struct amdgpu_device *adev = dev->dev_private; struct amdgpu_fpriv *fpriv = file_priv->driver_priv; struct amdgpu_bo_list *list; + struct amdgpu_bo *pd; + unsigned int pasid; int handle; if (!fpriv) @@ -895,7 +927,13 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_bo_unreserve(adev->virt.csa_obj); } + pasid = fpriv->vm.pasid; + pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); + amdgpu_vm_fini(adev, &fpriv->vm); + if (pasid) + amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); + amdgpu_bo_unref(&pd); idr_for_each_entry(&fpriv->bo_list_handles, list, handle) amdgpu_bo_list_free(list); @@ -947,11 +985,11 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe) */ do { count = amdgpu_display_vblank_get_counter(adev, pipe); - /* Ask amdgpu_get_crtc_scanoutpos to return vpos as - * distance to start of vblank, instead of regular - * vertical scanout pos. + /* Ask amdgpu_display_get_crtc_scanoutpos to return + * vpos as distance to start of vblank, instead of + * regular vertical scanout pos. */ - stat = amdgpu_get_crtc_scanoutpos( + stat = amdgpu_display_get_crtc_scanoutpos( dev, pipe, GET_DISTANCE_TO_VBLANKSTART, &vpos, &hpos, NULL, NULL, &adev->mode_info.crtcs[pipe]->base.hwmode); @@ -992,7 +1030,7 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe) int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe) { struct amdgpu_device *adev = dev->dev_private; - int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe); + int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe); return amdgpu_irq_get(adev, &adev->crtc_irq, idx); } @@ -1008,7 +1046,7 @@ int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe) void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe) { struct amdgpu_device *adev = dev->dev_private; - int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe); + int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe); amdgpu_irq_put(adev, &adev->crtc_irq, idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 54f06c959340..d9533bbc467c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -267,8 +267,6 @@ struct amdgpu_display_funcs { void (*bandwidth_update)(struct amdgpu_device *adev); /* get frame count */ u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc); - /* wait for vblank */ - void (*vblank_wait)(struct amdgpu_device *adev, int crtc); /* set backlight level */ void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder, u8 level); @@ -608,7 +606,7 @@ struct amdgpu_mst_connector { #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \ ((em) == ATOM_ENCODER_MODE_DP_MST)) -/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ +/* Driver internal use only flags of amdgpu_display_get_crtc_scanoutpos() */ #define DRM_SCANOUTPOS_VALID (1 << 0) #define DRM_SCANOUTPOS_IN_VBLANK (1 << 1) #define DRM_SCANOUTPOS_ACCURATE (1 << 2) @@ -627,30 +625,31 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder, u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder); struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder); -bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, bool use_aux); +bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, + bool use_aux); void amdgpu_encoder_set_active_device(struct drm_encoder *encoder); -int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, - unsigned int flags, int *vpos, int *hpos, - ktime_t *stime, ktime_t *etime, - const struct drm_display_mode *mode); +int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, + unsigned int pipe, unsigned int flags, int *vpos, + int *hpos, ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode); -int amdgpu_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); +int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj); int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb); void amdgpu_enc_destroy(struct drm_encoder *encoder); void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj); -bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode); +bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode); void amdgpu_panel_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *adjusted_mode); -int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc); +int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc); /* fbdev layer */ int amdgpu_fbdev_init(struct amdgpu_device *adev); @@ -662,15 +661,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled); /* amdgpu_display.c */ -void amdgpu_print_display_setup(struct drm_device *dev); -int amdgpu_modeset_create_props(struct amdgpu_device *adev); -int amdgpu_crtc_set_config(struct drm_mode_set *set, - struct drm_modeset_acquire_ctx *ctx); -int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, uint32_t target, - struct drm_modeset_acquire_ctx *ctx); +void amdgpu_display_print_display_setup(struct drm_device *dev); +int amdgpu_display_modeset_create_props(struct amdgpu_device *adev); +int amdgpu_display_crtc_set_config(struct drm_mode_set *set, + struct drm_modeset_acquire_ctx *ctx); +int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t page_flip_flags, uint32_t target, + struct drm_modeset_acquire_ctx *ctx); extern const struct drm_mode_config_funcs amdgpu_mode_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5c4c3e0d527b..969de54b62da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -83,7 +83,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) u32 c = 0; if (domain & AMDGPU_GEM_DOMAIN_VRAM) { - unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT; + unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; places[c].fpfn = 0; places[c].lpfn = 0; @@ -103,7 +103,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; if (flags & AMDGPU_GEM_CREATE_SHADOW) - places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT; + places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT; else places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_TT; @@ -190,7 +190,7 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, r = amdgpu_bo_create(adev, size, align, true, domain, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, bo_ptr); + NULL, NULL, bo_ptr); if (r) { dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); @@ -336,7 +336,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct reservation_object *resv, - uint64_t init_value, struct amdgpu_bo **bo_ptr) { struct ttm_operation_ctx ctx = { @@ -372,11 +371,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - r = drm_gem_object_init(adev->ddev, &bo->gem_base, size); - if (unlikely(r)) { - kfree(bo); - return r; - } + drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | @@ -428,9 +423,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, if (unlikely(r != 0)) return r; - if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, ctx.bytes_moved); else @@ -443,7 +438,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence); + r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -484,7 +479,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_SHADOW, - NULL, bo->tbo.resv, 0, + NULL, bo->tbo.resv, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); @@ -496,22 +491,18 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } -/* init_value will only take effect when flags contains - * AMDGPU_GEM_CREATE_VRAM_CLEARED. - */ int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct reservation_object *resv, - uint64_t init_value, struct amdgpu_bo **bo_ptr) { uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; int r; r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain, - parent_flags, sg, resv, init_value, bo_ptr); + parent_flags, sg, resv, bo_ptr); if (r) return r; @@ -832,25 +823,25 @@ static const char *amdgpu_vram_names[] = { int amdgpu_bo_init(struct amdgpu_device *adev) { /* reserve PAT memory space to WC for VRAM */ - arch_io_reserve_memtype_wc(adev->mc.aper_base, - adev->mc.aper_size); + arch_io_reserve_memtype_wc(adev->gmc.aper_base, + adev->gmc.aper_size); /* Add an MTRR for the VRAM */ - adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base, - adev->mc.aper_size); + adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base, + adev->gmc.aper_size); DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", - adev->mc.mc_vram_size >> 20, - (unsigned long long)adev->mc.aper_size >> 20); + adev->gmc.mc_vram_size >> 20, + (unsigned long long)adev->gmc.aper_size >> 20); DRM_INFO("RAM width %dbits %s\n", - adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]); + adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]); return amdgpu_ttm_init(adev); } void amdgpu_bo_fini(struct amdgpu_device *adev) { amdgpu_ttm_fini(adev); - arch_phys_wc_del(adev->mc.vram_mtrr); - arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size); + arch_phys_wc_del(adev->gmc.vram_mtrr); + arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); } int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, @@ -980,7 +971,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) size = bo->mem.num_pages << PAGE_SHIFT; offset = bo->mem.start << PAGE_SHIFT; - if ((offset + size) <= adev->mc.visible_vram_size) + if ((offset + size) <= adev->gmc.visible_vram_size) return 0; /* Can't move a pinned BO to visible VRAM */ @@ -1003,7 +994,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) offset = bo->mem.start << PAGE_SHIFT; /* this should never happen */ if (bo->mem.mem_type == TTM_PL_VRAM && - (offset + size) > adev->mc.visible_vram_size) + (offset + size) > adev->gmc.visible_vram_size) return -EINVAL; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 33615e2ea2e6..c2b02f5c88d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -206,7 +206,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct reservation_object *resv, - uint64_t init_value, struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 01a996c6b802..9e73cbcfce44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -116,7 +116,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, } if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL); + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state); } else { mutex_lock(&adev->pm.mutex); adev->pm.dpm.user_state = state; @@ -316,7 +316,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, if (state != POWER_STATE_TYPE_INTERNAL_BOOT && state != POWER_STATE_TYPE_DEFAULT) { amdgpu_dpm_dispatch_task(adev, - AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL); + AMD_PP_TASK_ENABLE_USER_STATE, &state); adev->pp_force_state_enabled = true; } } @@ -360,6 +360,90 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, return count; } +static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int ret; + uint32_t parameter_size = 0; + long parameter[64]; + char buf_cpy[128]; + char *tmp_str; + char *sub_str; + const char delimiter[3] = {' ', '\n', '\0'}; + uint32_t type; + + if (count > 127) + return -EINVAL; + + if (*buf == 's') + type = PP_OD_EDIT_SCLK_VDDC_TABLE; + else if (*buf == 'm') + type = PP_OD_EDIT_MCLK_VDDC_TABLE; + else if(*buf == 'r') + type = PP_OD_RESTORE_DEFAULT_TABLE; + else if (*buf == 'c') + type = PP_OD_COMMIT_DPM_TABLE; + else + return -EINVAL; + + memcpy(buf_cpy, buf, count+1); + + tmp_str = buf_cpy; + + while (isspace(*++tmp_str)); + + while (tmp_str[0]) { + sub_str = strsep(&tmp_str, delimiter); + ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); + if (ret) + return -EINVAL; + parameter_size++; + + while (isspace(*tmp_str)) + tmp_str++; + } + + if (adev->powerplay.pp_funcs->odn_edit_dpm_table) + ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, + parameter, parameter_size); + + if (ret) + return -EINVAL; + + if (type == PP_OD_COMMIT_DPM_TABLE) { + if (adev->powerplay.pp_funcs->dispatch_tasks) { + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + return count; + } else { + return -EINVAL; + } + } + + return count; +} + +static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint32_t size = 0; + + if (adev->powerplay.pp_funcs->print_clock_levels) { + size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); + size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); + return size; + } else { + return snprintf(buf, PAGE_SIZE, "\n"); + } + +} + static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, struct device_attribute *attr, char *buf) @@ -530,7 +614,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); } else { adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; amdgpu_pm_compute_clocks(adev); @@ -574,7 +658,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); } else { adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; amdgpu_pm_compute_clocks(adev); @@ -584,6 +668,72 @@ fail: return count; } +static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->powerplay.pp_funcs->get_power_profile_mode) + return amdgpu_dpm_get_power_profile_mode(adev, buf); + + return snprintf(buf, PAGE_SIZE, "\n"); +} + + +static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int ret = 0xff; + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint32_t parameter_size = 0; + long parameter[64]; + char *sub_str, buf_cpy[128]; + char *tmp_str; + uint32_t i = 0; + char tmp[2]; + long int profile_mode = 0; + const char delimiter[3] = {' ', '\n', '\0'}; + + tmp[0] = *(buf); + tmp[1] = '\0'; + ret = kstrtol(tmp, 0, &profile_mode); + if (ret) + goto fail; + + if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + if (count < 2 || count > 127) + return -EINVAL; + while (isspace(*++buf)) + i++; + memcpy(buf_cpy, buf, count-i); + tmp_str = buf_cpy; + while (tmp_str[0]) { + sub_str = strsep(&tmp_str, delimiter); + ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); + if (ret) { + count = -EINVAL; + goto fail; + } + parameter_size++; + while (isspace(*tmp_str)) + tmp_str++; + } + } + parameter[parameter_size] = profile_mode; + if (adev->powerplay.pp_funcs->set_power_profile_mode) + ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); + + if (!ret) + return count; +fail: + return -EINVAL; +} + static ssize_t amdgpu_get_pp_power_profile(struct device *dev, char *buf, struct amd_pp_profile *query) { @@ -772,6 +922,12 @@ static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR, static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR, amdgpu_get_pp_compute_power_profile, amdgpu_set_pp_compute_power_profile); +static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR, + amdgpu_get_pp_power_profile_mode, + amdgpu_set_pp_power_profile_mode); +static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, + amdgpu_get_pp_od_clk_voltage, + amdgpu_set_pp_od_clk_voltage); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -779,17 +935,23 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; - int temp; + int r, temp, size = sizeof(temp); /* Can't get temperature when the card is off */ if ((adev->flags & AMD_IS_PX) && (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (!adev->powerplay.pp_funcs->get_temperature) - temp = 0; - else - temp = amdgpu_dpm_get_temperature(adev); + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, + (void *)&temp, &size); + if (r) + return r; return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -834,6 +996,11 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, int err; int value; + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + if (!adev->powerplay.pp_funcs->set_fan_control_mode) return -EINVAL; @@ -868,6 +1035,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, int err; u32 value; + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + err = kstrtou32(buf, 10, &value); if (err) return err; @@ -891,6 +1063,11 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + if (adev->powerplay.pp_funcs->get_fan_speed_percent) { err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); if (err) @@ -910,6 +1087,11 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); if (err) @@ -919,6 +1101,175 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, return sprintf(buf, "%i\n", speed); } +static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + struct drm_device *ddev = adev->ddev; + u32 vddgfx; + int r, size = sizeof(vddgfx); + + /* Can't get voltage when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the voltage */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, + (void *)&vddgfx, &size); + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", vddgfx); +} + +static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "vddgfx\n"); +} + +static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + struct drm_device *ddev = adev->ddev; + u32 vddnb; + int r, size = sizeof(vddnb); + + /* only APUs have vddnb */ + if (adev->flags & AMD_IS_APU) + return -EINVAL; + + /* Can't get voltage when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the voltage */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, + (void *)&vddnb, &size); + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", vddnb); +} + +static ssize_t amdgpu_hwmon_show_vddnb_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "vddnb\n"); +} + +static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + struct drm_device *ddev = adev->ddev; + struct pp_gpu_power query = {0}; + int r, size = sizeof(query); + unsigned uw; + + /* Can't get power when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the voltage */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, + (void *)&query, &size); + if (r) + return r; + + /* convert to microwatts */ + uw = (query.average_gpu_power >> 8) * 1000000; + + return snprintf(buf, PAGE_SIZE, "%u\n", uw); +} + +static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%i\n", 0); +} + +static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + uint32_t limit = 0; + + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { + adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); + return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + } else { + return snprintf(buf, PAGE_SIZE, "\n"); + } +} + +static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + uint32_t limit = 0; + + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { + adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); + return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + } else { + return snprintf(buf, PAGE_SIZE, "\n"); + } +} + + +static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int err; + u32 value; + + err = kstrtou32(buf, 10, &value); + if (err) + return err; + + value = value / 1000000; /* convert to Watt */ + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { + err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); + if (err) + return err; + } else { + return -EINVAL; + } + + return count; +} + static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); @@ -927,6 +1278,14 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_ static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0); static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0); +static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0); +static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0); +static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0); +static SENSOR_DEVICE_ATTR(in1_label, S_IRUGO, amdgpu_hwmon_show_vddnb_label, NULL, 0); +static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg, NULL, 0); +static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0); +static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 0); +static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, @@ -937,6 +1296,14 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_pwm1_min.dev_attr.attr, &sensor_dev_attr_pwm1_max.dev_attr.attr, &sensor_dev_attr_fan1_input.dev_attr.attr, + &sensor_dev_attr_in0_input.dev_attr.attr, + &sensor_dev_attr_in0_label.dev_attr.attr, + &sensor_dev_attr_in1_input.dev_attr.attr, + &sensor_dev_attr_in1_label.dev_attr.attr, + &sensor_dev_attr_power1_average.dev_attr.attr, + &sensor_dev_attr_power1_cap_max.dev_attr.attr, + &sensor_dev_attr_power1_cap_min.dev_attr.attr, + &sensor_dev_attr_power1_cap.dev_attr.attr, NULL }; @@ -947,9 +1314,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; - /* no skipping for powerplay */ - if (adev->powerplay.cgs_device) - return effective_mode; + /* handle non-powerplay limitations */ + if (!adev->powerplay.cgs_device) { + /* Skip fan attributes if fan is not present */ + if (adev->pm.no_fan && + (attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) + return 0; + /* requires powerplay */ + if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr) + return 0; + } /* Skip limit attributes if DPM is not enabled */ if (!adev->pm.dpm_enabled && @@ -961,14 +1338,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) return 0; - /* Skip fan attributes if fan is not present */ - if (adev->pm.no_fan && - (attr == &sensor_dev_attr_pwm1.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) - return 0; - /* mask fan attributes if we have no bindings for this asic to expose */ if ((!adev->powerplay.pp_funcs->get_fan_speed_percent && attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ @@ -982,6 +1351,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ effective_mode &= ~S_IWUSR; + if ((adev->flags & AMD_IS_APU) && + (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| + attr == &sensor_dev_attr_power1_cap.dev_attr.attr)) + return 0; + /* hide max/min values if we can't both query and manage the fan */ if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && !adev->powerplay.pp_funcs->get_fan_speed_percent) && @@ -989,8 +1364,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) return 0; - /* requires powerplay */ - if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr) + /* only APUs have vddnb */ + if (!(adev->flags & AMD_IS_APU) && + (attr == &sensor_dev_attr_in1_input.dev_attr.attr || + attr == &sensor_dev_attr_in1_label.dev_attr.attr)) return 0; return effective_mode; @@ -1013,13 +1390,15 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) pm.dpm.thermal.work); /* switch to the thermal state */ enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL; + int temp, size = sizeof(temp); if (!adev->pm.dpm_enabled) return; - if (adev->powerplay.pp_funcs->get_temperature) { - int temp = amdgpu_dpm_get_temperature(adev); - + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor && + !amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, + (void *)&temp, &size)) { if (temp < adev->pm.dpm.thermal.min_temp) /* switch back the user state */ dpm_state = adev->pm.dpm.user_state; @@ -1319,9 +1698,6 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) if (adev->pm.dpm_enabled == 0) return 0; - if (adev->powerplay.pp_funcs->get_temperature == NULL) - return 0; - adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, DRIVER_NAME, adev, hwmon_groups); @@ -1405,6 +1781,20 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } + ret = device_create_file(adev->dev, + &dev_attr_pp_power_profile_mode); + if (ret) { + DRM_ERROR("failed to create device file " + "pp_power_profile_mode\n"); + return ret; + } + ret = device_create_file(adev->dev, + &dev_attr_pp_od_clk_voltage); + if (ret) { + DRM_ERROR("failed to create device file " + "pp_od_clk_voltage\n"); + return ret; + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -1440,6 +1830,10 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) &dev_attr_pp_gfx_power_profile); device_remove_file(adev->dev, &dev_attr_pp_compute_power_profile); + device_remove_file(adev->dev, + &dev_attr_pp_power_profile_mode); + device_remove_file(adev->dev, + &dev_attr_pp_od_clk_voltage); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) @@ -1462,7 +1856,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) } if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL); + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); } else { mutex_lock(&adev->pm.mutex); adev->pm.dpm.new_active_crtcs = 0; @@ -1512,6 +1906,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a seq_printf(m, "\t%u MHz (MCLK)\n", value/100); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (SCLK)\n", value/100); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (PSTATE_SCLK)\n", value/100); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (PSTATE_MCLK)\n", value/100); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size)) seq_printf(m, "\t%u mV (VDDGFX)\n", value); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index ae9c106979d7..8ce74a1d9966 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -26,9 +26,12 @@ #include <drm/drmP.h> #include "amdgpu.h" +#include "amdgpu_display.h" #include <drm/amdgpu_drm.h> #include <linux/dma-buf.h> +static const struct dma_buf_ops amdgpu_dmabuf_ops; + struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -103,7 +106,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, ww_mutex_lock(&resv->lock, NULL); ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, 0, &bo); + AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, &bo); ww_mutex_unlock(&resv->lock); if (ret) return ERR_PTR(ret); @@ -112,49 +115,72 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, return &bo->gem_base; } -int amdgpu_gem_prime_pin(struct drm_gem_object *obj) +static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, + struct device *target_dev, + struct dma_buf_attachment *attach) { + struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - long ret = 0; - - ret = amdgpu_bo_reserve(bo, false); - if (unlikely(ret != 0)) - return ret; - - /* - * Wait for all shared fences to complete before we switch to future - * use of exclusive fence on this prime shared bo. - */ - ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, - MAX_SCHEDULE_TIMEOUT); - if (unlikely(ret < 0)) { - DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); - amdgpu_bo_unreserve(bo); - return ret; + long r; + + r = drm_gem_map_attach(dma_buf, target_dev, attach); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (unlikely(r != 0)) + goto error_detach; + + + if (dma_buf->ops != &amdgpu_dmabuf_ops) { + /* + * Wait for all shared fences to complete before we switch to future + * use of exclusive fence on this prime shared bo. + */ + r = reservation_object_wait_timeout_rcu(bo->tbo.resv, + true, false, + MAX_SCHEDULE_TIMEOUT); + if (unlikely(r < 0)) { + DRM_DEBUG_PRIME("Fence wait failed: %li\n", r); + goto error_unreserve; + } } /* pin buffer into GTT */ - ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); - if (likely(ret == 0)) + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + if (r) + goto error_unreserve; + + if (dma_buf->ops != &amdgpu_dmabuf_ops) bo->prime_shared_count++; +error_unreserve: amdgpu_bo_unreserve(bo); - return ret; + +error_detach: + if (r) + drm_gem_map_detach(dma_buf, attach); + return r; } -void amdgpu_gem_prime_unpin(struct drm_gem_object *obj) +static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) { + struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); int ret = 0; ret = amdgpu_bo_reserve(bo, true); if (unlikely(ret != 0)) - return; + goto error; amdgpu_bo_unpin(bo); - if (bo->prime_shared_count) + if (dma_buf->ops != &amdgpu_dmabuf_ops && bo->prime_shared_count) bo->prime_shared_count--; amdgpu_bo_unreserve(bo); + +error: + drm_gem_map_detach(dma_buf, attach); } struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) @@ -164,6 +190,50 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) return bo->tbo.resv; } +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction direction) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { true, false }; + u32 domain = amdgpu_display_framebuffer_domains(adev); + int ret; + bool reads = (direction == DMA_BIDIRECTIONAL || + direction == DMA_FROM_DEVICE); + + if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT)) + return 0; + + /* move to gtt */ + ret = amdgpu_bo_reserve(bo, false); + if (unlikely(ret != 0)) + return ret; + + if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + } + + amdgpu_bo_unreserve(bo); + return ret; +} + +static const struct dma_buf_ops amdgpu_dmabuf_ops = { + .attach = amdgpu_gem_map_attach, + .detach = amdgpu_gem_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .begin_cpu_access = amdgpu_gem_begin_cpu_access, + .map = drm_gem_dmabuf_kmap, + .map_atomic = drm_gem_dmabuf_kmap_atomic, + .unmap = drm_gem_dmabuf_kunmap, + .unmap_atomic = drm_gem_dmabuf_kunmap_atomic, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, int flags) @@ -176,7 +246,30 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, return ERR_PTR(-EPERM); buf = drm_gem_prime_export(dev, gobj, flags); - if (!IS_ERR(buf)) + if (!IS_ERR(buf)) { buf->file->f_mapping = dev->anon_inode->i_mapping; + buf->ops = &amdgpu_dmabuf_ops; + } + return buf; } + +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct drm_gem_object *obj; + + if (dma_buf->ops == &amdgpu_dmabuf_ops) { + obj = dma_buf->priv; + if (obj->dev == dev) { + /* + * Importing dmabuf exported from out own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_get(obj); + return obj; + } + } + + return drm_gem_prime_import(dev, dma_buf); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 2157d4509e84..6e712f12eecd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -51,29 +51,10 @@ static int psp_sw_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: - psp->init_microcode = psp_v3_1_init_microcode; - psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv; - psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos; - psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf; - psp->ring_init = psp_v3_1_ring_init; - psp->ring_create = psp_v3_1_ring_create; - psp->ring_stop = psp_v3_1_ring_stop; - psp->ring_destroy = psp_v3_1_ring_destroy; - psp->cmd_submit = psp_v3_1_cmd_submit; - psp->compare_sram_data = psp_v3_1_compare_sram_data; - psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; - psp->mode1_reset = psp_v3_1_mode1_reset; + psp_v3_1_set_psp_funcs(psp); break; case CHIP_RAVEN: - psp->init_microcode = psp_v10_0_init_microcode; - psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; - psp->ring_init = psp_v10_0_ring_init; - psp->ring_create = psp_v10_0_ring_create; - psp->ring_stop = psp_v10_0_ring_stop; - psp->ring_destroy = psp_v10_0_ring_destroy; - psp->cmd_submit = psp_v10_0_cmd_submit; - psp->compare_sram_data = psp_v10_0_compare_sram_data; - psp->mode1_reset = psp_v10_0_mode1_reset; + psp_v10_0_set_psp_funcs(psp); break; default: return -EINVAL; @@ -512,19 +493,8 @@ failed: return ret; } -static bool psp_check_reset(void* handle) +int psp_gpu_reset(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->flags & AMD_IS_APU) - return true; - - return false; -} - -static int psp_reset(void* handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; return psp_mode1_reset(&adev->psp); } @@ -571,9 +541,9 @@ const struct amd_ip_funcs psp_ip_funcs = { .suspend = psp_suspend, .resume = psp_resume, .is_idle = NULL, - .check_soft_reset = psp_check_reset, + .check_soft_reset = NULL, .wait_for_idle = NULL, - .soft_reset = psp_reset, + .soft_reset = NULL, .set_clockgating_state = psp_set_clockgating_state, .set_powergating_state = psp_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index ce4654550416..129209686848 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -33,6 +33,8 @@ #define PSP_ASD_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 +struct psp_context; + enum psp_ring_type { PSP_RING_TYPE__INVALID = 0, @@ -53,12 +55,8 @@ struct psp_ring uint32_t ring_size; }; -struct psp_context +struct psp_funcs { - struct amdgpu_device *adev; - struct psp_ring km_ring; - struct psp_gfx_cmd_resp *cmd; - int (*init_microcode)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); @@ -77,6 +75,15 @@ struct psp_context enum AMDGPU_UCODE_ID ucode_type); bool (*smu_reload_quirk)(struct psp_context *psp); int (*mode1_reset)(struct psp_context *psp); +}; + +struct psp_context +{ + struct amdgpu_device *adev; + struct psp_ring km_ring; + struct psp_gfx_cmd_resp *cmd; + + const struct psp_funcs *funcs; /* fence buffer */ struct amdgpu_bo *fw_pri_bo; @@ -123,25 +130,25 @@ struct amdgpu_psp_funcs { enum AMDGPU_UCODE_ID); }; -#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) -#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) -#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type)) -#define psp_ring_stop(psp, type) (psp)->ring_stop((psp), (type)) -#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type))) +#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type)) +#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) +#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) +#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type)) +#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ - (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) + (psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) #define psp_compare_sram_data(psp, ucode, type) \ - (psp)->compare_sram_data((psp), (ucode), (type)) + (psp)->funcs->compare_sram_data((psp), (ucode), (type)) #define psp_init_microcode(psp) \ - ((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0) + ((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0) #define psp_bootloader_load_sysdrv(psp) \ - ((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0) + ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ - ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0) + ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ - ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false) + ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) #define psp_mode1_reset(psp) \ - ((psp)->mode1_reset ? (psp)->mode1_reset((psp)) : false) + ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) extern const struct amd_ip_funcs psp_ip_funcs; @@ -151,4 +158,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; +int psp_gpu_reset(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 13044e66dcaf..e223b0f6417b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -360,6 +360,9 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) amdgpu_debugfs_ring_fini(ring); + dma_fence_put(ring->vmid_wait); + ring->vmid_wait = NULL; + ring->adev->rings[ring->idx] = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 102dad3edf6a..1d0d250cbfdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -128,7 +128,6 @@ struct amdgpu_ring_funcs { void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void (*emit_hdp_flush)(struct amdgpu_ring *ring); - void (*emit_hdp_invalidate)(struct amdgpu_ring *ring); void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, @@ -151,6 +150,8 @@ struct amdgpu_ring_funcs { void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); /* priority functions */ void (*set_priority) (struct amdgpu_ring *ring, @@ -195,6 +196,7 @@ struct amdgpu_ring { u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; + struct dma_fence *vmid_wait; bool has_compute_vm_bug; atomic_t num_jobs[DRM_SCHED_PRIORITY_MAX]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 3144400435b7..5ca75a456ad2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -64,7 +64,7 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&sa_manager->flist[i]); r = amdgpu_bo_create(adev, size, align, true, domain, - 0, NULL, NULL, 0, &sa_manager->bo); + 0, NULL, NULL, &sa_manager->bo); if (r) { dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index ed8c3739015b..f3d81b6fb499 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -42,7 +42,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = adev->mc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; + n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) if (adev->rings[i]) n -= adev->rings[i]->ring_size; @@ -61,7 +61,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0, - NULL, NULL, 0, &vram_obj); + NULL, NULL, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -82,7 +82,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, 0, gtt_obj + i); + NULL, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; @@ -142,10 +142,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) "0x%16llx/0x%16llx)\n", i, *vram_start, gart_start, (unsigned long long) - (gart_addr - adev->mc.gart_start + + (gart_addr - adev->gmc.gart_start + (void*)gart_start - gtt_map), (unsigned long long) - (vram_addr - adev->mc.vram_start + + (vram_addr - adev->gmc.vram_start + (void*)gart_start - gtt_map)); amdgpu_bo_kunmap(vram_obj); goto out_lclean_unpin; @@ -187,10 +187,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) "0x%16llx/0x%16llx)\n", i, *gart_start, vram_start, (unsigned long long) - (vram_addr - adev->mc.vram_start + + (vram_addr - adev->gmc.vram_start + (void*)vram_start - vram_map), (unsigned long long) - (gart_addr - adev->mc.gart_start + + (gart_addr - adev->gmc.gart_start + (void*)vram_start - vram_map)); amdgpu_bo_kunmap(gtt_obj[i]); goto out_lclean_unpin; @@ -200,7 +200,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(gtt_obj[i]); DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", - gart_addr - adev->mc.gart_start); + gart_addr - adev->gmc.gart_start); continue; out_lclean_unpin: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index cace7a93fc94..532263ab6e16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -86,7 +86,7 @@ TRACE_EVENT(amdgpu_iv, __field(unsigned, vmid_src) __field(uint64_t, timestamp) __field(unsigned, timestamp_src) - __field(unsigned, pas_id) + __field(unsigned, pasid) __array(unsigned, src_data, 4) ), TP_fast_assign( @@ -97,16 +97,16 @@ TRACE_EVENT(amdgpu_iv, __entry->vmid_src = iv->vmid_src; __entry->timestamp = iv->timestamp; __entry->timestamp_src = iv->timestamp_src; - __entry->pas_id = iv->pas_id; + __entry->pasid = iv->pasid; __entry->src_data[0] = iv->src_data[0]; __entry->src_data[1] = iv->src_data[1]; __entry->src_data[2] = iv->src_data[2]; __entry->src_data[3] = iv->src_data[3]; ), - TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n", + TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n", __entry->client_id, __entry->src_id, __entry->ring_id, __entry->vmid, - __entry->timestamp, __entry->pas_id, + __entry->timestamp, __entry->pasid, __entry->src_data[0], __entry->src_data[1], __entry->src_data[2], __entry->src_data[3]) ); @@ -217,7 +217,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, struct amdgpu_job *job), TP_ARGS(vm, ring, job), TP_STRUCT__entry( - __field(struct amdgpu_vm *, vm) + __field(u32, pasid) __field(u32, ring) __field(u32, vmid) __field(u32, vm_hub) @@ -226,15 +226,15 @@ TRACE_EVENT(amdgpu_vm_grab_id, ), TP_fast_assign( - __entry->vm = vm; + __entry->pasid = vm->pasid; __entry->ring = ring->idx; __entry->vmid = job->vmid; __entry->vm_hub = ring->funcs->vmhub, __entry->pd_addr = job->vm_pd_addr; __entry->needs_flush = job->vm_needs_flush; ), - TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", - __entry->vm, __entry->ring, __entry->vmid, + TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", + __entry->pasid, __entry->ring, __entry->vmid, __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) ); @@ -378,6 +378,28 @@ TRACE_EVENT(amdgpu_vm_flush, __entry->vm_hub,__entry->pd_addr) ); +DECLARE_EVENT_CLASS(amdgpu_pasid, + TP_PROTO(unsigned pasid), + TP_ARGS(pasid), + TP_STRUCT__entry( + __field(unsigned, pasid) + ), + TP_fast_assign( + __entry->pasid = pasid; + ), + TP_printk("pasid=%u", __entry->pasid) +); + +DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_allocated, + TP_PROTO(unsigned pasid), + TP_ARGS(pasid) +); + +DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed, + TP_PROTO(unsigned pasid), + TP_ARGS(pasid) +); + TRACE_EVENT(amdgpu_bo_list_set, TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), TP_ARGS(list, bo), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d897c4c61a01..b372d8d650a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -161,7 +161,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, break; case TTM_PL_TT: man->func = &amdgpu_gtt_mgr_func; - man->gpu_offset = adev->mc.gart_start; + man->gpu_offset = adev->gmc.gart_start; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; @@ -169,7 +169,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, case TTM_PL_VRAM: /* "On-card" video ram */ man->func = &amdgpu_vram_mgr_func; - man->gpu_offset = adev->mc.vram_start; + man->gpu_offset = adev->gmc.vram_start; man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; @@ -217,9 +217,9 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, adev->mman.buffer_funcs_ring && adev->mman.buffer_funcs_ring->ready == false) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); - } else if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { - unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; + unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; struct drm_mm_node *node = bo->mem.mm_node; unsigned long pages_left; @@ -638,9 +638,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; /* check if it's visible */ - if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size) + if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size) return -EINVAL; - mem->bus.base = adev->mc.aper_base; + mem->bus.base = adev->gmc.aper_base; mem->bus.is_iomem = true; break; default: @@ -891,7 +891,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) placement.num_busy_placement = 1; placement.busy_placement = &placements; placements.fpfn = 0; - placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; + placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | TTM_PL_FLAG_TT; @@ -997,9 +997,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, struct amdgpu_ttm_tt *gtt = (void *)ttm; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); - if (ttm->state != tt_unpopulated) - return 0; - if (gtt && gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) @@ -1212,7 +1209,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); pos = (nodes->start << PAGE_SHIFT) + offset; - while (len && pos < adev->mc.mc_vram_size) { + while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; uint32_t bytes = 4 - (pos & 3); uint32_t shift = (pos & 3) * 8; @@ -1298,7 +1295,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) struct ttm_operation_ctx ctx = { false, false }; int r = 0; int i; - u64 vram_size = adev->mc.visible_vram_size; + u64 vram_size = adev->gmc.visible_vram_size; u64 offset = adev->fw_vram_usage.start_offset; u64 size = adev->fw_vram_usage.size; struct amdgpu_bo *bo; @@ -1312,7 +1309,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, &adev->fw_vram_usage.reserved_bo); if (r) goto error_create; @@ -1387,8 +1384,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } adev->mman.initialized = true; + + /* We opt to avoid OOM on system pages allocations */ + adev->mman.bdev.no_retry = true; + r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, - adev->mc.real_vram_size >> PAGE_SHIFT); + adev->gmc.real_vram_size >> PAGE_SHIFT); if (r) { DRM_ERROR("Failed initializing VRAM heap.\n"); return r; @@ -1397,11 +1398,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Reduce size of CPU-visible VRAM if requested */ vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024; if (amdgpu_vis_vram_limit > 0 && - vis_vram_limit <= adev->mc.visible_vram_size) - adev->mc.visible_vram_size = vis_vram_limit; + vis_vram_limit <= adev->gmc.visible_vram_size) + adev->gmc.visible_vram_size = vis_vram_limit; /* Change the size here instead of the init above so only lpfn is affected */ - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); /* *The reserved vram for firmware must be pinned to the specified @@ -1412,21 +1413,21 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } - r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory, NULL, NULL); if (r) return r; DRM_INFO("amdgpu: %uM of VRAM memory ready\n", - (unsigned) (adev->mc.real_vram_size / (1024 * 1024))); + (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); if (amdgpu_gtt_size == -1) { struct sysinfo si; si_meminfo(&si); gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size), + adev->gmc.mc_vram_size), ((uint64_t)si.totalram * si.mem_unit * 3/4)); } else @@ -1559,7 +1560,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); - *addr = adev->mc.gart_start; + *addr = adev->gmc.gart_start; *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE; @@ -1677,13 +1678,12 @@ error_free: } int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint64_t src_data, + uint32_t src_data, struct reservation_object *resv, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - uint32_t max_bytes = 8 * - adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde; + uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct drm_mm_node *mm_node; @@ -1714,9 +1714,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, num_pages -= mm_node->size; ++mm_node; } - - /* num of dwords for each SDMA_OP_PTEPDE cmd */ - num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; + num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; /* for IB padding */ num_dw += 64; @@ -1741,16 +1739,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; - WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8"); - dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); while (byte_count) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_vm_set_pte_pde(adev, &job->ibs[0], - dst_addr, 0, - cur_size_in_bytes >> 3, 0, - src_data); + amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, + dst_addr, cur_size_in_bytes); dst_addr += cur_size_in_bytes; byte_count -= cur_size_in_bytes; @@ -1811,14 +1805,14 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return -ENXIO; while (size) { unsigned long flags; uint32_t value; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return result; spin_lock_irqsave(&adev->mmio_idx_lock, flags); @@ -1850,14 +1844,14 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return -ENXIO; while (size) { unsigned long flags; uint32_t value; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return result; r = get_user(value, (uint32_t *)buf); @@ -2001,9 +1995,9 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) if (IS_ERR(ent)) return PTR_ERR(ent); if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM) - i_size_write(ent->d_inode, adev->mc.mc_vram_size); + i_size_write(ent->d_inode, adev->gmc.mc_vram_size); else if (ttm_debugfs_entries[count].domain == TTM_PL_TT) - i_size_write(ent->d_inode, adev->mc.gart_size); + i_size_write(ent->d_inode, adev->gmc.gart_size); adev->mman.debugfs_entries[count] = ent; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 167856f6080f..1e275c7b006b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -86,7 +86,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct reservation_object *resv, struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint64_t src_data, + uint32_t src_data, struct reservation_object *resv, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b2eae86bf906..9cd5517a4fa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -952,37 +952,28 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, bool direct, struct dma_fence **fence) { - struct ttm_operation_ctx ctx = { true, false }; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; + struct amdgpu_device *adev = ring->adev; + struct dma_fence *f = NULL; struct amdgpu_job *job; struct amdgpu_ib *ib; - struct dma_fence *f = NULL; - struct amdgpu_device *adev = ring->adev; - uint64_t addr; uint32_t data[4]; - int i, r; - - memset(&tv, 0, sizeof(tv)); - tv.bo = &bo->tbo; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); + uint64_t addr; + long r; + int i; - r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); - if (r) - return r; + amdgpu_bo_kunmap(bo); + amdgpu_bo_unpin(bo); if (!ring->adev->uvd.address_64_bit) { + struct ttm_operation_ctx ctx = { true, false }; + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_uvd_force_into_uvd_segment(bo); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + goto err; } - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - goto err; - r = amdgpu_job_alloc_with_ib(adev, 64, &job); if (r) goto err; @@ -1014,6 +1005,14 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { + r = reservation_object_wait_timeout_rcu(bo->tbo.resv, + true, false, + msecs_to_jiffies(10)); + if (r == 0) + r = -ETIMEDOUT; + if (r < 0) + goto err_free; + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); job->fence = dma_fence_get(f); if (r) @@ -1021,17 +1020,23 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, amdgpu_job_free(job); } else { + r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, + AMDGPU_FENCE_OWNER_UNDEFINED, false); + if (r) + goto err_free; + r = amdgpu_job_submit(job, ring, &adev->uvd.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &f); if (r) goto err_free; } - ttm_eu_fence_buffer_objects(&ticket, &head, f); + amdgpu_bo_fence(bo, f, false); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); if (fence) *fence = dma_fence_get(f); - amdgpu_bo_unref(&bo); dma_fence_put(f); return 0; @@ -1040,7 +1045,8 @@ err_free: amdgpu_job_free(job); err: - ttm_eu_backoff_reservation(&ticket, &head); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } @@ -1051,31 +1057,16 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; - r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &bo); + r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, (void **)&msg); if (r) return r; - r = amdgpu_bo_reserve(bo, false); - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - r = amdgpu_bo_kmap(bo, (void **)&msg); - if (r) { - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - return r; - } - /* stitch together an UVD create msg */ msg[0] = cpu_to_le32(0x00000de4); msg[1] = cpu_to_le32(0x00000000); @@ -1091,9 +1082,6 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = 11; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - return amdgpu_uvd_send_msg(ring, bo, true, fence); } @@ -1101,31 +1089,16 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; - r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &bo); + r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, (void **)&msg); if (r) return r; - r = amdgpu_bo_reserve(bo, false); - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - r = amdgpu_bo_kmap(bo, (void **)&msg); - if (r) { - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - return r; - } - /* stitch together an UVD destroy msg */ msg[0] = cpu_to_le32(0x00000de4); msg[1] = cpu_to_le32(0x00000002); @@ -1134,9 +1107,6 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = 4; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - return amdgpu_uvd_send_msg(ring, bo, direct, fence); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 0fd378ae92c3..71781267ee4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -30,6 +30,8 @@ #define AMDGPU_VCE_HARVEST_VCE0 (1 << 0) #define AMDGPU_VCE_HARVEST_VCE1 (1 << 1) +#define AMDGPU_VCE_FW_53_45 ((53 << 24) | (45 << 16)) + struct amdgpu_vce { struct amdgpu_bo *vcpu_bo; uint64_t gpu_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 837962118dbc..58e495330b38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -270,34 +270,17 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) return r; } -static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, - bool direct, struct dma_fence **fence) +static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, + struct amdgpu_bo *bo, bool direct, + struct dma_fence **fence) { - struct ttm_operation_ctx ctx = { true, false }; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; + struct amdgpu_device *adev = ring->adev; + struct dma_fence *f = NULL; struct amdgpu_job *job; struct amdgpu_ib *ib; - struct dma_fence *f = NULL; - struct amdgpu_device *adev = ring->adev; uint64_t addr; int i, r; - memset(&tv, 0, sizeof(tv)); - tv.bo = &bo->tbo; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); - - r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); - if (r) - return r; - - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - goto err; - r = amdgpu_job_alloc_with_ib(adev, 64, &job); if (r) goto err; @@ -330,11 +313,12 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *b goto err_free; } - ttm_eu_fence_buffer_objects(&ticket, &head, f); + amdgpu_bo_fence(bo, f, false); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); if (fence) *fence = dma_fence_get(f); - amdgpu_bo_unref(&bo); dma_fence_put(f); return 0; @@ -343,7 +327,8 @@ err_free: amdgpu_job_free(job); err: - ttm_eu_backoff_reservation(&ticket, &head); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } @@ -351,31 +336,16 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; - r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &bo); + r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, (void **)&msg); if (r) return r; - r = amdgpu_bo_reserve(bo, false); - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - r = amdgpu_bo_kmap(bo, (void **)&msg); - if (r) { - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - return r; - } - msg[0] = cpu_to_le32(0x00000028); msg[1] = cpu_to_le32(0x00000038); msg[2] = cpu_to_le32(0x00000001); @@ -393,9 +363,6 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = 14; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); } @@ -403,31 +370,16 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han bool direct, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; - r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &bo); + r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, (void **)&msg); if (r) return r; - r = amdgpu_bo_reserve(bo, false); - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - r = amdgpu_bo_kmap(bo, (void **)&msg); - if (r) { - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - return r; - } - msg[0] = cpu_to_le32(0x00000028); msg[1] = cpu_to_le32(0x00000018); msg[2] = cpu_to_le32(0x00000000); @@ -437,9 +389,6 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = 6; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index e7dfb7b44b4b..b832651d2137 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -24,6 +24,18 @@ #include "amdgpu.h" #define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) +{ + uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; + + addr -= AMDGPU_VA_RESERVED_SIZE; + + if (addr >= AMDGPU_VA_HOLE_START) + addr |= AMDGPU_VA_HOLE_END; + + return addr; +} + bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { /* By now all MMIO pages except mailbox are blocked */ @@ -55,14 +67,14 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) { /* * amdgpu_map_static_csa should be called during amdgpu_vm_init - * it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE" - * to this VM, and each command submission of GFX should use this virtual - * address within META_DATA init package to support SRIOV gfx preemption. + * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command + * submission of GFX should use this virtual address within META_DATA init + * package to support SRIOV gfx preemption. */ - int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { + uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK; struct ww_acquire_ctx ticket; struct list_head list; struct amdgpu_bo_list_entry pd; @@ -90,7 +102,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR, + r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); @@ -99,7 +111,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } - r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE, + r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 6a83425aa9ed..880ac113a3a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -251,8 +251,7 @@ struct amdgpu_virt { uint32_t gim_feature; }; -#define AMDGPU_CSA_SIZE (8 * 1024) -#define AMDGPU_CSA_VADDR (AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE) +#define AMDGPU_CSA_SIZE (8 * 1024) #define amdgpu_sriov_enabled(adev) \ ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) @@ -279,6 +278,8 @@ static inline bool is_virtual_machine(void) } struct amdgpu_vm; + +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); int amdgpu_allocate_static_csa(struct amdgpu_device *adev); int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5afbc5e714d0..0b237e027cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -75,7 +75,8 @@ struct amdgpu_pte_update_params { /* indirect buffer to fill with commands */ struct amdgpu_ib *ib; /* Function which actually does the update */ - void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, + void (*func)(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); /* The next two are used during VM update by CPU @@ -257,6 +258,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) } /** + * amdgpu_vm_clear_bo - initially clear the PDs/PTs + * + * @adev: amdgpu_device pointer + * @bo: BO to clear + * @level: level this BO is at + * + * Root PD needs to be reserved when calling this. + */ +static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo *bo, + unsigned level, bool pte_support_ats) +{ + struct ttm_operation_ctx ctx = { true, false }; + struct dma_fence *fence = NULL; + unsigned entries, ats_entries; + struct amdgpu_ring *ring; + struct amdgpu_job *job; + uint64_t addr; + int r; + + addr = amdgpu_bo_gpu_offset(bo); + entries = amdgpu_bo_size(bo) / 8; + + if (pte_support_ats) { + if (level == adev->vm_manager.root_level) { + ats_entries = amdgpu_vm_level_shift(adev, level); + ats_entries += AMDGPU_GPU_PAGE_SHIFT; + ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + } else { + ats_entries = entries; + entries = 0; + } + } else { + ats_entries = 0; + } + + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + + r = reservation_object_reserve_shared(bo->tbo.resv); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + goto error; + + r = amdgpu_job_alloc_with_ib(adev, 64, &job); + if (r) + goto error; + + if (ats_entries) { + uint64_t ats_value; + + ats_value = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) + ats_value |= AMDGPU_PDE_PTE; + + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, + ats_entries, 0, ats_value); + addr += ats_entries * 8; + } + + if (entries) + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, + entries, 0, 0); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + + WARN_ON(job->ibs[0].length_dw > 64); + r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, + AMDGPU_FENCE_OWNER_UNDEFINED, false); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + amdgpu_bo_fence(bo, fence, true); + dma_fence_put(fence); + + if (bo->shadow) + return amdgpu_vm_clear_bo(adev, vm, bo->shadow, + level, pte_support_ats); + + return 0; + +error_free: + amdgpu_job_free(job); + +error: + return r; +} + +/** * amdgpu_vm_alloc_levels - allocate the PD/PT levels * * @adev: amdgpu_device pointer @@ -270,13 +369,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_pt *parent, uint64_t saddr, uint64_t eaddr, - unsigned level) + unsigned level, bool ats) { unsigned shift = amdgpu_vm_level_shift(adev, level); unsigned pt_idx, from, to; - int r; u64 flags; - uint64_t init_value = 0; + int r; if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); @@ -299,21 +397,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_SHADOW); - if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) - init_value |= AMDGPU_PDE_PTE; - - } - /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.base.bo->tbo.resv; @@ -324,15 +414,22 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - flags, - NULL, resv, init_value, &pt); + AMDGPU_GEM_DOMAIN_VRAM, flags, + NULL, resv, &pt); if (r) return r; + r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); + if (r) { + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt); + return r; + } + if (vm->use_cpu_for_update) { r = amdgpu_bo_kmap(pt, NULL); if (r) { + amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt); return r; } @@ -356,7 +453,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, uint64_t sub_eaddr = (pt_idx == to) ? eaddr : ((1 << shift) - 1); r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, - sub_eaddr, level); + sub_eaddr, level, ats); if (r) return r; } @@ -379,26 +476,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size) { - uint64_t last_pfn; uint64_t eaddr; + bool ats = false; /* validate the parameters */ if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) return -EINVAL; eaddr = saddr + size - 1; - last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; - if (last_pfn >= adev->vm_manager.max_pfn) { - dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", - last_pfn, adev->vm_manager.max_pfn); - return -EINVAL; - } + + if (vm->pte_support_ats) + ats = saddr < AMDGPU_VA_HOLE_START; saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; + if (eaddr >= adev->vm_manager.max_pfn) { + dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", + eaddr, adev->vm_manager.max_pfn); + return -EINVAL; + } + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, - adev->vm_manager.root_level); + adev->vm_manager.root_level, ats); } /** @@ -465,7 +565,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) { - return (adev->mc.real_vram_size == adev->mc.visible_vram_size); + return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size); } /** @@ -491,14 +591,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ id->oa_base != job->oa_base || id->oa_size != job->oa_size); bool vm_flush_needed = job->vm_needs_flush; + bool pasid_mapping_needed = id->pasid != job->pasid || + !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping); + struct dma_fence *fence = NULL; unsigned patch_offset = 0; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; vm_flush_needed = true; + pasid_mapping_needed = true; } + gds_switch_needed &= !!ring->funcs->emit_gds_switch; + vm_flush_needed &= !!ring->funcs->emit_vm_flush; + pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && + ring->funcs->emit_wreg; + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; @@ -508,23 +618,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); - if (ring->funcs->emit_vm_flush && vm_flush_needed) { - struct dma_fence *fence; - + if (vm_flush_needed) { trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); + } + if (pasid_mapping_needed) + amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); + + if (vm_flush_needed || pasid_mapping_needed) { r = amdgpu_fence_emit(ring, &fence); if (r) return r; + } + if (vm_flush_needed) { mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); - id->last_flush = fence; - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); + id->last_flush = dma_fence_get(fence); + id->current_gpu_reset_count = + atomic_read(&adev->gpu_reset_counter); mutex_unlock(&id_mgr->lock); } + if (pasid_mapping_needed) { + id->pasid = job->pasid; + dma_fence_put(id->pasid_mapping); + id->pasid_mapping = dma_fence_get(fence); + } + dma_fence_put(fence); + if (ring->funcs->emit_gds_switch && gds_switch_needed) { id->gds_base = job->gds_base; id->gds_size = job->gds_size; @@ -578,6 +701,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * amdgpu_vm_do_set_ptes - helper to call the right asic function * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -588,10 +712,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * to setup the page table using the DMA. */ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { + pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); if (count < 3) { @@ -608,6 +734,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -617,13 +744,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, * Traces the parameters and calls the DMA function to copy the PTEs. */ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { uint64_t src = (params->src + (addr >> 12) * 8); - + pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_copy_ptes(pe, src, count); amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); @@ -657,6 +785,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: kmap addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -666,6 +795,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) * Write count number of PT/PD entries directly. */ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) @@ -673,14 +803,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, unsigned int i; uint64_t value; + pe += (unsigned long)amdgpu_bo_kptr(bo); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); for (i = 0; i < count; i++) { value = params->pages_addr ? amdgpu_vm_map_gart(params->pages_addr, addr) : addr; - amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, - i, value, flags); + amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, + i, value, flags); addr += incr; } } @@ -714,8 +846,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, struct amdgpu_vm_pt *parent, struct amdgpu_vm_pt *entry) { - struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; - uint64_t pd_addr, shadow_addr = 0; + struct amdgpu_bo *bo = parent->base.bo, *pbo; uint64_t pde, pt, flags; unsigned level; @@ -723,29 +854,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, if (entry->huge) return; - if (vm->use_cpu_for_update) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - } else { - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - shadow = parent->base.bo->shadow; - if (shadow) - shadow_addr = amdgpu_bo_gpu_offset(shadow); - } - - for (level = 0, pbo = parent->base.bo->parent; pbo; ++level) + for (level = 0, pbo = bo->parent; pbo; ++level) pbo = pbo->parent; level += params->adev->vm_manager.root_level; - pt = amdgpu_bo_gpu_offset(bo); + pt = amdgpu_bo_gpu_offset(entry->base.bo); flags = AMDGPU_PTE_VALID; - amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); - if (shadow) { - pde = shadow_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, flags); - } - - pde = pd_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, flags); + amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); + pde = (entry - parent->entries) * 8; + if (bo->shadow) + params->func(params, bo->shadow, pde, pt, 1, 0, flags); + params->func(params, bo, pde, pt, 1, 0, flags); } /* @@ -856,7 +975,7 @@ restart: if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); } else if (params.ib->length_dw == 0) { amdgpu_job_free(job); } else { @@ -870,11 +989,6 @@ restart: amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, AMDGPU_FENCE_OWNER_VM, false); - if (root->shadow) - amdgpu_sync_resv(adev, &job->sync, - root->shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &fence); @@ -946,7 +1060,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, unsigned nptes, uint64_t dst, uint64_t flags) { - uint64_t pd_addr, pde; + uint64_t pde; /* In the case of a mixed PT the PDE must point to it*/ if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && @@ -967,21 +1081,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, } entry->huge = true; - amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, - &dst, &flags); + amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); - if (p->func == amdgpu_vm_cpu_set_ptes) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - } else { - if (parent->base.bo->shadow) { - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); - pde = pd_addr + (entry - parent->entries) * 8; - p->func(p, pde, dst, 1, 0, flags); - } - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - } - pde = pd_addr + (entry - parent->entries) * 8; - p->func(p, pde, dst, 1, 0, flags); + pde = (entry - parent->entries) * 8; + if (parent->base.bo->shadow) + p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags); + p->func(p, parent->base.bo, pde, dst, 1, 0, flags); } /** @@ -1007,7 +1112,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t addr, pe_start; struct amdgpu_bo *pt; unsigned nptes; - bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); /* walk over the address space and update the page tables */ for (addr = start; addr < end; addr += nptes, @@ -1030,20 +1134,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, continue; pt = entry->base.bo; - if (use_cpu_update) { - pe_start = (unsigned long)amdgpu_bo_kptr(pt); - } else { - if (pt->shadow) { - pe_start = amdgpu_bo_gpu_offset(pt->shadow); - pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - } - pe_start = amdgpu_bo_gpu_offset(pt); - } - - pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, + pe_start = (addr & mask) * 8; + if (pt->shadow) + params->func(params, pt->shadow, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); + params->func(params, pt, pe_start, dst, nptes, AMDGPU_GPU_PAGE_SIZE, flags); } @@ -1204,11 +1299,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } else { /* set page commands needed */ - ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; + ndw += ncmds * 10; /* extra commands for begin/end fragments */ - ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw - * adev->vm_manager.fragment_size; + ndw += 2 * 10 * adev->vm_manager.fragment_size; params.func = amdgpu_vm_do_set_ptes; } @@ -1457,7 +1551,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); } spin_lock(&vm->status_lock); @@ -1485,7 +1579,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); enable = !!atomic_read(&adev->vm_manager.num_prt_users); - adev->gart.gart_funcs->set_prt(adev, enable); + adev->gmc.gmc_funcs->set_prt(adev, enable); spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); } @@ -1494,7 +1588,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) */ static void amdgpu_vm_prt_get(struct amdgpu_device *adev) { - if (!adev->gart.gart_funcs->set_prt) + if (!adev->gmc.gmc_funcs->set_prt) return; if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) @@ -1529,7 +1623,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, { struct amdgpu_prt_cb *cb; - if (!adev->gart.gart_funcs->set_prt) + if (!adev->gmc.gmc_funcs->set_prt) return; cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); @@ -1623,16 +1717,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence) { struct amdgpu_bo_va_mapping *mapping; + uint64_t init_pte_value = 0; struct dma_fence *f = NULL; int r; - uint64_t init_pte_value = 0; while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats) + if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, @@ -2262,11 +2356,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); - uint64_t init_pde_value = 0, flags; unsigned ring_instance; struct amdgpu_ring *ring; struct drm_sched_rq *rq; unsigned long size; + uint64_t flags; int r, i; vm->va = RB_ROOT_CACHED; @@ -2295,23 +2389,19 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN) vm->pte_support_ats = true; - init_pde_value = AMDGPU_PTE_DEFAULT_ATC - | AMDGPU_PDE_PTE; - - } - } else + } else { vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); + } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), "CPU update of VM recommended only for large BAR system\n"); vm->last_update = NULL; - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else @@ -2320,8 +2410,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, - flags, NULL, NULL, init_pde_value, - &vm->root.base.bo); + flags, NULL, NULL, &vm->root.base.bo); if (r) goto error_free_sched_entity; @@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + adev->vm_manager.root_level, + vm->pte_support_ats); + if (r) + goto error_unreserve; + vm->root.base.vm = vm; list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); list_add_tail(&vm->root.base.vm_status, &vm->evicted); @@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, return 0; +error_unreserve: + amdgpu_bo_unreserve(vm->root.base.bo); + error_free_root: amdgpu_bo_unref(&vm->root.base.bo->shadow); amdgpu_bo_unref(&vm->root.base.bo); @@ -2405,7 +2503,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev, void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; - bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; u64 fault; int i, r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 21a80f1bb2b9..fabf44b262be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -99,7 +99,7 @@ struct amdgpu_bo_list_entry; #define AMDGPU_MMHUB 1 /* hardcode that limit for now */ -#define AMDGPU_VA_RESERVED_SIZE (8ULL << 20) +#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) /* VA hole for 48bit addresses on Vega10 */ #define AMDGPU_VA_HOLE_START 0x0000800000000000ULL diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 4acca92f6a52..9aca653bec07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -89,11 +89,11 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, uint64_t start = node->start << PAGE_SHIFT; uint64_t end = (node->size + node->start) << PAGE_SHIFT; - if (start >= adev->mc.visible_vram_size) + if (start >= adev->gmc.visible_vram_size) return 0; - return (end > adev->mc.visible_vram_size ? - adev->mc.visible_vram_size : end) - start; + return (end > adev->gmc.visible_vram_size ? + adev->gmc.visible_vram_size : end) - start; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index a0943aa8d1d3..f82f40fb3bea 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -905,7 +905,7 @@ static bool ci_dpm_vblank_too_short(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); - u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; + u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; /* disable mclk switching if the refresh is >120Hz, even if the * blanking period would allow it @@ -2954,7 +2954,7 @@ static int ci_calculate_mclk_params(struct amdgpu_device *adev, mpll_ad_func_cntl &= ~MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK; mpll_ad_func_cntl |= (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT); - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { mpll_dq_func_cntl &= ~(MPLL_DQ_FUNC_CNTL__YCLK_SEL_MASK | MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK); mpll_dq_func_cntl |= (mpll_param.yclk_sel << MPLL_DQ_FUNC_CNTL__YCLK_SEL__SHIFT) | @@ -3077,7 +3077,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev, (memory_clock <= pi->mclk_strobe_mode_threshold)) memory_level->StrobeEnable = 1; - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { memory_level->StrobeRatio = ci_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable); if (pi->mclk_edc_enable_threshold && @@ -3752,7 +3752,7 @@ static int ci_init_smc_table(struct amdgpu_device *adev) if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC) table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; if (ulv->supported) { @@ -4549,12 +4549,12 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev, for (k = 0; k < table->num_entries; k++) { table->mc_reg_table_entry[k].mc_data[j] = (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); - if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) + if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) table->mc_reg_table_entry[k].mc_data[j] |= 0x100; } j++; - if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) return -EINVAL; table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; @@ -6639,9 +6639,10 @@ static int ci_dpm_force_clock_level(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct ci_power_info *pi = ci_get_pi(adev); - if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO | - AMD_DPM_FORCED_LEVEL_LOW | - AMD_DPM_FORCED_LEVEL_HIGH)) + if (adev->pm.dpm.forced_level != AMD_DPM_FORCED_LEVEL_MANUAL) + return -EINVAL; + + if (mask == 0) return -EINVAL; switch (type) { @@ -6662,15 +6663,15 @@ static int ci_dpm_force_clock_level(void *handle, case PP_PCIE: { uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask; - uint32_t level = 0; - while (tmp >>= 1) - level++; - - if (!pi->pcie_dpm_key_disabled) - amdgpu_ci_send_msg_to_smc_with_parameter(adev, + if (!pi->pcie_dpm_key_disabled) { + if (fls(tmp) != ffs(tmp)) + amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PCIeDPM_UnForceLevel); + else + amdgpu_ci_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_PCIeDPM_ForceLevel, - level); + fls(tmp) - 1); + } break; } default: @@ -7029,7 +7030,6 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = { }; const struct amd_pm_funcs ci_dpm_funcs = { - .get_temperature = &ci_dpm_get_temp, .pre_set_power_state = &ci_dpm_pre_set_power_state, .set_power_state = &ci_dpm_set_power_state, .post_set_power_state = &ci_dpm_post_set_power_state, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 8e59e65efd44..4324184996a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1715,6 +1715,27 @@ static void cik_detect_hw_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } +static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1); + RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL); + } else { + amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1); + } +} + +static void cik_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32(mmHDP_DEBUG0, 1); + RREG32(mmHDP_DEBUG0); + } else { + amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1); + } +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1726,6 +1747,8 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .set_uvd_clocks = &cik_set_uvd_clocks, .set_vce_clocks = &cik_set_vce_clocks, .get_config_memsize = &cik_get_config_memsize, + .flush_hdp = &cik_flush_hdp, + .invalidate_hdp = &cik_invalidate_hdp, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index c4989f51ecef..e49c6f15a0a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -24,6 +24,8 @@ #ifndef __CIK_H__ #define __CIK_H__ +#define CIK_FLUSH_GPU_TLB_NUM_WREG 3 + void cik_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int cik_set_ip_blocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index d5a05c19708f..07c7852180d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -281,7 +281,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vmid = (dw[2] >> 8) & 0xff; - entry->pas_id = (dw[2] >> 16) & 0xffff; + entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ adev->irq.ih.rptr += 16; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 6e8278e689b1..69568cd1bb99 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -261,13 +261,6 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ } -static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - amdgpu_ring_write(ring, mmHDP_DEBUG0); - amdgpu_ring_write(ring, 1); -} - /** * cik_sdma_ring_emit_fence - emit a fence on the DMA ring * @@ -317,7 +310,7 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); @@ -517,7 +510,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); } return 0; @@ -885,18 +878,7 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring, u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - if (vmid < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); - } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); - } - amdgpu_ring_write(ring, pd_addr >> 12); - - /* flush TLB */ - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); @@ -906,6 +888,14 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ } +static void cik_sdma_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, val); +} + static void cik_enable_sdma_mgcg(struct amdgpu_device *adev, bool enable) { @@ -1279,9 +1269,9 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .set_wptr = cik_sdma_ring_set_wptr, .emit_frame_size = 6 + /* cik_sdma_ring_emit_hdp_flush */ - 3 + /* cik_sdma_ring_emit_hdp_invalidate */ + 3 + /* hdp invalidate */ 6 + /* cik_sdma_ring_emit_pipeline_sync */ - 12 + /* cik_sdma_ring_emit_vm_flush */ + CIK_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* cik_sdma_ring_emit_vm_flush */ 9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */ .emit_ib = cik_sdma_ring_emit_ib, @@ -1289,11 +1279,11 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync, .emit_vm_flush = cik_sdma_ring_emit_vm_flush, .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush, - .emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate, .test_ring = cik_sdma_ring_test_ring, .test_ib = cik_sdma_ring_test_ib, .insert_nop = cik_sdma_ring_insert_nop, .pad_ib = cik_sdma_ring_pad_ib, + .emit_wreg = cik_sdma_ring_emit_wreg, }; static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) @@ -1391,9 +1381,6 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { .copy_pte = cik_sdma_vm_copy_pte, .write_pte = cik_sdma_vm_write_pte, - - .set_max_nums_pte_pde = 0x1fffff >> 3, - .set_pte_pde_num_dw = 10, .set_pte_pde = cik_sdma_vm_set_pte_pde, }; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index f576e9cbbc61..cfd0ad03c938 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -260,7 +260,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vmid = (dw[2] >> 8) & 0xff; - entry->pas_id = (dw[2] >> 16) & 0xffff; + entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ adev->irq.ih.rptr += 16; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index f34bc68aadfb..7ea900010702 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -190,66 +190,6 @@ static void dce_v10_0_audio_endpt_wreg(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } -static bool dce_v10_0_is_in_vblank(struct amdgpu_device *adev, int crtc) -{ - if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & - CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK) - return true; - else - return false; -} - -static bool dce_v10_0_is_counter_moving(struct amdgpu_device *adev, int crtc) -{ - u32 pos1, pos2; - - pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - if (pos1 != pos2) - return true; - else - return false; -} - -/** - * dce_v10_0_vblank_wait - vblank wait asic callback. - * - * @adev: amdgpu_device pointer - * @crtc: crtc to wait for vblank on - * - * Wait for vblank on the requested crtc (evergreen+). - */ -static void dce_v10_0_vblank_wait(struct amdgpu_device *adev, int crtc) -{ - unsigned i = 100; - - if (crtc >= adev->mode_info.num_crtc) - return; - - if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK)) - return; - - /* depending on when we hit vblank, we may be close to active; if so, - * wait for another frame. - */ - while (dce_v10_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v10_0_is_counter_moving(adev, crtc)) - break; - } - } - - while (!dce_v10_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v10_0_is_counter_moving(adev, crtc)) - break; - } - } -} - static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) { if (crtc >= adev->mode_info.num_crtc) @@ -1205,7 +1145,7 @@ static void dce_v10_0_bandwidth_update(struct amdgpu_device *adev) u32 num_heads = 0, lb_size; int i; - amdgpu_update_display_priority(adev); + amdgpu_display_update_priority(adev); for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->mode_info.crtcs[i]->base.enabled) @@ -2517,9 +2457,9 @@ static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = { .cursor_set2 = dce_v10_0_crtc_cursor_set2, .cursor_move = dce_v10_0_crtc_cursor_move, .gamma_set = dce_v10_0_crtc_gamma_set, - .set_config = amdgpu_crtc_set_config, + .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v10_0_crtc_destroy, - .page_flip_target = amdgpu_crtc_page_flip_target, + .page_flip_target = amdgpu_display_crtc_page_flip_target, }; static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2537,7 +2477,8 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode) amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); dce_v10_0_vga_enable(crtc, false); /* Make sure VBLANK and PFLIP interrupts are still enabled */ - type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + type = amdgpu_display_crtc_idx_to_irq_type(adev, + amdgpu_crtc->crtc_id); amdgpu_irq_update(adev, &adev->crtc_irq, type); amdgpu_irq_update(adev, &adev->pageflip_irq, type); drm_crtc_vblank_on(crtc); @@ -2676,7 +2617,7 @@ static bool dce_v10_0_crtc_mode_fixup(struct drm_crtc *crtc, amdgpu_crtc->connector = NULL; return false; } - if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) + if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) return false; if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) return false; @@ -2824,9 +2765,9 @@ static int dce_v10_0_sw_init(void *handle) adev->ddev->mode_config.preferred_depth = 24; adev->ddev->mode_config.prefer_shadow = 1; - adev->ddev->mode_config.fb_base = adev->mc.aper_base; + adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_modeset_create_props(adev); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2841,7 +2782,7 @@ static int dce_v10_0_sw_init(void *handle) } if (amdgpu_atombios_get_connector_info_from_object_table(adev)) - amdgpu_print_display_setup(adev->ddev); + amdgpu_display_print_display_setup(adev->ddev); else return -EINVAL; @@ -3249,7 +3190,7 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev, { unsigned crtc = entry->src_id - 1; uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); - unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); + unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, crtc); switch (entry->src_data[0]) { case 0: /* vblank */ @@ -3601,7 +3542,6 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev, static const struct amdgpu_display_funcs dce_v10_0_display_funcs = { .bandwidth_update = &dce_v10_0_bandwidth_update, .vblank_get_counter = &dce_v10_0_vblank_get_counter, - .vblank_wait = &dce_v10_0_vblank_wait, .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, .hpd_sense = &dce_v10_0_hpd_sense, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 26378bd6aba4..158b92ea435f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -207,66 +207,6 @@ static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } -static bool dce_v11_0_is_in_vblank(struct amdgpu_device *adev, int crtc) -{ - if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & - CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK) - return true; - else - return false; -} - -static bool dce_v11_0_is_counter_moving(struct amdgpu_device *adev, int crtc) -{ - u32 pos1, pos2; - - pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - if (pos1 != pos2) - return true; - else - return false; -} - -/** - * dce_v11_0_vblank_wait - vblank wait asic callback. - * - * @adev: amdgpu_device pointer - * @crtc: crtc to wait for vblank on - * - * Wait for vblank on the requested crtc (evergreen+). - */ -static void dce_v11_0_vblank_wait(struct amdgpu_device *adev, int crtc) -{ - unsigned i = 100; - - if (crtc < 0 || crtc >= adev->mode_info.num_crtc) - return; - - if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK)) - return; - - /* depending on when we hit vblank, we may be close to active; if so, - * wait for another frame. - */ - while (dce_v11_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v11_0_is_counter_moving(adev, crtc)) - break; - } - } - - while (!dce_v11_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v11_0_is_counter_moving(adev, crtc)) - break; - } - } -} - static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) { if (crtc < 0 || crtc >= adev->mode_info.num_crtc) @@ -1229,7 +1169,7 @@ static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev) u32 num_heads = 0, lb_size; int i; - amdgpu_update_display_priority(adev); + amdgpu_display_update_priority(adev); for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->mode_info.crtcs[i]->base.enabled) @@ -2592,9 +2532,9 @@ static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = { .cursor_set2 = dce_v11_0_crtc_cursor_set2, .cursor_move = dce_v11_0_crtc_cursor_move, .gamma_set = dce_v11_0_crtc_gamma_set, - .set_config = amdgpu_crtc_set_config, + .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v11_0_crtc_destroy, - .page_flip_target = amdgpu_crtc_page_flip_target, + .page_flip_target = amdgpu_display_crtc_page_flip_target, }; static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2612,7 +2552,8 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode) amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); dce_v11_0_vga_enable(crtc, false); /* Make sure VBLANK and PFLIP interrupts are still enabled */ - type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + type = amdgpu_display_crtc_idx_to_irq_type(adev, + amdgpu_crtc->crtc_id); amdgpu_irq_update(adev, &adev->crtc_irq, type); amdgpu_irq_update(adev, &adev->pageflip_irq, type); drm_crtc_vblank_on(crtc); @@ -2779,7 +2720,7 @@ static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc, amdgpu_crtc->connector = NULL; return false; } - if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) + if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) return false; if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) return false; @@ -2939,9 +2880,9 @@ static int dce_v11_0_sw_init(void *handle) adev->ddev->mode_config.preferred_depth = 24; adev->ddev->mode_config.prefer_shadow = 1; - adev->ddev->mode_config.fb_base = adev->mc.aper_base; + adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_modeset_create_props(adev); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2957,7 +2898,7 @@ static int dce_v11_0_sw_init(void *handle) } if (amdgpu_atombios_get_connector_info_from_object_table(adev)) - amdgpu_print_display_setup(adev->ddev); + amdgpu_display_print_display_setup(adev->ddev); else return -EINVAL; @@ -3368,7 +3309,8 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, { unsigned crtc = entry->src_id - 1; uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); - unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); + unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, + crtc); switch (entry->src_data[0]) { case 0: /* vblank */ @@ -3725,7 +3667,6 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev, static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { .bandwidth_update = &dce_v11_0_bandwidth_update, .vblank_get_counter = &dce_v11_0_vblank_get_counter, - .vblank_wait = &dce_v11_0_vblank_wait, .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, .hpd_sense = &dce_v11_0_hpd_sense, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index bd2c4f727df6..03f19363f8f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -142,64 +142,6 @@ static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } -static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc) -{ - if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & CRTC_STATUS__CRTC_V_BLANK_MASK) - return true; - else - return false; -} - -static bool dce_v6_0_is_counter_moving(struct amdgpu_device *adev, int crtc) -{ - u32 pos1, pos2; - - pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - if (pos1 != pos2) - return true; - else - return false; -} - -/** - * dce_v6_0_wait_for_vblank - vblank wait asic callback. - * - * @crtc: crtc to wait for vblank on - * - * Wait for vblank on the requested crtc (evergreen+). - */ -static void dce_v6_0_vblank_wait(struct amdgpu_device *adev, int crtc) -{ - unsigned i = 100; - - if (crtc >= adev->mode_info.num_crtc) - return; - - if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK)) - return; - - /* depending on when we hit vblank, we may be close to active; if so, - * wait for another frame. - */ - while (dce_v6_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v6_0_is_counter_moving(adev, crtc)) - break; - } - } - - while (!dce_v6_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v6_0_is_counter_moving(adev, crtc)) - break; - } - } -} - static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) { if (crtc >= adev->mode_info.num_crtc) @@ -1108,7 +1050,7 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev) if (!adev->mode_info.mode_config_initialized) return; - amdgpu_update_display_priority(adev); + amdgpu_display_update_priority(adev); for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->mode_info.crtcs[i]->base.enabled) @@ -2407,9 +2349,9 @@ static const struct drm_crtc_funcs dce_v6_0_crtc_funcs = { .cursor_set2 = dce_v6_0_crtc_cursor_set2, .cursor_move = dce_v6_0_crtc_cursor_move, .gamma_set = dce_v6_0_crtc_gamma_set, - .set_config = amdgpu_crtc_set_config, + .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v6_0_crtc_destroy, - .page_flip_target = amdgpu_crtc_page_flip_target, + .page_flip_target = amdgpu_display_crtc_page_flip_target, }; static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2425,7 +2367,8 @@ static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode) amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE); amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); /* Make sure VBLANK and PFLIP interrupts are still enabled */ - type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + type = amdgpu_display_crtc_idx_to_irq_type(adev, + amdgpu_crtc->crtc_id); amdgpu_irq_update(adev, &adev->crtc_irq, type); amdgpu_irq_update(adev, &adev->pageflip_irq, type); drm_crtc_vblank_on(crtc); @@ -2562,7 +2505,7 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc, amdgpu_crtc->connector = NULL; return false; } - if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) + if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) return false; if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) return false; @@ -2693,9 +2636,9 @@ static int dce_v6_0_sw_init(void *handle) adev->ddev->mode_config.max_height = 16384; adev->ddev->mode_config.preferred_depth = 24; adev->ddev->mode_config.prefer_shadow = 1; - adev->ddev->mode_config.fb_base = adev->mc.aper_base; + adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_modeset_create_props(adev); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2711,7 +2654,7 @@ static int dce_v6_0_sw_init(void *handle) ret = amdgpu_atombios_get_connector_info_from_object_table(adev); if (ret) - amdgpu_print_display_setup(adev->ddev); + amdgpu_display_print_display_setup(adev->ddev); else return -EINVAL; @@ -2966,7 +2909,8 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, { unsigned crtc = entry->src_id - 1; uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); - unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); + unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, + crtc); switch (entry->src_data[0]) { case 0: /* vblank */ @@ -3407,7 +3351,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev, static const struct amdgpu_display_funcs dce_v6_0_display_funcs = { .bandwidth_update = &dce_v6_0_bandwidth_update, .vblank_get_counter = &dce_v6_0_vblank_get_counter, - .vblank_wait = &dce_v6_0_vblank_wait, .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, .hpd_sense = &dce_v6_0_hpd_sense, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index c008dc030687..8dbe97dff58c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -140,66 +140,6 @@ static void dce_v8_0_audio_endpt_wreg(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } -static bool dce_v8_0_is_in_vblank(struct amdgpu_device *adev, int crtc) -{ - if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & - CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK) - return true; - else - return false; -} - -static bool dce_v8_0_is_counter_moving(struct amdgpu_device *adev, int crtc) -{ - u32 pos1, pos2; - - pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - if (pos1 != pos2) - return true; - else - return false; -} - -/** - * dce_v8_0_vblank_wait - vblank wait asic callback. - * - * @adev: amdgpu_device pointer - * @crtc: crtc to wait for vblank on - * - * Wait for vblank on the requested crtc (evergreen+). - */ -static void dce_v8_0_vblank_wait(struct amdgpu_device *adev, int crtc) -{ - unsigned i = 100; - - if (crtc >= adev->mode_info.num_crtc) - return; - - if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK)) - return; - - /* depending on when we hit vblank, we may be close to active; if so, - * wait for another frame. - */ - while (dce_v8_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v8_0_is_counter_moving(adev, crtc)) - break; - } - } - - while (!dce_v8_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v8_0_is_counter_moving(adev, crtc)) - break; - } - } -} - static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) { if (crtc >= adev->mode_info.num_crtc) @@ -1144,7 +1084,7 @@ static void dce_v8_0_bandwidth_update(struct amdgpu_device *adev) u32 num_heads = 0, lb_size; int i; - amdgpu_update_display_priority(adev); + amdgpu_display_update_priority(adev); for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->mode_info.crtcs[i]->base.enabled) @@ -2421,9 +2361,9 @@ static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = { .cursor_set2 = dce_v8_0_crtc_cursor_set2, .cursor_move = dce_v8_0_crtc_cursor_move, .gamma_set = dce_v8_0_crtc_gamma_set, - .set_config = amdgpu_crtc_set_config, + .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v8_0_crtc_destroy, - .page_flip_target = amdgpu_crtc_page_flip_target, + .page_flip_target = amdgpu_display_crtc_page_flip_target, }; static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2441,7 +2381,8 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode) amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); dce_v8_0_vga_enable(crtc, false); /* Make sure VBLANK and PFLIP interrupts are still enabled */ - type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + type = amdgpu_display_crtc_idx_to_irq_type(adev, + amdgpu_crtc->crtc_id); amdgpu_irq_update(adev, &adev->crtc_irq, type); amdgpu_irq_update(adev, &adev->pageflip_irq, type); drm_crtc_vblank_on(crtc); @@ -2587,7 +2528,7 @@ static bool dce_v8_0_crtc_mode_fixup(struct drm_crtc *crtc, amdgpu_crtc->connector = NULL; return false; } - if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) + if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) return false; if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) return false; @@ -2724,9 +2665,9 @@ static int dce_v8_0_sw_init(void *handle) adev->ddev->mode_config.preferred_depth = 24; adev->ddev->mode_config.prefer_shadow = 1; - adev->ddev->mode_config.fb_base = adev->mc.aper_base; + adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_modeset_create_props(adev); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2741,7 +2682,7 @@ static int dce_v8_0_sw_init(void *handle) } if (amdgpu_atombios_get_connector_info_from_object_table(adev)) - amdgpu_print_display_setup(adev->ddev); + amdgpu_display_print_display_setup(adev->ddev); else return -EINVAL; @@ -3063,7 +3004,8 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, { unsigned crtc = entry->src_id - 1; uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); - unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); + unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, + crtc); switch (entry->src_data[0]) { case 0: /* vblank */ @@ -3491,7 +3433,6 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev, static const struct amdgpu_display_funcs dce_v8_0_display_funcs = { .bandwidth_update = &dce_v8_0_bandwidth_update, .vblank_get_counter = &dce_v8_0_vblank_get_counter, - .vblank_wait = &dce_v8_0_vblank_wait, .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, .hpd_sense = &dce_v8_0_hpd_sense, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 120dd3b26fc2..8201a0929ca2 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -48,19 +48,6 @@ static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *ad int crtc, enum amdgpu_interrupt_state state); -/** - * dce_virtual_vblank_wait - vblank wait asic callback. - * - * @adev: amdgpu_device pointer - * @crtc: crtc to wait for vblank on - * - * Wait for vblank on the requested crtc (evergreen+). - */ -static void dce_virtual_vblank_wait(struct amdgpu_device *adev, int crtc) -{ - return; -} - static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc) { return 0; @@ -130,9 +117,9 @@ static const struct drm_crtc_funcs dce_virtual_crtc_funcs = { .cursor_set2 = NULL, .cursor_move = NULL, .gamma_set = dce_virtual_crtc_gamma_set, - .set_config = amdgpu_crtc_set_config, + .set_config = amdgpu_display_crtc_set_config, .destroy = dce_virtual_crtc_destroy, - .page_flip_target = amdgpu_crtc_page_flip_target, + .page_flip_target = amdgpu_display_crtc_page_flip_target, }; static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -149,7 +136,8 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) case DRM_MODE_DPMS_ON: amdgpu_crtc->enabled = true; /* Make sure VBLANK interrupts are still enabled */ - type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + type = amdgpu_display_crtc_idx_to_irq_type(adev, + amdgpu_crtc->crtc_id); amdgpu_irq_update(adev, &adev->crtc_irq, type); drm_crtc_vblank_on(crtc); break; @@ -406,9 +394,9 @@ static int dce_virtual_sw_init(void *handle) adev->ddev->mode_config.preferred_depth = 24; adev->ddev->mode_config.prefer_shadow = 1; - adev->ddev->mode_config.fb_base = adev->mc.aper_base; + adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_modeset_create_props(adev); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -653,7 +641,6 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, static const struct amdgpu_display_funcs dce_virtual_display_funcs = { .bandwidth_update = &dce_virtual_bandwidth_update, .vblank_get_counter = &dce_virtual_vblank_get_counter, - .vblank_wait = &dce_virtual_vblank_wait, .backlight_set_level = NULL, .backlight_get_level = NULL, .hpd_sense = &dce_virtual_hpd_sense, diff --git a/drivers/gpu/drm/amd/amdgpu/emu_soc.c b/drivers/gpu/drm/amd/amdgpu/emu_soc.c new file mode 100644 index 000000000000..d72c25c1b987 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/emu_soc.c @@ -0,0 +1,33 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" + +int emu_soc_asic_init(struct amdgpu_device *adev) +{ + return 0; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 9870d83b68c1..0fff5b8cd318 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -38,6 +38,7 @@ #include "dce/dce_6_0_sh_mask.h" #include "gca/gfx_7_2_enum.h" #include "si_enums.h" +#include "si.h" static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev); @@ -1808,17 +1809,6 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring) return r; } -static void gfx_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) -{ - /* flush hdp cache */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 0x1); -} - static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); @@ -1826,24 +1816,6 @@ static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) EVENT_INDEX(0)); } -/** - * gfx_v6_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp - * - * @adev: amdgpu_device pointer - * @ridx: amdgpu ring index - * - * Emits an hdp invalidate on the cp. - */ -static void gfx_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, mmHDP_DEBUG0); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 0x1); -} - static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { @@ -2358,25 +2330,7 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); - /* write new base address */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | - WRITE_DATA_DST_SEL(0))); - if (vmid < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid )); - } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8))); - } - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, pd_addr >> 12); - - /* bits 0-15 are the VM contexts0-15 */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for the invalidate to complete */ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); @@ -2401,6 +2355,18 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, } } +static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | + WRITE_DATA_DST_SEL(0))); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) { @@ -3511,23 +3477,21 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { .get_wptr = gfx_v6_0_ring_get_wptr, .set_wptr = gfx_v6_0_ring_set_wptr_gfx, .emit_frame_size = - 5 + /* gfx_v6_0_ring_emit_hdp_flush */ - 5 + /* gfx_v6_0_ring_emit_hdp_invalidate */ + 5 + 5 + /* hdp flush / invalidate */ 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */ - 17 + 6 + /* gfx_v6_0_ring_emit_vm_flush */ + SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */ 3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ .emit_ib = gfx_v6_0_ring_emit_ib, .emit_fence = gfx_v6_0_ring_emit_fence, .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush, - .emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate, .test_ring = gfx_v6_0_ring_test_ring, .test_ib = gfx_v6_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .emit_cntxcntl = gfx_v6_ring_emit_cntxcntl, + .emit_wreg = gfx_v6_0_ring_emit_wreg, }; static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { @@ -3538,21 +3502,19 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { .get_wptr = gfx_v6_0_ring_get_wptr, .set_wptr = gfx_v6_0_ring_set_wptr_compute, .emit_frame_size = - 5 + /* gfx_v6_0_ring_emit_hdp_flush */ - 5 + /* gfx_v6_0_ring_emit_hdp_invalidate */ + 5 + 5 + /* hdp flush / invalidate */ 7 + /* gfx_v6_0_ring_emit_pipeline_sync */ - 17 + /* gfx_v6_0_ring_emit_vm_flush */ + SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */ 14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ .emit_ib = gfx_v6_0_ring_emit_ib, .emit_fence = gfx_v6_0_ring_emit_fence, .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush, - .emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate, .test_ring = gfx_v6_0_ring_test_ring, .test_ib = gfx_v6_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .emit_wreg = gfx_v6_0_ring_emit_wreg, }; static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index a066c5eda135..972d421caada 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1946,7 +1946,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) if (i == 0) sh_mem_base = 0; else - sh_mem_base = adev->mc.shared_aperture_start >> 48; + sh_mem_base = adev->gmc.shared_aperture_start >> 48; cik_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_cfg); @@ -2147,26 +2147,6 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) EVENT_INDEX(0)); } - -/** - * gfx_v7_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp - * - * @adev: amdgpu_device pointer - * @ridx: amdgpu ring index - * - * Emits an hdp invalidate on the cp. - */ -static void gfx_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0) | - WR_CONFIRM)); - amdgpu_ring_write(ring, mmHDP_DEBUG0); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1); -} - /** * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring * @@ -3243,26 +3223,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | - WRITE_DATA_DST_SEL(0))); - if (vmid < 8) { - amdgpu_ring_write(ring, - (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); - } else { - amdgpu_ring_write(ring, - (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); - } - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, pd_addr >> 12); - - /* bits 0-15 are the VM contexts0-15 */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for the invalidate to complete */ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); @@ -3289,6 +3250,19 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, } } +static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | + WRITE_DATA_DST_SEL(0))); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + /* * RLC * The RLC is a multi-purpose microengine that handles a @@ -5115,10 +5089,10 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_frame_size = 20 + /* gfx_v7_0_ring_emit_gds_switch */ 7 + /* gfx_v7_0_ring_emit_hdp_flush */ - 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */ + 5 + /* hdp invalidate */ 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ - 17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ + CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ .emit_ib = gfx_v7_0_ring_emit_ib_gfx, @@ -5127,12 +5101,12 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate, .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, + .emit_wreg = gfx_v7_0_ring_emit_wreg, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { @@ -5146,9 +5120,9 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .emit_frame_size = 20 + /* gfx_v7_0_ring_emit_gds_switch */ 7 + /* gfx_v7_0_ring_emit_hdp_flush */ - 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */ + 5 + /* hdp invalidate */ 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ - 17 + /* gfx_v7_0_ring_emit_vm_flush */ + CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ .emit_ib = gfx_v7_0_ring_emit_ib_compute, @@ -5157,11 +5131,11 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate, .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_wreg = gfx_v7_0_ring_emit_wreg, }; static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4e694ae9f308..27943e57681c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3796,7 +3796,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32(mmSH_MEM_CONFIG, tmp); - tmp = adev->mc.shared_aperture_start >> 48; + tmp = adev->gmc.shared_aperture_start >> 48; WREG32(mmSH_MEM_BASES, tmp); } @@ -4847,6 +4847,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); } else { amdgpu_ring_clear_ring(ring); } @@ -4921,13 +4924,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) /* Test KCQs */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - if (adev->in_gpu_reset) { - /* move reset ring buffer to here to workaround - * compute ring test failed - */ - ring->wptr = 0; - amdgpu_ring_clear_ring(ring); - } ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) @@ -6230,19 +6226,6 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) EVENT_INDEX(0)); } - -static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0) | - WR_CONFIRM)); - amdgpu_ring_write(ring, mmHDP_DEBUG0); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1); - -} - static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vmid, bool ctx_switch) @@ -6332,28 +6315,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | - WRITE_DATA_DST_SEL(0)) | - WR_CONFIRM); - if (vmid < 8) { - amdgpu_ring_write(ring, - (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); - } else { - amdgpu_ring_write(ring, - (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); - } - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, pd_addr >> 12); - - /* bits 0-15 are the VM contexts0-15 */ - /* invalidate the cache */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for the invalidate to complete */ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); @@ -6617,8 +6579,22 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { + uint32_t cmd; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; + break; + case AMDGPU_RING_TYPE_KIQ: + cmd = 1 << 16; /* no inc addr */ + break; + default: + cmd = WR_CONFIRM; + break; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ + amdgpu_ring_write(ring, cmd); amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, val); @@ -6871,7 +6847,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 5 + /* COND_EXEC */ 7 + /* PIPELINE_SYNC */ - 19 + /* VM_FLUSH */ + VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -6893,7 +6869,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, @@ -6902,6 +6877,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, + .emit_wreg = gfx_v8_0_ring_emit_wreg, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -6915,9 +6891,9 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .emit_frame_size = 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7 + /* gfx_v8_0_ring_emit_hdp_flush */ - 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ + 5 + /* hdp_invalidate */ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ - 17 + /* gfx_v8_0_ring_emit_vm_flush */ + VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, @@ -6926,12 +6902,12 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .set_priority = gfx_v8_0_ring_set_priority_compute, + .emit_wreg = gfx_v8_0_ring_emit_wreg, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { @@ -6945,7 +6921,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { .emit_frame_size = 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7 + /* gfx_v8_0_ring_emit_hdp_flush */ - 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ + 5 + /* hdp_invalidate */ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ @@ -7151,12 +7127,12 @@ static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) } ce_payload = {}; if (ring->adev->virt.chained_ib_support) { - ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + - offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); + ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; } else { - ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + - offsetof(struct vi_gfx_meta_data, ce_payload); + ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + + offsetof(struct vi_gfx_meta_data, ce_payload); cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; } @@ -7179,7 +7155,7 @@ static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) struct vi_de_ib_state_chained_ib chained; } de_payload = {}; - csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; + csa_addr = amdgpu_csa_vaddr(ring->adev); gds_addr = csa_addr + 4096; if (ring->adev->virt.chained_ib_support) { de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c06479615e8a..848008ef46b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1539,7 +1539,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - tmp = adev->mc.shared_aperture_start >> 48; + tmp = adev->gmc.shared_aperture_start >> 48; WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); } } @@ -3585,14 +3585,6 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) ref_and_mask, ref_and_mask, 0x20); } -static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - gfx_v9_0_write_data_to_reg(ring, 0, true, - SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); -} - static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vmid, bool ctx_switch) @@ -3686,32 +3678,10 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; - unsigned eng = ring->vm_inv_eng; - - amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); - pd_addr |= flags; - - gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_lo32 + (2 * vmid), - lower_32_bits(pd_addr)); - - gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_hi32 + (2 * vmid), - upper_32_bits(pd_addr)); - - gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->vm_inv_eng0_req + eng, req); - - /* wait for the invalidate to complete */ - gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + - eng, 0, 1 << vmid, 1 << vmid, 0x20); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ - if (usepfp) { + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { /* sync PFP to ME, otherwise we might get invalid PFP reads */ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); @@ -3735,6 +3705,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) return wptr; } +static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32(reg, tmp); +} + +static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + mutex_lock(&adev->gfx.pipe_reserve_mutex); + pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { + /* Clear all reservations - everyone reacquires all resources */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) + gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], + true); + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) + gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], + true); + } else { + /* Lower all pipes without a current reservation */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + iring = &adev->gfx.gfx_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v9_0_ring_set_pipe_percent(iring, reserve); + } + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + iring = &adev->gfx.compute_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v9_0_ring_set_pipe_percent(iring, reserve); + } + } + + mutex_unlock(&adev->gfx.pipe_reserve_mutex); +} + +static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + uint32_t pipe_priority = acquire ? 0x2 : 0x0; + uint32_t queue_priority = acquire ? 0xf : 0x0; + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); + + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, + enum drm_sched_priority priority) +{ + struct amdgpu_device *adev = ring->adev; + bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; + + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return; + + gfx_v9_0_hqd_set_priority(adev, ring, acquire); + gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); +} + static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3788,7 +3857,7 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) int cnt; cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; + csa_addr = amdgpu_csa_vaddr(ring->adev); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -3806,7 +3875,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) uint64_t csa_addr, gds_addr; int cnt; - csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; + csa_addr = amdgpu_csa_vaddr(ring->adev); gds_addr = csa_addr + 4096; de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); @@ -3904,15 +3973,34 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) } static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, - uint32_t val) + uint32_t val) { + uint32_t cmd = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; + break; + case AMDGPU_RING_TYPE_KIQ: + cmd = (1 << 16); /* no inc addr */ + break; + default: + cmd = WR_CONFIRM; + break; + } amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ + amdgpu_ring_write(ring, cmd); amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, val); } +static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -4199,7 +4287,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5 + /* COND_EXEC */ 7 + /* PIPELINE_SYNC */ - 24 + /* VM_FLUSH */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -4221,7 +4311,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, @@ -4231,6 +4320,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .emit_tmz = gfx_v9_0_ring_emit_tmz, + .emit_wreg = gfx_v9_0_ring_emit_wreg, + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -4245,9 +4336,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_frame_size = 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7 + /* gfx_v9_0_ring_emit_hdp_flush */ - 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ + 5 + /* hdp invalidate */ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ - 24 + /* gfx_v9_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, @@ -4256,11 +4349,13 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .set_priority = gfx_v9_0_ring_set_priority_compute, + .emit_wreg = gfx_v9_0_ring_emit_wreg, + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { @@ -4275,9 +4370,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .emit_frame_size = 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7 + /* gfx_v9_0_ring_emit_hdp_flush */ - 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ + 5 + /* hdp invalidate */ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ - 24 + /* gfx_v9_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, @@ -4288,6 +4385,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_rreg = gfx_v9_0_ring_emit_rreg, .emit_wreg = gfx_v9_0_ring_emit_wreg, + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, }; static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 56f5fe4e2fee..94a07bcbbdda 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -40,7 +40,7 @@ static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) uint64_t value; BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); - value = adev->gart.table_addr - adev->mc.vram_start + value = adev->gart.table_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; value &= 0x0000FFFFFFFFF000ULL; value |= 0x1; /*valid bit*/ @@ -57,14 +57,14 @@ static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) gfxhub_v1_0_init_gart_pt_regs(adev); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->mc.gart_start >> 12)); + (u32)(adev->gmc.gart_start >> 12)); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->mc.gart_start >> 44)); + (u32)(adev->gmc.gart_start >> 44)); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, - (u32)(adev->mc.gart_end >> 12)); + (u32)(adev->gmc.gart_end >> 12)); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, - (u32)(adev->mc.gart_end >> 44)); + (u32)(adev->gmc.gart_end >> 44)); } static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) @@ -78,12 +78,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) /* Program the system aperture low logical page number. */ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->mc.vram_start >> 18); + adev->gmc.vram_start >> 18); WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->mc.vram_end >> 18); + adev->gmc.vram_end >> 18); /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); @@ -143,7 +143,7 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); tmp = mmVM_L2_CNTL3_DEFAULT; - if (adev->mc.translate_further) { + if (adev->gmc.translate_further) { tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); @@ -195,7 +195,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) num_level = adev->vm_manager.num_level; block_size = adev->vm_manager.block_size; - if (adev->mc.translate_further) + if (adev->gmc.translate_further) num_level -= 1; else block_size -= 9; @@ -257,9 +257,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) * SRIOV driver need to program them */ WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, - adev->mc.vram_start >> 24); + adev->gmc.vram_start >> 24); WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, - adev->mc.vram_end >> 24); + adev->gmc.vram_end >> 24); } /* GART Enable. */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 5f5eb15ccf4a..2c0ed9dd0c91 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -37,7 +37,7 @@ #include "dce/dce_6_0_sh_mask.h" #include "si_enums.h" -static void gmc_v6_0_set_gart_funcs(struct amdgpu_device *adev); +static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev); static int gmc_v6_0_wait_for_idle(void *handle); @@ -137,19 +137,19 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin"); else snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); - err = request_firmware(&adev->mc.fw, fw_name, adev->dev); + err = request_firmware(&adev->gmc.fw, fw_name, adev->dev); if (err) goto out; - err = amdgpu_ucode_validate(adev->mc.fw); + err = amdgpu_ucode_validate(adev->gmc.fw); out: if (err) { dev_err(adev->dev, "si_mc: Failed to load firmware \"%s\"\n", fw_name); - release_firmware(adev->mc.fw); - adev->mc.fw = NULL; + release_firmware(adev->gmc.fw); + adev->gmc.fw = NULL; } return err; } @@ -162,20 +162,20 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev) int i, regs_size, ucode_size; const struct mc_firmware_header_v1_0 *hdr; - if (!adev->mc.fw) + if (!adev->gmc.fw) return -EINVAL; - hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; + hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data; amdgpu_ucode_print_mc_hdr(&hdr->header); - adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version); regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); new_io_mc_regs = (const __le32 *) - (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); + (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; new_fw_data = (const __le32 *) - (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); running = RREG32(mmMC_SEQ_SUP_CNTL) & MC_SEQ_SUP_CNTL__RUN_MASK; @@ -218,12 +218,12 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev) } static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc) + struct amdgpu_gmc *mc) { u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_device_gart_location(adev, mc); } @@ -260,9 +260,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) } /* Update configuration */ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->mc.vram_start >> 12); + adev->gmc.vram_start >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->mc.vram_end >> 12); + adev->gmc.vram_end >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->vram_scratch.gpu_addr >> 12); WREG32(mmMC_VM_AGP_BASE, 0); @@ -320,56 +320,69 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) numchan = 16; break; } - adev->mc.vram_width = numchan * chansize; + adev->gmc.vram_width = numchan * chansize; /* size in MB on si */ - adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; - adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; if (!(adev->flags & AMD_IS_APU)) { r = amdgpu_device_resize_fb_bar(adev); if (r) return r; } - adev->mc.aper_base = pci_resource_start(adev->pdev, 0); - adev->mc.aper_size = pci_resource_len(adev->pdev, 0); - adev->mc.visible_vram_size = adev->mc.aper_size; + adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); + adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); + adev->gmc.visible_vram_size = adev->gmc.aper_size; /* set the gart size */ if (amdgpu_gart_size == -1) { switch (adev->asic_type) { case CHIP_HAINAN: /* no MM engines */ default: - adev->mc.gart_size = 256ULL << 20; + adev->gmc.gart_size = 256ULL << 20; break; case CHIP_VERDE: /* UVD, VCE do not support GPUVM */ case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */ case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */ case CHIP_OLAND: /* UVD, VCE do not support GPUVM */ - adev->mc.gart_size = 1024ULL << 20; + adev->gmc.gart_size = 1024ULL << 20; break; } } else { - adev->mc.gart_size = (u64)amdgpu_gart_size << 20; + adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; } - gmc_v6_0_vram_gtt_location(adev, &adev->mc); + gmc_v6_0_vram_gtt_location(adev, &adev->gmc); return 0; } -static void gmc_v6_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid) +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) { - WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } -static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev, - void *cpu_pt_addr, - uint32_t gpu_page_idx, - uint64_t addr, - uint64_t flags) +static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + uint32_t reg; + + /* write new base address */ + if (vmid < 8) + reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid; + else + reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8); + amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12); + + /* bits 0-15 are the VM contexts0-15 */ + amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid); + + return pd_addr; +} + +static int gmc_v6_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; @@ -433,9 +446,9 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) { u32 tmp; - if (enable && !adev->mc.prt_warning) { + if (enable && !adev->gmc.prt_warning) { dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); - adev->mc.prt_warning = true; + adev->gmc.prt_warning = true; } tmp = RREG32(mmVM_PRT_CNTL); @@ -455,7 +468,8 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) if (enable) { uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; - uint32_t high = adev->vm_manager.max_pfn; + uint32_t high = adev->vm_manager.max_pfn - + (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -515,8 +529,8 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) | (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); /* setup context0 */ - WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); @@ -561,9 +575,9 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) else gmc_v6_0_set_fault_enable_default(adev, true); - gmc_v6_0_gart_flush_gpu_tlb(adev, 0); + gmc_v6_0_flush_gpu_tlb(adev, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gart_size >> 20), + (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -795,7 +809,7 @@ static int gmc_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gmc_v6_0_set_gart_funcs(adev); + gmc_v6_0_set_gmc_funcs(adev); gmc_v6_0_set_irq_funcs(adev); return 0; @@ -806,7 +820,7 @@ static int gmc_v6_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) - return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } @@ -818,26 +832,26 @@ static int gmc_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->flags & AMD_IS_APU) { - adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { u32 tmp = RREG32(mmMC_SEQ_MISC0); tmp &= MC_SEQ_MISC0__MT__MASK; - adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp); + adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); if (r) return r; amdgpu_vm_adjust_size(adev, 64, 9, 1, 40); - adev->mc.mc_mask = 0xffffffffffULL; + adev->gmc.mc_mask = 0xffffffffffULL; - adev->mc.stolen_size = 256 * 1024; + adev->gmc.stolen_size = 256 * 1024; adev->need_dma32 = false; dma_bits = adev->need_dma32 ? 32 : 40; @@ -902,8 +916,8 @@ static int gmc_v6_0_sw_fini(void *handle) amdgpu_vm_manager_fini(adev); gmc_v6_0_gart_fini(adev); amdgpu_bo_fini(adev); - release_firmware(adev->mc.fw); - adev->mc.fw = NULL; + release_firmware(adev->gmc.fw); + adev->gmc.fw = NULL; return 0; } @@ -934,7 +948,7 @@ static int gmc_v6_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v6_0_gart_disable(adev); return 0; @@ -1129,9 +1143,10 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { .set_powergating_state = gmc_v6_0_set_powergating_state, }; -static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = { - .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, - .set_pte_pde = gmc_v6_0_gart_set_pte_pde, +static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v6_0_flush_gpu_tlb, + .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb, + .set_pte_pde = gmc_v6_0_set_pte_pde, .set_prt = gmc_v6_0_set_prt, .get_vm_pde = gmc_v6_0_get_vm_pde, .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags @@ -1142,16 +1157,16 @@ static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { .process = gmc_v6_0_process_interrupt, }; -static void gmc_v6_0_set_gart_funcs(struct amdgpu_device *adev) +static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gart.gart_funcs == NULL) - adev->gart.gart_funcs = &gmc_v6_0_gart_funcs; + if (adev->gmc.gmc_funcs == NULL) + adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs; } static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->mc.vm_fault.num_types = 1; - adev->mc.vm_fault.funcs = &gmc_v6_0_irq_funcs; + adev->gmc.vm_fault.num_types = 1; + adev->gmc.vm_fault.funcs = &gmc_v6_0_irq_funcs; } const struct amdgpu_ip_block_version gmc_v6_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 12e49bd8fd2d..4edd17059868 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -43,7 +43,7 @@ #include "amdgpu_atombios.h" -static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); +static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); static int gmc_v7_0_wait_for_idle(void *handle); @@ -152,16 +152,16 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) else snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); - err = request_firmware(&adev->mc.fw, fw_name, adev->dev); + err = request_firmware(&adev->gmc.fw, fw_name, adev->dev); if (err) goto out; - err = amdgpu_ucode_validate(adev->mc.fw); + err = amdgpu_ucode_validate(adev->gmc.fw); out: if (err) { pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name); - release_firmware(adev->mc.fw); - adev->mc.fw = NULL; + release_firmware(adev->gmc.fw); + adev->gmc.fw = NULL; } return err; } @@ -182,19 +182,19 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev) u32 running; int i, ucode_size, regs_size; - if (!adev->mc.fw) + if (!adev->gmc.fw) return -EINVAL; - hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; + hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data; amdgpu_ucode_print_mc_hdr(&hdr->header); - adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version); regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); io_mc_regs = (const __le32 *) - (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); + (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; fw_data = (const __le32 *) - (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN); @@ -236,12 +236,12 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev) } static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc) + struct amdgpu_gmc *mc) { u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_device_gart_location(adev, mc); } @@ -284,9 +284,9 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) } /* Update configuration */ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->mc.vram_start >> 12); + adev->gmc.vram_start >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->mc.vram_end >> 12); + adev->gmc.vram_end >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->vram_scratch.gpu_addr >> 12); WREG32(mmMC_VM_AGP_BASE, 0); @@ -319,8 +319,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) { int r; - adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev); - if (!adev->mc.vram_width) { + adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev); + if (!adev->gmc.vram_width) { u32 tmp; int chansize, numchan; @@ -362,38 +362,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) numchan = 16; break; } - adev->mc.vram_width = numchan * chansize; + adev->gmc.vram_width = numchan * chansize; } /* size in MB on si */ - adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; - adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; if (!(adev->flags & AMD_IS_APU)) { r = amdgpu_device_resize_fb_bar(adev); if (r) return r; } - adev->mc.aper_base = pci_resource_start(adev->pdev, 0); - adev->mc.aper_size = pci_resource_len(adev->pdev, 0); + adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); + adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 if (adev->flags & AMD_IS_APU) { - adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; - adev->mc.aper_size = adev->mc.real_vram_size; + adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; + adev->gmc.aper_size = adev->gmc.real_vram_size; } #endif /* In case the PCI BAR is larger than the actual amount of vram */ - adev->mc.visible_vram_size = adev->mc.aper_size; - if (adev->mc.visible_vram_size > adev->mc.real_vram_size) - adev->mc.visible_vram_size = adev->mc.real_vram_size; + adev->gmc.visible_vram_size = adev->gmc.aper_size; + if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) + adev->gmc.visible_vram_size = adev->gmc.real_vram_size; /* set the gart size */ if (amdgpu_gart_size == -1) { switch (adev->asic_type) { case CHIP_TOPAZ: /* no MM engines */ default: - adev->mc.gart_size = 256ULL << 20; + adev->gmc.gart_size = 256ULL << 20; break; #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */ @@ -401,15 +401,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */ case CHIP_KABINI: /* UVD, VCE do not support GPUVM */ case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */ - adev->mc.gart_size = 1024ULL << 20; + adev->gmc.gart_size = 1024ULL << 20; break; #endif } } else { - adev->mc.gart_size = (u64)amdgpu_gart_size << 20; + adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; } - gmc_v7_0_vram_gtt_location(adev, &adev->mc); + gmc_v7_0_vram_gtt_location(adev, &adev->gmc); return 0; } @@ -422,25 +422,44 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) */ /** - * gmc_v7_0_gart_flush_gpu_tlb - gart tlb flush callback + * gmc_v7_0_flush_gpu_tlb - gart tlb flush callback * * @adev: amdgpu_device pointer * @vmid: vm instance to flush * * Flush the TLB for the requested page table (CIK). */ -static void gmc_v7_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid) +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) { - /* flush hdp cache */ - WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); - /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } +static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + uint32_t reg; + + if (vmid < 8) + reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid; + else + reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8; + amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12); + + /* bits 0-15 are the VM contexts0-15 */ + amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid); + + return pd_addr; +} + +static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, + unsigned pasid) +{ + amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); +} + /** - * gmc_v7_0_gart_set_pte_pde - update the page tables using MMIO + * gmc_v7_0_set_pte_pde - update the page tables using MMIO * * @adev: amdgpu_device pointer * @cpu_pt_addr: cpu address of the page table @@ -450,11 +469,9 @@ static void gmc_v7_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, * * Update the page tables using the CPU. */ -static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev, - void *cpu_pt_addr, - uint32_t gpu_page_idx, - uint64_t addr, - uint64_t flags) +static int gmc_v7_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; @@ -524,9 +541,9 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) { uint32_t tmp; - if (enable && !adev->mc.prt_warning) { + if (enable && !adev->gmc.prt_warning) { dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); - adev->mc.prt_warning = true; + adev->gmc.prt_warning = true; } tmp = RREG32(mmVM_PRT_CNTL); @@ -548,7 +565,8 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) if (enable) { uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; - uint32_t high = adev->vm_manager.max_pfn; + uint32_t high = adev->vm_manager.max_pfn - + (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -622,8 +640,8 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); WREG32(mmVM_L2_CNTL3, tmp); /* setup context0 */ - WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); @@ -675,9 +693,9 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) WREG32(mmCHUB_CONTROL, tmp); } - gmc_v7_0_gart_flush_gpu_tlb(adev, 0); + gmc_v7_0_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gart_size >> 20), + (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -750,21 +768,21 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev) * * Print human readable fault information (CIK). */ -static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, - u32 status, u32 addr, u32 mc_client) +static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, + u32 addr, u32 mc_client, unsigned pasid) { - u32 mc_id; u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, PROTECTIONS); char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; + u32 mc_id; mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, MEMORY_CLIENT_ID); - dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", - protections, vmid, addr, + dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", + protections, vmid, pasid, addr, REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, MEMORY_CLIENT_RW) ? "write" : "read", block, mc_client, mc_id); @@ -922,16 +940,16 @@ static int gmc_v7_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gmc_v7_0_set_gart_funcs(adev); + gmc_v7_0_set_gmc_funcs(adev); gmc_v7_0_set_irq_funcs(adev); - adev->mc.shared_aperture_start = 0x2000000000000000ULL; - adev->mc.shared_aperture_end = - adev->mc.shared_aperture_start + (4ULL << 30) - 1; - adev->mc.private_aperture_start = - adev->mc.shared_aperture_end + 1; - adev->mc.private_aperture_end = - adev->mc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.shared_aperture_start = 0x2000000000000000ULL; + adev->gmc.shared_aperture_end = + adev->gmc.shared_aperture_start + (4ULL << 30) - 1; + adev->gmc.private_aperture_start = + adev->gmc.shared_aperture_end + 1; + adev->gmc.private_aperture_end = + adev->gmc.private_aperture_start + (4ULL << 30) - 1; return 0; } @@ -941,7 +959,7 @@ static int gmc_v7_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) - return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } @@ -953,18 +971,18 @@ static int gmc_v7_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->flags & AMD_IS_APU) { - adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { u32 tmp = RREG32(mmMC_SEQ_MISC0); tmp &= MC_SEQ_MISC0__MT__MASK; - adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp); + adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); if (r) return r; @@ -978,9 +996,9 @@ static int gmc_v7_0_sw_init(void *handle) * This is the max address of the GPU's * internal address space. */ - adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ + adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - adev->mc.stolen_size = 256 * 1024; + adev->gmc.stolen_size = 256 * 1024; /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. @@ -1051,8 +1069,8 @@ static int gmc_v7_0_sw_fini(void *handle) amdgpu_vm_manager_fini(adev); gmc_v7_0_gart_fini(adev); amdgpu_bo_fini(adev); - release_firmware(adev->mc.fw); - adev->mc.fw = NULL; + release_firmware(adev->gmc.fw); + adev->gmc.fw = NULL; return 0; } @@ -1085,7 +1103,7 @@ static int gmc_v7_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v7_0_gart_disable(adev); return 0; @@ -1259,7 +1277,8 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", status); - gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client); + gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client, + entry->pasid); } return 0; @@ -1308,9 +1327,11 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { .set_powergating_state = gmc_v7_0_set_powergating_state, }; -static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = { - .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb, - .set_pte_pde = gmc_v7_0_gart_set_pte_pde, +static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, + .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, + .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, + .set_pte_pde = gmc_v7_0_set_pte_pde, .set_prt = gmc_v7_0_set_prt, .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, .get_vm_pde = gmc_v7_0_get_vm_pde @@ -1321,16 +1342,16 @@ static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { .process = gmc_v7_0_process_interrupt, }; -static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev) +static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gart.gart_funcs == NULL) - adev->gart.gart_funcs = &gmc_v7_0_gart_funcs; + if (adev->gmc.gmc_funcs == NULL) + adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs; } static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->mc.vm_fault.num_types = 1; - adev->mc.vm_fault.funcs = &gmc_v7_0_irq_funcs; + adev->gmc.vm_fault.num_types = 1; + adev->gmc.vm_fault.funcs = &gmc_v7_0_irq_funcs; } const struct amdgpu_ip_block_version gmc_v7_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 9a170e37fbe7..1e0ad0657e96 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -45,7 +45,7 @@ #include "amdgpu_atombios.h" -static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev); +static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); static int gmc_v8_0_wait_for_idle(void *handle); @@ -236,16 +236,16 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); - err = request_firmware(&adev->mc.fw, fw_name, adev->dev); + err = request_firmware(&adev->gmc.fw, fw_name, adev->dev); if (err) goto out; - err = amdgpu_ucode_validate(adev->mc.fw); + err = amdgpu_ucode_validate(adev->gmc.fw); out: if (err) { pr_err("mc: Failed to load firmware \"%s\"\n", fw_name); - release_firmware(adev->mc.fw); - adev->mc.fw = NULL; + release_firmware(adev->gmc.fw); + adev->gmc.fw = NULL; } return err; } @@ -274,19 +274,19 @@ static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev) if (amdgpu_sriov_bios(adev)) return 0; - if (!adev->mc.fw) + if (!adev->gmc.fw) return -EINVAL; - hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; + hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data; amdgpu_ucode_print_mc_hdr(&hdr->header); - adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version); regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); io_mc_regs = (const __le32 *) - (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); + (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; fw_data = (const __le32 *) - (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN); @@ -350,19 +350,19 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev) if (vbios_version == 0) return 0; - if (!adev->mc.fw) + if (!adev->gmc.fw) return -EINVAL; - hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; + hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data; amdgpu_ucode_print_mc_hdr(&hdr->header); - adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version); regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); io_mc_regs = (const __le32 *) - (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); + (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; fw_data = (const __le32 *) - (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); data = RREG32(mmMC_SEQ_MISC0); data &= ~(0x40); @@ -398,7 +398,7 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev) } static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc) + struct amdgpu_gmc *mc) { u64 base = 0; @@ -406,7 +406,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_device_gart_location(adev, mc); } @@ -449,18 +449,18 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) } /* Update configuration */ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->mc.vram_start >> 12); + adev->gmc.vram_start >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->mc.vram_end >> 12); + adev->gmc.vram_end >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->vram_scratch.gpu_addr >> 12); if (amdgpu_sriov_vf(adev)) { - tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; - tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); + tmp = ((adev->gmc.vram_end >> 24) & 0xFFFF) << 16; + tmp |= ((adev->gmc.vram_start >> 24) & 0xFFFF); WREG32(mmMC_VM_FB_LOCATION, tmp); /* XXX double check these! */ - WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); + WREG32(mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); } @@ -495,8 +495,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) { int r; - adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev); - if (!adev->mc.vram_width) { + adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev); + if (!adev->gmc.vram_width) { u32 tmp; int chansize, numchan; @@ -538,31 +538,31 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) numchan = 16; break; } - adev->mc.vram_width = numchan * chansize; + adev->gmc.vram_width = numchan * chansize; } /* size in MB on si */ - adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; - adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; if (!(adev->flags & AMD_IS_APU)) { r = amdgpu_device_resize_fb_bar(adev); if (r) return r; } - adev->mc.aper_base = pci_resource_start(adev->pdev, 0); - adev->mc.aper_size = pci_resource_len(adev->pdev, 0); + adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); + adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 if (adev->flags & AMD_IS_APU) { - adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; - adev->mc.aper_size = adev->mc.real_vram_size; + adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; + adev->gmc.aper_size = adev->gmc.real_vram_size; } #endif /* In case the PCI BAR is larger than the actual amount of vram */ - adev->mc.visible_vram_size = adev->mc.aper_size; - if (adev->mc.visible_vram_size > adev->mc.real_vram_size) - adev->mc.visible_vram_size = adev->mc.real_vram_size; + adev->gmc.visible_vram_size = adev->gmc.aper_size; + if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) + adev->gmc.visible_vram_size = adev->gmc.real_vram_size; /* set the gart size */ if (amdgpu_gart_size == -1) { @@ -571,20 +571,20 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) case CHIP_POLARIS10: /* all engines support GPUVM */ case CHIP_POLARIS12: /* all engines support GPUVM */ default: - adev->mc.gart_size = 256ULL << 20; + adev->gmc.gart_size = 256ULL << 20; break; case CHIP_TONGA: /* UVD, VCE do not support GPUVM */ case CHIP_FIJI: /* UVD, VCE do not support GPUVM */ case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */ case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */ - adev->mc.gart_size = 1024ULL << 20; + adev->gmc.gart_size = 1024ULL << 20; break; } } else { - adev->mc.gart_size = (u64)amdgpu_gart_size << 20; + adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; } - gmc_v8_0_vram_gtt_location(adev, &adev->mc); + gmc_v8_0_vram_gtt_location(adev, &adev->gmc); return 0; } @@ -597,25 +597,45 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) */ /** - * gmc_v8_0_gart_flush_gpu_tlb - gart tlb flush callback + * gmc_v8_0_flush_gpu_tlb - gart tlb flush callback * * @adev: amdgpu_device pointer * @vmid: vm instance to flush * * Flush the TLB for the requested page table (CIK). */ -static void gmc_v8_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, +static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) { - /* flush hdp cache */ - WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); - /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } +static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + uint32_t reg; + + if (vmid < 8) + reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid; + else + reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8; + amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12); + + /* bits 0-15 are the VM contexts0-15 */ + amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid); + + return pd_addr; +} + +static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, + unsigned pasid) +{ + amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); +} + /** - * gmc_v8_0_gart_set_pte_pde - update the page tables using MMIO + * gmc_v8_0_set_pte_pde - update the page tables using MMIO * * @adev: amdgpu_device pointer * @cpu_pt_addr: cpu address of the page table @@ -625,11 +645,9 @@ static void gmc_v8_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, * * Update the page tables using the CPU. */ -static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev, - void *cpu_pt_addr, - uint32_t gpu_page_idx, - uint64_t addr, - uint64_t flags) +static int gmc_v8_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; @@ -723,9 +741,9 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) { u32 tmp; - if (enable && !adev->mc.prt_warning) { + if (enable && !adev->gmc.prt_warning) { dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); - adev->mc.prt_warning = true; + adev->gmc.prt_warning = true; } tmp = RREG32(mmVM_PRT_CNTL); @@ -747,7 +765,8 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) if (enable) { uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; - uint32_t high = adev->vm_manager.max_pfn; + uint32_t high = adev->vm_manager.max_pfn - + (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -837,8 +856,8 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0); WREG32(mmVM_L2_CNTL4, tmp); /* setup context0 */ - WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); @@ -891,9 +910,9 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) else gmc_v8_0_set_fault_enable_default(adev, true); - gmc_v8_0_gart_flush_gpu_tlb(adev, 0); + gmc_v8_0_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gart_size >> 20), + (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -966,21 +985,21 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev) * * Print human readable fault information (CIK). */ -static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, - u32 status, u32 addr, u32 mc_client) +static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, + u32 addr, u32 mc_client, unsigned pasid) { - u32 mc_id; u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, PROTECTIONS); char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; + u32 mc_id; mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, MEMORY_CLIENT_ID); - dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", - protections, vmid, addr, + dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", + protections, vmid, pasid, addr, REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, MEMORY_CLIENT_RW) ? "write" : "read", block, mc_client, mc_id); @@ -1012,16 +1031,16 @@ static int gmc_v8_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gmc_v8_0_set_gart_funcs(adev); + gmc_v8_0_set_gmc_funcs(adev); gmc_v8_0_set_irq_funcs(adev); - adev->mc.shared_aperture_start = 0x2000000000000000ULL; - adev->mc.shared_aperture_end = - adev->mc.shared_aperture_start + (4ULL << 30) - 1; - adev->mc.private_aperture_start = - adev->mc.shared_aperture_end + 1; - adev->mc.private_aperture_end = - adev->mc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.shared_aperture_start = 0x2000000000000000ULL; + adev->gmc.shared_aperture_end = + adev->gmc.shared_aperture_start + (4ULL << 30) - 1; + adev->gmc.private_aperture_start = + adev->gmc.shared_aperture_end + 1; + adev->gmc.private_aperture_end = + adev->gmc.private_aperture_start + (4ULL << 30) - 1; return 0; } @@ -1031,7 +1050,7 @@ static int gmc_v8_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) - return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } @@ -1045,7 +1064,7 @@ static int gmc_v8_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->flags & AMD_IS_APU) { - adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { u32 tmp; @@ -1054,14 +1073,14 @@ static int gmc_v8_0_sw_init(void *handle) else tmp = RREG32(mmMC_SEQ_MISC0); tmp &= MC_SEQ_MISC0__MT__MASK; - adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp); + adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); if (r) return r; @@ -1075,9 +1094,9 @@ static int gmc_v8_0_sw_init(void *handle) * This is the max address of the GPU's * internal address space. */ - adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ + adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - adev->mc.stolen_size = 256 * 1024; + adev->gmc.stolen_size = 256 * 1024; /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. @@ -1149,8 +1168,8 @@ static int gmc_v8_0_sw_fini(void *handle) amdgpu_vm_manager_fini(adev); gmc_v8_0_gart_fini(adev); amdgpu_bo_fini(adev); - release_firmware(adev->mc.fw); - adev->mc.fw = NULL; + release_firmware(adev->gmc.fw); + adev->gmc.fw = NULL; return 0; } @@ -1191,7 +1210,7 @@ static int gmc_v8_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v8_0_gart_disable(adev); return 0; @@ -1271,10 +1290,10 @@ static bool gmc_v8_0_check_soft_reset(void *handle) SRBM_SOFT_RESET, SOFT_RESET_MC, 1); } if (srbm_soft_reset) { - adev->mc.srbm_soft_reset = srbm_soft_reset; + adev->gmc.srbm_soft_reset = srbm_soft_reset; return true; } else { - adev->mc.srbm_soft_reset = 0; + adev->gmc.srbm_soft_reset = 0; return false; } } @@ -1283,7 +1302,7 @@ static int gmc_v8_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->mc.srbm_soft_reset) + if (!adev->gmc.srbm_soft_reset) return 0; gmc_v8_0_mc_stop(adev); @@ -1299,9 +1318,9 @@ static int gmc_v8_0_soft_reset(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 srbm_soft_reset; - if (!adev->mc.srbm_soft_reset) + if (!adev->gmc.srbm_soft_reset) return 0; - srbm_soft_reset = adev->mc.srbm_soft_reset; + srbm_soft_reset = adev->gmc.srbm_soft_reset; if (srbm_soft_reset) { u32 tmp; @@ -1329,7 +1348,7 @@ static int gmc_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->mc.srbm_soft_reset) + if (!adev->gmc.srbm_soft_reset) return 0; gmc_v8_0_mc_resume(adev); @@ -1410,7 +1429,8 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", status); - gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client); + gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client, + entry->pasid); } return 0; @@ -1642,9 +1662,11 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { .get_clockgating_state = gmc_v8_0_get_clockgating_state, }; -static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = { - .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb, - .set_pte_pde = gmc_v8_0_gart_set_pte_pde, +static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, + .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, + .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, + .set_pte_pde = gmc_v8_0_set_pte_pde, .set_prt = gmc_v8_0_set_prt, .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, .get_vm_pde = gmc_v8_0_get_vm_pde @@ -1655,16 +1677,16 @@ static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { .process = gmc_v8_0_process_interrupt, }; -static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev) +static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gart.gart_funcs == NULL) - adev->gart.gart_funcs = &gmc_v8_0_gart_funcs; + if (adev->gmc.gmc_funcs == NULL) + adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs; } static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->mc.vm_fault.num_types = 1; - adev->mc.vm_fault.funcs = &gmc_v8_0_irq_funcs; + adev->gmc.vm_fault.num_types = 1; + adev->gmc.vm_fault.funcs = &gmc_v8_0_irq_funcs; } const struct amdgpu_ip_block_version gmc_v8_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 100ec69f020a..bc4bd5e7ac94 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -34,6 +34,7 @@ #include "vega10_enum.h" #include "mmhub/mmhub_1_0_offset.h" #include "athub/athub_1_0_offset.h" +#include "oss/osssys_4_0_offset.h" #include "soc15.h" #include "soc15_common.h" @@ -263,10 +264,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (printk_ratelimit()) { dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pas_id:%u)\n", + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", entry->src_id, entry->ring_id, entry->vmid, - entry->pas_id); + entry->pasid); dev_err(adev->dev, " at page 0x%016llx from %d\n", addr, entry->client_id); if (!amdgpu_sriov_vf(adev)) @@ -285,8 +286,8 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->mc.vm_fault.num_types = 1; - adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs; + adev->gmc.vm_fault.num_types = 1; + adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; } static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) @@ -316,24 +317,21 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) */ /** - * gmc_v9_0_gart_flush_gpu_tlb - gart tlb flush callback + * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback * * @adev: amdgpu_device pointer * @vmid: vm instance to flush * * Flush the TLB for the requested page table. */ -static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) { /* Use register 17 for GART */ const unsigned eng = 17; unsigned i, j; - /* flush hdp cache */ - adev->nbio_funcs->hdp_flush(adev); - - spin_lock(&adev->mc.invalidate_lock); + spin_lock(&adev->gmc.invalidate_lock); for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &adev->vmhub[i]; @@ -366,11 +364,52 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } - spin_unlock(&adev->mc.invalidate_lock); + spin_unlock(&adev->gmc.invalidate_lock); +} + +static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; + uint32_t req = gmc_v9_0_get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; + unsigned eng = ring->vm_inv_eng; + + amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags); + pd_addr |= flags; + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + upper_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); + + /* wait for the invalidate to complete */ + amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, + 1 << vmid, 1 << vmid); + + return pd_addr; +} + +static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, + unsigned pasid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg; + + if (ring->funcs->vmhub == AMDGPU_GFXHUB) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; + else + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; + + amdgpu_ring_emit_wreg(ring, reg, pasid); } /** - * gmc_v9_0_gart_set_pte_pde - update the page tables using MMIO + * gmc_v9_0_set_pte_pde - update the page tables using MMIO * * @adev: amdgpu_device pointer * @cpu_pt_addr: cpu address of the page table @@ -380,11 +419,9 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, * * Update the page tables using the CPU. */ -static int gmc_v9_0_gart_set_pte_pde(struct amdgpu_device *adev, - void *cpu_pt_addr, - uint32_t gpu_page_idx, - uint64_t addr, - uint64_t flags) +static int gmc_v9_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; @@ -475,10 +512,10 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, { if (!(*flags & AMDGPU_PDE_PTE)) *addr = adev->vm_manager.vram_base_offset + *addr - - adev->mc.vram_start; + adev->gmc.vram_start; BUG_ON(*addr & 0xFFFF00000000003FULL); - if (!adev->mc.translate_further) + if (!adev->gmc.translate_further) return; if (level == AMDGPU_VM_PDB1) { @@ -494,34 +531,35 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } } -static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { - .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, - .set_pte_pde = gmc_v9_0_gart_set_pte_pde, - .get_invalidate_req = gmc_v9_0_get_invalidate_req, +static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, + .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, + .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, + .set_pte_pde = gmc_v9_0_set_pte_pde, .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, .get_vm_pde = gmc_v9_0_get_vm_pde }; -static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) +static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gart.gart_funcs == NULL) - adev->gart.gart_funcs = &gmc_v9_0_gart_funcs; + if (adev->gmc.gmc_funcs == NULL) + adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; } static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gmc_v9_0_set_gart_funcs(adev); + gmc_v9_0_set_gmc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); - adev->mc.shared_aperture_start = 0x2000000000000000ULL; - adev->mc.shared_aperture_end = - adev->mc.shared_aperture_start + (4ULL << 30) - 1; - adev->mc.private_aperture_start = - adev->mc.shared_aperture_end + 1; - adev->mc.private_aperture_end = - adev->mc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.shared_aperture_start = 0x2000000000000000ULL; + adev->gmc.shared_aperture_end = + adev->gmc.shared_aperture_start + (4ULL << 30) - 1; + adev->gmc.private_aperture_start = + adev->gmc.shared_aperture_end + 1; + adev->gmc.private_aperture_end = + adev->gmc.private_aperture_start + (4ULL << 30) - 1; return 0; } @@ -647,16 +685,16 @@ static int gmc_v9_0_late_init(void *handle) } } - return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); } static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc) + struct amdgpu_gmc *mc) { u64 base = 0; if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); - amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_device_gart_location(adev, mc); /* base offset of vram pages */ if (adev->flags & AMD_IS_APU) @@ -680,8 +718,9 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) int chansize, numchan; int r; - adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); - if (!adev->mc.vram_width) { + if (amdgpu_emu_mode != 1) + adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); + if (!adev->gmc.vram_width) { /* hbm memory channel size */ chansize = 128; @@ -718,43 +757,49 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) numchan = 2; break; } - adev->mc.vram_width = numchan * chansize; + adev->gmc.vram_width = numchan * chansize; } /* size in MB on si */ - adev->mc.mc_vram_size = + adev->gmc.mc_vram_size = adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; - adev->mc.real_vram_size = adev->mc.mc_vram_size; + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { r = amdgpu_device_resize_fb_bar(adev); if (r) return r; } - adev->mc.aper_base = pci_resource_start(adev->pdev, 0); - adev->mc.aper_size = pci_resource_len(adev->pdev, 0); + adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); + adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); +#ifdef CONFIG_X86_64 + if (adev->flags & AMD_IS_APU) { + adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev); + adev->gmc.aper_size = adev->gmc.real_vram_size; + } +#endif /* In case the PCI BAR is larger than the actual amount of vram */ - adev->mc.visible_vram_size = adev->mc.aper_size; - if (adev->mc.visible_vram_size > adev->mc.real_vram_size) - adev->mc.visible_vram_size = adev->mc.real_vram_size; + adev->gmc.visible_vram_size = adev->gmc.aper_size; + if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) + adev->gmc.visible_vram_size = adev->gmc.real_vram_size; /* set the gart size */ if (amdgpu_gart_size == -1) { switch (adev->asic_type) { case CHIP_VEGA10: /* all engines support GPUVM */ default: - adev->mc.gart_size = 256ULL << 20; + adev->gmc.gart_size = 256ULL << 20; break; case CHIP_RAVEN: /* DCE SG support */ - adev->mc.gart_size = 1024ULL << 20; + adev->gmc.gart_size = 1024ULL << 20; break; } } else { - adev->mc.gart_size = (u64)amdgpu_gart_size << 20; + adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; } - gmc_v9_0_vram_gtt_location(adev, &adev->mc); + gmc_v9_0_vram_gtt_location(adev, &adev->gmc); return 0; } @@ -786,23 +831,23 @@ static int gmc_v9_0_sw_init(void *handle) gfxhub_v1_0_init(adev); mmhub_v1_0_init(adev); - spin_lock_init(&adev->mc.invalidate_lock); + spin_lock_init(&adev->gmc.invalidate_lock); switch (adev->asic_type) { case CHIP_RAVEN: - adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); } else { /* vm_size is 128TB + 512GB for legacy 3-level page support */ amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48); - adev->mc.translate_further = + adev->gmc.translate_further = adev->vm_manager.num_level > 1; } break; case CHIP_VEGA10: /* XXX Don't know how to get VRAM type yet. */ - adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM; + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, @@ -816,9 +861,9 @@ static int gmc_v9_0_sw_init(void *handle) /* This interrupt is VMC page fault.*/ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, - &adev->mc.vm_fault); + &adev->gmc.vm_fault); r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UTCL2, 0, - &adev->mc.vm_fault); + &adev->gmc.vm_fault); if (r) return r; @@ -827,13 +872,13 @@ static int gmc_v9_0_sw_init(void *handle) * This is the max address of the GPU's * internal address space. */ - adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ /* * It needs to reserve 8M stolen memory for vega10 * TODO: Figure out how to avoid that... */ - adev->mc.stolen_size = 8 * 1024 * 1024; + adev->gmc.stolen_size = 8 * 1024 * 1024; /* set DMA mask + need_dma32 flags. * PCIE - can handle 44-bits. @@ -975,7 +1020,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); /* After HDP is initialized, flush HDP.*/ - adev->nbio_funcs->hdp_flush(adev); + adev->nbio_funcs->hdp_flush(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; @@ -984,10 +1029,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) gfxhub_v1_0_set_fault_enable_default(adev, value); mmhub_v1_0_set_fault_enable_default(adev, value); - gmc_v9_0_gart_flush_gpu_tlb(adev, 0); + gmc_v9_0_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gart_size >> 20), + (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -1038,7 +1083,7 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } - amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v9_0_gart_disable(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index c4e4be3dd31d..3237a576692d 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -260,7 +260,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vmid = (dw[2] >> 8) & 0xff; - entry->pas_id = (dw[2] >> 16) & 0xffff; + entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ adev->irq.ih.rptr += 16; diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index d9e9e52a0def..8766681cfd3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -3319,7 +3319,6 @@ const struct amd_ip_funcs kv_dpm_ip_funcs = { }; const struct amd_pm_funcs kv_dpm_funcs = { - .get_temperature = &kv_dpm_get_temp, .pre_set_power_state = &kv_dpm_pre_set_power_state, .set_power_state = &kv_dpm_set_power_state, .post_set_power_state = &kv_dpm_post_set_power_state, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index ffd5b7ee49c4..d0ade9fd9fa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -50,7 +50,7 @@ static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) uint64_t value; BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); - value = adev->gart.table_addr - adev->mc.vram_start + + value = adev->gart.table_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; value &= 0x0000FFFFFFFFF000ULL; value |= 0x1; /* valid bit */ @@ -67,14 +67,14 @@ static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) mmhub_v1_0_init_gart_pt_regs(adev); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->mc.gart_start >> 12)); + (u32)(adev->gmc.gart_start >> 12)); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->mc.gart_start >> 44)); + (u32)(adev->gmc.gart_start >> 44)); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, - (u32)(adev->mc.gart_end >> 12)); + (u32)(adev->gmc.gart_end >> 12)); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, - (u32)(adev->mc.gart_end >> 44)); + (u32)(adev->gmc.gart_end >> 44)); } static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) @@ -89,12 +89,12 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->mc.vram_start >> 18); + adev->gmc.vram_start >> 18); WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->mc.vram_end >> 18); + adev->gmc.vram_end >> 18); /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); @@ -155,7 +155,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); - if (adev->mc.translate_further) { + if (adev->gmc.translate_further) { tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); @@ -207,7 +207,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) num_level = adev->vm_manager.num_level; block_size = adev->vm_manager.block_size; - if (adev->mc.translate_further) + if (adev->gmc.translate_further) num_level -= 1; else block_size -= 9; @@ -272,21 +272,21 @@ static const struct pctl_data pctl0_data[] = { {0x11, 0x6a684}, {0x19, 0xea68e}, {0x29, 0xa69e}, - {0x2b, 0x34a6c0}, - {0x61, 0x83a707}, - {0xe6, 0x8a7a4}, - {0xf0, 0x1a7b8}, - {0xf3, 0xfa7cc}, - {0x104, 0x17a7dd}, - {0x11d, 0xa7dc}, - {0x11f, 0x12a7f5}, - {0x133, 0xa808}, - {0x135, 0x12a810}, - {0x149, 0x7a82c} + {0x2b, 0x0010a6c0}, + {0x3d, 0x83a707}, + {0xc2, 0x8a7a4}, + {0xcc, 0x1a7b8}, + {0xcf, 0xfa7cc}, + {0xe0, 0x17a7dd}, + {0xf9, 0xa7dc}, + {0xfb, 0x12a7f5}, + {0x10f, 0xa808}, + {0x111, 0x12a810}, + {0x125, 0x7a82c} }; #define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data)) -#define PCTL0_RENG_EXEC_END_PTR 0x151 +#define PCTL0_RENG_EXEC_END_PTR 0x12d #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 #define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833 @@ -385,10 +385,9 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return; + /****************** pctl0 **********************/ pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC); pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); - pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC); - pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); /* Light sleep must be disabled before writing to pctl0 registers */ pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; @@ -402,12 +401,13 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) pctl0_data[i].data); } - /* Set the reng execute end ptr for pctl0 */ - pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, - PCTL0_RENG_EXECUTE, - RENG_EXECUTE_END_PTR, - PCTL0_RENG_EXEC_END_PTR); - WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); + /* Re-enable light sleep */ + pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); + + /****************** pctl1 **********************/ + pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC); + pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); /* Light sleep must be disabled before writing to pctl1 registers */ pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; @@ -421,20 +421,25 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) pctl1_data[i].data); } + /* Re-enable light sleep */ + pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); + + mmhub_v1_0_power_gating_write_save_ranges(adev); + + /* Set the reng execute end ptr for pctl0 */ + pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, + PCTL0_RENG_EXECUTE, + RENG_EXECUTE_END_PTR, + PCTL0_RENG_EXEC_END_PTR); + WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); + /* Set the reng execute end ptr for pctl1 */ pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, PCTL1_RENG_EXECUTE, RENG_EXECUTE_END_PTR, PCTL1_RENG_EXEC_END_PTR); WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); - - mmhub_v1_0_power_gating_write_save_ranges(adev); - - /* Re-enable light sleep */ - pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); - pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); } void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, @@ -466,6 +471,9 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, RENG_EXECUTE_ON_REG_UPDATE, 1); WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); + if (adev->powerplay.pp_funcs->set_mmhub_powergating_by_smu) + amdgpu_dpm_set_mmhub_powergating_by_smu(adev); + } else { pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, PCTL0_RENG_EXECUTE, @@ -494,9 +502,9 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) * SRIOV driver need to program them */ WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE, - adev->mc.vram_start >> 24); + adev->gmc.vram_start >> 24); WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP, - adev->mc.vram_end >> 24); + adev->gmc.vram_end >> 24); } /* GART Enable. */ diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index d4da663d5eb0..2daeef6e9345 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -53,9 +53,16 @@ static void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) +static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring) { - WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); + if (!ring || !ring->funcs->emit_wreg) + WREG32_SOC15_NO_KIQ(NBIO, 0, + mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, + 0); + else + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); } static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 17a9131a4598..cd10c76a76e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -53,9 +53,14 @@ static void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) +static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring) { - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + if (!ring || !ring->funcs->emit_wreg) + WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + else + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); } static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 5a9fe24697f9..8873d833a7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -87,7 +87,7 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * return 0; } -int psp_v10_0_init_microcode(struct psp_context *psp) +static int psp_v10_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; const char *chip_name; @@ -133,7 +133,8 @@ out: return err; } -int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) +static int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd) { int ret; uint64_t fw_mem_mc_addr = ucode->mc_addr; @@ -152,7 +153,8 @@ int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cm return ret; } -int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) +static int psp_v10_0_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) { int ret = 0; struct psp_ring *ring; @@ -177,7 +179,8 @@ int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) return 0; } -int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) +static int psp_v10_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) { int ret = 0; unsigned int psp_ring_reg = 0; @@ -208,7 +211,8 @@ int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) return ret; } -int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) +static int psp_v10_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) { int ret = 0; struct psp_ring *ring; @@ -231,7 +235,8 @@ int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) return ret; } -int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) +static int psp_v10_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) { int ret = 0; struct psp_ring *ring = &psp->km_ring; @@ -248,10 +253,10 @@ int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type return ret; } -int psp_v10_0_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index) +static int psp_v10_0_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index) { unsigned int psp_write_ptr_reg = 0; struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; @@ -298,9 +303,9 @@ int psp_v10_0_cmd_submit(struct psp_context *psp, static int psp_v10_0_sram_map(struct amdgpu_device *adev, - unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, - unsigned int *sram_data_reg_offset, - enum AMDGPU_UCODE_ID ucode_id) + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) { int ret = 0; @@ -383,9 +388,9 @@ psp_v10_0_sram_map(struct amdgpu_device *adev, return ret; } -bool psp_v10_0_compare_sram_data(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - enum AMDGPU_UCODE_ID ucode_type) +static bool psp_v10_0_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) { int err = 0; unsigned int fw_sram_reg_val = 0; @@ -419,8 +424,25 @@ bool psp_v10_0_compare_sram_data(struct psp_context *psp, } -int psp_v10_0_mode1_reset(struct psp_context *psp) +static int psp_v10_0_mode1_reset(struct psp_context *psp) { DRM_INFO("psp mode 1 reset not supported now! \n"); return -EINVAL; } + +static const struct psp_funcs psp_v10_0_funcs = { + .init_microcode = psp_v10_0_init_microcode, + .prep_cmd_buf = psp_v10_0_prep_cmd_buf, + .ring_init = psp_v10_0_ring_init, + .ring_create = psp_v10_0_ring_create, + .ring_stop = psp_v10_0_ring_stop, + .ring_destroy = psp_v10_0_ring_destroy, + .cmd_submit = psp_v10_0_cmd_submit, + .compare_sram_data = psp_v10_0_compare_sram_data, + .mode1_reset = psp_v10_0_mode1_reset, +}; + +void psp_v10_0_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v10_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h index 451e8308303f..20c2a94859d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h @@ -27,24 +27,6 @@ #include "amdgpu_psp.h" -extern int psp_v10_0_init_microcode(struct psp_context *psp); -extern int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, - struct psp_gfx_cmd_resp *cmd); -extern int psp_v10_0_ring_init(struct psp_context *psp, - enum psp_ring_type ring_type); -extern int psp_v10_0_ring_create(struct psp_context *psp, - enum psp_ring_type ring_type); -extern int psp_v10_0_ring_stop(struct psp_context *psp, - enum psp_ring_type ring_type); -extern int psp_v10_0_ring_destroy(struct psp_context *psp, - enum psp_ring_type ring_type); -extern int psp_v10_0_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index); -extern bool psp_v10_0_compare_sram_data(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - enum AMDGPU_UCODE_ID ucode_type); +void psp_v10_0_set_psp_funcs(struct psp_context *psp); -extern int psp_v10_0_mode1_reset(struct psp_context *psp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 19bd1934e63d..690b9766d8ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -93,7 +93,7 @@ psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *t return 0; } -int psp_v3_1_init_microcode(struct psp_context *psp) +static int psp_v3_1_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; const char *chip_name; @@ -161,7 +161,7 @@ out: return err; } -int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) +static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) { int ret; uint32_t psp_gfxdrv_command_reg = 0; @@ -202,7 +202,7 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) return ret; } -int psp_v3_1_bootloader_load_sos(struct psp_context *psp) +static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) { int ret; unsigned int psp_gfxdrv_command_reg = 0; @@ -243,7 +243,8 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) return ret; } -int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) +static int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd) { int ret; uint64_t fw_mem_mc_addr = ucode->mc_addr; @@ -262,7 +263,8 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd return ret; } -int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) +static int psp_v3_1_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) { int ret = 0; struct psp_ring *ring; @@ -287,7 +289,8 @@ int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) return 0; } -int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) +static int psp_v3_1_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) { int ret = 0; unsigned int psp_ring_reg = 0; @@ -318,7 +321,8 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) return ret; } -int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) +static int psp_v3_1_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) { int ret = 0; struct psp_ring *ring; @@ -341,7 +345,8 @@ int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) return ret; } -int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) +static int psp_v3_1_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) { int ret = 0; struct psp_ring *ring = &psp->km_ring; @@ -358,10 +363,10 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) return ret; } -int psp_v3_1_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index) +static int psp_v3_1_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index) { unsigned int psp_write_ptr_reg = 0; struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; @@ -410,9 +415,9 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, static int psp_v3_1_sram_map(struct amdgpu_device *adev, - unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, - unsigned int *sram_data_reg_offset, - enum AMDGPU_UCODE_ID ucode_id) + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) { int ret = 0; @@ -495,9 +500,9 @@ psp_v3_1_sram_map(struct amdgpu_device *adev, return ret; } -bool psp_v3_1_compare_sram_data(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - enum AMDGPU_UCODE_ID ucode_type) +static bool psp_v3_1_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) { int err = 0; unsigned int fw_sram_reg_val = 0; @@ -530,7 +535,7 @@ bool psp_v3_1_compare_sram_data(struct psp_context *psp, return true; } -bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) +static bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; uint32_t reg; @@ -541,7 +546,7 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; } -int psp_v3_1_mode1_reset(struct psp_context *psp) +static int psp_v3_1_mode1_reset(struct psp_context *psp) { int ret; uint32_t offset; @@ -574,3 +579,23 @@ int psp_v3_1_mode1_reset(struct psp_context *psp) return 0; } + +static const struct psp_funcs psp_v3_1_funcs = { + .init_microcode = psp_v3_1_init_microcode, + .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, + .bootloader_load_sos = psp_v3_1_bootloader_load_sos, + .prep_cmd_buf = psp_v3_1_prep_cmd_buf, + .ring_init = psp_v3_1_ring_init, + .ring_create = psp_v3_1_ring_create, + .ring_stop = psp_v3_1_ring_stop, + .ring_destroy = psp_v3_1_ring_destroy, + .cmd_submit = psp_v3_1_cmd_submit, + .compare_sram_data = psp_v3_1_compare_sram_data, + .smu_reload_quirk = psp_v3_1_smu_reload_quirk, + .mode1_reset = psp_v3_1_mode1_reset, +}; + +void psp_v3_1_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v3_1_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h index b05dbada7751..e411e31ba452 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h @@ -32,26 +32,6 @@ enum { PSP_BINARY_ALIGNMENT = 64 }; enum { PSP_BOOTLOADER_1_MEG_ALIGNMENT = 0x100000 }; enum { PSP_BOOTLOADER_8_MEM_ALIGNMENT = 0x800000 }; -extern int psp_v3_1_init_microcode(struct psp_context *psp); -extern int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp); -extern int psp_v3_1_bootloader_load_sos(struct psp_context *psp); -extern int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, - struct psp_gfx_cmd_resp *cmd); -extern int psp_v3_1_ring_init(struct psp_context *psp, - enum psp_ring_type ring_type); -extern int psp_v3_1_ring_create(struct psp_context *psp, - enum psp_ring_type ring_type); -extern int psp_v3_1_ring_stop(struct psp_context *psp, - enum psp_ring_type ring_type); -extern int psp_v3_1_ring_destroy(struct psp_context *psp, - enum psp_ring_type ring_type); -extern int psp_v3_1_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index); -extern bool psp_v3_1_compare_sram_data(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - enum AMDGPU_UCODE_ID ucode_type); -extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp); -extern int psp_v3_1_mode1_reset(struct psp_context *psp); +void psp_v3_1_set_psp_funcs(struct psp_context *psp); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index d4787ad4d346..6ccc9d43a7b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -289,13 +289,6 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring) SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ } -static void sdma_v2_4_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, mmHDP_DEBUG0); - amdgpu_ring_write(ring, 1); -} /** * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring * @@ -346,7 +339,7 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); @@ -491,7 +484,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); } return 0; @@ -861,20 +854,7 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - if (vmid < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); - } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); - } - amdgpu_ring_write(ring, pd_addr >> 12); - - /* flush TLB */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for flush */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | @@ -888,6 +868,15 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ } +static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, val); +} + static int sdma_v2_4_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1203,9 +1192,9 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .set_wptr = sdma_v2_4_ring_set_wptr, .emit_frame_size = 6 + /* sdma_v2_4_ring_emit_hdp_flush */ - 3 + /* sdma_v2_4_ring_emit_hdp_invalidate */ + 3 + /* hdp invalidate */ 6 + /* sdma_v2_4_ring_emit_pipeline_sync */ - 12 + /* sdma_v2_4_ring_emit_vm_flush */ + VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */ 10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */ .emit_ib = sdma_v2_4_ring_emit_ib, @@ -1213,11 +1202,11 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync, .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush, .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush, - .emit_hdp_invalidate = sdma_v2_4_ring_emit_hdp_invalidate, .test_ring = sdma_v2_4_ring_test_ring, .test_ib = sdma_v2_4_ring_test_ib, .insert_nop = sdma_v2_4_ring_insert_nop, .pad_ib = sdma_v2_4_ring_pad_ib, + .emit_wreg = sdma_v2_4_ring_emit_wreg, }; static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) @@ -1316,9 +1305,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { .copy_pte = sdma_v2_4_vm_copy_pte, .write_pte = sdma_v2_4_vm_write_pte, - - .set_max_nums_pte_pde = 0x1fffff >> 3, - .set_pte_pde_num_dw = 10, .set_pte_pde = sdma_v2_4_vm_set_pte_pde, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 521978c40537..0c2b12ec0e9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -460,14 +460,6 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ } -static void sdma_v3_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, mmHDP_DEBUG0); - amdgpu_ring_write(ring, 1); -} - /** * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring * @@ -518,7 +510,7 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); @@ -758,7 +750,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); } return 0; @@ -1127,20 +1119,7 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - if (vmid < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); - } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); - } - amdgpu_ring_write(ring, pd_addr >> 12); - - /* flush TLB */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for flush */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | @@ -1154,6 +1133,15 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ } +static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, val); +} + static int sdma_v3_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1637,9 +1625,9 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .set_wptr = sdma_v3_0_ring_set_wptr, .emit_frame_size = 6 + /* sdma_v3_0_ring_emit_hdp_flush */ - 3 + /* sdma_v3_0_ring_emit_hdp_invalidate */ + 3 + /* hdp invalidate */ 6 + /* sdma_v3_0_ring_emit_pipeline_sync */ - 12 + /* sdma_v3_0_ring_emit_vm_flush */ + VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v3_0_ring_emit_vm_flush */ 10 + 10 + 10, /* sdma_v3_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v3_0_ring_emit_ib */ .emit_ib = sdma_v3_0_ring_emit_ib, @@ -1647,11 +1635,11 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync, .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush, .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = sdma_v3_0_ring_emit_hdp_invalidate, .test_ring = sdma_v3_0_ring_test_ring, .test_ib = sdma_v3_0_ring_test_ib, .insert_nop = sdma_v3_0_ring_insert_nop, .pad_ib = sdma_v3_0_ring_pad_ib, + .emit_wreg = sdma_v3_0_ring_emit_wreg, }; static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) @@ -1750,10 +1738,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { .copy_pte = sdma_v3_0_vm_copy_pte, .write_pte = sdma_v3_0_vm_write_pte, - - /* not 0x3fffff due to HW limitation */ - .set_max_nums_pte_pde = 0x3fffe0 >> 3, - .set_pte_pde_num_dw = 10, .set_pte_pde = sdma_v3_0_vm_set_pte_pde, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e92fb372bc99..3d5385dda34c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -375,16 +375,6 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ } -static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE)); - amdgpu_ring_write(ring, 1); -} - /** * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring * @@ -440,7 +430,7 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); @@ -682,7 +672,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); } @@ -1135,38 +1125,28 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; - unsigned eng = ring->vm_inv_eng; - - amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); - pd_addr |= flags; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vmid * 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} - /* flush TLB */ +static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); - amdgpu_ring_write(ring, req); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, val); +} - /* wait for flush */ +static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vmid); /* reference */ - amdgpu_ring_write(ring, 1 << vmid); /* mask */ + amdgpu_ring_write(ring, val); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } @@ -1592,9 +1572,11 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .set_wptr = sdma_v4_0_ring_set_wptr, .emit_frame_size = 6 + /* sdma_v4_0_ring_emit_hdp_flush */ - 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ + 3 + /* hdp invalidate */ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - 18 + /* sdma_v4_0_ring_emit_vm_flush */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ .emit_ib = sdma_v4_0_ring_emit_ib, @@ -1602,11 +1584,12 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = sdma_v4_0_ring_emit_hdp_invalidate, .test_ring = sdma_v4_0_ring_test_ring, .test_ib = sdma_v4_0_ring_test_ib, .insert_nop = sdma_v4_0_ring_insert_nop, .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, }; static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) @@ -1705,9 +1688,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { .copy_pte = sdma_v4_0_vm_copy_pte, .write_pte = sdma_v4_0_vm_write_pte, - - .set_max_nums_pte_pde = 0x400000 >> 3, - .set_pte_pde_num_dw = 10, .set_pte_pde = sdma_v4_0_vm_set_pte_pde, }; diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 543101d5a5ed..f20c4b7414e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1230,6 +1230,27 @@ static void si_detect_hw_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } +static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1); + RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL); + } else { + amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1); + } +} + +static void si_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32(mmHDP_DEBUG0, 1); + RREG32(mmHDP_DEBUG0); + } else { + amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1); + } +} + static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, @@ -1241,6 +1262,8 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .set_uvd_clocks = &si_set_uvd_clocks, .set_vce_clocks = NULL, .get_config_memsize = &si_get_config_memsize, + .flush_hdp = &si_flush_hdp, + .invalidate_hdp = &si_invalidate_hdp, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si.h b/drivers/gpu/drm/amd/amdgpu/si.h index 589225080c24..06ed7212a0d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.h +++ b/drivers/gpu/drm/amd/amdgpu/si.h @@ -24,6 +24,8 @@ #ifndef __SI_H__ #define __SI_H__ +#define SI_FLUSH_GPU_TLB_NUM_WREG 2 + void si_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int si_set_ip_blocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 9a29c1399091..acbf5afa4f38 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -24,6 +24,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "si.h" #include "sid.h" const u32 sdma_offsets[SDMA_MAX_INSTANCE] = @@ -74,20 +75,6 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, } -static void si_dma_ring_emit_hdp_flush(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - amdgpu_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL)); - amdgpu_ring_write(ring, 1); -} - -static void si_dma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - amdgpu_ring_write(ring, (0xf << 16) | (HDP_DEBUG0)); - amdgpu_ring_write(ring, 1); -} - /** * si_dma_ring_emit_fence - emit a fence on the DMA ring * @@ -134,7 +121,7 @@ static void si_dma_stop(struct amdgpu_device *adev) WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); ring->ready = false; } } @@ -197,7 +184,7 @@ static int si_dma_start(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); } return 0; @@ -475,17 +462,7 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - if (vmid < 8) - amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); - else - amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8))); - amdgpu_ring_write(ring, pd_addr >> 12); - - /* bits 0-7 are the VM contexts0-7 */ - amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST)); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for invalidate to complete */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); @@ -496,6 +473,14 @@ static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ } +static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + amdgpu_ring_write(ring, (0xf << 16) | reg); + amdgpu_ring_write(ring, val); +} + static int si_dma_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -772,22 +757,20 @@ static const struct amdgpu_ring_funcs si_dma_ring_funcs = { .get_wptr = si_dma_ring_get_wptr, .set_wptr = si_dma_ring_set_wptr, .emit_frame_size = - 3 + /* si_dma_ring_emit_hdp_flush */ - 3 + /* si_dma_ring_emit_hdp_invalidate */ + 3 + 3 + /* hdp flush / invalidate */ 6 + /* si_dma_ring_emit_pipeline_sync */ - 12 + /* si_dma_ring_emit_vm_flush */ + SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */ 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */ .emit_ib = si_dma_ring_emit_ib, .emit_fence = si_dma_ring_emit_fence, .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, .emit_vm_flush = si_dma_ring_emit_vm_flush, - .emit_hdp_flush = si_dma_ring_emit_hdp_flush, - .emit_hdp_invalidate = si_dma_ring_emit_hdp_invalidate, .test_ring = si_dma_ring_test_ring, .test_ib = si_dma_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = si_dma_ring_pad_ib, + .emit_wreg = si_dma_ring_emit_wreg, }; static void si_dma_set_ring_funcs(struct amdgpu_device *adev) @@ -891,9 +874,6 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { .copy_pte = si_dma_vm_copy_pte, .write_pte = si_dma_vm_write_pte, - - .set_max_nums_pte_pde = 0xffff8 >> 3, - .set_pte_pde_num_dw = 9, .set_pte_pde = si_dma_vm_set_pte_pde, }; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index ce675a7f179a..8138053fcef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3064,7 +3064,7 @@ static bool si_dpm_vblank_too_short(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); /* we never hit the non-gddr5 limit so disable it */ - u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; + u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; if (vblank_time < switch_limit) return true; @@ -4350,7 +4350,7 @@ static u8 si_get_strobe_mode_settings(struct amdgpu_device *adev, u32 mclk) if (mclk <= pi->mclk_strobe_mode_threshold) strobe_mode = true; - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) result = si_get_mclk_frequency_ratio(mclk, strobe_mode); else result = si_get_ddr3_mclk_frequency_ratio(mclk); @@ -4937,7 +4937,7 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev, table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp); table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen; - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { table->initialState.levels[0].strobeMode = si_get_strobe_mode_settings(adev, initial_state->performance_levels[0].mclk); @@ -5208,7 +5208,7 @@ static int si_init_smc_table(struct amdgpu_device *adev) if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC) table->systemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) table->systemFlags |= PPSMC_SYSTEMFLAG_GDDR5; if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_REVERT_GPIO5_POLARITY) @@ -5385,7 +5385,7 @@ static int si_populate_mclk_value(struct amdgpu_device *adev, mpll_ad_func_cntl &= ~YCLK_POST_DIV_MASK; mpll_ad_func_cntl |= YCLK_POST_DIV(mpll_param.post_div); - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { mpll_dq_func_cntl &= ~(YCLK_SEL_MASK | YCLK_POST_DIV_MASK); mpll_dq_func_cntl |= YCLK_SEL(mpll_param.yclk_sel) | YCLK_POST_DIV(mpll_param.post_div); @@ -5397,7 +5397,7 @@ static int si_populate_mclk_value(struct amdgpu_device *adev, u32 tmp; u32 reference_clock = adev->clock.mpll.reference_freq; - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) freq_nom = memory_clock * 4; else freq_nom = memory_clock * 2; @@ -5489,7 +5489,7 @@ static int si_convert_power_level_to_smc(struct amdgpu_device *adev, level->mcFlags |= SISLANDS_SMC_MC_PG_EN; } - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { if (pl->mclk > pi->mclk_edc_enable_threshold) level->mcFlags |= SISLANDS_SMC_MC_EDC_RD_FLAG; @@ -5860,12 +5860,12 @@ static int si_set_mc_special_registers(struct amdgpu_device *adev, table->mc_reg_table_entry[k].mc_data[j] = (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); - if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) + if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) table->mc_reg_table_entry[k].mc_data[j] |= 0x100; } j++; - if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) return -EINVAL; table->mc_reg_address[j].s1 = MC_PMG_AUTO_CMD; @@ -8056,7 +8056,6 @@ const struct amd_ip_funcs si_dpm_ip_funcs = { }; const struct amd_pm_funcs si_dpm_funcs = { - .get_temperature = &si_dpm_get_temp, .pre_set_power_state = &si_dpm_pre_set_power_state, .set_power_state = &si_dpm_set_power_state, .post_set_power_state = &si_dpm_post_set_power_state, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a04a033f57de..8dc8b72ed49b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -417,12 +417,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) pci_save_state(adev->pdev); - for (i = 0; i < AMDGPU_MAX_IP_NUM; i++) { - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP){ - adev->ip_blocks[i].version->funcs->soft_reset((void *)adev); - break; - } - } + psp_gpu_reset(adev); pci_restore_state(adev->pdev); @@ -583,6 +578,21 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) return adev->nbio_funcs->get_rev_id(adev); } +static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + adev->nbio_funcs->hdp_flush(adev, ring); +} + +static void soc15_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + else + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); +} + static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, @@ -594,6 +604,8 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .set_uvd_clocks = &soc15_set_uvd_clocks, .set_vce_clocks = &soc15_set_vce_clocks, .get_config_memsize = &soc15_get_config_memsize, + .flush_hdp = &soc15_flush_hdp, + .invalidate_hdp = &soc15_invalidate_hdp, }; static int soc15_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 26b3feac5d06..f70da8a29f86 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -27,6 +27,9 @@ #include "nbio_v6_1.h" #include "nbio_v7_0.h" +#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4 +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1 + extern const struct amd_ip_funcs soc15_common_ip_funcs; struct soc15_reg_golden { diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 5995ffc183de..18435389bae4 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -271,7 +271,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vmid = (dw[2] >> 8) & 0xff; - entry->pas_id = (dw[2] >> 16) & 0xffff; + entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ adev->irq.ih.rptr += 16; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 8ab10c220910..948bb9437757 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -464,32 +464,6 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq } /** - * uvd_v4_2_ring_emit_hdp_flush - emit an hdp flush - * - * @ring: amdgpu_ring pointer - * - * Emits an hdp flush. - */ -static void uvd_v4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0)); - amdgpu_ring_write(ring, 0); -} - -/** - * uvd_v4_2_ring_hdp_invalidate - emit an hdp invalidate - * - * @ring: amdgpu_ring pointer - * - * Emits an hdp invalidate. - */ -static void uvd_v4_2_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0)); - amdgpu_ring_write(ring, 1); -} - -/** * uvd_v4_2_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -765,14 +739,10 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .set_wptr = uvd_v4_2_ring_set_wptr, .parse_cs = amdgpu_uvd_ring_parse_cs, .emit_frame_size = - 2 + /* uvd_v4_2_ring_emit_hdp_flush */ - 2 + /* uvd_v4_2_ring_emit_hdp_invalidate */ 14, /* uvd_v4_2_ring_emit_fence x1 no user fence */ .emit_ib_size = 4, /* uvd_v4_2_ring_emit_ib */ .emit_ib = uvd_v4_2_ring_emit_ib, .emit_fence = uvd_v4_2_ring_emit_fence, - .emit_hdp_flush = uvd_v4_2_ring_emit_hdp_flush, - .emit_hdp_invalidate = uvd_v4_2_ring_emit_hdp_invalidate, .test_ring = uvd_v4_2_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index c1fe30cdba32..6445d55e7d5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -479,32 +479,6 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq } /** - * uvd_v5_0_ring_emit_hdp_flush - emit an hdp flush - * - * @ring: amdgpu_ring pointer - * - * Emits an hdp flush. - */ -static void uvd_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0)); - amdgpu_ring_write(ring, 0); -} - -/** - * uvd_v5_0_ring_hdp_invalidate - emit an hdp invalidate - * - * @ring: amdgpu_ring pointer - * - * Emits an hdp invalidate. - */ -static void uvd_v5_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0)); - amdgpu_ring_write(ring, 1); -} - -/** * uvd_v5_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -873,14 +847,10 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .set_wptr = uvd_v5_0_ring_set_wptr, .parse_cs = amdgpu_uvd_ring_parse_cs, .emit_frame_size = - 2 + /* uvd_v5_0_ring_emit_hdp_flush */ - 2 + /* uvd_v5_0_ring_emit_hdp_invalidate */ 14, /* uvd_v5_0_ring_emit_fence x1 no user fence */ .emit_ib_size = 6, /* uvd_v5_0_ring_emit_ib */ .emit_ib = uvd_v5_0_ring_emit_ib, .emit_fence = uvd_v5_0_ring_emit_fence, - .emit_hdp_flush = uvd_v5_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = uvd_v5_0_ring_emit_hdp_invalidate, .test_ring = uvd_v5_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index b2bfedaf57f1..a3e64e22c93c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -964,32 +964,6 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } /** - * uvd_v6_0_ring_emit_hdp_flush - emit an hdp flush - * - * @ring: amdgpu_ring pointer - * - * Emits an hdp flush. - */ -static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0)); - amdgpu_ring_write(ring, 0); -} - -/** - * uvd_v6_0_ring_hdp_invalidate - emit an hdp invalidate - * - * @ring: amdgpu_ring pointer - * - * Emits an hdp invalidate. - */ -static void uvd_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0)); - amdgpu_ring_write(ring, 1); -} - -/** * uvd_v6_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -1072,29 +1046,21 @@ static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } -static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) +static void uvd_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) { - uint32_t reg; - - if (vmid < 8) - reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid; - else - reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8; - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); - amdgpu_ring_write(ring, pd_addr >> 12); + amdgpu_ring_write(ring, val); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); amdgpu_ring_write(ring, 0x8); +} - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); - amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); - amdgpu_ring_write(ring, 1 << vmid); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); - amdgpu_ring_write(ring, 0x8); +static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); @@ -1140,7 +1106,7 @@ static void uvd_v6_0_enc_ring_insert_end(struct amdgpu_ring *ring) } static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, HEVC_ENC_CMD_UPDATE_PTB); amdgpu_ring_write(ring, vmid); @@ -1562,21 +1528,19 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .set_wptr = uvd_v6_0_ring_set_wptr, .parse_cs = amdgpu_uvd_ring_parse_cs, .emit_frame_size = - 2 + /* uvd_v6_0_ring_emit_hdp_flush */ - 2 + /* uvd_v6_0_ring_emit_hdp_invalidate */ + 6 + 6 + /* hdp flush / invalidate */ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */ .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ .emit_ib = uvd_v6_0_ring_emit_ib, .emit_fence = uvd_v6_0_ring_emit_fence, - .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate, .test_ring = uvd_v6_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, + .emit_wreg = uvd_v6_0_ring_emit_wreg, }; static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { @@ -1588,24 +1552,22 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, .emit_frame_size = - 2 + /* uvd_v6_0_ring_emit_hdp_flush */ - 2 + /* uvd_v6_0_ring_emit_hdp_invalidate */ + 6 + 6 + /* hdp flush / invalidate */ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ - 20 + /* uvd_v6_0_ring_emit_vm_flush */ + VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */ 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */ .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ .emit_ib = uvd_v6_0_ring_emit_ib, .emit_fence = uvd_v6_0_ring_emit_fence, .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush, .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync, - .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate, .test_ring = uvd_v6_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, + .emit_wreg = uvd_v6_0_ring_emit_wreg, }; static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 6b95f4f344b5..e54cc3ca2303 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -25,6 +25,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_uvd.h" +#include "soc15.h" #include "soc15d.h" #include "soc15_common.h" #include "mmsch_v1_0.h" @@ -1135,37 +1136,6 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } /** - * uvd_v7_0_ring_emit_hdp_flush - emit an hdp flush - * - * @ring: amdgpu_ring pointer - * - * Emits an hdp flush. - */ -static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(NBIF, 0, - mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0)); - amdgpu_ring_write(ring, 0); -} - -/** - * uvd_v7_0_ring_hdp_invalidate - emit an hdp invalidate - * - * @ring: amdgpu_ring pointer - * - * Emits an hdp invalidate. - */ -static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0)); - amdgpu_ring_write(ring, 1); -} - -/** * uvd_v7_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -1255,33 +1225,33 @@ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } -static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring, - uint32_t data0, uint32_t data1) +static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) { struct amdgpu_device *adev = ring->adev; amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); - amdgpu_ring_write(ring, data0); + amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); - amdgpu_ring_write(ring, data1); + amdgpu_ring_write(ring, val); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, 8); } -static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, - uint32_t data0, uint32_t data1, uint32_t mask) +static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { struct amdgpu_device *adev = ring->adev; amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); - amdgpu_ring_write(ring, data0); + amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); - amdgpu_ring_write(ring, data1); + amdgpu_ring_write(ring, val); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); amdgpu_ring_write(ring, mask); @@ -1294,37 +1264,15 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; - unsigned eng = ring->vm_inv_eng; uint32_t data0, data1, mask; - amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); - pd_addr |= flags; - - data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2; - data1 = upper_32_bits(pd_addr); - uvd_v7_0_vm_reg_write(ring, data0, data1); - - data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; - data1 = lower_32_bits(pd_addr); - uvd_v7_0_vm_reg_write(ring, data0, data1); + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); - data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; + /* wait for reg writes */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); - - /* flush TLB */ - data0 = (hub->vm_inv_eng0_req + eng) << 2; - data1 = req; - uvd_v7_0_vm_reg_write(ring, data0, data1); - - /* wait for flush */ - data0 = (hub->vm_inv_eng0_ack + eng) << 2; - data1 = 1 << vmid; - mask = 1 << vmid; - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); + uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask); } static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -1342,40 +1290,34 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) amdgpu_ring_write(ring, HEVC_ENC_CMD_END); } +static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) +{ + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; - unsigned eng = ring->vm_inv_eng; - amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); - pd_addr |= flags; + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + /* wait for reg writes */ + uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + lower_32_bits(pd_addr), 0xffffffff); +} - /* flush TLB */ +static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vmid); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); } #if 0 @@ -1712,22 +1654,23 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, .emit_frame_size = - 2 + /* uvd_v7_0_ring_emit_hdp_flush */ - 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */ - 34 + /* uvd_v7_0_ring_emit_vm_flush */ + 6 + 6 + /* hdp flush / invalidate */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* uvd_v7_0_ring_emit_vm_flush */ 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */ .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */ .emit_ib = uvd_v7_0_ring_emit_ib, .emit_fence = uvd_v7_0_ring_emit_fence, .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush, - .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = uvd_v7_0_ring_emit_hdp_invalidate, .test_ring = uvd_v7_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = uvd_v7_0_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, + .emit_wreg = uvd_v7_0_ring_emit_wreg, + .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait, }; static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { @@ -1740,7 +1683,10 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, .emit_frame_size = - 17 + /* uvd_v7_0_enc_ring_emit_vm_flush */ + 3 + 3 + /* hdp flush / invalidate */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* uvd_v7_0_enc_ring_emit_vm_flush */ 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */ 1, /* uvd_v7_0_enc_ring_insert_end */ .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */ @@ -1754,6 +1700,8 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, + .emit_wreg = uvd_v7_0_enc_ring_emit_wreg, + .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait, }; static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index a5355eb689f1..428d1928e44e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -844,7 +844,7 @@ static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, } static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB); amdgpu_ring_write(ring, vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 7cf2eef68cf2..2329b310ccf2 100755 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -28,6 +28,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_vce.h" +#include "soc15.h" #include "soc15d.h" #include "soc15_common.h" #include "mmsch_v1_0.h" @@ -964,40 +965,33 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) amdgpu_ring_write(ring, VCE_CMD_END); } +static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; - unsigned eng = ring->vm_inv_eng; - amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); - pd_addr |= flags; + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + /* wait for reg writes */ + vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + lower_32_bits(pd_addr), 0xffffffff); +} - /* flush TLB */ +static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vmid); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); } static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, @@ -1069,7 +1063,9 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .set_wptr = vce_v4_0_ring_set_wptr, .parse_cs = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = - 17 + /* vce_v4_0_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vce_v4_0_emit_vm_flush */ 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1, /* vce_v4_0_ring_insert_end */ .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ @@ -1083,6 +1079,8 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vce_ring_begin_use, .end_use = amdgpu_vce_ring_end_use, + .emit_wreg = vce_v4_0_emit_wreg, + .emit_reg_wait = vce_v4_0_emit_reg_wait, }; static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index b99e15c43e45..fdf4ac9313cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -25,6 +25,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_vcn.h" +#include "soc15.h" #include "soc15d.h" #include "soc15_common.h" @@ -809,21 +810,6 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 } /** - * vcn_v1_0_dec_ring_hdp_invalidate - emit an hdp invalidate - * - * @ring: amdgpu_ring pointer - * - * Emits an hdp invalidate. - */ -static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0)); - amdgpu_ring_write(ring, 1); -} - -/** * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer @@ -852,33 +838,18 @@ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } -static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, - uint32_t data0, uint32_t data1) +static void vcn_v1_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) { struct amdgpu_device *adev = ring->adev; amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); - amdgpu_ring_write(ring, data0); + amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); - amdgpu_ring_write(ring, data1); - amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); -} - -static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, - uint32_t data0, uint32_t data1, uint32_t mask) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); - amdgpu_ring_write(ring, data0); - amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); - amdgpu_ring_write(ring, data1); + amdgpu_ring_write(ring, val); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); amdgpu_ring_write(ring, mask); @@ -888,40 +859,34 @@ static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, } static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; - unsigned eng = ring->vm_inv_eng; uint32_t data0, data1, mask; - amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); - pd_addr |= flags; - - data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2; - data1 = upper_32_bits(pd_addr); - vcn_v1_0_dec_vm_reg_write(ring, data0, data1); - - data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; - data1 = lower_32_bits(pd_addr); - vcn_v1_0_dec_vm_reg_write(ring, data0, data1); + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); - data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; - vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); - - /* flush TLB */ - data0 = (hub->vm_inv_eng0_req + eng) << 2; - data1 = req; - vcn_v1_0_dec_vm_reg_write(ring, data0, data1); - - /* wait for flush */ - data0 = (hub->vm_inv_eng0_ack + eng) << 2; - data1 = 1 << vmid; - mask = 1 << vmid; - vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); + vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, val); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); } /** @@ -1020,43 +985,34 @@ static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } +static void vcn_v1_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; - unsigned eng = ring->vm_inv_eng; - - amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); - pd_addr |= flags; - amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); - amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + /* wait for reg writes */ + vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + lower_32_bits(pd_addr), 0xffffffff); +} - /* flush TLB */ +static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vmid); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); } static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, @@ -1133,15 +1089,16 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, .emit_frame_size = - 2 + /* vcn_v1_0_dec_ring_emit_hdp_invalidate */ - 34 + /* vcn_v1_0_dec_ring_emit_vm_flush */ + 6 + 6 + /* hdp invalidate / flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v1_0_dec_ring_emit_vm_flush */ 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ 6, .emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */ .emit_ib = vcn_v1_0_dec_ring_emit_ib, .emit_fence = vcn_v1_0_dec_ring_emit_fence, .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, - .emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate, .test_ring = amdgpu_vcn_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, .insert_nop = vcn_v1_0_ring_insert_nop, @@ -1150,6 +1107,8 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v1_0_dec_ring_emit_wreg, + .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait, }; static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { @@ -1162,7 +1121,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .get_wptr = vcn_v1_0_enc_ring_get_wptr, .set_wptr = vcn_v1_0_enc_ring_set_wptr, .emit_frame_size = - 17 + /* vcn_v1_0_enc_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v1_0_enc_ring_emit_vm_flush */ 5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */ 1, /* vcn_v1_0_enc_ring_insert_end */ .emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */ @@ -1176,6 +1137,8 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v1_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait, }; static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index ee14d78be2a9..cc8ce7e352a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -333,7 +333,7 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, entry->vmid_src = (dw[0] >> 31); entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); entry->timestamp_src = dw[2] >> 31; - entry->pas_id = dw[3] & 0xffff; + entry->pasid = dw[3] & 0xffff; entry->pasid_src = dw[3] >> 31; entry->src_data[0] = dw[4]; entry->src_data[1] = dw[5]; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index b7bdd04793d6..4c45db7f1157 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -24,7 +24,8 @@ #include "soc15.h" #include "soc15_common.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" int vega10_reg_base_init(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 1e3e05a11f7a..61360a1552d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -856,6 +856,27 @@ static uint32_t vi_get_rev_id(struct amdgpu_device *adev) >> PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID__SHIFT; } +static void vi_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1); + RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL); + } else { + amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1); + } +} + +static void vi_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32(mmHDP_DEBUG0, 1); + RREG32(mmHDP_DEBUG0); + } else { + amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1); + } +} + static const struct amdgpu_asic_funcs vi_asic_funcs = { .read_disabled_bios = &vi_read_disabled_bios, @@ -867,6 +888,8 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .set_uvd_clocks = &vi_set_uvd_clocks, .set_vce_clocks = &vi_set_vce_clocks, .get_config_memsize = &vi_get_config_memsize, + .flush_hdp = &vi_flush_hdp, + .invalidate_hdp = &vi_invalidate_hdp, }; #define CZ_REV_BRISTOL(rev) \ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 575d7aed5d32..0429fe332269 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -24,6 +24,8 @@ #ifndef __VI_H__ #define __VI_H__ +#define VI_FLUSH_GPU_TLB_NUM_WREG 3 + void vi_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int vi_set_ip_blocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index c27c81cdeed3..3d14478913de 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -32,11 +32,12 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color #TODO: remove when Timing Sync feature is complete subdir-ccflags-y += -DBUILD_FEATURE_TIMING_SYNC=0 -DAL_LIBS = amdgpu_dm dc modules/freesync +DAL_LIBS = amdgpu_dm dc modules/freesync modules/color AMD_DAL = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/,$(DAL_LIBS))) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 2b72009844f8..af16973f2c41 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -25,12 +25,16 @@ -AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o +AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o ifneq ($(CONFIG_DRM_AMD_DC),) AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o endif +ifneq ($(CONFIG_DEBUG_FS),) +AMDGPUDM += amdgpu_dm_crc.o +endif + subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc AMDGPU_DM = $(addprefix $(AMDDALPATH)/amdgpu_dm/,$(AMDGPUDM)) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1ce4c98385e3..84f6fe9a448b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -61,7 +61,8 @@ #include "dcn/dcn_1_0_offset.h" #include "dcn/dcn_1_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" #include "soc15_common.h" #endif @@ -319,6 +320,7 @@ static void dm_crtc_high_irq(void *interrupt_params) crtc_index = acrtc->crtc_id; drm_handle_vblank(adev->ddev, crtc_index); + amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); } static int dm_set_clockgating_state(void *handle, @@ -345,23 +347,43 @@ static void hotplug_notify_work_func(struct work_struct *work) } #if defined(CONFIG_DRM_AMD_DC_FBC) -#include "dal_asic_id.h" /* Allocate memory for FBC compressed data */ -/* TODO: Dynamic allocation */ -#define AMDGPU_FBC_SIZE (3840 * 2160 * 4) - -static void amdgpu_dm_initialize_fbc(struct amdgpu_device *adev) +static void amdgpu_dm_fbc_init(struct drm_connector *connector) { - int r; + struct drm_device *dev = connector->dev; + struct amdgpu_device *adev = dev->dev_private; struct dm_comressor_info *compressor = &adev->dm.compressor; + struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector); + struct drm_display_mode *mode; + unsigned long max_size = 0; + + if (adev->dm.dc->fbc_compressor == NULL) + return; + + if (aconn->dc_link->connector_signal != SIGNAL_TYPE_EDP) + return; + + if (compressor->bo_ptr) + return; + - if (!compressor->bo_ptr) { - r = amdgpu_bo_create_kernel(adev, AMDGPU_FBC_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &compressor->bo_ptr, - &compressor->gpu_addr, &compressor->cpu_addr); + list_for_each_entry(mode, &connector->modes, head) { + if (max_size < mode->htotal * mode->vtotal) + max_size = mode->htotal * mode->vtotal; + } + + if (max_size) { + int r = amdgpu_bo_create_kernel(adev, max_size * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &compressor->bo_ptr, + &compressor->gpu_addr, &compressor->cpu_addr); if (r) - DRM_ERROR("DM: Failed to initialize fbc\n"); + DRM_ERROR("DM: Failed to initialize FBC\n"); + else { + adev->dm.dc->ctx->fbc_gpu_addr = compressor->gpu_addr; + DRM_INFO("DM: FBC alloc %lu\n", max_size*4); + } + } } @@ -381,12 +403,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) /* Zero all the fields */ memset(&init_data, 0, sizeof(init_data)); - /* initialize DAL's lock (for SYNC context use) */ - spin_lock_init(&adev->dm.dal_lock); - - /* initialize DAL's mutex */ - mutex_init(&adev->dm.dal_mutex); - if(amdgpu_dm_irq_init(adev)) { DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n"); goto error; @@ -397,7 +413,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.asic_id.pci_revision_id = adev->rev_id; init_data.asic_id.hw_internal_rev = adev->external_rev_id; - init_data.asic_id.vram_width = adev->mc.vram_width; + init_data.asic_id.vram_width = adev->gmc.vram_width; /* TODO: initialize init_data.asic_id.vram_type here!!!! */ init_data.asic_id.atombios_base_address = adev->mode_info.atom_context->bios; @@ -422,11 +438,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) else init_data.log_mask = DC_MIN_LOG_MASK; -#if defined(CONFIG_DRM_AMD_DC_FBC) - if (adev->family == FAMILY_CZ) - amdgpu_dm_initialize_fbc(adev); - init_data.fbc_gpu_addr = adev->dm.compressor.gpu_addr; -#endif + /* + * TODO debug why this doesn't work on Raven + */ + if (adev->flags & AMD_IS_APU && + adev->asic_type >= CHIP_CARRIZO && + adev->asic_type < CHIP_RAVEN) + init_data.flags.gpu_vm_support = true; + /* Display Core create. */ adev->dm.dc = dc_create(&init_data); @@ -447,6 +466,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_DEBUG_DRIVER("amdgpu: freesync_module init done %p.\n", adev->dm.freesync_module); + amdgpu_dm_init_color_mod(); + if (amdgpu_dm_initialize_drm_device(adev)) { DRM_ERROR( "amdgpu: failed to initialize sw for display support.\n"); @@ -540,9 +561,9 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) static int dm_late_init(void *handle) { - struct drm_device *dev = ((struct amdgpu_device *)handle)->ddev; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return detect_mst_link_for_all_connectors(dev); + return detect_mst_link_for_all_connectors(adev->ddev); } static void s3_handle_mst(struct drm_device *dev, bool suspend) @@ -629,11 +650,13 @@ static int dm_resume(void *handle) { struct amdgpu_device *adev = handle; struct amdgpu_display_manager *dm = &adev->dm; + int ret = 0; /* power on hardware */ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); - return 0; + ret = amdgpu_dm_display_resume(adev); + return ret; } int amdgpu_dm_display_resume(struct amdgpu_device *adev) @@ -791,7 +814,7 @@ dm_atomic_state_alloc_free(struct drm_atomic_state *state) } static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = { - .fb_create = amdgpu_user_framebuffer_create, + .fb_create = amdgpu_display_user_framebuffer_create, .output_poll_changed = drm_fb_helper_output_poll_changed, .atomic_check = amdgpu_dm_atomic_check, .atomic_commit = amdgpu_dm_atomic_commit, @@ -1279,9 +1302,9 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) /* indicate support of immediate flip */ adev->ddev->mode_config.async_page_flip = true; - adev->ddev->mode_config.fb_base = adev->mc.aper_base; + adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_modeset_create_props(adev); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -1538,7 +1561,6 @@ static int amdgpu_notify_freesync(struct drm_device *dev, void *data, static const struct amdgpu_display_funcs dm_display_funcs = { .bandwidth_update = dm_bandwidth_update, /* called unconditionally */ .vblank_get_counter = dm_vblank_get_counter,/* called unconditionally */ - .vblank_wait = NULL, .backlight_set_level = dm_set_backlight_level,/* called unconditionally */ .backlight_get_level = @@ -1589,8 +1611,6 @@ static int dm_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->ddev->driver->driver_features |= DRIVER_ATOMIC; - switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: @@ -1924,32 +1944,6 @@ static int fill_plane_attributes_from_fb(struct amdgpu_device *adev, } -static void fill_gamma_from_crtc_state(const struct drm_crtc_state *crtc_state, - struct dc_plane_state *plane_state) -{ - int i; - struct dc_gamma *gamma; - struct drm_color_lut *lut = - (struct drm_color_lut *) crtc_state->gamma_lut->data; - - gamma = dc_create_gamma(); - - if (gamma == NULL) { - WARN_ON(1); - return; - } - - gamma->type = GAMMA_RGB_256; - gamma->num_entries = GAMMA_RGB_256_ENTRIES; - for (i = 0; i < GAMMA_RGB_256_ENTRIES; i++) { - gamma->entries.red[i] = dal_fixed31_32_from_int(lut[i].red); - gamma->entries.green[i] = dal_fixed31_32_from_int(lut[i].green); - gamma->entries.blue[i] = dal_fixed31_32_from_int(lut[i].blue); - } - - plane_state->gamma_correction = gamma; -} - static int fill_plane_attributes(struct amdgpu_device *adev, struct dc_plane_state *dc_plane_state, struct drm_plane_state *plane_state, @@ -1977,14 +1971,13 @@ static int fill_plane_attributes(struct amdgpu_device *adev, if (input_tf == NULL) return -ENOMEM; - input_tf->type = TF_TYPE_PREDEFINED; - input_tf->tf = TRANSFER_FUNCTION_SRGB; - dc_plane_state->in_transfer_func = input_tf; - /* In case of gamma set, update gamma value */ - if (crtc_state->gamma_lut) - fill_gamma_from_crtc_state(crtc_state, dc_plane_state); + /* + * Always set input transfer function, since plane state is refreshed + * every time. + */ + ret = amdgpu_dm_set_degamma_lut(crtc_state, dc_plane_state); return ret; } @@ -2010,30 +2003,32 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode, dst.width = stream->timing.h_addressable; dst.height = stream->timing.v_addressable; - rmx_type = dm_state->scaling; - if (rmx_type == RMX_ASPECT || rmx_type == RMX_OFF) { - if (src.width * dst.height < - src.height * dst.width) { - /* height needs less upscaling/more downscaling */ - dst.width = src.width * - dst.height / src.height; - } else { - /* width needs less upscaling/more downscaling */ - dst.height = src.height * - dst.width / src.width; + if (dm_state) { + rmx_type = dm_state->scaling; + if (rmx_type == RMX_ASPECT || rmx_type == RMX_OFF) { + if (src.width * dst.height < + src.height * dst.width) { + /* height needs less upscaling/more downscaling */ + dst.width = src.width * + dst.height / src.height; + } else { + /* width needs less upscaling/more downscaling */ + dst.height = src.height * + dst.width / src.width; + } + } else if (rmx_type == RMX_CENTER) { + dst = src; } - } else if (rmx_type == RMX_CENTER) { - dst = src; - } - dst.x = (stream->timing.h_addressable - dst.width) / 2; - dst.y = (stream->timing.v_addressable - dst.height) / 2; + dst.x = (stream->timing.h_addressable - dst.width) / 2; + dst.y = (stream->timing.v_addressable - dst.height) / 2; - if (dm_state->underscan_enable) { - dst.x += dm_state->underscan_hborder / 2; - dst.y += dm_state->underscan_vborder / 2; - dst.width -= dm_state->underscan_hborder; - dst.height -= dm_state->underscan_vborder; + if (dm_state->underscan_enable) { + dst.x += dm_state->underscan_hborder / 2; + dst.y += dm_state->underscan_vborder / 2; + dst.width -= dm_state->underscan_hborder; + dst.height -= dm_state->underscan_vborder; + } } stream->src = src; @@ -2322,7 +2317,7 @@ static void set_master_stream(struct dc_stream_state *stream_set[], } } for (j = 0; j < stream_count; j++) { - if (stream_set[j] && j != master_stream) + if (stream_set[j]) stream_set[j]->triggered_crtc_reset.event_source = stream_set[master_stream]; } } @@ -2358,12 +2353,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (aconnector == NULL) { DRM_ERROR("aconnector is NULL!\n"); - goto drm_connector_null; - } - - if (dm_state == NULL) { - DRM_ERROR("dm_state is NULL!\n"); - goto dm_state_null; + return stream; } drm_connector = &aconnector->base; @@ -2375,18 +2365,18 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, */ if (aconnector->mst_port) { dm_dp_mst_dc_sink_create(drm_connector); - goto mst_dc_sink_create_done; + return stream; } if (create_fake_sink(aconnector)) - goto stream_create_fail; + return stream; } stream = dc_create_stream_for_sink(aconnector->dc_sink); if (stream == NULL) { DRM_ERROR("Failed to create stream for sink!\n"); - goto stream_create_fail; + return stream; } list_for_each_entry(preferred_mode, &aconnector->base.modes, head) { @@ -2412,9 +2402,12 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, } else { decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, - dm_state->scaling != RMX_OFF); + dm_state ? (dm_state->scaling != RMX_OFF) : false); } + if (!dm_state) + drm_mode_set_crtcinfo(&mode, 0); + fill_stream_properties_from_drm_display_mode(stream, &mode, &aconnector->base); update_stream_scaling_settings(&mode, dm_state, stream); @@ -2424,10 +2417,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, drm_connector, aconnector->dc_sink); -stream_create_fail: -dm_state_null: -drm_connector_null: -mst_dc_sink_create_done: + update_stream_signal(stream); + return stream; } @@ -2504,6 +2495,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = dm_crtc_duplicate_state, .atomic_destroy_state = dm_crtc_destroy_state, + .set_crc_source = amdgpu_dm_crtc_set_crc_source, }; static enum drm_connector_status @@ -2779,6 +2771,7 @@ int amdgpu_dm_connector_mode_valid(struct drm_connector *connector, /* TODO: Unhardcode stream count */ struct dc_stream_state *stream; struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + enum dc_status dc_result = DC_OK; if ((mode->flags & DRM_MODE_FLAG_INTERLACE) || (mode->flags & DRM_MODE_FLAG_DBLSCAN)) @@ -2798,21 +2791,22 @@ int amdgpu_dm_connector_mode_valid(struct drm_connector *connector, goto fail; } - stream = dc_create_stream_for_sink(dc_sink); + stream = create_stream_for_sink(aconnector, mode, NULL); if (stream == NULL) { DRM_ERROR("Failed to create stream for sink!\n"); goto fail; } - drm_mode_set_crtcinfo(mode, 0); - fill_stream_properties_from_drm_display_mode(stream, mode, connector); - - stream->src.width = mode->hdisplay; - stream->src.height = mode->vdisplay; - stream->dst = stream->src; + dc_result = dc_validate_stream(adev->dm.dc, stream); - if (dc_validate_stream(adev->dm.dc, stream) == DC_OK) + if (dc_result == DC_OK) result = MODE_OK; + else + DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n", + mode->vdisplay, + mode->hdisplay, + mode->clock, + dc_result); dc_stream_release(stream); @@ -2954,11 +2948,13 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, { struct amdgpu_framebuffer *afb; struct drm_gem_object *obj; + struct amdgpu_device *adev; struct amdgpu_bo *rbo; uint64_t chroma_addr = 0; - int r; struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; unsigned int awidth; + uint32_t domain; + int r; dm_plane_state_old = to_dm_plane_state(plane->state); dm_plane_state_new = to_dm_plane_state(new_state); @@ -2972,12 +2968,17 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, obj = afb->obj; rbo = gem_to_amdgpu_bo(obj); + adev = amdgpu_ttm_adev(rbo->tbo.bdev); r = amdgpu_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; - r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb->address); + if (plane->type != DRM_PLANE_TYPE_CURSOR) + domain = amdgpu_display_framebuffer_domains(adev); + else + domain = AMDGPU_GEM_DOMAIN_VRAM; + r = amdgpu_bo_pin(rbo, domain, &afb->address); amdgpu_bo_unreserve(rbo); @@ -3190,7 +3191,9 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, acrtc->base.enabled = false; dm->adev->mode_info.crtcs[crtc_index] = acrtc; - drm_mode_crtc_set_gamma_size(&acrtc->base, 256); + drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, + true, MAX_COLOR_LUT_ENTRIES); + drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LUT_ENTRIES); return 0; @@ -3366,9 +3369,12 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) struct edid *edid = amdgpu_dm_connector->edid; encoder = helper->best_encoder(connector); - amdgpu_dm_connector_ddc_get_modes(connector, edid); amdgpu_dm_connector_add_common_modes(encoder, connector); + +#if defined(CONFIG_DRM_AMD_DC_FBC) + amdgpu_dm_fbc_init(connector); +#endif return amdgpu_dm_connector->num_modes; } @@ -3641,7 +3647,7 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, * constant is the same as PFLIP */ int irq_type = - amdgpu_crtc_idx_to_irq_type( + amdgpu_display_crtc_idx_to_irq_type( adev, acrtc->crtc_id); @@ -3860,9 +3866,9 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, * targeted by the flip */ while ((acrtc->enabled && - (amdgpu_get_crtc_scanoutpos(adev->ddev, acrtc->crtc_id, 0, - &vpos, &hpos, NULL, NULL, - &crtc->hwmode) + (amdgpu_display_get_crtc_scanoutpos(adev->ddev, acrtc->crtc_id, + 0, &vpos, &hpos, NULL, + NULL, &crtc->hwmode) & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) == (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && (int)(target_vblank - @@ -4603,6 +4609,30 @@ next_crtc: /* Release extra reference */ if (new_stream) dc_stream_release(new_stream); + + /* + * We want to do dc stream updates that do not require a + * full modeset below. + */ + if (!enable || !aconnector || modereset_required(new_crtc_state)) + continue; + /* + * Given above conditions, the dc state cannot be NULL because: + * 1. We're attempting to enable a CRTC. Which has a... + * 2. Valid connector attached, and + * 3. User does not want to reset it (disable or mark inactive, + * which can happen on a CRTC that's already disabled). + * => It currently exists. + */ + BUG_ON(dm_new_crtc_state->stream == NULL); + + /* Color managment settings */ + if (dm_new_crtc_state->base.color_mgmt_changed) { + ret = amdgpu_dm_set_regamma_lut(dm_new_crtc_state); + if (ret) + goto fail; + amdgpu_dm_set_ctm(dm_new_crtc_state); + } } return ret; @@ -4711,7 +4741,6 @@ static int dm_update_planes_state(struct dc *dc, if (ret) return ret; - if (!dc_add_plane_to_context( dc, dm_new_crtc_state->stream, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 2faa77a7eeda..aa7df5775545 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -85,8 +85,6 @@ struct amdgpu_display_manager { struct dal *dal; struct dc *dc; struct cgs_device *cgs_device; - /* lock to be used when DAL is called from SYNC IRQ context */ - spinlock_t dal_lock; struct amdgpu_device *adev; /*AMD base driver*/ struct drm_device *ddev; /*DRM base driver*/ @@ -119,17 +117,6 @@ struct amdgpu_display_manager { /* this spin lock synchronizes access to 'irq_handler_list_table' */ spinlock_t irq_handler_list_table_lock; - /* Timer-related data. */ - struct list_head timer_handler_list; - struct workqueue_struct *timer_workqueue; - - /* Use dal_mutex for any activity which is NOT syncronized by - * DRM mode setting locks. - * For example: amdgpu_dm_hpd_low_irq() calls into DAL *without* - * DRM mode setting locks being acquired. This is where dal_mutex - * is acquired before calling into DAL. */ - struct mutex dal_mutex; - struct backlight_device *backlight_dev; const struct dc_link *backlight_link; @@ -210,6 +197,9 @@ struct dm_plane_state { struct dm_crtc_state { struct drm_crtc_state base; struct dc_stream_state *stream; + + int crc_skip_count; + bool crc_enabled; }; #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) @@ -268,6 +258,24 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector, void amdgpu_dm_remove_sink_from_freesync_module(struct drm_connector *connector); +/* amdgpu_dm_crc.c */ +#ifdef CONFIG_DEBUG_FS +int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name, + size_t *values_cnt); +void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc); +#else +#define amdgpu_dm_crtc_set_crc_source NULL +#define amdgpu_dm_crtc_handle_crc_irq(x) +#endif + +#define MAX_COLOR_LUT_ENTRIES 256 + +void amdgpu_dm_init_color_mod(void); +int amdgpu_dm_set_degamma_lut(struct drm_crtc_state *crtc_state, + struct dc_plane_state *dc_plane_state); +void amdgpu_dm_set_ctm(struct dm_crtc_state *crtc); +int amdgpu_dm_set_regamma_lut(struct dm_crtc_state *crtc); + extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs; #endif /* __AMDGPU_DM_H__ */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c new file mode 100644 index 000000000000..62bb72fe9aa5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -0,0 +1,228 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "amdgpu_mode.h" +#include "amdgpu_dm.h" +#include "modules/color/color_gamma.h" + +/* + * Initialize the color module. + * + * We're not using the full color module, only certain components. + * Only call setup functions for components that we need. + */ +void amdgpu_dm_init_color_mod(void) +{ + setup_x_points_distribution(); +} + + +/* + * Return true if the given lut is a linear mapping of values, i.e. it acts + * like a bypass LUT. + * + * It is considered linear if the lut represents: + * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in + * [0, MAX_COLOR_LUT_ENTRIES) + */ +static bool __is_lut_linear(struct drm_color_lut *lut) +{ + int i; + uint32_t max_os = 0xFF00; + uint32_t expected; + int delta; + + for (i = 0; i < MAX_COLOR_LUT_ENTRIES; i++) { + /* All color values should equal */ + if ((lut[i].red != lut[i].green) || (lut[i].green != lut[i].blue)) + return false; + + expected = i * max_os / (MAX_COLOR_LUT_ENTRIES-1); + + /* Allow a +/-1 error. */ + delta = lut[i].red - expected; + if (delta < -1 || 1 < delta) + return false; + } + return true; +} + +/** + * amdgpu_dm_set_regamma_lut: Set regamma lut for the given CRTC. + * @crtc: amdgpu_dm crtc state + * + * Update the underlying dc_stream_state's output transfer function (OTF) in + * preparation for hardware commit. If no lut is specified by user, we default + * to SRGB. + * + * RETURNS: + * 0 on success, -ENOMEM if memory cannot be allocated to calculate the OTF. + */ +int amdgpu_dm_set_regamma_lut(struct dm_crtc_state *crtc) +{ + struct drm_property_blob *blob = crtc->base.gamma_lut; + struct dc_stream_state *stream = crtc->stream; + struct drm_color_lut *lut; + struct dc_gamma *gamma; + enum dc_transfer_func_type old_type = stream->out_transfer_func->type; + + uint32_t r, g, b; + int i; + bool ret; + + if (!blob) { + /* By default, use the SRGB predefined curve.*/ + stream->out_transfer_func->type = TF_TYPE_PREDEFINED; + stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; + return 0; + } + + lut = (struct drm_color_lut *)blob->data; + + if (__is_lut_linear(lut)) { + /* Set to bypass if lut is set to linear */ + stream->out_transfer_func->type = TF_TYPE_BYPASS; + stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; + return 0; + } + + gamma = dc_create_gamma(); + if (!gamma) + return -ENOMEM; + + gamma->num_entries = MAX_COLOR_LUT_ENTRIES; + gamma->type = GAMMA_RGB_256; + + /* Truncate, and store in dc_gamma for output tf calculation */ + for (i = 0; i < gamma->num_entries; i++) { + r = drm_color_lut_extract(lut[i].red, 16); + g = drm_color_lut_extract(lut[i].green, 16); + b = drm_color_lut_extract(lut[i].blue, 16); + + gamma->entries.red[i] = dal_fixed31_32_from_int(r); + gamma->entries.green[i] = dal_fixed31_32_from_int(g); + gamma->entries.blue[i] = dal_fixed31_32_from_int(b); + } + + /* Call color module to translate into something DC understands. Namely + * a transfer function. + */ + stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; + ret = mod_color_calculate_regamma_params(stream->out_transfer_func, + gamma, true); + dc_gamma_release(&gamma); + if (!ret) { + stream->out_transfer_func->type = old_type; + DRM_ERROR("Out of memory when calculating regamma params\n"); + return -ENOMEM; + } + + return 0; +} + +/** + * amdgpu_dm_set_ctm: Set the color transform matrix for the given CRTC. + * @crtc: amdgpu_dm crtc state + * + * Update the underlying dc_stream_state's gamut remap matrix in preparation + * for hardware commit. If no matrix is specified by user, gamut remap will be + * disabled. + */ +void amdgpu_dm_set_ctm(struct dm_crtc_state *crtc) +{ + + struct drm_property_blob *blob = crtc->base.ctm; + struct dc_stream_state *stream = crtc->stream; + struct drm_color_ctm *ctm; + int i; + + if (!blob) { + stream->gamut_remap_matrix.enable_remap = false; + return; + } + + stream->gamut_remap_matrix.enable_remap = true; + ctm = (struct drm_color_ctm *)blob->data; + /* + * DRM gives a 3x3 matrix, but DC wants 3x4. Assuming we're operating + * with homogeneous coordinates, augment the matrix with 0's. + * + * The format provided is S31.32, which is the same as our fixed31_32. + */ + for (i = 0; i < 12; i++) { + /* Skip 4th element */ + if (i % 4 == 3) { + stream->gamut_remap_matrix.matrix[i] = dal_fixed31_32_zero; + continue; + } + /* csc[i] = ctm[i - floor(i/4)] */ + stream->gamut_remap_matrix.matrix[i].value = ctm->matrix[i - (i/4)]; + } +} + + +/** + * amdgpu_dm_set_degamma_lut: Set degamma lut for the given CRTC. + * @crtc: amdgpu_dm crtc state + * + * Update the underlying dc_stream_state's input transfer function (ITF) in + * preparation for hardware commit. If no lut is specified by user, we default + * to SRGB degamma. + * + * Currently, we only support degamma bypass, or preprogrammed SRGB degamma. + * Programmable degamma is not supported, and an attempt to do so will return + * -EINVAL. + * + * RETURNS: + * 0 on success, -EINVAL if custom degamma curve is given. + */ +int amdgpu_dm_set_degamma_lut(struct drm_crtc_state *crtc_state, + struct dc_plane_state *dc_plane_state) +{ + struct drm_property_blob *blob = crtc_state->degamma_lut; + struct drm_color_lut *lut; + + if (!blob) { + /* Default to SRGB */ + dc_plane_state->in_transfer_func->type = TF_TYPE_PREDEFINED; + dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_SRGB; + return 0; + } + + lut = (struct drm_color_lut *)blob->data; + if (__is_lut_linear(lut)) { + dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; + dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; + return 0; + } + + /* Otherwise, assume SRGB, since programmable degamma is not + * supported. + */ + dc_plane_state->in_transfer_func->type = TF_TYPE_PREDEFINED; + dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_SRGB; + return -EINVAL; +} + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c new file mode 100644 index 000000000000..52f2c01349e3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -0,0 +1,126 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include <drm/drm_crtc.h> + +#include "amdgpu.h" +#include "amdgpu_dm.h" +#include "dc.h" + +enum amdgpu_dm_pipe_crc_source { + AMDGPU_DM_PIPE_CRC_SOURCE_NONE = 0, + AMDGPU_DM_PIPE_CRC_SOURCE_AUTO, + AMDGPU_DM_PIPE_CRC_SOURCE_MAX, + AMDGPU_DM_PIPE_CRC_SOURCE_INVALID = -1, +}; + +static enum amdgpu_dm_pipe_crc_source dm_parse_crc_source(const char *source) +{ + if (!source || !strcmp(source, "none")) + return AMDGPU_DM_PIPE_CRC_SOURCE_NONE; + if (!strcmp(source, "auto")) + return AMDGPU_DM_PIPE_CRC_SOURCE_AUTO; + + return AMDGPU_DM_PIPE_CRC_SOURCE_INVALID; +} + +int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name, + size_t *values_cnt) +{ + struct dm_crtc_state *crtc_state = to_dm_crtc_state(crtc->state); + struct dc_stream_state *stream_state = crtc_state->stream; + + enum amdgpu_dm_pipe_crc_source source = dm_parse_crc_source(src_name); + + if (source < 0) { + DRM_DEBUG_DRIVER("Unknown CRC source %s for CRTC%d\n", + src_name, crtc->index); + return -EINVAL; + } + + /* When enabling CRC, we should also disable dithering. */ + if (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO) { + if (dc_stream_configure_crc(stream_state->ctx->dc, + stream_state, + true, true)) { + crtc_state->crc_enabled = true; + dc_stream_set_dither_option(stream_state, + DITHER_OPTION_TRUN8); + } + else + return -EINVAL; + } else { + if (dc_stream_configure_crc(stream_state->ctx->dc, + stream_state, + false, false)) { + crtc_state->crc_enabled = false; + dc_stream_set_dither_option(stream_state, + DITHER_OPTION_DEFAULT); + } + else + return -EINVAL; + } + + *values_cnt = 3; + /* Reset crc_skipped on dm state */ + crtc_state->crc_skip_count = 0; + return 0; +} + +/** + * amdgpu_dm_crtc_handle_crc_irq: Report to DRM the CRC on given CRTC. + * @crtc: DRM CRTC object. + * + * This function should be called at the end of a vblank, when the fb has been + * fully processed through the pipe. + */ +void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc) +{ + struct dm_crtc_state *crtc_state = to_dm_crtc_state(crtc->state); + struct dc_stream_state *stream_state = crtc_state->stream; + uint32_t crcs[3]; + + /* Early return if CRC capture is not enabled. */ + if (!crtc_state->crc_enabled) + return; + + /* + * Since flipping and crc enablement happen asynchronously, we - more + * often than not - will be returning an 'uncooked' crc on first frame. + * Probably because hw isn't ready yet. For added security, skip the + * first two CRC values. + */ + if (crtc_state->crc_skip_count < 2) { + crtc_state->crc_skip_count += 1; + return; + } + + if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, + &crcs[0], &crcs[1], &crcs[2])) + return; + + drm_crtc_add_crc_entry(crtc, true, + drm_crtc_accurate_vblank_count(crtc), crcs); +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 1874b6cee6af..490017df371d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -51,11 +51,6 @@ struct amdgpu_dm_irq_handler_data { enum dc_irq_source irq_source; }; -struct amdgpu_dm_timer_handler_data { - struct handler_common_data hcd; - struct delayed_work d_work; -}; - #define DM_IRQ_TABLE_LOCK(adev, flags) \ spin_lock_irqsave(&adev->dm.irq_handler_list_table_lock, flags) @@ -169,62 +164,6 @@ static struct list_head *remove_irq_handler(struct amdgpu_device *adev, return hnd_list; } -/* If 'handler_in == NULL' then remove ALL handlers. */ -static void remove_timer_handler(struct amdgpu_device *adev, - struct amdgpu_dm_timer_handler_data *handler_in) -{ - struct amdgpu_dm_timer_handler_data *handler_temp; - struct list_head *handler_list; - struct list_head *entry, *tmp; - unsigned long irq_table_flags; - bool handler_removed = false; - - DM_IRQ_TABLE_LOCK(adev, irq_table_flags); - - handler_list = &adev->dm.timer_handler_list; - - list_for_each_safe(entry, tmp, handler_list) { - /* Note that list_for_each_safe() guarantees that - * handler_temp is NOT null. */ - handler_temp = list_entry(entry, - struct amdgpu_dm_timer_handler_data, hcd.list); - - if (handler_in == NULL || handler_in == handler_temp) { - list_del(&handler_temp->hcd.list); - DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags); - - DRM_DEBUG_KMS("DM_IRQ: removing timer handler: %p\n", - handler_temp); - - if (handler_in == NULL) { - /* Since it is still in the queue, it must - * be cancelled. */ - cancel_delayed_work_sync(&handler_temp->d_work); - } - - kfree(handler_temp); - handler_removed = true; - - DM_IRQ_TABLE_LOCK(adev, irq_table_flags); - } - - /* Remove ALL handlers. */ - if (handler_in == NULL) - continue; - - /* Remove a SPECIFIC handler. - * Found our handler - we can stop here. */ - if (handler_in == handler_temp) - break; - } - - DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags); - - if (handler_in != NULL && handler_removed == false) - DRM_ERROR("DM_IRQ: handler: %p is not in the list!\n", - handler_in); -} - static bool validate_irq_registration_params(struct dc_interrupt_params *int_params, void (*ih)(void *)) @@ -382,16 +321,6 @@ int amdgpu_dm_irq_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&adev->dm.irq_handler_list_high_tab[src]); } - INIT_LIST_HEAD(&adev->dm.timer_handler_list); - - /* allocate and initialize the workqueue for DM timer */ - adev->dm.timer_workqueue = create_singlethread_workqueue( - "dm_timer_queue"); - if (adev->dm.timer_workqueue == NULL) { - DRM_ERROR("DM_IRQ: unable to create timer queue!\n"); - return -1; - } - return 0; } @@ -410,11 +339,6 @@ void amdgpu_dm_irq_fini(struct amdgpu_device *adev) lh = &adev->dm.irq_handler_list_low_tab[src]; flush_work(&lh->work); } - - /* Cancel ALL timers and release handlers (if any). */ - remove_timer_handler(adev, NULL); - /* Release the queue itself. */ - destroy_workqueue(adev->dm.timer_workqueue); } int amdgpu_dm_irq_suspend(struct amdgpu_device *adev) @@ -683,10 +607,8 @@ static const struct amdgpu_irq_src_funcs dm_hpd_irq_funcs = { void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.num_crtc > 0) - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc; - else - adev->crtc_irq.num_types = 0; + + adev->crtc_irq.num_types = adev->mode_info.num_crtc; adev->crtc_irq.funcs = &dm_crtc_irq_funcs; adev->pageflip_irq.num_types = adev->mode_info.num_crtc; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index f3d87f418d2e..1e8a21b67df7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -174,12 +174,6 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { .atomic_get_property = amdgpu_dm_connector_atomic_get_property }; -static int dm_connector_update_modes(struct drm_connector *connector, - struct edid *edid) -{ - return drm_add_edid_modes(connector, edid); -} - void dm_dp_mst_dc_sink_create(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -189,6 +183,12 @@ void dm_dp_mst_dc_sink_create(struct drm_connector *connector) .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; + /* + * TODO: Need to further figure out why ddc.algo is NULL while MST port exists + */ + if (!aconnector->port || !aconnector->port->aux.ddc.algo) + return; + edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); if (!edid) { @@ -222,7 +222,7 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) int ret = 0; if (!aconnector) - return dm_connector_update_modes(connector, NULL); + return drm_add_edid_modes(connector, NULL); if (!aconnector->edid) { struct edid *edid; @@ -258,7 +258,7 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) &aconnector->base, edid); } - ret = dm_connector_update_modes(connector, aconnector->edid); + ret = drm_add_edid_modes(connector, aconnector->edid); return ret; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c index 56e549249134..89342b48be6b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c @@ -71,15 +71,6 @@ bool dm_read_persistent_data(struct dc_context *ctx, /**** power component interfaces ****/ -bool dm_pp_pre_dce_clock_change( - struct dc_context *ctx, - struct dm_pp_gpu_clock_range *requested_state, - struct dm_pp_gpu_clock_range *actual_state) -{ - /*TODO*/ - return false; -} - bool dm_pp_apply_display_requirements( const struct dc_context *ctx, const struct dm_pp_display_configuration *pp_display_cfg) @@ -151,30 +142,6 @@ bool dm_pp_apply_display_requirements( return true; } -bool dc_service_get_system_clocks_range( - const struct dc_context *ctx, - struct dm_pp_gpu_clock_range *sys_clks) -{ - struct amdgpu_device *adev = ctx->driver_context; - - /* Default values, in case PPLib is not compiled-in. */ - sys_clks->mclk.max_khz = 800000; - sys_clks->mclk.min_khz = 800000; - - sys_clks->sclk.max_khz = 600000; - sys_clks->sclk.min_khz = 300000; - - if (adev->pm.dpm_enabled) { - sys_clks->mclk.max_khz = amdgpu_dpm_get_mclk(adev, false); - sys_clks->mclk.min_khz = amdgpu_dpm_get_mclk(adev, true); - - sys_clks->sclk.max_khz = amdgpu_dpm_get_sclk(adev, false); - sys_clks->sclk.min_khz = amdgpu_dpm_get_sclk(adev, true); - } - - return true; -} - static void get_default_clock_levels( enum dm_pp_clock_type clk_type, struct dm_pp_clock_levels *clks) diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 011a97f82fb6..8a9bba879207 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -593,3 +593,12 @@ uint32_t dal_fixed31_32_clamp_u0d10( { return clamp_ux_dy(arg.value, 0, 10, 1); } + +int32_t dal_fixed31_32_s4d19( + struct fixed31_32 arg) +{ + if (arg.value < 0) + return -(int32_t)ux_dy(dal_fixed31_32_abs(arg).value, 4, 19); + else + return ux_dy(arg.value, 4, 19); +} diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index c00e405b63e8..69c59e050a96 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -3795,14 +3795,11 @@ static const struct dc_vbios_funcs vbios_funcs = { .get_gpio_pin_info = bios_parser_get_gpio_pin_info, - .get_embedded_panel_info = bios_parser_get_embedded_panel_info, - - .get_gpio_pin_info = bios_parser_get_gpio_pin_info, - .get_encoder_cap_info = bios_parser_get_encoder_cap_info, /* bios scratch register communication */ .is_accelerated_mode = bios_is_accelerated_mode, + .get_vga_enabled_displays = bios_get_vga_enabled_displays, .set_scratch_critical_state = bios_parser_set_scratch_critical_state, diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 1ee1717f2e6f..1689c670ca6f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1280,6 +1280,12 @@ static bool bios_parser_is_accelerated_mode( return bios_is_accelerated_mode(dcb); } +static uint32_t bios_parser_get_vga_enabled_displays( + struct dc_bios *bios) +{ + return bios_get_vga_enabled_displays(bios); +} + /** * bios_parser_set_scratch_critical_state @@ -1800,6 +1806,7 @@ static const struct dc_vbios_funcs vbios_funcs = { .is_accelerated_mode = bios_parser_is_accelerated_mode, + .get_vga_enabled_displays = bios_parser_get_vga_enabled_displays, .set_scratch_critical_state = bios_parser_set_scratch_critical_state, diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c index 5c9e5108c32c..d4589470985c 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c @@ -78,5 +78,13 @@ void bios_set_scratch_critical_state( REG_UPDATE(BIOS_SCRATCH_6, S6_CRITICAL_STATE, critial_state); } +uint32_t bios_get_vga_enabled_displays( + struct dc_bios *bios) +{ + uint32_t active_disp = 1; + if (bios->regs->BIOS_SCRATCH_3) /*follow up with other asic, todo*/ + active_disp = REG_READ(BIOS_SCRATCH_3) & 0XFFFF; + return active_disp; +} diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h index c0047efeb006..75a29e68fb27 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h @@ -34,6 +34,7 @@ uint8_t *bios_get_image(struct dc_bios *bp, uint32_t offset, bool bios_is_accelerated_mode(struct dc_bios *bios); void bios_set_scratch_acc_mode_change(struct dc_bios *bios); void bios_set_scratch_critical_state(struct dc_bios *bios, bool state); +uint32_t bios_get_vga_enabled_displays(struct dc_bios *bios); #define GET_IMAGE(type, offset) ((type *) bios_get_image(&bp->base, offset, sizeof(type))) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h index 1fab634b66be..4c3789df253d 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h @@ -29,38 +29,7 @@ #include "dce80/command_table_helper_dce80.h" #include "dce110/command_table_helper_dce110.h" #include "dce112/command_table_helper_dce112.h" - -struct command_table_helper { - bool (*controller_id_to_atom)(enum controller_id id, uint8_t *atom_id); - uint8_t (*encoder_action_to_atom)( - enum bp_encoder_control_action action); - uint32_t (*encoder_mode_bp_to_atom)(enum signal_type s, - bool enable_dp_audio); - bool (*engine_bp_to_atom)(enum engine_id engine_id, - uint32_t *atom_engine_id); - void (*assign_control_parameter)( - const struct command_table_helper *h, - struct bp_encoder_control *control, - DIG_ENCODER_CONTROL_PARAMETERS_V2 *ctrl_param); - bool (*clock_source_id_to_atom)(enum clock_source_id id, - uint32_t *atom_pll_id); - bool (*clock_source_id_to_ref_clk_src)( - enum clock_source_id id, - uint32_t *ref_clk_src_id); - uint8_t (*transmitter_bp_to_atom)(enum transmitter t); - uint8_t (*encoder_id_to_atom)(enum encoder_id id); - uint8_t (*clock_source_id_to_atom_phy_clk_src_id)( - enum clock_source_id id); - uint8_t (*signal_type_to_atom_dig_mode)(enum signal_type s); - uint8_t (*hpd_sel_to_atom)(enum hpd_source_id id); - uint8_t (*dig_encoder_sel_to_atom)(enum engine_id engine_id); - uint8_t (*phy_id_to_atom)(enum transmitter t); - uint8_t (*disp_power_gating_action_to_atom)( - enum bp_pipe_control_action action); - bool (*dc_clock_type_to_atom)(enum bp_dce_clock_type id, - uint32_t *atom_clock_type); - uint8_t (*transmitter_color_depth_to_atom)(enum transmitter_color_depth id); -}; +#include "command_table_helper_struct.h" bool dal_bios_parser_init_cmd_tbl_helper(const struct command_table_helper **h, enum dce_version dce); diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.h index 9f587c91d843..785fcb20a1b9 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.h @@ -29,35 +29,7 @@ #include "dce80/command_table_helper_dce80.h" #include "dce110/command_table_helper_dce110.h" #include "dce112/command_table_helper2_dce112.h" - -struct command_table_helper { - bool (*controller_id_to_atom)(enum controller_id id, uint8_t *atom_id); - uint8_t (*encoder_action_to_atom)( - enum bp_encoder_control_action action); - uint32_t (*encoder_mode_bp_to_atom)(enum signal_type s, - bool enable_dp_audio); - bool (*engine_bp_to_atom)(enum engine_id engine_id, - uint32_t *atom_engine_id); - bool (*clock_source_id_to_atom)(enum clock_source_id id, - uint32_t *atom_pll_id); - bool (*clock_source_id_to_ref_clk_src)( - enum clock_source_id id, - uint32_t *ref_clk_src_id); - uint8_t (*transmitter_bp_to_atom)(enum transmitter t); - uint8_t (*encoder_id_to_atom)(enum encoder_id id); - uint8_t (*clock_source_id_to_atom_phy_clk_src_id)( - enum clock_source_id id); - uint8_t (*signal_type_to_atom_dig_mode)(enum signal_type s); - uint8_t (*hpd_sel_to_atom)(enum hpd_source_id id); - uint8_t (*dig_encoder_sel_to_atom)(enum engine_id engine_id); - uint8_t (*phy_id_to_atom)(enum transmitter t); - uint8_t (*disp_power_gating_action_to_atom)( - enum bp_pipe_control_action action); - bool (*dc_clock_type_to_atom)(enum bp_dce_clock_type id, - uint32_t *atom_clock_type); - uint8_t (*transmitter_color_depth_to_atom)( - enum transmitter_color_depth id); -}; +#include "command_table_helper_struct.h" bool dal_bios_parser_init_cmd_tbl_helper2(const struct command_table_helper **h, enum dce_version dce); diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper_struct.h b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper_struct.h new file mode 100644 index 000000000000..1f2c0a3f06f9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper_struct.h @@ -0,0 +1,66 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DAL_COMMAND_TABLE_HELPER_STRUCT_H__ +#define __DAL_COMMAND_TABLE_HELPER_STRUCT_H__ + +#include "dce80/command_table_helper_dce80.h" +#include "dce110/command_table_helper_dce110.h" +#include "dce112/command_table_helper_dce112.h" + +struct _DIG_ENCODER_CONTROL_PARAMETERS_V2; +struct command_table_helper { + bool (*controller_id_to_atom)(enum controller_id id, uint8_t *atom_id); + uint8_t (*encoder_action_to_atom)( + enum bp_encoder_control_action action); + uint32_t (*encoder_mode_bp_to_atom)(enum signal_type s, + bool enable_dp_audio); + bool (*engine_bp_to_atom)(enum engine_id engine_id, + uint32_t *atom_engine_id); + void (*assign_control_parameter)( + const struct command_table_helper *h, + struct bp_encoder_control *control, + struct _DIG_ENCODER_CONTROL_PARAMETERS_V2 *ctrl_param); + bool (*clock_source_id_to_atom)(enum clock_source_id id, + uint32_t *atom_pll_id); + bool (*clock_source_id_to_ref_clk_src)( + enum clock_source_id id, + uint32_t *ref_clk_src_id); + uint8_t (*transmitter_bp_to_atom)(enum transmitter t); + uint8_t (*encoder_id_to_atom)(enum encoder_id id); + uint8_t (*clock_source_id_to_atom_phy_clk_src_id)( + enum clock_source_id id); + uint8_t (*signal_type_to_atom_dig_mode)(enum signal_type s); + uint8_t (*hpd_sel_to_atom)(enum hpd_source_id id); + uint8_t (*dig_encoder_sel_to_atom)(enum engine_id engine_id); + uint8_t (*phy_id_to_atom)(enum transmitter t); + uint8_t (*disp_power_gating_action_to_atom)( + enum bp_pipe_control_action action); + bool (*dc_clock_type_to_atom)(enum bp_dce_clock_type id, + uint32_t *atom_clock_type); + uint8_t (*transmitter_color_depth_to_atom)(enum transmitter_color_depth id); +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 7959e382ed28..95f332ee3e7e 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -24,9 +24,17 @@ # It calculates Bandwidth and Watermarks values for HW programming # -CFLAGS_dcn_calcs.o := -mhard-float -msse -mpreferred-stack-boundary=4 -CFLAGS_dcn_calc_auto.o := -mhard-float -msse -mpreferred-stack-boundary=4 -CFLAGS_dcn_calc_math.o := -mhard-float -msse -mpreferred-stack-boundary=4 -Wno-tautological-compare +ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) + cc_stack_align := -mpreferred-stack-boundary=4 +else ifneq ($(call cc-option, -mstack-alignment=16),) + cc_stack_align := -mstack-alignment=16 +endif + +calcs_ccflags := -mhard-float -msse $(cc_stack_align) + +CFLAGS_dcn_calcs.o := $(calcs_ccflags) +CFLAGS_dcn_calc_auto.o := $(calcs_ccflags) +CFLAGS_dcn_calc_math.o := $(calcs_ccflags) -Wno-tautological-compare BW_CALCS = dce_calcs.o bw_fixed.o custom_float.o diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index 2e11fac2a63d..6d38b8f43198 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -623,7 +623,7 @@ static void calculate_bandwidth( } else { /*graphics portrait tiling mode*/ - if ((data->graphics_micro_tile_mode == bw_def_rotated_micro_tiling)) { + if (data->graphics_micro_tile_mode == bw_def_rotated_micro_tiling) { data->orthogonal_rotation[i] = 0; } else { @@ -634,7 +634,7 @@ static void calculate_bandwidth( else { if ((i < 4)) { /*underlay landscape tiling mode is only supported*/ - if ((data->underlay_micro_tile_mode == bw_def_display_micro_tiling)) { + if (data->underlay_micro_tile_mode == bw_def_display_micro_tiling) { data->orthogonal_rotation[i] = 0; } else { @@ -643,7 +643,7 @@ static void calculate_bandwidth( } else { /*graphics landscape tiling mode*/ - if ((data->graphics_micro_tile_mode == bw_def_display_micro_tiling)) { + if (data->graphics_micro_tile_mode == bw_def_display_micro_tiling) { data->orthogonal_rotation[i] = 0; } else { @@ -947,14 +947,14 @@ static void calculate_bandwidth( } for (i = 0; i <= maximum_number_of_surfaces - 1; i++) { if (data->enable[i]) { - if ((data->number_of_displays == 1 && data->number_of_underlay_surfaces == 0)) { + if (data->number_of_displays == 1 && data->number_of_underlay_surfaces == 0) { /*set maximum chunk limit if only one graphic pipe is enabled*/ data->outstanding_chunk_request_limit[i] = bw_int_to_fixed(127); } else { data->outstanding_chunk_request_limit[i] = bw_ceil2(bw_div(data->adjusted_data_buffer_size[i], data->pipe_chunk_size_in_bytes[i]), bw_int_to_fixed(1)); /*clamp maximum chunk limit in the graphic display pipe*/ - if ((i >= 4)) { + if (i >= 4) { data->outstanding_chunk_request_limit[i] = bw_max2(bw_int_to_fixed(127), data->outstanding_chunk_request_limit[i]); } } @@ -1337,7 +1337,7 @@ static void calculate_bandwidth( /*if stutter and dram clock state change are gated before cursor then the cursor latency hiding does not limit stutter or dram clock state change*/ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) { if (data->enable[i]) { - if ((dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) { + if (dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1) { data->maximum_latency_hiding[i] = bw_add(data->minimum_latency_hiding[i], bw_mul(bw_frc_to_fixed(8, 10), data->total_dmifmc_urgent_latency)); } else { @@ -1396,7 +1396,7 @@ static void calculate_bandwidth( } /*determine the number of displays with margin to switch in the v_active region*/ for (k = 0; k <= maximum_number_of_surfaces - 1; k++) { - if ((data->enable[k] == 1 && data->display_pstate_change_enable[k] == 1)) { + if (data->enable[k] == 1 && data->display_pstate_change_enable[k] == 1) { number_of_displays_enabled_with_margin = number_of_displays_enabled_with_margin + 1; } } @@ -1442,7 +1442,7 @@ static void calculate_bandwidth( data->nbp_state_change_enable = bw_def_no; } /*dram clock change is possible only in vblank if all displays are aligned and have no margin*/ - if ((number_of_aligned_displays_with_no_margin == number_of_displays_enabled)) { + if (number_of_aligned_displays_with_no_margin == number_of_displays_enabled) { nbp_state_change_enable_blank = bw_def_yes; } else { @@ -1470,7 +1470,7 @@ static void calculate_bandwidth( } } /*compute minimum time to read one chunk from the dmif buffer*/ - if ((number_of_displays_enabled > 2)) { + if (number_of_displays_enabled > 2) { data->chunk_request_delay = 0; } else { @@ -1804,7 +1804,7 @@ static void calculate_bandwidth( data->stutter_exit_watermark[i] = bw_add(bw_sub(vbios->stutter_self_refresh_exit_latency, data->total_dmifmc_urgent_latency), data->urgent_watermark[i]); data->stutter_entry_watermark[i] = bw_add(bw_sub(bw_add(vbios->stutter_self_refresh_exit_latency, vbios->stutter_self_refresh_entry_latency), data->total_dmifmc_urgent_latency), data->urgent_watermark[i]); /*unconditionally remove black out time from the nb p_state watermark*/ - if ((data->display_pstate_change_enable[i] == 1)) { + if (data->display_pstate_change_enable[i] == 1) { data->nbp_state_change_watermark[i] = bw_add(bw_add(vbios->nbp_state_change_latency, data->dmif_burst_time[data->y_clk_level][data->sclk_level]), bw_max2(data->line_source_pixels_transfer_time, data->dram_speed_change_line_source_transfer_time[i][data->y_clk_level][data->sclk_level])); } else { @@ -1816,7 +1816,7 @@ static void calculate_bandwidth( data->urgent_watermark[i] = bw_add(bw_add(bw_add(bw_add(bw_add(vbios->mcifwrmc_urgent_latency, data->mcifwr_burst_time[data->y_clk_level][data->sclk_level]), bw_max2(data->line_source_pixels_transfer_time, data->line_source_transfer_time[i][data->y_clk_level][data->sclk_level])), vbios->blackout_duration), data->chunk_request_time), data->cursor_request_time); data->stutter_exit_watermark[i] = bw_int_to_fixed(0); data->stutter_entry_watermark[i] = bw_int_to_fixed(0); - if ((data->display_pstate_change_enable[i] == 1)) { + if (data->display_pstate_change_enable[i] == 1) { data->nbp_state_change_watermark[i] = bw_add(bw_add(vbios->nbp_state_change_latency, data->mcifwr_burst_time[data->y_clk_level][data->sclk_level]), bw_max2(data->line_source_pixels_transfer_time, data->dram_speed_change_line_source_transfer_time[i][data->y_clk_level][data->sclk_level])); } else { @@ -2033,8 +2033,8 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, vbios.cursor_width = 32; vbios.average_compression_rate = 4; vbios.number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256; - vbios.blackout_duration = bw_int_to_fixed(18); /* us */ - vbios.maximum_blackout_recovery_time = bw_int_to_fixed(20); + vbios.blackout_duration = bw_int_to_fixed(0); /* us */ + vbios.maximum_blackout_recovery_time = bw_int_to_fixed(0); dceip.large_cursor = false; dceip.dmif_request_buffer_size = bw_int_to_fixed(768); @@ -2366,8 +2366,8 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, vbios.cursor_width = 32; vbios.average_compression_rate = 4; vbios.number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256; - vbios.blackout_duration = bw_int_to_fixed(18); /* us */ - vbios.maximum_blackout_recovery_time = bw_int_to_fixed(20); + vbios.blackout_duration = bw_int_to_fixed(0); /* us */ + vbios.maximum_blackout_recovery_time = bw_int_to_fixed(0); dceip.large_cursor = false; dceip.dmif_request_buffer_size = bw_int_to_fixed(768); diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 331891c2c71a..c9aa686d16b9 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -486,6 +486,7 @@ static void split_stream_across_pipes( secondary_pipe->plane_res.ipp = pool->ipps[secondary_pipe->pipe_idx]; secondary_pipe->plane_res.xfm = pool->transforms[secondary_pipe->pipe_idx]; secondary_pipe->plane_res.dpp = pool->dpps[secondary_pipe->pipe_idx]; + secondary_pipe->plane_res.mpcc_inst = pool->dpps[secondary_pipe->pipe_idx]->inst; if (primary_pipe->bottom_pipe) { ASSERT(primary_pipe->bottom_pipe != secondary_pipe); secondary_pipe->bottom_pipe = primary_pipe->bottom_pipe; @@ -625,7 +626,7 @@ static bool dcn_bw_apply_registry_override(struct dc *dc) return updated; } -void hack_disable_optional_pipe_split(struct dcn_bw_internal_vars *v) +static void hack_disable_optional_pipe_split(struct dcn_bw_internal_vars *v) { /* * disable optional pipe split by lower dispclk bounding box @@ -634,7 +635,7 @@ void hack_disable_optional_pipe_split(struct dcn_bw_internal_vars *v) v->max_dispclk[0] = v->max_dppclk_vmin0p65; } -void hack_force_pipe_split(struct dcn_bw_internal_vars *v, +static void hack_force_pipe_split(struct dcn_bw_internal_vars *v, unsigned int pixel_rate_khz) { float pixel_rate_mhz = pixel_rate_khz / 1000; @@ -647,25 +648,20 @@ void hack_force_pipe_split(struct dcn_bw_internal_vars *v, v->max_dppclk[0] = pixel_rate_mhz; } -void hack_bounding_box(struct dcn_bw_internal_vars *v, +static void hack_bounding_box(struct dcn_bw_internal_vars *v, struct dc_debug *dbg, struct dc_state *context) { - if (dbg->pipe_split_policy == MPC_SPLIT_AVOID) { + if (dbg->pipe_split_policy == MPC_SPLIT_AVOID) hack_disable_optional_pipe_split(v); - } if (dbg->pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP && - context->stream_count >= 2) { + context->stream_count >= 2) hack_disable_optional_pipe_split(v); - } if (context->stream_count == 1 && - dbg->force_single_disp_pipe_split) { - struct dc_stream_state *stream0 = context->streams[0]; - - hack_force_pipe_split(v, stream0->timing.pix_clk_khz); - } + dbg->force_single_disp_pipe_split) + hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_khz); } bool dcn_validate_bandwidth( @@ -799,23 +795,10 @@ bool dcn_validate_bandwidth( v->phyclk_per_state[2] = v->phyclkv_nom0p8; v->phyclk_per_state[1] = v->phyclkv_mid0p72; v->phyclk_per_state[0] = v->phyclkv_min0p65; - - hack_bounding_box(v, &dc->debug, context); - - if (v->voltage_override == dcn_bw_v_max0p9) { - v->voltage_override_level = number_of_states - 1; - } else if (v->voltage_override == dcn_bw_v_nom0p8) { - v->voltage_override_level = number_of_states - 2; - } else if (v->voltage_override == dcn_bw_v_mid0p72) { - v->voltage_override_level = number_of_states - 3; - } else { - v->voltage_override_level = 0; - } v->synchronized_vblank = dcn_bw_no; v->ta_pscalculation = dcn_bw_override; v->allow_different_hratio_vratio = dcn_bw_yes; - for (i = 0, input_idx = 0; i < pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -948,8 +931,19 @@ bool dcn_validate_bandwidth( v->number_of_active_planes = input_idx; scaler_settings_calculation(v); + + hack_bounding_box(v, &dc->debug, context); + mode_support_and_system_configuration(v); + /* Unhack dppclk: dont bother with trying to pipe split if we cannot maintain dpm0 */ + if (v->voltage_level != 0 + && context->stream_count == 1 + && dc->debug.force_single_disp_pipe_split) { + v->max_dppclk[0] = v->max_dppclk_vmin0p65; + mode_support_and_system_configuration(v); + } + if (v->voltage_level == 0 && (dc->debug.sr_exit_time_dpm0_ns || dc->debug.sr_enter_plus_exit_time_dpm0_ns)) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 35e84ed031de..77a1bf233c3c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -29,6 +29,7 @@ #include "core_status.h" #include "core_types.h" #include "hw_sequencer.h" +#include "dce/dce_hwseq.h" #include "resource.h" @@ -38,6 +39,7 @@ #include "bios_parser_interface.h" #include "include/irq_service_interface.h" #include "transform.h" +#include "dmcu.h" #include "dpp.h" #include "timing_generator.h" #include "virtual/virtual_link_encoder.h" @@ -214,6 +216,130 @@ bool dc_stream_get_crtc_position(struct dc *dc, return ret; } +/** + * dc_stream_configure_crc: Configure CRC capture for the given stream. + * @dc: DC Object + * @stream: The stream to configure CRC on. + * @enable: Enable CRC if true, disable otherwise. + * @continuous: Capture CRC on every frame if true. Otherwise, only capture + * once. + * + * By default, only CRC0 is configured, and the entire frame is used to + * calculate the crc. + */ +bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, + bool enable, bool continuous) +{ + int i; + struct pipe_ctx *pipe; + struct crc_params param; + struct timing_generator *tg; + + for (i = 0; i < MAX_PIPES; i++) { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe->stream == stream) + break; + } + /* Stream not found */ + if (i == MAX_PIPES) + return false; + + /* Always capture the full frame */ + param.windowa_x_start = 0; + param.windowa_y_start = 0; + param.windowa_x_end = pipe->stream->timing.h_addressable; + param.windowa_y_end = pipe->stream->timing.v_addressable; + param.windowb_x_start = 0; + param.windowb_y_start = 0; + param.windowb_x_end = pipe->stream->timing.h_addressable; + param.windowb_y_end = pipe->stream->timing.v_addressable; + + /* Default to the union of both windows */ + param.selection = UNION_WINDOW_A_B; + param.continuous_mode = continuous; + param.enable = enable; + + tg = pipe->stream_res.tg; + + /* Only call if supported */ + if (tg->funcs->configure_crc) + return tg->funcs->configure_crc(tg, ¶m); + dm_logger_write(dc->ctx->logger, LOG_WARNING, "CRC capture not supported."); + return false; +} + +/** + * dc_stream_get_crc: Get CRC values for the given stream. + * @dc: DC object + * @stream: The DC stream state of the stream to get CRCs from. + * @r_cr, g_y, b_cb: CRC values for the three channels are stored here. + * + * dc_stream_configure_crc needs to be called beforehand to enable CRCs. + * Return false if stream is not found, or if CRCs are not enabled. + */ +bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) +{ + int i; + struct pipe_ctx *pipe; + struct timing_generator *tg; + + for (i = 0; i < MAX_PIPES; i++) { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe->stream == stream) + break; + } + /* Stream not found */ + if (i == MAX_PIPES) + return false; + + tg = pipe->stream_res.tg; + + if (tg->funcs->get_crc) + return tg->funcs->get_crc(tg, r_cr, g_y, b_cb); + dm_logger_write(dc->ctx->logger, LOG_WARNING, "CRC capture not supported."); + return false; +} + +void dc_stream_set_dither_option(struct dc_stream_state *stream, + enum dc_dither_option option) +{ + struct bit_depth_reduction_params params; + struct dc_link *link = stream->status.link; + struct pipe_ctx *pipes = NULL; + int i; + + for (i = 0; i < MAX_PIPES; i++) { + if (link->dc->current_state->res_ctx.pipe_ctx[i].stream == + stream) { + pipes = &link->dc->current_state->res_ctx.pipe_ctx[i]; + break; + } + } + + if (!pipes) + return; + if (option > DITHER_OPTION_MAX) + return; + + stream->dither_option = option; + + memset(¶ms, 0, sizeof(params)); + resource_build_bit_depth_reduction_params(stream, ¶ms); + stream->bit_depth_params = params; + + if (pipes->plane_res.xfm && + pipes->plane_res.xfm->funcs->transform_set_pixel_storage_depth) { + pipes->plane_res.xfm->funcs->transform_set_pixel_storage_depth( + pipes->plane_res.xfm, + pipes->plane_res.scl_data.lb_params.depth, + &stream->bit_depth_params); + } + + pipes->stream_res.opp->funcs-> + opp_program_bit_depth_reduction(pipes->stream_res.opp, ¶ms); +} + void dc_stream_set_static_screen_events(struct dc *dc, struct dc_stream_state **streams, int num_streams, @@ -359,9 +485,6 @@ static bool construct(struct dc *dc, dc_version = resource_parse_asic_id(init_params->asic_id); dc_ctx->dce_version = dc_version; -#if defined(CONFIG_DRM_AMD_DC_FBC) - dc->ctx->fbc_gpu_addr = init_params->fbc_gpu_addr; -#endif /* Resource should construct all asic specific resources. * This should be the only place where we need to parse the asic id */ @@ -487,6 +610,12 @@ struct dc *dc_create(const struct dc_init_data *init_params) dc->caps.max_audios = dc->res_pool->audio_count; dc->caps.linear_pitch_alignment = 64; + /* Populate versioning information */ + dc->versions.dc_ver = DC_VER; + + if (dc->res_pool->dmcu != NULL) + dc->versions.dmcu_version = dc->res_pool->dmcu->dmcu_version; + dc->config = init_params->flags; dm_logger_write(dc->ctx->logger, LOG_DC, @@ -524,11 +653,13 @@ static void enable_timing_multisync( if (!ctx->res_ctx.pipe_ctx[i].stream || !ctx->res_ctx.pipe_ctx[i].stream->triggered_crtc_reset.enabled) continue; + if (ctx->res_ctx.pipe_ctx[i].stream == ctx->res_ctx.pipe_ctx[i].stream->triggered_crtc_reset.event_source) + continue; multisync_pipes[multisync_count] = &ctx->res_ctx.pipe_ctx[i]; multisync_count++; } - if (multisync_count > 1) { + if (multisync_count > 0) { dc->hwss.enable_per_frame_crtc_position_reset( dc, multisync_count, multisync_pipes); } @@ -650,7 +781,6 @@ bool dc_enable_stereo( return ret; } - /* * Applies given context to HW and copy it into current context. * It's up to the user to release the src context afterwards. @@ -669,7 +799,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_streams[i] = context->streams[i]; if (!dcb->funcs->is_accelerated_mode(dcb)) - dc->hwss.enable_accelerated_mode(dc); + dc->hwss.enable_accelerated_mode(dc, context); /* re-program planes for existing stream, in case we need to * free up plane resource for later use @@ -973,12 +1103,18 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa if (u->plane_info->input_tf != u->surface->input_tf) update_flags->bits.input_tf_change = 1; + if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) + update_flags->bits.output_tf_change = 1; + if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) update_flags->bits.horizontal_mirror_change = 1; if (u->plane_info->rotation != u->surface->rotation) update_flags->bits.rotation_change = 1; + if (u->plane_info->format != u->surface->format) + update_flags->bits.pixel_format_change = 1; + if (u->plane_info->stereo_format != u->surface->stereo_format) update_flags->bits.stereo_format_change = 1; @@ -997,6 +1133,9 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa */ update_flags->bits.bpp_change = 1; + if (u->gamma && dce_use_lut(u->plane_info->format)) + update_flags->bits.gamma_change = 1; + if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, sizeof(union dc_tiling_info)) != 0) { update_flags->bits.swizzle_change = 1; @@ -1012,8 +1151,11 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa if (update_flags->bits.rotation_change || update_flags->bits.stereo_format_change + || update_flags->bits.pixel_format_change + || update_flags->bits.gamma_change || update_flags->bits.bpp_change - || update_flags->bits.bandwidth_change) + || update_flags->bits.bandwidth_change + || update_flags->bits.output_tf_change) return UPDATE_TYPE_FULL; return UPDATE_TYPE_MED; @@ -1092,12 +1234,12 @@ static enum surface_update_type det_surface_update(const struct dc *dc, elevate_update_type(&overall_type, type); if (u->in_transfer_func) - update_flags->bits.in_transfer_func = 1; + update_flags->bits.in_transfer_func_change = 1; if (u->input_csc_color_matrix) update_flags->bits.input_csc_change = 1; - if (update_flags->bits.in_transfer_func + if (update_flags->bits.in_transfer_func_change || update_flags->bits.input_csc_change) { type = UPDATE_TYPE_MED; elevate_update_type(&overall_type, type); @@ -1183,6 +1325,7 @@ static void commit_planes_for_stream(struct dc *dc, struct dc_state *context) { int i, j; + struct pipe_ctx *top_pipe_to_program = NULL; if (update_type == UPDATE_TYPE_FULL) { dc->hwss.set_bandwidth(dc, context, false); @@ -1202,13 +1345,17 @@ static void commit_planes_for_stream(struct dc *dc, for (j = 0; j < dc->res_pool->pipe_count; j++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; - if (update_type == UPDATE_TYPE_FAST || !pipe_ctx->plane_state) - continue; - if (!pipe_ctx->top_pipe && - pipe_ctx->stream && - pipe_ctx->stream == stream) { - struct dc_stream_status *stream_status = + pipe_ctx->stream && + pipe_ctx->stream == stream) { + struct dc_stream_status *stream_status = NULL; + + top_pipe_to_program = pipe_ctx; + + if (update_type == UPDATE_TYPE_FAST || !pipe_ctx->plane_state) + continue; + + stream_status = stream_get_status(context, pipe_ctx->stream); dc->hwss.apply_ctx_for_surface( @@ -1219,22 +1366,32 @@ static void commit_planes_for_stream(struct dc *dc, if (update_type == UPDATE_TYPE_FULL) context_timing_trace(dc, &context->res_ctx); - /* Perform requested Updates */ - for (i = 0; i < surface_count; i++) { - struct dc_plane_state *plane_state = srf_updates[i].surface; + /* Lock the top pipe while updating plane addrs, since freesync requires + * plane addr update event triggers to be synchronized. + * top_pipe_to_program is expected to never be NULL + */ + if (update_type == UPDATE_TYPE_FAST) { + dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true); - for (j = 0; j < dc->res_pool->pipe_count; j++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; + /* Perform requested Updates */ + for (i = 0; i < surface_count; i++) { + struct dc_plane_state *plane_state = srf_updates[i].surface; - if (pipe_ctx->stream != stream) - continue; + for (j = 0; j < dc->res_pool->pipe_count; j++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; - if (pipe_ctx->plane_state != plane_state) - continue; + if (pipe_ctx->stream != stream) + continue; - if (update_type == UPDATE_TYPE_FAST && srf_updates[i].flip_addr) + if (pipe_ctx->plane_state != plane_state) + continue; + + if (srf_updates[i].flip_addr) dc->hwss.update_plane_addr(dc, pipe_ctx); + } } + + dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); } if (stream && stream_update && update_type > UPDATE_TYPE_FAST) @@ -1487,12 +1644,17 @@ struct dc_sink *dc_link_add_remote_sink( &dc_sink->dc_edid, &dc_sink->edid_caps); - if (edid_status != EDID_OK) - goto fail; + /* + * Treat device as no EDID device if EDID + * parsing fails + */ + if (edid_status != EDID_OK) { + dc_sink->dc_edid.length = 0; + dm_error("Bad EDID, status%d!\n", edid_status); + } return dc_sink; -fail: - dc_link_remove_remote_sink(link, dc_sink); + fail_add_sink: dc_sink_release(dc_sink); return NULL; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index a37428271573..95955ade4012 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -126,6 +126,8 @@ static bool program_hpd_filter( int delay_on_connect_in_ms = 0; int delay_on_disconnect_in_ms = 0; + if (link->is_hpd_filter_disabled) + return false; /* Verify feature is supported */ switch (link->connector_signal) { case SIGNAL_TYPE_DVI_SINGLE_LINK: @@ -464,7 +466,7 @@ static void link_disconnect_sink(struct dc_link *link) link->dpcd_sink_count = 0; } -static void detect_dp( +static bool detect_dp( struct dc_link *link, struct display_sink_capability *sink_caps, bool *converter_disable_audio, @@ -478,7 +480,8 @@ static void detect_dp( if (sink_caps->transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX) { sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT; - detect_dp_sink_caps(link); + if (!detect_dp_sink_caps(link)) + return false; if (is_mst_supported(link)) { sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT_MST; @@ -529,7 +532,7 @@ static void detect_dp( * active dongle unplug processing for short irq */ link_disconnect_sink(link); - return; + return true; } if (link->dpcd_caps.dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER) @@ -541,6 +544,8 @@ static void detect_dp( sink_caps, audio_support); } + + return true; } bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) @@ -604,11 +609,12 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) } case SIGNAL_TYPE_DISPLAY_PORT: { - detect_dp( + if (!detect_dp( link, &sink_caps, &converter_disable_audio, - aud_support, reason); + aud_support, reason)) + return false; /* Active dongle downstream unplug */ if (link->type == dc_connection_active_dongle @@ -677,8 +683,6 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) case EDID_NO_RESPONSE: dm_logger_write(link->ctx->logger, LOG_ERROR, "No EDID read.\n"); - return false; - default: break; } @@ -1248,6 +1252,12 @@ static enum dc_status enable_link_dp( pipe_ctx->clock_source->id, &link_settings); + if (stream->sink->edid_caps.panel_patch.dppowerup_delay > 0) { + int delay_dp_power_up_in_ms = stream->sink->edid_caps.panel_patch.dppowerup_delay; + + msleep(delay_dp_power_up_in_ms); + } + panel_mode = dp_get_panel_mode(link); dpcd_configure_panel_mode(link, panel_mode); @@ -1279,13 +1289,12 @@ static enum dc_status enable_link_edp( enum dc_status status; struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->sink->link; - + /*in case it is not on*/ link->dc->hwss.edp_power_control(link, true); link->dc->hwss.edp_wait_for_hpd_ready(link, true); status = enable_link_dp(state, pipe_ctx); - link->dc->hwss.edp_backlight_control(link, true); return status; } @@ -1749,8 +1758,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) link->link_enc, pipe_ctx->clock_source->id, display_color_depth, - pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A, - pipe_ctx->stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK, + pipe_ctx->stream->signal, stream->phy_pix_clk); if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) @@ -1788,9 +1796,21 @@ static enum dc_status enable_link( } if (pipe_ctx->stream_res.audio && status == DC_OK) { + struct dc *core_dc = pipe_ctx->stream->ctx->dc; /* notify audio driver for audio modes of monitor */ + struct pp_smu_funcs_rv *pp_smu = core_dc->res_pool->pp_smu; + unsigned int i, num_audio = 1; + for (i = 0; i < MAX_PIPES; i++) { + /*current_state not updated yet*/ + if (core_dc->current_state->res_ctx.pipe_ctx[i].stream_res.audio != NULL) + num_audio++; + } + pipe_ctx->stream_res.audio->funcs->az_enable(pipe_ctx->stream_res.audio); + if (num_audio == 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) + /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ + pp_smu->set_pme_wa_enable(&pp_smu->pp_smu); /* un-mute audio */ /* TODO: audio should be per stream rather than per link */ pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( @@ -2268,7 +2288,16 @@ void core_link_enable_stream( { struct dc *core_dc = pipe_ctx->stream->ctx->dc; - enum dc_status status = enable_link(state, pipe_ctx); + enum dc_status status; + + /* eDP lit up by bios already, no need to enable again. */ + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + core_dc->apply_edp_fast_boot_optimization) { + core_dc->apply_edp_fast_boot_optimization = false; + return; + } + + status = enable_link(state, pipe_ctx); if (status != DC_OK) { dm_logger_write(pipe_ctx->stream->ctx->logger, @@ -2298,9 +2327,8 @@ void core_link_enable_stream( if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) allocate_mst_payload(pipe_ctx); - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - core_dc->hwss.unblank_stream(pipe_ctx, - &pipe_ctx->stream->sink->link->cur_link_settings); + core_dc->hwss.unblank_stream(pipe_ctx, + &pipe_ctx->stream->sink->link->cur_link_settings); } void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) @@ -2310,8 +2338,7 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) - core_dc->hwss.edp_backlight_control(pipe_ctx->stream->sink->link, false); + core_dc->hwss.blank_stream(pipe_ctx); core_dc->hwss.disable_stream(pipe_ctx, option); @@ -2328,3 +2355,33 @@ void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) core_dc->hwss.set_avmute(pipe_ctx, enable); } +void dc_link_disable_hpd_filter(struct dc_link *link) +{ + struct gpio *hpd; + + if (!link->is_hpd_filter_disabled) { + link->is_hpd_filter_disabled = true; + /* Obtain HPD handle */ + hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service); + + if (!hpd) + return; + + /* Setup HPD filtering */ + if (dal_gpio_open(hpd, GPIO_MODE_INTERRUPT) == GPIO_RESULT_OK) { + struct gpio_hpd_config config; + + config.delay_on_connect = 0; + config.delay_on_disconnect = 0; + + dal_irq_setup_hpd_filter(hpd, &config); + + dal_gpio_close(hpd); + } else { + ASSERT_CRITICAL(false); + } + /* Release HPD handle */ + dal_gpio_destroy_irq(&hpd); + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 61e8c3e02d16..604fb0171ee3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -718,7 +718,7 @@ static enum link_training_result perform_channel_equalization_sequence( uint32_t retries_ch_eq; enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; union lane_align_status_updated dpcd_lane_status_updated = {{0}}; - union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {{{0}}};; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {{{0}}}; hw_tr_pattern = get_supported_tp(link); @@ -1465,7 +1465,7 @@ void decide_link_settings(struct dc_stream_state *stream, /* MST doesn't perform link training for now * TODO: add MST specific link training routine */ - if (is_mst_supported(link)) { + if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { *link_setting = link->verified_link_cap; return; } @@ -2235,13 +2235,14 @@ static void dp_wa_power_up_0010FA(struct dc_link *link, uint8_t *dpcd_data, link->wa_flags.dp_keep_receiver_powered = false; } -static void retrieve_link_cap(struct dc_link *link) +static bool retrieve_link_cap(struct dc_link *link) { uint8_t dpcd_data[DP_TRAINING_AUX_RD_INTERVAL - DP_DPCD_REV + 1]; union down_stream_port_count down_strm_port_count; union edp_configuration_cap edp_config_cap; union dp_downstream_port_present ds_port = { 0 }; + enum dc_status status = DC_ERROR_UNEXPECTED; memset(dpcd_data, '\0', sizeof(dpcd_data)); memset(&down_strm_port_count, @@ -2249,11 +2250,16 @@ static void retrieve_link_cap(struct dc_link *link) memset(&edp_config_cap, '\0', sizeof(union edp_configuration_cap)); - core_link_read_dpcd( - link, - DP_DPCD_REV, - dpcd_data, - sizeof(dpcd_data)); + status = core_link_read_dpcd( + link, + DP_DPCD_REV, + dpcd_data, + sizeof(dpcd_data)); + + if (status != DC_OK) { + dm_error("%s: Read dpcd data failed.\n", __func__); + return false; + } { union training_aux_rd_interval aux_rd_interval; @@ -2315,11 +2321,13 @@ static void retrieve_link_cap(struct dc_link *link) /* Connectivity log: detection */ CONN_DATA_DETECT(link, dpcd_data, sizeof(dpcd_data), "Rx Caps: "); + + return true; } -void detect_dp_sink_caps(struct dc_link *link) +bool detect_dp_sink_caps(struct dc_link *link) { - retrieve_link_cap(link); + return retrieve_link_cap(link); /* dc init_hw has power encoder using default * signal for connector. For native DP, no diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index 2096f2a179f2..bae9b0587e12 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -102,7 +102,7 @@ void dp_enable_link_phy( dp_receiver_power_ctrl(link, true); } -static bool edp_receiver_ready_T9(struct dc_link *link) +bool edp_receiver_ready_T9(struct dc_link *link) { unsigned int tries = 0; unsigned char sinkstatus = 0; @@ -123,6 +123,28 @@ static bool edp_receiver_ready_T9(struct dc_link *link) } while (++tries < 50); return result; } +bool edp_receiver_ready_T7(struct dc_link *link) +{ + unsigned int tries = 0; + unsigned char sinkstatus = 0; + unsigned char edpRev = 0; + enum dc_status result = DC_OK; + + result = core_link_read_dpcd(link, DP_EDP_DPCD_REV, &edpRev, sizeof(edpRev)); + if (result == DC_OK && edpRev < DP_EDP_12) + return true; + /* start from eDP version 1.2, SINK_STAUS indicate the sink is ready.*/ + do { + sinkstatus = 0; + result = core_link_read_dpcd(link, DP_SINK_STATUS, &sinkstatus, sizeof(sinkstatus)); + if (sinkstatus == 1) + break; + if (result != DC_OK) + break; + udelay(25); //MAx T7 is 50ms + } while (++tries < 300); + return result; +} void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) { @@ -130,7 +152,6 @@ void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) dp_receiver_power_ctrl(link, false); if (signal == SIGNAL_TYPE_EDP) { - edp_receiver_ready_T9(link); link->link_enc->funcs->disable_output(link->link_enc, signal); link->dc->hwss.edp_power_control(link, false); } else diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 95b8dd0e53c6..ce0e9e76eb35 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -35,6 +35,7 @@ #include "core_types.h" #include "set_mode_types.h" #include "virtual/virtual_stream_encoder.h" +#include "dpcd_defs.h" #include "dce80/dce80_resource.h" #include "dce100/dce100_resource.h" @@ -696,7 +697,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r /* Adjust for viewport end clip-off */ - if ((data->viewport.x + data->viewport.width) < (src.x + src.width)) { + if ((data->viewport.x + data->viewport.width) < (src.x + src.width) && !flip_horz_scan_dir) { int vp_clip = src.x + src.width - data->viewport.width - data->viewport.x; int int_part = dal_fixed31_32_floor( dal_fixed31_32_sub(data->inits.h, data->ratios.horz)); @@ -704,7 +705,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r int_part = int_part > 0 ? int_part : 0; data->viewport.width += int_part < vp_clip ? int_part : vp_clip; } - if ((data->viewport.y + data->viewport.height) < (src.y + src.height)) { + if ((data->viewport.y + data->viewport.height) < (src.y + src.height) && !flip_vert_scan_dir) { int vp_clip = src.y + src.height - data->viewport.height - data->viewport.y; int int_part = dal_fixed31_32_floor( dal_fixed31_32_sub(data->inits.v, data->ratios.vert)); @@ -712,7 +713,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r int_part = int_part > 0 ? int_part : 0; data->viewport.height += int_part < vp_clip ? int_part : vp_clip; } - if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div) { + if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div && !flip_horz_scan_dir) { int vp_clip = (src.x + src.width) / vpc_div - data->viewport_c.width - data->viewport_c.x; int int_part = dal_fixed31_32_floor( @@ -721,7 +722,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r int_part = int_part > 0 ? int_part : 0; data->viewport_c.width += int_part < vp_clip ? int_part : vp_clip; } - if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div) { + if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div && !flip_vert_scan_dir) { int vp_clip = (src.y + src.height) / vpc_div - data->viewport_c.height - data->viewport_c.y; int int_part = dal_fixed31_32_floor( @@ -1054,6 +1055,7 @@ static int acquire_first_split_pipe( pipe_ctx->plane_res.ipp = pool->ipps[i]; pipe_ctx->plane_res.dpp = pool->dpps[i]; pipe_ctx->stream_res.opp = pool->opps[i]; + pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst; pipe_ctx->pipe_idx = i; pipe_ctx->stream = stream; @@ -1360,9 +1362,6 @@ bool dc_is_stream_scaling_unchanged( return true; } -/* Maximum TMDS single link pixel clock 165MHz */ -#define TMDS_MAX_PIXEL_CLOCK_IN_KHZ 165000 - static void update_stream_engine_usage( struct resource_context *res_ctx, const struct resource_pool *pool, @@ -1409,6 +1408,8 @@ static int acquire_first_free_pipe( pipe_ctx->plane_res.xfm = pool->transforms[i]; pipe_ctx->plane_res.dpp = pool->dpps[i]; pipe_ctx->stream_res.opp = pool->opps[i]; + if (pool->dpps[i]) + pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst; pipe_ctx->pipe_idx = i; @@ -1555,6 +1556,9 @@ enum dc_status dc_remove_stream_from_ctx( dc->res_pool, del_pipe->clock_source); + if (dc->res_pool->funcs->remove_stream_from_ctx) + dc->res_pool->funcs->remove_stream_from_ctx(dc, new_ctx, stream); + memset(del_pipe, 0, sizeof(*del_pipe)); break; @@ -2431,7 +2435,8 @@ static void set_vsc_info_packet( unsigned int vscPacketRevision = 0; unsigned int i; - if (stream->sink->link->psr_enabled) { + /*VSC packet set to 2 when DP revision >= 1.2*/ + if (stream->sink->link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { vscPacketRevision = 2; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 261811e0c094..87a193ac2883 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -33,8 +33,7 @@ /******************************************************************************* * Private functions ******************************************************************************/ -#define TMDS_MAX_PIXEL_CLOCK_IN_KHZ_UPMOST 297000 -static void update_stream_signal(struct dc_stream_state *stream) +void update_stream_signal(struct dc_stream_state *stream) { struct dc_sink *dc_sink = stream->sink; @@ -45,8 +44,9 @@ static void update_stream_signal(struct dc_stream_state *stream) stream->signal = dc_sink->sink_signal; if (dc_is_dvi_signal(stream->signal)) { - if (stream->timing.pix_clk_khz > TMDS_MAX_PIXEL_CLOCK_IN_KHZ_UPMOST && - stream->sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) + if (stream->ctx->dc->caps.dual_link_dvi && + stream->timing.pix_clk_khz > TMDS_MAX_PIXEL_CLOCK && + stream->sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK; else stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK; @@ -193,6 +193,7 @@ bool dc_stream_set_cursor_attributes( core_dc = stream->ctx->dc; res_ctx = &core_dc->current_state->res_ctx; + stream->cursor_attributes = *attributes; for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; @@ -203,34 +204,8 @@ bool dc_stream_set_cursor_attributes( continue; - if (pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes != NULL) - pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes( - pipe_ctx->plane_res.ipp, attributes); - - if (pipe_ctx->plane_res.hubp != NULL && - pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes != NULL) - pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes( - pipe_ctx->plane_res.hubp, attributes); - - if (pipe_ctx->plane_res.mi != NULL && - pipe_ctx->plane_res.mi->funcs->set_cursor_attributes != NULL) - pipe_ctx->plane_res.mi->funcs->set_cursor_attributes( - pipe_ctx->plane_res.mi, attributes); - - - if (pipe_ctx->plane_res.xfm != NULL && - pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes != NULL) - pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes( - pipe_ctx->plane_res.xfm, attributes); - - if (pipe_ctx->plane_res.dpp != NULL && - pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes != NULL) - pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes( - pipe_ctx->plane_res.dpp, attributes->color_format); + core_dc->hwss.set_cursor_attribute(pipe_ctx); } - - stream->cursor_attributes = *attributes; - return true; } @@ -254,21 +229,10 @@ bool dc_stream_set_cursor_position( core_dc = stream->ctx->dc; res_ctx = &core_dc->current_state->res_ctx; + stream->cursor_position = *position; for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp; - struct mem_input *mi = pipe_ctx->plane_res.mi; - struct hubp *hubp = pipe_ctx->plane_res.hubp; - struct dpp *dpp = pipe_ctx->plane_res.dpp; - struct dc_cursor_position pos_cpy = *position; - struct dc_cursor_mi_param param = { - .pixel_clk_khz = stream->timing.pix_clk_khz, - .ref_clk_khz = core_dc->res_pool->ref_clock_inKhz, - .viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x, - .viewport_width = pipe_ctx->plane_res.scl_data.viewport.width, - .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz - }; if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.mi && !pipe_ctx->plane_res.hubp) || @@ -276,33 +240,9 @@ bool dc_stream_set_cursor_position( (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp)) continue; - if (pipe_ctx->plane_state->address.type - == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) - pos_cpy.enable = false; - - if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state) - pos_cpy.enable = false; - - - if (ipp != NULL && ipp->funcs->ipp_cursor_set_position != NULL) - ipp->funcs->ipp_cursor_set_position(ipp, &pos_cpy, ¶m); - - if (mi != NULL && mi->funcs->set_cursor_position != NULL) - mi->funcs->set_cursor_position(mi, &pos_cpy, ¶m); - - if (!hubp) - continue; - - if (hubp->funcs->set_cursor_position != NULL) - hubp->funcs->set_cursor_position(hubp, &pos_cpy, ¶m); - - if (dpp != NULL && dpp->funcs->set_cursor_position != NULL) - dpp->funcs->set_cursor_position(dpp, &pos_cpy, ¶m, hubp->curs_attr.width); - + core_dc->hwss.set_cursor_position(pipe_ctx); } - stream->cursor_position = *position; - return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e2e3c9df79ea..5bb0e5defaf4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -38,7 +38,7 @@ #include "inc/compressor.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.1.27" +#define DC_VER "3.1.34" #define MAX_SURFACES 3 #define MAX_STREAMS 6 @@ -48,6 +48,18 @@ /******************************************************************************* * Display Core Interfaces ******************************************************************************/ +struct dmcu_version { + unsigned int date; + unsigned int month; + unsigned int year; + unsigned int interface_version; +}; + +struct dc_versions { + const char *dc_ver; + struct dmcu_version dmcu_version; +}; + struct dc_caps { uint32_t max_streams; uint32_t max_links; @@ -62,6 +74,7 @@ struct dc_caps { bool dcc_const_color; bool dynamic_audio; bool is_apu; + bool dual_link_dvi; }; struct dc_dcc_surface_param { @@ -94,6 +107,7 @@ struct dc_surface_dcc_cap { }; struct dc_static_screen_events { + bool force_trigger; bool cursor_update; bool surface_update; bool overlay_update; @@ -211,11 +225,15 @@ struct dc_debug { bool disable_stereo_support; bool vsr_support; bool performance_trace; + bool az_endpoint_mute_only; + bool always_use_regamma; + bool p010_mpo_support; }; struct dc_state; struct resource_pool; struct dce_hwseq; struct dc { + struct dc_versions versions; struct dc_caps caps; struct dc_cap_funcs cap_funcs; struct dc_config config; @@ -252,6 +270,8 @@ struct dc { bool optimized_required; + bool apply_edp_fast_boot_optimization; + /* FBC compressor */ #if defined(CONFIG_DRM_AMD_DC_FBC) struct compressor *fbc_compressor; @@ -288,9 +308,6 @@ struct dc_init_data { struct dc_config flags; uint32_t log_mask; -#if defined(CONFIG_DRM_AMD_DC_FBC) - uint64_t fbc_gpu_addr; -#endif }; struct dc *dc_create(const struct dc_init_data *init_params); @@ -369,6 +386,8 @@ struct dc_transfer_func { struct dc_transfer_func_distributed_points tf_pts; enum dc_transfer_func_type type; enum dc_transfer_func_predefined tf; + /* FP16 1.0 reference level in nits, default is 80 nits, only for PQ*/ + uint32_t sdr_ref_white_level; struct dc_context *ctx; }; @@ -397,12 +416,15 @@ union surface_update_flags { uint32_t swizzle_change:1; uint32_t scaling_change:1; uint32_t position_change:1; - uint32_t in_transfer_func:1; + uint32_t in_transfer_func_change:1; uint32_t input_csc_change:1; + uint32_t output_tf_change:1; + uint32_t pixel_format_change:1; /* Full updates */ uint32_t new_plane:1; uint32_t bpp_change:1; + uint32_t gamma_change:1; uint32_t bandwidth_change:1; uint32_t clock_change:1; uint32_t stereo_format_change:1; @@ -429,6 +451,7 @@ struct dc_plane_state { struct dc_bias_and_scale *bias_and_scale; struct csc_transform input_csc_color_matrix; struct fixed31_32 coeff_reduction_factor; + uint32_t sdr_white_level; // TODO: No longer used, remove struct dc_hdr_static_metadata hdr_static_ctx; @@ -465,6 +488,7 @@ struct dc_plane_info { enum plane_stereo_format stereo_format; enum dc_color_space color_space; enum color_transfer_func input_tf; + unsigned int sdr_white_level; bool horizontal_mirror; bool visible; bool per_pixel_alpha; @@ -489,10 +513,8 @@ struct dc_surface_update { /* following updates require alloc/sleep/spin that is not isr safe, * null means no updates */ - /* gamma TO BE REMOVED */ struct dc_gamma *gamma; enum color_transfer_func color_input_tf; - enum color_transfer_func color_output_tf; struct dc_transfer_func *in_transfer_func; struct csc_transform *input_csc_color_matrix; diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 273d80a4ebce..d9b84ec7954c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -111,6 +111,8 @@ struct dc_vbios_funcs { struct dc_bios *bios); bool (*is_accelerated_mode)( struct dc_bios *bios); + uint32_t (*get_vga_enabled_displays)( + struct dc_bios *bios); void (*get_bios_event_info)( struct dc_bios *bios, struct bios_event_info *info); @@ -199,6 +201,7 @@ struct dc_vbios_funcs { }; struct bios_registers { + uint32_t BIOS_SCRATCH_3; uint32_t BIOS_SCRATCH_6; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 03029f72dc3f..e91ac6811990 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -413,12 +413,14 @@ struct dc_cursor_mi_param { enum { GAMMA_RGB_256_ENTRIES = 256, GAMMA_RGB_FLOAT_1024_ENTRIES = 1024, - GAMMA_MAX_ENTRIES = 1024 + GAMMA_CS_TFM_1D_ENTRIES = 4096, + GAMMA_MAX_ENTRIES = 4096 }; enum dc_gamma_type { GAMMA_RGB_256 = 1, - GAMMA_RGB_FLOAT_1024 = 2 + GAMMA_RGB_FLOAT_1024 = 2, + GAMMA_CS_TFM_1D = 3, }; struct dc_gamma { @@ -434,6 +436,8 @@ struct dc_gamma { /* private to DC core */ struct dc_context *ctx; + + bool is_identity; }; /* Used by both ipp amd opp functions*/ diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index f11a734da1db..ac0f617b43c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -64,6 +64,8 @@ struct dc_link { enum signal_type connector_signal; enum dc_irq_source irq_source_hpd; enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ + bool is_hpd_filter_disabled; + /* caps is the same as reported_link_cap. link_traing use * reported_link_cap. Will clean up. TODO */ @@ -195,6 +197,8 @@ bool dc_link_dp_set_test_pattern( const unsigned char *p_custom_pattern, unsigned int cust_pattern_size); +void dc_link_disable_hpd_filter(struct dc_link *link); + /* * DPCD access interfaces */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 01c60f11b2bd..78a2bbe0b272 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -65,6 +65,7 @@ struct dc_stream_state { enum dc_dither_option dither_option; enum view_3d_format view_format; + enum color_transfer_func output_tf; bool ignore_msa_timing_param; /* TODO: custom INFO packets */ @@ -103,6 +104,7 @@ struct dc_stream_update { struct rect dst; struct dc_transfer_func *out_transfer_func; struct dc_hdr_static_metadata *hdr_static_metadata; + enum color_transfer_func color_output_tf; }; bool dc_is_stream_unchanged( @@ -152,7 +154,7 @@ struct dc_stream_state *dc_get_stream_at_index(struct dc *dc, uint8_t i); uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream); /* TODO: Return parsed values rather than direct register read - * This has a dependency on the caller (amdgpu_get_crtc_scanoutpos) + * This has a dependency on the caller (amdgpu_display_get_crtc_scanoutpos) * being refactored properly to be dce-specific */ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream, @@ -237,6 +239,8 @@ enum surface_update_type dc_check_update_surfaces_for_stream( */ struct dc_stream_state *dc_create_stream_for_sink(struct dc_sink *dc_sink); +void update_stream_signal(struct dc_stream_state *stream); + void dc_stream_retain(struct dc_stream_state *dc_stream); void dc_stream_release(struct dc_stream_state *dc_stream); @@ -267,11 +271,25 @@ bool dc_stream_get_crtc_position(struct dc *dc, unsigned int *v_pos, unsigned int *nom_v_pos); +bool dc_stream_configure_crc(struct dc *dc, + struct dc_stream_state *stream, + bool enable, + bool continuous); + +bool dc_stream_get_crc(struct dc *dc, + struct dc_stream_state *stream, + uint32_t *r_cr, + uint32_t *g_y, + uint32_t *b_cb); + void dc_stream_set_static_screen_events(struct dc *dc, struct dc_stream_state **stream, int num_streams, const struct dc_static_screen_events *events); +void dc_stream_set_dither_option(struct dc_stream_state *stream, + enum dc_dither_option option); + bool dc_stream_adjust_vmin_vmax(struct dc *dc, struct dc_stream_state **stream, diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 9faddfae241d..8811b6f86bff 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -193,6 +193,10 @@ union display_content_support { } bits; }; +struct dc_panel_patch { + unsigned int dppowerup_delay; +}; + struct dc_edid_caps { /* sink identification */ uint16_t manufacturer_id; @@ -219,6 +223,8 @@ struct dc_edid_caps { bool edid_hdmi; bool hdr_supported; + + struct dc_panel_patch panel_patch; }; struct view { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index b48190f54907..b231bd53613e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -323,6 +323,15 @@ static bool dce_abm_immediate_disable(struct abm *abm) /* notifyDMCUMsg */ REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); + abm->stored_backlight_registers.BL_PWM_CNTL = + REG_READ(BL_PWM_CNTL); + abm->stored_backlight_registers.BL_PWM_CNTL2 = + REG_READ(BL_PWM_CNTL2); + abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL = + REG_READ(BL_PWM_PERIOD_CNTL); + + REG_GET(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, + &abm->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 0df9ecb2710c..e366bfd7cf6f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -359,7 +359,10 @@ void dce_aud_az_enable(struct audio *audio) AUDIO_ENABLED); AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, value); - value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); + set_reg_field_value(value, 0, + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + CLOCK_GATING_DISABLE); + AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, value); dm_logger_write(CTX->logger, LOG_HW_AUDIO, "\n\t========= AUDIO:dce_aud_az_enable: index: %u data: 0x%x\n", @@ -372,6 +375,10 @@ void dce_aud_az_disable(struct audio *audio) struct dce_audio *aud = DCE_AUD(audio); value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); + set_reg_field_value(value, 1, + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + CLOCK_GATING_DISABLE); + AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, value); set_reg_field_value(value, 0, AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, @@ -716,6 +723,11 @@ void dce_aud_az_configure( DESCRIPTION17); AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_SINK_INFO8, value); + value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); + set_reg_field_value(value, 0, + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + CLOCK_GATING_DISABLE); + AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, value); } /* @@ -897,6 +909,10 @@ void dce_aud_hw_init( REG_UPDATE_2(AZALIA_F0_CODEC_FUNCTION_PARAMETER_POWER_STATES, CLKSTOP, 1, EPSS, 1); + set_reg_field_value(value, 0, + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + CLOCK_GATING_DISABLE); + AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, value); } static const struct audio_funcs funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 31280d252753..5036b674f68b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -908,19 +908,9 @@ static bool dce110_program_pix_clk( #if defined(CONFIG_DRM_AMD_DC_DCN1_0) if (IS_FPGA_MAXIMUS_DC(clock_source->ctx->dce_environment)) { unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0; - unsigned dp_dto_ref_kHz = 600000; - /* DPREF clock from FPGA TODO: Does FPGA have this value? */ + unsigned dp_dto_ref_kHz = 700000; unsigned clock_kHz = pll_settings->actual_pix_clk; - /* For faster simulation, if mode pixe clock less than 290MHz, - * pixel clock can be hard coded to 290Mhz. For 4K mode, pixel clock - * is greater than 500Mhz, need real pixel clock - * clock_kHz = 290000; - */ - /* TODO: un-hardcode when we can set display clock properly*/ - /*clock_kHz = pix_clk_params->requested_pix_clk;*/ - clock_kHz = 290000; - /* Set DTO values: phase = target clock, modulo = reference clock */ REG_WRITE(PHASE[inst], clock_kHz); REG_WRITE(MODULO[inst], dp_dto_ref_kHz); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c index 9e98a5f39a6d..046658c8498a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c @@ -34,6 +34,7 @@ #include "dcn_calcs.h" #endif #include "core_types.h" +#include "dc_types.h" #define TO_DCE_CLOCKS(clocks)\ @@ -292,7 +293,10 @@ static enum dm_pp_clocks_state dce_get_required_clocks_state( low_req_clk = i + 1; if (low_req_clk > clk->max_clks_state) { dm_logger_write(clk->ctx->logger, LOG_WARNING, - "%s: clocks unsupported", __func__); + "%s: clocks unsupported disp_clk %d pix_clk %d", + __func__, + req_clocks->display_clk_khz, + req_clocks->pixel_clk_khz); low_req_clk = DM_PP_CLOCKS_STATE_INVALID; } @@ -415,9 +419,12 @@ static int dce112_set_clock( bp->funcs->set_dce_clock(bp, &dce_clk_params); - if (clk_dce->dfs_bypass_disp_clk != actual_clock) - dmcu->funcs->set_psr_wait_loop(dmcu, - actual_clock / 1000 / 7); + if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) { + if (clk_dce->dfs_bypass_disp_clk != actual_clock) + dmcu->funcs->set_psr_wait_loop(dmcu, + actual_clock / 1000 / 7); + } + clk_dce->dfs_bypass_disp_clk = actual_clock; return actual_clock; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index f663adb33584..2ee3d9bf1062 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -360,7 +360,7 @@ static void dcn10_get_dmcu_version(struct dmcu *dmcu) dmcu->dmcu_version.year = ((REG_READ(DMCU_IRAM_RD_DATA) << 8) | REG_READ(DMCU_IRAM_RD_DATA)); dmcu->dmcu_version.month = REG_READ(DMCU_IRAM_RD_DATA); - dmcu->dmcu_version.day = REG_READ(DMCU_IRAM_RD_DATA); + dmcu->dmcu_version.date = REG_READ(DMCU_IRAM_RD_DATA); /* Disable write access to IRAM to allow dynamic sleep state */ REG_UPDATE_2(DMCU_RAM_ACCESS_CTRL, @@ -521,6 +521,9 @@ static void dcn10_dmcu_set_psr_enable(struct dmcu *dmcu, bool enable, bool wait) if (dmcu->dmcu_state != DMCU_RUNNING) return; + dcn10_get_dmcu_psr_state(dmcu, &psr_state); + if (psr_state == 0 && !enable) + return; /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, dmcu_wait_reg_ready_interval, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c index d2e66b1bc0ef..4b8e7ce2de8c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c @@ -197,9 +197,9 @@ void dce_crtc_switch_to_clk_src(struct dce_hwseq *hws, } /* Only use LUT for 8 bit formats */ -bool dce_use_lut(const struct dc_plane_state *plane_state) +bool dce_use_lut(enum surface_pixel_format format) { - switch (plane_state->format) { + switch (format) { case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: return true; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h index b73db9e78437..3336428b1fed 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h @@ -140,42 +140,8 @@ BL_REG_LIST() #define HWSEQ_DCN_REG_LIST()\ - SRII(DCHUBP_CNTL, HUBP, 0), \ - SRII(DCHUBP_CNTL, HUBP, 1), \ - SRII(DCHUBP_CNTL, HUBP, 2), \ - SRII(DCHUBP_CNTL, HUBP, 3), \ - SRII(HUBP_CLK_CNTL, HUBP, 0), \ - SRII(HUBP_CLK_CNTL, HUBP, 1), \ - SRII(HUBP_CLK_CNTL, HUBP, 2), \ - SRII(HUBP_CLK_CNTL, HUBP, 3), \ - SRII(DPP_CONTROL, DPP_TOP, 0), \ - SRII(DPP_CONTROL, DPP_TOP, 1), \ - SRII(DPP_CONTROL, DPP_TOP, 2), \ - SRII(DPP_CONTROL, DPP_TOP, 3), \ - SRII(OPP_PIPE_CONTROL, OPP_PIPE, 0), \ - SRII(OPP_PIPE_CONTROL, OPP_PIPE, 1), \ - SRII(OPP_PIPE_CONTROL, OPP_PIPE, 2), \ - SRII(OPP_PIPE_CONTROL, OPP_PIPE, 3), \ SR(REFCLK_CNTL), \ - SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A),\ - SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A),\ - SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A),\ - SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B),\ - SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B),\ - SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B),\ - SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C),\ - SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C),\ - SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C),\ - SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D),\ - SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D),\ - SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D),\ - SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL),\ - SR(DCHUBBUB_ARB_DRAM_STATE_CNTL),\ - SR(DCHUBBUB_ARB_SAT_LEVEL),\ - SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND),\ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ - SR(DCHUBBUB_TEST_DEBUG_INDEX), \ - SR(DCHUBBUB_TEST_DEBUG_DATA), \ SR(DIO_MEM_PWR_CTRL), \ SR(DCCG_GATE_DISABLE_CNTL), \ SR(DCCG_GATE_DISABLE_CNTL2), \ @@ -195,22 +161,10 @@ MMHUB_SR(MC_VM_SYSTEM_APERTURE_LOW_ADDR),\ MMHUB_SR(MC_VM_SYSTEM_APERTURE_HIGH_ADDR) -#define HWSEQ_SR_WATERMARK_REG_LIST()\ - SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A),\ - SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A),\ - SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B),\ - SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B),\ - SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C),\ - SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C),\ - SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D),\ - SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D) - #define HWSEQ_DCN1_REG_LIST()\ HWSEQ_DCN_REG_LIST(), \ - HWSEQ_SR_WATERMARK_REG_LIST(), \ HWSEQ_PIXEL_RATE_REG_LIST(OTG), \ HWSEQ_PHYPLL_REG_LIST(OTG), \ - SR(DCHUBBUB_SDPIF_FB_TOP),\ SR(DCHUBBUB_SDPIF_FB_BASE),\ SR(DCHUBBUB_SDPIF_FB_OFFSET),\ SR(DCHUBBUB_SDPIF_AGP_BASE),\ @@ -260,39 +214,9 @@ struct dce_hwseq_registers { uint32_t DCHUB_AGP_BOT; uint32_t DCHUB_AGP_TOP; - uint32_t DCHUBP_CNTL[4]; - uint32_t HUBP_CLK_CNTL[4]; - uint32_t DPP_CONTROL[4]; - uint32_t OPP_PIPE_CONTROL[4]; uint32_t REFCLK_CNTL; - uint32_t DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A; - uint32_t DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A; - uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A; - uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A; - uint32_t DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A; - uint32_t DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B; - uint32_t DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B; - uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B; - uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B; - uint32_t DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B; - uint32_t DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C; - uint32_t DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C; - uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C; - uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C; - uint32_t DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C; - uint32_t DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D; - uint32_t DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D; - uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D; - uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D; - uint32_t DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D; - uint32_t DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL; - uint32_t DCHUBBUB_ARB_SAT_LEVEL; - uint32_t DCHUBBUB_ARB_DF_REQ_OUTSTAND; + uint32_t DCHUBBUB_GLOBAL_TIMER_CNTL; - uint32_t DCHUBBUB_ARB_DRAM_STATE_CNTL; - uint32_t DCHUBBUB_TEST_DEBUG_INDEX; - uint32_t DCHUBBUB_TEST_DEBUG_DATA; - uint32_t DCHUBBUB_SDPIF_FB_TOP; uint32_t DCHUBBUB_SDPIF_FB_BASE; uint32_t DCHUBBUB_SDPIF_FB_OFFSET; uint32_t DCHUBBUB_SDPIF_AGP_BASE; @@ -433,31 +357,17 @@ struct dce_hwseq_registers { #define HWSEQ_DCN_MASK_SH_LIST(mask_sh)\ HWSEQ_PIXEL_RATE_MASK_SH_LIST(mask_sh, OTG0_),\ HWS_SF1(OTG0_, PHYPLL_PIXEL_RATE_CNTL, PHYPLL_PIXEL_RATE_SOURCE, mask_sh), \ - HWS_SF(HUBP0_, DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh), \ - HWS_SF(HUBP0_, HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh), \ - HWS_SF(DPP_TOP0_, DPP_CONTROL, DPP_CLOCK_ENABLE, mask_sh), \ - HWS_SF(OPP_PIPE0_, OPP_PIPE_CONTROL, OPP_PIPE_CLOCK_EN, mask_sh),\ HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_DONE_INTERRUPT_DISABLE, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_VALUE, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_SAT_LEVEL, DCHUBBUB_ARB_SAT_LEVEL, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, mask_sh), \ HWS_SF(, DCFCLK_CNTL, DCFCLK_GATE_DIS, mask_sh) #define HWSEQ_DCN1_MASK_SH_LIST(mask_sh)\ HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ HWS_SF1(OTG0_, PHYPLL_PIXEL_RATE_CNTL, PIXEL_RATE_PLL_SOURCE, mask_sh), \ - HWS_SF(, DCHUBBUB_SDPIF_FB_TOP, SDPIF_FB_TOP, mask_sh), \ HWS_SF(, DCHUBBUB_SDPIF_FB_BASE, SDPIF_FB_BASE, mask_sh), \ HWS_SF(, DCHUBBUB_SDPIF_FB_OFFSET, SDPIF_FB_OFFSET, mask_sh), \ HWS_SF(, DCHUBBUB_SDPIF_AGP_BASE, SDPIF_AGP_BASE, mask_sh), \ HWS_SF(, DCHUBBUB_SDPIF_AGP_BOT, SDPIF_AGP_BOT, mask_sh), \ HWS_SF(, DCHUBBUB_SDPIF_AGP_TOP, SDPIF_AGP_TOP, mask_sh), \ - HWS_SF(DPP_TOP0_, DPP_CONTROL, DPPCLK_RATE_CONTROL, mask_sh), \ /* todo: get these from GVM instead of reading registers ourselves */\ HWS_SF(, VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, PAGE_DIRECTORY_ENTRY_HI32, mask_sh),\ HWS_SF(, VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, PAGE_DIRECTORY_ENTRY_LO32, mask_sh),\ @@ -532,8 +442,6 @@ struct dce_hwseq_registers { type HUBP_VTG_SEL; \ type HUBP_CLOCK_ENABLE; \ type DPP_CLOCK_ENABLE; \ - type DPPCLK_RATE_CONTROL; \ - type SDPIF_FB_TOP;\ type SDPIF_FB_BASE;\ type SDPIF_FB_OFFSET;\ type SDPIF_AGP_BASE;\ @@ -546,14 +454,6 @@ struct dce_hwseq_registers { type AGP_BOT;\ type AGP_TOP;\ type DCHUBBUB_GLOBAL_TIMER_ENABLE; \ - type DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST;\ - type DCHUBBUB_ARB_WATERMARK_CHANGE_DONE_INTERRUPT_DISABLE;\ - type DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_VALUE;\ - type DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE;\ - type DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE;\ - type DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE;\ - type DCHUBBUB_ARB_SAT_LEVEL;\ - type DCHUBBUB_ARB_MIN_REQ_OUTSTAND;\ type OPP_PIPE_CLOCK_EN;\ type IP_REQUEST_EN; \ type DOMAIN0_POWER_FORCEON; \ @@ -619,5 +519,5 @@ void dce_crtc_switch_to_clk_src(struct dce_hwseq *hws, struct clock_source *clk_src, unsigned int tg_inst); -bool dce_use_lut(const struct dc_plane_state *plane_state); +bool dce_use_lut(enum surface_pixel_format format); #endif /*__DCE_HWSEQ_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index a266e3f5e75f..11f50588b3f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -82,13 +82,6 @@ #define DCE110_DIG_FE_SOURCE_SELECT_DIGF 0x20 #define DCE110_DIG_FE_SOURCE_SELECT_DIGG 0x40 -/* Minimum pixel clock, in KHz. For TMDS signal is 25.00 MHz */ -#define TMDS_MIN_PIXEL_CLOCK 25000 -/* Maximum pixel clock, in KHz. For TMDS signal is 165.00 MHz */ -#define TMDS_MAX_PIXEL_CLOCK 165000 -/* For current ASICs pixel clock - 600MHz */ -#define MAX_ENCODER_CLOCK 600000 - enum { DP_MST_UPDATE_MAX_RETRY = 50 }; @@ -828,6 +821,9 @@ void dce110_link_encoder_hw_init( cntl.coherent = false; cntl.hpd_sel = enc110->base.hpd_source; + if (enc110->base.connector.id == CONNECTOR_ID_EDP) + cntl.signal = SIGNAL_TYPE_EDP; + result = link_transmitter_control(enc110, &cntl); if (result != BP_RESULT_OK) { @@ -904,8 +900,7 @@ void dce110_link_encoder_enable_tmds_output( struct link_encoder *enc, enum clock_source_id clock_source, enum dc_color_depth color_depth, - bool hdmi, - bool dual_link, + enum signal_type signal, uint32_t pixel_clock) { struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc); @@ -919,16 +914,12 @@ void dce110_link_encoder_enable_tmds_output( cntl.engine_id = enc->preferred_engine; cntl.transmitter = enc110->base.transmitter; cntl.pll_id = clock_source; - if (hdmi) { - cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A; - cntl.lanes_number = 4; - } else if (dual_link) { - cntl.signal = SIGNAL_TYPE_DVI_DUAL_LINK; + cntl.signal = signal; + if (cntl.signal == SIGNAL_TYPE_DVI_DUAL_LINK) cntl.lanes_number = 8; - } else { - cntl.signal = SIGNAL_TYPE_DVI_SINGLE_LINK; + else cntl.lanes_number = 4; - } + cntl.hpd_sel = enc110->base.hpd_source; cntl.pixel_clock = pixel_clock; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h index 8ca9afe47a2b..0ec3433d34b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h @@ -210,8 +210,7 @@ void dce110_link_encoder_enable_tmds_output( struct link_encoder *enc, enum clock_source_id clock_source, enum dc_color_depth color_depth, - bool hdmi, - bool dual_link, + enum signal_type signal, uint32_t pixel_clock); /* enables DP PHY output */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 83bae207371d..8146b9079d51 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -920,6 +920,7 @@ static void dce110_stream_encoder_dp_blank( { struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); uint32_t retries = 0; + uint32_t reg1 = 0; uint32_t max_retries = DP_BLANK_MAX_RETRY * 10; /* Note: For CZ, we are changing driver default to disable @@ -928,7 +929,10 @@ static void dce110_stream_encoder_dp_blank( * handful of panels that cannot handle disable stream at * HBLANK and will result in a white line flash across the * screen on stream disable. */ - + REG_GET(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, ®1); + if ((reg1 & 0x1) == 0) + /*stream not enabled*/ + return; /* Specify the video stream disable point * (2 = start of the next vertical blank) */ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, 2); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c index 0f662e6ee9bd..ad411dac5639 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c @@ -618,80 +618,48 @@ static void program_bit_depth_reduction( enum dc_color_depth depth, const struct bit_depth_reduction_params *bit_depth_params) { - enum dcp_bit_depth_reduction_mode depth_reduction_mode; - enum dcp_spatial_dither_mode spatial_dither_mode; - bool frame_random_enable; - bool rgb_random_enable; - bool highpass_random_enable; + enum dcp_out_trunc_round_depth trunc_round_depth; + enum dcp_out_trunc_round_mode trunc_mode; + bool spatial_dither_enable; ASSERT(depth < COLOR_DEPTH_121212); /* Invalid clamp bit depth */ - if (bit_depth_params->flags.SPATIAL_DITHER_ENABLED) { - depth_reduction_mode = DCP_BIT_DEPTH_REDUCTION_MODE_DITHER; - frame_random_enable = true; - rgb_random_enable = true; - highpass_random_enable = true; - - } else { - depth_reduction_mode = DCP_BIT_DEPTH_REDUCTION_MODE_DISABLED; - frame_random_enable = false; - rgb_random_enable = false; - highpass_random_enable = false; + spatial_dither_enable = bit_depth_params->flags.SPATIAL_DITHER_ENABLED; + /* Default to 12 bit truncation without rounding */ + trunc_round_depth = DCP_OUT_TRUNC_ROUND_DEPTH_12BIT; + trunc_mode = DCP_OUT_TRUNC_ROUND_MODE_TRUNCATE; + + if (bit_depth_params->flags.TRUNCATE_ENABLED) { + /* Don't enable dithering if truncation is enabled */ + spatial_dither_enable = false; + trunc_mode = bit_depth_params->flags.TRUNCATE_MODE ? + DCP_OUT_TRUNC_ROUND_MODE_ROUND : + DCP_OUT_TRUNC_ROUND_MODE_TRUNCATE; + + if (bit_depth_params->flags.TRUNCATE_DEPTH == 0 || + bit_depth_params->flags.TRUNCATE_DEPTH == 1) + trunc_round_depth = DCP_OUT_TRUNC_ROUND_DEPTH_8BIT; + else if (bit_depth_params->flags.TRUNCATE_DEPTH == 2) + trunc_round_depth = DCP_OUT_TRUNC_ROUND_DEPTH_10BIT; + else { + /* + * Invalid truncate/round depth. Setting here to 12bit + * to prevent use-before-initialize errors. + */ + trunc_round_depth = DCP_OUT_TRUNC_ROUND_DEPTH_12BIT; + BREAK_TO_DEBUGGER(); + } } - spatial_dither_mode = DCP_SPATIAL_DITHER_MODE_A_AA_A; - set_clamp(xfm_dce, depth); - - switch (depth_reduction_mode) { - case DCP_BIT_DEPTH_REDUCTION_MODE_DITHER: - /* Spatial Dither: Set round/truncate to bypass (12bit), - * enable Dither (30bpp) */ - set_round(xfm_dce, - DCP_OUT_TRUNC_ROUND_MODE_TRUNCATE, - DCP_OUT_TRUNC_ROUND_DEPTH_12BIT); - - set_dither(xfm_dce, true, spatial_dither_mode, - DCP_SPATIAL_DITHER_DEPTH_30BPP, frame_random_enable, - rgb_random_enable, highpass_random_enable); - break; - case DCP_BIT_DEPTH_REDUCTION_MODE_ROUND: - /* Round: Enable round (10bit), disable Dither */ - set_round(xfm_dce, - DCP_OUT_TRUNC_ROUND_MODE_ROUND, - DCP_OUT_TRUNC_ROUND_DEPTH_10BIT); - - set_dither(xfm_dce, false, spatial_dither_mode, - DCP_SPATIAL_DITHER_DEPTH_30BPP, frame_random_enable, - rgb_random_enable, highpass_random_enable); - break; - case DCP_BIT_DEPTH_REDUCTION_MODE_TRUNCATE: /* Truncate */ - /* Truncate: Enable truncate (10bit), disable Dither */ - set_round(xfm_dce, - DCP_OUT_TRUNC_ROUND_MODE_TRUNCATE, - DCP_OUT_TRUNC_ROUND_DEPTH_10BIT); - - set_dither(xfm_dce, false, spatial_dither_mode, - DCP_SPATIAL_DITHER_DEPTH_30BPP, frame_random_enable, - rgb_random_enable, highpass_random_enable); - break; - - case DCP_BIT_DEPTH_REDUCTION_MODE_DISABLED: /* Disabled */ - /* Truncate: Set round/truncate to bypass (12bit), - * disable Dither */ - set_round(xfm_dce, - DCP_OUT_TRUNC_ROUND_MODE_TRUNCATE, - DCP_OUT_TRUNC_ROUND_DEPTH_12BIT); - - set_dither(xfm_dce, false, spatial_dither_mode, - DCP_SPATIAL_DITHER_DEPTH_30BPP, frame_random_enable, - rgb_random_enable, highpass_random_enable); - break; - default: - /* Invalid DCP Depth reduction mode */ - BREAK_TO_DEBUGGER(); - break; - } + set_round(xfm_dce, trunc_mode, trunc_round_depth); + set_dither(xfm_dce, + spatial_dither_enable, + DCP_SPATIAL_DITHER_MODE_A_AA_A, + DCP_SPATIAL_DITHER_DEPTH_30BPP, + bit_depth_params->flags.FRAME_RANDOM, + bit_depth_params->flags.RGB_RANDOM, + bit_depth_params->flags.HIGHPASS_RANDOM); } static int dce_transform_get_max_num_of_supported_lines( @@ -879,6 +847,7 @@ static void dce_transform_set_gamut_remap( const struct xfm_grph_csc_adjustment *adjust) { struct dce_transform *xfm_dce = TO_DCE_TRANSFORM(xfm); + int i = 0; if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW) /* Bypass if type is bypass or hw */ @@ -887,20 +856,8 @@ static void dce_transform_set_gamut_remap( struct fixed31_32 arr_matrix[GAMUT_MATRIX_SIZE]; uint16_t arr_reg_val[GAMUT_MATRIX_SIZE]; - arr_matrix[0] = adjust->temperature_matrix[0]; - arr_matrix[1] = adjust->temperature_matrix[1]; - arr_matrix[2] = adjust->temperature_matrix[2]; - arr_matrix[3] = dal_fixed31_32_zero; - - arr_matrix[4] = adjust->temperature_matrix[3]; - arr_matrix[5] = adjust->temperature_matrix[4]; - arr_matrix[6] = adjust->temperature_matrix[5]; - arr_matrix[7] = dal_fixed31_32_zero; - - arr_matrix[8] = adjust->temperature_matrix[6]; - arr_matrix[9] = adjust->temperature_matrix[7]; - arr_matrix[10] = adjust->temperature_matrix[8]; - arr_matrix[11] = dal_fixed31_32_zero; + for (i = 0; i < GAMUT_MATRIX_SIZE; i++) + arr_matrix[i] = adjust->temperature_matrix[i]; convert_float_matrix( arr_reg_val, arr_matrix, GAMUT_MATRIX_SIZE); @@ -1126,7 +1083,7 @@ void dce110_opp_set_csc_adjustment( CSC_COLOR_MODE_GRAPHICS_OUTPUT_CSC; program_color_matrix( - xfm_dce, tbl_entry, GRAPHICS_CSC_ADJUST_TYPE_SW); + xfm_dce, tbl_entry, GRPH_COLOR_MATRIX_SW); /* We did everything ,now program DxOUTPUT_CSC_CONTROL */ configure_graphics_mode(xfm_dce, config, GRAPHICS_CSC_ADJUST_TYPE_SW, diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c index 469af0587604..41f83ecd7469 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c @@ -69,7 +69,7 @@ static const struct dce100_hw_seq_reg_offsets reg_offsets[] = { ******************************************************************************/ /***************************PIPE_CONTROL***********************************/ -static bool dce100_enable_display_power_gating( +bool dce100_enable_display_power_gating( struct dc *dc, uint8_t controller_id, struct dc_bios *dcb, diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h index cb5384ef46c3..c6ec0ed6ec3d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h @@ -38,5 +38,9 @@ void dce100_set_bandwidth( struct dc_state *context, bool decrease_allowed); +bool dce100_enable_display_power_gating(struct dc *dc, uint8_t controller_id, + struct dc_bios *dcb, + enum pipe_gating_control power_gating); + #endif /* __DC_HWSS_DCE100_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 3ea43e2a9450..3bdbed80f7f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -849,9 +849,11 @@ static bool construct( *************************************************/ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; pool->base.pipe_count = res_cap.num_timing_generator; + pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 40; dc->caps.max_cursor_size = 128; + dc->caps.dual_link_dvi = true; for (i = 0; i < pool->base.pipe_count; i++) { pool->base.timing_generators[i] = diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 86cdd7b4811f..0422c72a7579 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -57,6 +57,8 @@ #include "dce/dce_11_0_sh_mask.h" #include "custom_float.h" +#include "atomfirmware.h" + /* * All values are in milliseconds; * For eDP, after power-up/power/down, @@ -275,7 +277,7 @@ dce110_set_input_transfer_func(struct pipe_ctx *pipe_ctx, build_prescale_params(&prescale_params, plane_state); ipp->funcs->ipp_program_prescale(ipp, &prescale_params); - if (plane_state->gamma_correction && dce_use_lut(plane_state)) + if (plane_state->gamma_correction && dce_use_lut(plane_state->format)) ipp->funcs->ipp_program_input_lut(ipp, plane_state->gamma_correction); if (tf == NULL) { @@ -407,6 +409,10 @@ static bool convert_to_custom_float(struct pwl_result_data *rgb_resulted, return true; } +#define MAX_LOW_POINT 25 +#define NUMBER_REGIONS 16 +#define NUMBER_SW_SEGMENTS 16 + static bool dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, struct pwl_params *regamma_params) @@ -421,8 +427,8 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, struct fixed31_32 y1_min; struct fixed31_32 y3_max; - int32_t segment_start, segment_end; - uint32_t i, j, k, seg_distr[16], increment, start_index, hw_points; + int32_t region_start, region_end; + uint32_t i, j, k, seg_distr[NUMBER_REGIONS], increment, start_index, hw_points; if (output_tf == NULL || regamma_params == NULL || output_tf->type == TF_TYPE_BYPASS) return false; @@ -437,34 +443,20 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, /* 16 segments * segments are from 2^-11 to 2^5 */ - segment_start = -11; - segment_end = 5; - - seg_distr[0] = 2; - seg_distr[1] = 2; - seg_distr[2] = 2; - seg_distr[3] = 2; - seg_distr[4] = 2; - seg_distr[5] = 2; - seg_distr[6] = 3; - seg_distr[7] = 4; - seg_distr[8] = 4; - seg_distr[9] = 4; - seg_distr[10] = 4; - seg_distr[11] = 5; - seg_distr[12] = 5; - seg_distr[13] = 5; - seg_distr[14] = 5; - seg_distr[15] = 5; + region_start = -11; + region_end = region_start + NUMBER_REGIONS; + + for (i = 0; i < NUMBER_REGIONS; i++) + seg_distr[i] = 4; } else { /* 10 segments * segment is from 2^-10 to 2^0 */ - segment_start = -10; - segment_end = 0; + region_start = -10; + region_end = 0; - seg_distr[0] = 3; + seg_distr[0] = 4; seg_distr[1] = 4; seg_distr[2] = 4; seg_distr[3] = 4; @@ -472,8 +464,8 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, seg_distr[5] = 4; seg_distr[6] = 4; seg_distr[7] = 4; - seg_distr[8] = 5; - seg_distr[9] = 5; + seg_distr[8] = 4; + seg_distr[9] = 4; seg_distr[10] = -1; seg_distr[11] = -1; seg_distr[12] = -1; @@ -488,10 +480,12 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, } j = 0; - for (k = 0; k < (segment_end - segment_start); k++) { - increment = 32 / (1 << seg_distr[k]); - start_index = (segment_start + k + 25) * 32; - for (i = start_index; i < start_index + 32; i += increment) { + for (k = 0; k < (region_end - region_start); k++) { + increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); + start_index = (region_start + k + MAX_LOW_POINT) * + NUMBER_SW_SEGMENTS; + for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; + i += increment) { if (j == hw_points - 1) break; rgb_resulted[j].red = output_tf->tf_pts.red[i]; @@ -502,15 +496,15 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, } /* last point */ - start_index = (segment_end + 25) * 32; + start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(segment_start)); + dal_fixed31_32_from_int(region_start)); arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(segment_end)); + dal_fixed31_32_from_int(region_end)); y_r = rgb_resulted[0].red; y_g = rgb_resulted[0].green; @@ -625,7 +619,7 @@ static enum dc_status bios_parser_crtc_source_select( const struct dc_sink *sink = pipe_ctx->stream->sink; crtc_source_select.engine_id = pipe_ctx->stream_res.stream_enc->id; - crtc_source_select.controller_id = pipe_ctx->pipe_idx + 1; + crtc_source_select.controller_id = pipe_ctx->stream_res.tg->inst + 1; /*TODO: Need to un-hardcode color depth, dp_audio and account for * the case where signal and sink signal is different (translator * encoder)*/ @@ -914,6 +908,7 @@ void hwss_edp_backlight_control( /*todo: unhardcode*/ cntl.lanes_number = LANE_COUNT_FOUR; cntl.hpd_sel = link->link_enc->hpd_source; + cntl.signal = SIGNAL_TYPE_EDP; /* For eDP, the following delays might need to be considered * after link training completed: @@ -926,7 +921,13 @@ void hwss_edp_backlight_control( * Enable it in the future if necessary. */ /* dc_service_sleep_in_milliseconds(50); */ + /*edp 1.2*/ + if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) + edp_receiver_ready_T7(link); link_transmitter_control(ctx->dc_bios, &cntl); + /*edp 1.2*/ + if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF) + edp_receiver_ready_T9(link); } void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) @@ -946,7 +947,11 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( pipe_ctx->stream_res.stream_enc, true); if (pipe_ctx->stream_res.audio) { - pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio); + if (option != KEEP_ACQUIRED_RESOURCE || + !dc->debug.az_endpoint_mute_only) { + /*only disalbe az_endpoint if power down or free*/ + pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio); + } if (dc_is_dp_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable( @@ -969,9 +974,6 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) */ } - /* blank at encoder level */ - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->dp_blank(pipe_ctx->stream_res.stream_enc); link->link_enc->funcs->connect_dig_be_to_fe( link->link_enc, @@ -984,12 +986,30 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings) { struct encoder_unblank_param params = { { 0 } }; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->sink->link; /* only 3 items below are used by unblank */ params.pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz; params.link_settings.link_rate = link_settings->link_rate; - pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(pipe_ctx->stream_res.stream_enc, ¶ms); + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(pipe_ctx->stream_res.stream_enc, ¶ms); + + if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) + link->dc->hwss.edp_backlight_control(link, true); +} +void dce110_blank_stream(struct pipe_ctx *pipe_ctx) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->sink->link; + + if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) + link->dc->hwss.edp_backlight_control(link, false); + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->dp_blank(pipe_ctx->stream_res.stream_enc); } @@ -1091,7 +1111,7 @@ static void build_audio_output( audio_output->pll_info.dto_source = translate_to_dto_source( - pipe_ctx->pipe_idx + 1); + pipe_ctx->stream_res.tg->inst + 1); /* TODO hard code to enable for now. Need get from stream */ audio_output->pll_info.ss_enabled = true; @@ -1407,6 +1427,31 @@ static void disable_vga_and_power_gate_all_controllers( } } +static struct dc_link *get_link_for_edp_not_in_use( + struct dc *dc, + struct dc_state *context) +{ + int i; + struct dc_link *link = NULL; + + /* check if eDP panel is suppose to be set mode, if yes, no need to disable */ + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->signal == SIGNAL_TYPE_EDP) + return NULL; + } + + /* check if there is an eDP panel not in use */ + for (i = 0; i < dc->link_count; i++) { + if (dc->links[i]->local_sink && + dc->links[i]->local_sink->sink_signal == SIGNAL_TYPE_EDP) { + link = dc->links[i]; + break; + } + } + + return link; +} + /** * When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need: * 1. Power down all DC HW blocks @@ -1414,11 +1459,37 @@ static void disable_vga_and_power_gate_all_controllers( * 3. Enable power gating for controller * 4. Set acc_mode_change bit (VBIOS will clear this bit when going to FSDOS) */ -void dce110_enable_accelerated_mode(struct dc *dc) +void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) { - power_down_all_hw_blocks(dc); + struct dc_bios *dcb = dc->ctx->dc_bios; - disable_vga_and_power_gate_all_controllers(dc); + /* vbios already light up eDP, so we can leverage vbios and skip eDP + * programming + */ + bool can_eDP_fast_boot_optimize = + (dcb->funcs->get_vga_enabled_displays(dc->ctx->dc_bios) == ATOM_DISPLAY_LCD1_ACTIVE); + + /* if OS doesn't light up eDP and eDP link is available, we want to disable */ + struct dc_link *edp_link_to_turnoff = NULL; + + if (can_eDP_fast_boot_optimize) { + edp_link_to_turnoff = get_link_for_edp_not_in_use(dc, context); + + if (!edp_link_to_turnoff) + dc->apply_edp_fast_boot_optimization = true; + } + + if (!dc->apply_edp_fast_boot_optimization) { + if (edp_link_to_turnoff) { + /*turn off backlight before DP_blank and encoder powered down*/ + dc->hwss.edp_backlight_control(edp_link_to_turnoff, false); + } + /*resume from S3, no vbios posting, no need to power down again*/ + power_down_all_hw_blocks(dc); + disable_vga_and_power_gate_all_controllers(dc); + if (edp_link_to_turnoff) + dc->hwss.edp_power_control(edp_link_to_turnoff, false); + } bios_set_scratch_acc_mode_change(dc->ctx->dc_bios); } @@ -1439,7 +1510,7 @@ static uint32_t compute_pstate_blackout_duration( return total_dest_line_time_ns; } -void dce110_set_displaymarks( +static void dce110_set_displaymarks( const struct dc *dc, struct dc_state *context) { @@ -1553,6 +1624,8 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx, value |= 0x80; if (events->cursor_update) value |= 0x2; + if (events->force_trigger) + value |= 0x1; #if defined(CONFIG_DRM_AMD_DC_FBC) value |= 0x84; @@ -1690,9 +1763,13 @@ static void apply_min_clocks( * Check if FBC can be enabled */ static bool should_enable_fbc(struct dc *dc, - struct dc_state *context) + struct dc_state *context, + uint32_t *pipe_idx) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0]; + uint32_t i; + struct pipe_ctx *pipe_ctx = NULL; + struct resource_context *res_ctx = &context->res_ctx; + ASSERT(dc->fbc_compressor); @@ -1704,6 +1781,14 @@ static bool should_enable_fbc(struct dc *dc, if (context->stream_count != 1) return false; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (res_ctx->pipe_ctx[i].stream) { + pipe_ctx = &res_ctx->pipe_ctx[i]; + *pipe_idx = i; + break; + } + } + /* Only supports eDP */ if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP) return false; @@ -1729,11 +1814,14 @@ static bool should_enable_fbc(struct dc *dc, static void enable_fbc(struct dc *dc, struct dc_state *context) { - if (should_enable_fbc(dc, context)) { + uint32_t pipe_idx = 0; + + if (should_enable_fbc(dc, context, &pipe_idx)) { /* Program GRPH COMPRESSED ADDRESS and PITCH */ struct compr_addr_and_pitch_params params = {0, 0, 0}; struct compressor *compr = dc->fbc_compressor; - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx]; + params.source_view_width = pipe_ctx->stream->timing.h_addressable; params.source_view_height = pipe_ctx->stream->timing.v_addressable; @@ -1748,36 +1836,6 @@ static void enable_fbc(struct dc *dc, } #endif -static enum dc_status apply_ctx_to_hw_fpga( - struct dc *dc, - struct dc_state *context) -{ - enum dc_status status = DC_ERROR_UNEXPECTED; - int i; - - for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx *pipe_ctx_old = - &dc->current_state->res_ctx.pipe_ctx[i]; - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->stream == NULL) - continue; - - if (pipe_ctx->stream == pipe_ctx_old->stream) - continue; - - status = apply_single_controller_ctx_to_hw( - pipe_ctx, - context, - dc); - - if (status != DC_OK) - return status; - } - - return DC_OK; -} - static void dce110_reset_hw_ctx_wrap( struct dc *dc, struct dc_state *context) @@ -1847,11 +1905,6 @@ enum dc_status dce110_apply_ctx_to_hw( if (context->stream_count <= 0) return DC_OK; - if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - apply_ctx_to_hw_fpga(dc, context); - return DC_OK; - } - /* Apply new context */ dcb->funcs->set_scratch_critical_state(dcb, true); @@ -2134,13 +2187,14 @@ static void program_surface_visibility(const struct dc *dc, } else if (!pipe_ctx->plane_state->visible) blank_target = true; - dce_set_blender_mode(dc->hwseq, pipe_ctx->pipe_idx, blender_mode); + dce_set_blender_mode(dc->hwseq, pipe_ctx->stream_res.tg->inst, blender_mode); pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, blank_target); } static void program_gamut_remap(struct pipe_ctx *pipe_ctx) { + int i = 0; struct xfm_grph_csc_adjustment adjust; memset(&adjust, 0, sizeof(adjust)); adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; @@ -2148,33 +2202,10 @@ static void program_gamut_remap(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) { adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; - adjust.temperature_matrix[0] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[0]; - adjust.temperature_matrix[1] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[1]; - adjust.temperature_matrix[2] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[2]; - adjust.temperature_matrix[3] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[4]; - adjust.temperature_matrix[4] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[5]; - adjust.temperature_matrix[5] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[6]; - adjust.temperature_matrix[6] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[8]; - adjust.temperature_matrix[7] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[9]; - adjust.temperature_matrix[8] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[10]; + + for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) + adjust.temperature_matrix[i] = + pipe_ctx->stream->gamut_remap_matrix.matrix[i]; } pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust); @@ -2198,7 +2229,7 @@ static void set_plane_config( memset(&tbl_entry, 0, sizeof(tbl_entry)); adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; - dce_enable_fe_clock(dc->hwseq, pipe_ctx->pipe_idx, true); + dce_enable_fe_clock(dc->hwseq, mi->inst, true); set_default_colors(pipe_ctx); if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) { @@ -2215,33 +2246,10 @@ static void set_plane_config( if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) { adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; - adjust.temperature_matrix[0] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[0]; - adjust.temperature_matrix[1] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[1]; - adjust.temperature_matrix[2] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[2]; - adjust.temperature_matrix[3] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[4]; - adjust.temperature_matrix[4] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[5]; - adjust.temperature_matrix[5] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[6]; - adjust.temperature_matrix[6] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[8]; - adjust.temperature_matrix[7] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[9]; - adjust.temperature_matrix[8] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[10]; + + for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) + adjust.temperature_matrix[i] = + pipe_ctx->stream->gamut_remap_matrix.matrix[i]; } pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust); @@ -2286,7 +2294,7 @@ static void update_plane_addr(const struct dc *dc, plane_state->status.requested_address = plane_state->address; } -void dce110_update_pending_status(struct pipe_ctx *pipe_ctx) +static void dce110_update_pending_status(struct pipe_ctx *pipe_ctx) { struct dc_plane_state *plane_state = pipe_ctx->plane_state; @@ -2527,7 +2535,7 @@ void dce110_fill_display_configs( num_cfgs++; cfg->signal = pipe_ctx->stream->signal; - cfg->pipe_idx = pipe_ctx->pipe_idx; + cfg->pipe_idx = pipe_ctx->stream_res.tg->inst; cfg->src_height = stream->src.height; cfg->src_width = stream->src.width; cfg->ddi_channel_mapping = @@ -2680,7 +2688,6 @@ static void dce110_program_front_end_for_pipe( struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct xfm_grph_csc_adjustment adjust; struct out_csc_color_matrix tbl_entry; - struct pipe_ctx *cur_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx]; unsigned int i; memset(&tbl_entry, 0, sizeof(tbl_entry)); @@ -2691,7 +2698,7 @@ static void dce110_program_front_end_for_pipe( memset(&adjust, 0, sizeof(adjust)); adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; - dce_enable_fe_clock(dc->hwseq, pipe_ctx->pipe_idx, true); + dce_enable_fe_clock(dc->hwseq, mi->inst, true); set_default_colors(pipe_ctx); if (pipe_ctx->stream->csc_color_matrix.enable_adjustment @@ -2709,33 +2716,10 @@ static void dce110_program_front_end_for_pipe( if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) { adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; - adjust.temperature_matrix[0] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[0]; - adjust.temperature_matrix[1] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[1]; - adjust.temperature_matrix[2] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[2]; - adjust.temperature_matrix[3] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[4]; - adjust.temperature_matrix[4] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[5]; - adjust.temperature_matrix[5] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[6]; - adjust.temperature_matrix[6] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[8]; - adjust.temperature_matrix[7] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[9]; - adjust.temperature_matrix[8] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[10]; + + for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) + adjust.temperature_matrix[i] = + pipe_ctx->stream->gamut_remap_matrix.matrix[i]; } pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust); @@ -2772,10 +2756,13 @@ static void dce110_program_front_end_for_pipe( plane_state->rotation); /* Moved programming gamma from dc to hwss */ - if (cur_pipe_ctx->plane_state != pipe_ctx->plane_state) { + if (pipe_ctx->plane_state->update_flags.bits.full_update || + pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change) dc->hwss.set_input_transfer_func(pipe_ctx, pipe_ctx->plane_state); + + if (pipe_ctx->plane_state->update_flags.bits.full_update) dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream); - } dm_logger_write(dc->ctx->logger, LOG_SURFACE, "Pipe:%d 0x%x: addr hi:0x%x, " @@ -2872,7 +2859,8 @@ static void dce110_apply_ctx_for_surface( static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx) { - int fe_idx = pipe_ctx->pipe_idx; + int fe_idx = pipe_ctx->plane_res.mi ? + pipe_ctx->plane_res.mi->inst : pipe_ctx->pipe_idx; /* Do not power down fe when stream is active on dce*/ if (dc->current_state->res_ctx.pipe_ctx[fe_idx].stream) @@ -2915,6 +2903,49 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx, } } +void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) +{ + struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position; + struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp; + struct mem_input *mi = pipe_ctx->plane_res.mi; + struct dc_cursor_mi_param param = { + .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz, + .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz, + .viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x, + .viewport_width = pipe_ctx->plane_res.scl_data.viewport.width, + .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz + }; + + if (pipe_ctx->plane_state->address.type + == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) + pos_cpy.enable = false; + + if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state) + pos_cpy.enable = false; + + if (ipp->funcs->ipp_cursor_set_position) + ipp->funcs->ipp_cursor_set_position(ipp, &pos_cpy, ¶m); + if (mi->funcs->set_cursor_position) + mi->funcs->set_cursor_position(mi, &pos_cpy, ¶m); +} + +void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx) +{ + struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes; + + if (pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes) + pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes( + pipe_ctx->plane_res.ipp, attributes); + + if (pipe_ctx->plane_res.mi->funcs->set_cursor_attributes) + pipe_ctx->plane_res.mi->funcs->set_cursor_attributes( + pipe_ctx->plane_res.mi, attributes); + + if (pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes) + pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes( + pipe_ctx->plane_res.xfm, attributes); +} + static void ready_shared_resources(struct dc *dc, struct dc_state *context) {} static void optimize_shared_resources(struct dc *dc) {} @@ -2938,6 +2969,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { .enable_stream = dce110_enable_stream, .disable_stream = dce110_disable_stream, .unblank_stream = dce110_unblank_stream, + .blank_stream = dce110_blank_stream, .enable_display_pipe_clock_gating = enable_display_pipe_clock_gating, .enable_display_power_gating = dce110_enable_display_power_gating, .disable_plane = dce110_power_down_fe, @@ -2957,6 +2989,8 @@ static const struct hw_sequencer_funcs dce110_funcs = { .edp_backlight_control = hwss_edp_backlight_control, .edp_power_control = hwss_edp_power_control, .edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready, + .set_cursor_position = dce110_set_cursor_position, + .set_cursor_attribute = dce110_set_cursor_attribute }; void dce110_hw_sequencer_construct(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h index fc637647f643..5d7e9f516827 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h @@ -39,11 +39,7 @@ enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context); -void dce110_set_display_clock(struct dc_state *context); -void dce110_set_displaymarks( - const struct dc *dc, - struct dc_state *context); void dce110_enable_stream(struct pipe_ctx *pipe_ctx); @@ -52,15 +48,14 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option); void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); +void dce110_blank_stream(struct pipe_ctx *pipe_ctx); void dce110_update_info_frame(struct pipe_ctx *pipe_ctx); void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable); -void dce110_enable_accelerated_mode(struct dc *dc); +void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context); void dce110_power_down(struct dc *dc); -void dce110_update_pending_status(struct pipe_ctx *pipe_ctx); - void dce110_fill_display_configs( const struct dc_state *context, struct dm_pp_display_configuration *pp_display_cfg); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c index feb397b5c1a3..4245e1f818a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c @@ -727,7 +727,7 @@ void dce110_opp_v_set_csc_adjustment( CSC_COLOR_MODE_GRAPHICS_OUTPUT_CSC; program_color_matrix_v( - xfm_dce, tbl_entry, GRAPHICS_CSC_ADJUST_TYPE_SW); + xfm_dce, tbl_entry, GRPH_COLOR_MATRIX_SW); /* We did everything ,now program DxOUTPUT_CSC_CONTROL */ configure_graphics_mode_v(xfm_dce, config, GRAPHICS_CSC_ADJUST_TYPE_SW, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 7c4779578fb7..c4e877ac95d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -700,7 +700,7 @@ static void get_pixel_clock_parameters( pixel_clk_params->requested_pix_clk = stream->timing.pix_clk_khz; pixel_clk_params->encoder_object_id = stream->sink->link->link_enc->id; pixel_clk_params->signal_type = pipe_ctx->stream->signal; - pixel_clk_params->controller_id = pipe_ctx->pipe_idx + 1; + pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; /* TODO: un-hardcode*/ pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * LINK_RATE_REF_FREQ_IN_KHZ; @@ -973,7 +973,7 @@ static struct pipe_ctx *dce110_acquire_underlay( dc->hwss.enable_display_power_gating( dc, - pipe_ctx->pipe_idx, + pipe_ctx->stream_res.tg->inst, dcb, PIPE_GATING_CONTROL_DISABLE); /* @@ -1152,7 +1152,7 @@ static bool construct( pool->base.pipe_count = pool->base.res_cap->num_timing_generator; pool->base.underlay_pipe_index = pool->base.pipe_count; - + pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; dc->caps.max_downscale_ratio = 150; dc->caps.i2c_speed_in_khz = 100; dc->caps.max_cursor_size = 128; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 25ca72139e5f..be7153924a70 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -2077,6 +2077,125 @@ bool dce110_arm_vert_intr(struct timing_generator *tg, uint8_t width) return true; } +static bool dce110_is_tg_enabled(struct timing_generator *tg) +{ + uint32_t addr = 0; + uint32_t value = 0; + uint32_t field = 0; + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + + addr = CRTC_REG(mmCRTC_CONTROL); + value = dm_read_reg(tg->ctx, addr); + field = get_reg_field_value(value, CRTC_CONTROL, + CRTC_CURRENT_MASTER_EN_STATE); + return field == 1; +} + +bool dce110_configure_crc(struct timing_generator *tg, + const struct crc_params *params) +{ + uint32_t cntl_addr = 0; + uint32_t addr = 0; + uint32_t value; + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + + /* Cannot configure crc on a CRTC that is disabled */ + if (!dce110_is_tg_enabled(tg)) + return false; + + cntl_addr = CRTC_REG(mmCRTC_CRC_CNTL); + + /* First, disable CRC before we configure it. */ + dm_write_reg(tg->ctx, cntl_addr, 0); + + if (!params->enable) + return true; + + /* Program frame boundaries */ + /* Window A x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL); + set_reg_field_value(value, params->windowa_x_start, + CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_START); + set_reg_field_value(value, params->windowa_x_end, + CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window A y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL); + set_reg_field_value(value, params->windowa_y_start, + CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_START); + set_reg_field_value(value, params->windowa_y_end, + CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL); + set_reg_field_value(value, params->windowb_x_start, + CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_START); + set_reg_field_value(value, params->windowb_x_end, + CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL); + set_reg_field_value(value, params->windowb_y_start, + CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_START); + set_reg_field_value(value, params->windowb_y_end, + CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Set crc mode and selection, and enable. Only using CRC0*/ + value = 0; + set_reg_field_value(value, params->continuous_mode ? 1 : 0, + CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); + set_reg_field_value(value, params->selection, + CRTC_CRC_CNTL, CRTC_CRC0_SELECT); + set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); + dm_write_reg(tg->ctx, cntl_addr, value); + + return true; +} + +bool dce110_get_crc(struct timing_generator *tg, + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) +{ + uint32_t addr = 0; + uint32_t value = 0; + uint32_t field = 0; + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + + addr = CRTC_REG(mmCRTC_CRC_CNTL); + value = dm_read_reg(tg->ctx, addr); + field = get_reg_field_value(value, CRTC_CRC_CNTL, CRTC_CRC_EN); + + /* Early return if CRC is not enabled for this CRTC */ + if (!field) + return false; + + addr = CRTC_REG(mmCRTC_CRC0_DATA_RG); + value = dm_read_reg(tg->ctx, addr); + *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR); + *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y); + + addr = CRTC_REG(mmCRTC_CRC0_DATA_B); + value = dm_read_reg(tg->ctx, addr); + *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB); + + return true; +} + static const struct timing_generator_funcs dce110_tg_funcs = { .validate_timing = dce110_tg_validate_timing, .program_timing = dce110_tg_program_timing, @@ -2112,6 +2231,9 @@ static const struct timing_generator_funcs dce110_tg_funcs = { dce110_timing_generator_set_static_screen_control, .set_test_pattern = dce110_timing_generator_set_test_pattern, .arm_vert_intr = dce110_arm_vert_intr, + .is_tg_enabled = dce110_is_tg_enabled, + .configure_crc = dce110_configure_crc, + .get_crc = dce110_get_crc, }; void dce110_timing_generator_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 232747c7c60b..734d4965dab1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -276,4 +276,10 @@ void dce110_tg_set_colors(struct timing_generator *tg, bool dce110_arm_vert_intr( struct timing_generator *tg, uint8_t width); +bool dce110_configure_crc(struct timing_generator *tg, + const struct crc_params *params); + +bool dce110_get_crc(struct timing_generator *tg, + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); + #endif /* __DC_TIMING_GENERATOR_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 663e0a047a4b..c0757dd6c03c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -1100,9 +1100,12 @@ static bool construct( *************************************************/ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 100; dc->caps.max_cursor_size = 128; + dc->caps.dual_link_dvi = true; + /************************************************* * Create resources * diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c index 75d029742f96..e96ff86d2fc3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c @@ -33,7 +33,8 @@ #include "dce/dce_12_0_offset.h" #include "dce/dce_12_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" #include "reg_helper.h" #define CTX \ diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 57cd67359567..4659a4bfabaa 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -56,7 +56,8 @@ #include "dce/dce_12_0_offset.h" #include "dce/dce_12_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" #include "nbio/nbio_6_1_offset.h" #include "reg_helper.h" @@ -830,11 +831,14 @@ static bool construct( /* TODO: Fill more data from GreenlandAsicCapability.cpp */ pool->base.pipe_count = res_cap.num_timing_generator; + pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 100; dc->caps.max_cursor_size = 128; + dc->caps.dual_link_dvi = true; + dc->debug = debug_defaults; /************************************************* diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 0aa60e5727e0..7bee78172d85 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -27,7 +27,8 @@ #include "dce/dce_12_0_offset.h" #include "dce/dce_12_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" #include "dc_types.h" #include "dc_bios_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile index bc388aa4b2f5..666fcb2bdbba 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. -DCE80 = dce80_timing_generator.o dce80_compressor.o dce80_hw_sequencer.o \ +DCE80 = dce80_timing_generator.o dce80_hw_sequencer.o \ dce80_resource.o AMD_DAL_DCE80 = $(addprefix $(AMDDALPATH)/dc/dce80/,$(DCE80)) diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_compressor.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_compressor.c deleted file mode 100644 index 951f2caba9b3..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_compressor.c +++ /dev/null @@ -1,834 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -#include "dce/dce_8_0_d.h" -#include "dce/dce_8_0_sh_mask.h" -#include "gmc/gmc_7_1_sh_mask.h" -#include "gmc/gmc_7_1_d.h" - -#include "include/logger_interface.h" -#include "dce80_compressor.h" - -#define DCP_REG(reg)\ - (reg + cp80->offsets.dcp_offset) -#define DMIF_REG(reg)\ - (reg + cp80->offsets.dmif_offset) - -static const struct dce80_compressor_reg_offsets reg_offsets[] = { -{ - .dcp_offset = (mmDCP0_GRPH_CONTROL - mmDCP0_GRPH_CONTROL), - .dmif_offset = (mmDMIF_PG0_DPG_PIPE_DPM_CONTROL - - mmDMIF_PG0_DPG_PIPE_DPM_CONTROL), -}, -{ - .dcp_offset = (mmDCP1_GRPH_CONTROL - mmDCP0_GRPH_CONTROL), - .dmif_offset = (mmDMIF_PG1_DPG_PIPE_DPM_CONTROL - - mmDMIF_PG0_DPG_PIPE_DPM_CONTROL), -}, -{ - .dcp_offset = (mmDCP2_GRPH_CONTROL - mmDCP0_GRPH_CONTROL), - .dmif_offset = (mmDMIF_PG2_DPG_PIPE_DPM_CONTROL - - mmDMIF_PG0_DPG_PIPE_DPM_CONTROL), -}, -{ - .dcp_offset = (mmDCP3_GRPH_CONTROL - mmDCP0_GRPH_CONTROL), - .dmif_offset = (mmDMIF_PG3_DPG_PIPE_DPM_CONTROL - - mmDMIF_PG0_DPG_PIPE_DPM_CONTROL), -}, -{ - .dcp_offset = (mmDCP4_GRPH_CONTROL - mmDCP0_GRPH_CONTROL), - .dmif_offset = (mmDMIF_PG4_DPG_PIPE_DPM_CONTROL - - mmDMIF_PG0_DPG_PIPE_DPM_CONTROL), -}, -{ - .dcp_offset = (mmDCP5_GRPH_CONTROL - mmDCP0_GRPH_CONTROL), - .dmif_offset = (mmDMIF_PG5_DPG_PIPE_DPM_CONTROL - - mmDMIF_PG0_DPG_PIPE_DPM_CONTROL), -} -}; - -static const uint32_t dce8_one_lpt_channel_max_resolution = 2048 * 1200; - -enum fbc_idle_force { - /* Bit 0 - Display registers updated */ - FBC_IDLE_FORCE_DISPLAY_REGISTER_UPDATE = 0x00000001, - - /* Bit 2 - FBC_GRPH_COMP_EN register updated */ - FBC_IDLE_FORCE_GRPH_COMP_EN = 0x00000002, - /* Bit 3 - FBC_SRC_SEL register updated */ - FBC_IDLE_FORCE_SRC_SEL_CHANGE = 0x00000004, - /* Bit 4 - FBC_MIN_COMPRESSION register updated */ - FBC_IDLE_FORCE_MIN_COMPRESSION_CHANGE = 0x00000008, - /* Bit 5 - FBC_ALPHA_COMP_EN register updated */ - FBC_IDLE_FORCE_ALPHA_COMP_EN = 0x00000010, - /* Bit 6 - FBC_ZERO_ALPHA_CHUNK_SKIP_EN register updated */ - FBC_IDLE_FORCE_ZERO_ALPHA_CHUNK_SKIP_EN = 0x00000020, - /* Bit 7 - FBC_FORCE_COPY_TO_COMP_BUF register updated */ - FBC_IDLE_FORCE_FORCE_COPY_TO_COMP_BUF = 0x00000040, - - /* Bit 24 - Memory write to region 0 defined by MC registers. */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION0 = 0x01000000, - /* Bit 25 - Memory write to region 1 defined by MC registers */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION1 = 0x02000000, - /* Bit 26 - Memory write to region 2 defined by MC registers */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION2 = 0x04000000, - /* Bit 27 - Memory write to region 3 defined by MC registers. */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION3 = 0x08000000, - - /* Bit 28 - Memory write from any client other than MCIF */ - FBC_IDLE_FORCE_MEMORY_WRITE_OTHER_THAN_MCIF = 0x10000000, - /* Bit 29 - CG statics screen signal is inactive */ - FBC_IDLE_FORCE_CG_STATIC_SCREEN_IS_INACTIVE = 0x20000000, -}; - -static uint32_t lpt_size_alignment(struct dce80_compressor *cp80) -{ - /*LPT_ALIGNMENT (in bytes) = ROW_SIZE * #BANKS * # DRAM CHANNELS. */ - return cp80->base.raw_size * cp80->base.banks_num * - cp80->base.dram_channels_num; -} - -static uint32_t lpt_memory_control_config(struct dce80_compressor *cp80, - uint32_t lpt_control) -{ - /*LPT MC Config */ - if (cp80->base.options.bits.LPT_MC_CONFIG == 1) { - /* POSSIBLE VALUES for LPT NUM_PIPES (DRAM CHANNELS): - * 00 - 1 CHANNEL - * 01 - 2 CHANNELS - * 02 - 4 OR 6 CHANNELS - * (Only for discrete GPU, N/A for CZ) - * 03 - 8 OR 12 CHANNELS - * (Only for discrete GPU, N/A for CZ) */ - switch (cp80->base.dram_channels_num) { - case 2: - set_reg_field_value( - lpt_control, - 1, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_NUM_PIPES); - break; - case 1: - set_reg_field_value( - lpt_control, - 0, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_NUM_PIPES); - break; - default: - dm_logger_write( - cp80->base.ctx->logger, LOG_WARNING, - "%s: Invalid LPT NUM_PIPES!!!", - __func__); - break; - } - - /* The mapping for LPT NUM_BANKS is in - * GRPH_CONTROL.GRPH_NUM_BANKS register field - * Specifies the number of memory banks for tiling - * purposes. Only applies to 2D and 3D tiling modes. - * POSSIBLE VALUES: - * 00 - DCP_GRPH_NUM_BANKS_2BANK: ADDR_SURF_2_BANK - * 01 - DCP_GRPH_NUM_BANKS_4BANK: ADDR_SURF_4_BANK - * 02 - DCP_GRPH_NUM_BANKS_8BANK: ADDR_SURF_8_BANK - * 03 - DCP_GRPH_NUM_BANKS_16BANK: ADDR_SURF_16_BANK */ - switch (cp80->base.banks_num) { - case 16: - set_reg_field_value( - lpt_control, - 3, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_NUM_BANKS); - break; - case 8: - set_reg_field_value( - lpt_control, - 2, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_NUM_BANKS); - break; - case 4: - set_reg_field_value( - lpt_control, - 1, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_NUM_BANKS); - break; - case 2: - set_reg_field_value( - lpt_control, - 0, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_NUM_BANKS); - break; - default: - dm_logger_write( - cp80->base.ctx->logger, LOG_WARNING, - "%s: Invalid LPT NUM_BANKS!!!", - __func__); - break; - } - - /* The mapping is in DMIF_ADDR_CALC. - * ADDR_CONFIG_PIPE_INTERLEAVE_SIZE register field for - * Carrizo specifies the memory interleave per pipe. - * It effectively specifies the location of pipe bits in - * the memory address. - * POSSIBLE VALUES: - * 00 - ADDR_CONFIG_PIPE_INTERLEAVE_256B: 256 byte - * interleave - * 01 - ADDR_CONFIG_PIPE_INTERLEAVE_512B: 512 byte - * interleave - */ - switch (cp80->base.channel_interleave_size) { - case 256: /*256B */ - set_reg_field_value( - lpt_control, - 0, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_PIPE_INTERLEAVE_SIZE); - break; - case 512: /*512B */ - set_reg_field_value( - lpt_control, - 1, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_PIPE_INTERLEAVE_SIZE); - break; - default: - dm_logger_write( - cp80->base.ctx->logger, LOG_WARNING, - "%s: Invalid LPT INTERLEAVE_SIZE!!!", - __func__); - break; - } - - /* The mapping for LOW_POWER_TILING_ROW_SIZE is in - * DMIF_ADDR_CALC.ADDR_CONFIG_ROW_SIZE register field - * for Carrizo. Specifies the size of dram row in bytes. - * This should match up with NOOFCOLS field in - * MC_ARB_RAMCFG (ROW_SIZE = 4 * 2 ^^ columns). - * This register DMIF_ADDR_CALC is not used by the - * hardware as it is only used for addrlib assertions. - * POSSIBLE VALUES: - * 00 - ADDR_CONFIG_1KB_ROW: Treat 1KB as DRAM row - * boundary - * 01 - ADDR_CONFIG_2KB_ROW: Treat 2KB as DRAM row - * boundary - * 02 - ADDR_CONFIG_4KB_ROW: Treat 4KB as DRAM row - * boundary */ - switch (cp80->base.raw_size) { - case 4096: /*4 KB */ - set_reg_field_value( - lpt_control, - 2, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_ROW_SIZE); - break; - case 2048: - set_reg_field_value( - lpt_control, - 1, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_ROW_SIZE); - break; - case 1024: - set_reg_field_value( - lpt_control, - 0, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_ROW_SIZE); - break; - default: - dm_logger_write( - cp80->base.ctx->logger, LOG_WARNING, - "%s: Invalid LPT ROW_SIZE!!!", - __func__); - break; - } - } else { - dm_logger_write( - cp80->base.ctx->logger, LOG_WARNING, - "%s: LPT MC Configuration is not provided", - __func__); - } - - return lpt_control; -} - -static bool is_source_bigger_than_epanel_size( - struct dce80_compressor *cp80, - uint32_t source_view_width, - uint32_t source_view_height) -{ - if (cp80->base.embedded_panel_h_size != 0 && - cp80->base.embedded_panel_v_size != 0 && - ((source_view_width * source_view_height) > - (cp80->base.embedded_panel_h_size * - cp80->base.embedded_panel_v_size))) - return true; - - return false; -} - -static uint32_t align_to_chunks_number_per_line( - struct dce80_compressor *cp80, - uint32_t pixels) -{ - return 256 * ((pixels + 255) / 256); -} - -static void wait_for_fbc_state_changed( - struct dce80_compressor *cp80, - bool enabled) -{ - uint8_t counter = 0; - uint32_t addr = mmFBC_STATUS; - uint32_t value; - - while (counter < 10) { - value = dm_read_reg(cp80->base.ctx, addr); - if (get_reg_field_value( - value, - FBC_STATUS, - FBC_ENABLE_STATUS) == enabled) - break; - udelay(10); - counter++; - } - - if (counter == 10) { - dm_logger_write( - cp80->base.ctx->logger, LOG_WARNING, - "%s: wait counter exceeded, changes to HW not applied", - __func__); - } -} - -void dce80_compressor_power_up_fbc(struct compressor *compressor) -{ - uint32_t value; - uint32_t addr; - - addr = mmFBC_CNTL; - value = dm_read_reg(compressor->ctx, addr); - set_reg_field_value(value, 0, FBC_CNTL, FBC_GRPH_COMP_EN); - set_reg_field_value(value, 1, FBC_CNTL, FBC_EN); - set_reg_field_value(value, 2, FBC_CNTL, FBC_COHERENCY_MODE); - dm_write_reg(compressor->ctx, addr, value); - - addr = mmFBC_COMP_MODE; - value = dm_read_reg(compressor->ctx, addr); - set_reg_field_value(value, 1, FBC_COMP_MODE, FBC_RLE_EN); - set_reg_field_value(value, 1, FBC_COMP_MODE, FBC_DPCM4_RGB_EN); - set_reg_field_value(value, 1, FBC_COMP_MODE, FBC_IND_EN); - dm_write_reg(compressor->ctx, addr, value); - - addr = mmFBC_COMP_CNTL; - value = dm_read_reg(compressor->ctx, addr); - set_reg_field_value(value, 1, FBC_COMP_CNTL, FBC_DEPTH_RGB08_EN); - dm_write_reg(compressor->ctx, addr, value); - /*FBC_MIN_COMPRESSION 0 ==> 2:1 */ - /* 1 ==> 4:1 */ - /* 2 ==> 8:1 */ - /* 0xF ==> 1:1 */ - set_reg_field_value(value, 0xF, FBC_COMP_CNTL, FBC_MIN_COMPRESSION); - dm_write_reg(compressor->ctx, addr, value); - compressor->min_compress_ratio = FBC_COMPRESS_RATIO_1TO1; - - value = 0; - dm_write_reg(compressor->ctx, mmFBC_IND_LUT0, value); - - value = 0xFFFFFF; - dm_write_reg(compressor->ctx, mmFBC_IND_LUT1, value); -} - -void dce80_compressor_enable_fbc( - struct compressor *compressor, - uint32_t paths_num, - struct compr_addr_and_pitch_params *params) -{ - struct dce80_compressor *cp80 = TO_DCE80_COMPRESSOR(compressor); - - if (compressor->options.bits.FBC_SUPPORT && - (compressor->options.bits.DUMMY_BACKEND == 0) && - (!dce80_compressor_is_fbc_enabled_in_hw(compressor, NULL)) && - (!is_source_bigger_than_epanel_size( - cp80, - params->source_view_width, - params->source_view_height))) { - - uint32_t addr; - uint32_t value; - - /* Before enabling FBC first need to enable LPT if applicable - * LPT state should always be changed (enable/disable) while FBC - * is disabled */ - if (compressor->options.bits.LPT_SUPPORT && (paths_num < 2) && - (params->source_view_width * - params->source_view_height <= - dce8_one_lpt_channel_max_resolution)) { - dce80_compressor_enable_lpt(compressor); - } - - addr = mmFBC_CNTL; - value = dm_read_reg(compressor->ctx, addr); - set_reg_field_value(value, 1, FBC_CNTL, FBC_GRPH_COMP_EN); - set_reg_field_value( - value, - params->inst, - FBC_CNTL, FBC_SRC_SEL); - dm_write_reg(compressor->ctx, addr, value); - - /* Keep track of enum controller_id FBC is attached to */ - compressor->is_enabled = true; - compressor->attached_inst = params->inst; - cp80->offsets = reg_offsets[params->inst]; - - /*Toggle it as there is bug in HW */ - set_reg_field_value(value, 0, FBC_CNTL, FBC_GRPH_COMP_EN); - dm_write_reg(compressor->ctx, addr, value); - set_reg_field_value(value, 1, FBC_CNTL, FBC_GRPH_COMP_EN); - dm_write_reg(compressor->ctx, addr, value); - - wait_for_fbc_state_changed(cp80, true); - } -} - -void dce80_compressor_disable_fbc(struct compressor *compressor) -{ - struct dce80_compressor *cp80 = TO_DCE80_COMPRESSOR(compressor); - - if (compressor->options.bits.FBC_SUPPORT && - dce80_compressor_is_fbc_enabled_in_hw(compressor, NULL)) { - uint32_t reg_data; - /* Turn off compression */ - reg_data = dm_read_reg(compressor->ctx, mmFBC_CNTL); - set_reg_field_value(reg_data, 0, FBC_CNTL, FBC_GRPH_COMP_EN); - dm_write_reg(compressor->ctx, mmFBC_CNTL, reg_data); - - /* Reset enum controller_id to undefined */ - compressor->attached_inst = 0; - compressor->is_enabled = false; - - /* Whenever disabling FBC make sure LPT is disabled if LPT - * supported */ - if (compressor->options.bits.LPT_SUPPORT) - dce80_compressor_disable_lpt(compressor); - - wait_for_fbc_state_changed(cp80, false); - } -} - -bool dce80_compressor_is_fbc_enabled_in_hw( - struct compressor *compressor, - uint32_t *inst) -{ - /* Check the hardware register */ - uint32_t value; - - value = dm_read_reg(compressor->ctx, mmFBC_STATUS); - if (get_reg_field_value(value, FBC_STATUS, FBC_ENABLE_STATUS)) { - if (inst != NULL) - *inst = compressor->attached_inst; - return true; - } - - value = dm_read_reg(compressor->ctx, mmFBC_CNTL); - if (get_reg_field_value(value, FBC_CNTL, FBC_GRPH_COMP_EN)) { - if (inst != NULL) - *inst = compressor->attached_inst; - return true; - } - - return false; -} - -bool dce80_compressor_is_lpt_enabled_in_hw(struct compressor *compressor) -{ - /* Check the hardware register */ - uint32_t value = dm_read_reg(compressor->ctx, - mmLOW_POWER_TILING_CONTROL); - - return get_reg_field_value( - value, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_ENABLE); -} - -void dce80_compressor_program_compressed_surface_address_and_pitch( - struct compressor *compressor, - struct compr_addr_and_pitch_params *params) -{ - struct dce80_compressor *cp80 = TO_DCE80_COMPRESSOR(compressor); - uint32_t value = 0; - uint32_t fbc_pitch = 0; - uint32_t compressed_surf_address_low_part = - compressor->compr_surface_address.addr.low_part; - - /* Clear content first. */ - dm_write_reg( - compressor->ctx, - DCP_REG(mmGRPH_COMPRESS_SURFACE_ADDRESS_HIGH), - 0); - dm_write_reg(compressor->ctx, - DCP_REG(mmGRPH_COMPRESS_SURFACE_ADDRESS), 0); - - if (compressor->options.bits.LPT_SUPPORT) { - uint32_t lpt_alignment = lpt_size_alignment(cp80); - - if (lpt_alignment != 0) { - compressed_surf_address_low_part = - ((compressed_surf_address_low_part - + (lpt_alignment - 1)) / lpt_alignment) - * lpt_alignment; - } - } - - /* Write address, HIGH has to be first. */ - dm_write_reg(compressor->ctx, - DCP_REG(mmGRPH_COMPRESS_SURFACE_ADDRESS_HIGH), - compressor->compr_surface_address.addr.high_part); - dm_write_reg(compressor->ctx, - DCP_REG(mmGRPH_COMPRESS_SURFACE_ADDRESS), - compressed_surf_address_low_part); - - fbc_pitch = align_to_chunks_number_per_line( - cp80, - params->source_view_width); - - if (compressor->min_compress_ratio == FBC_COMPRESS_RATIO_1TO1) - fbc_pitch = fbc_pitch / 8; - else - dm_logger_write( - compressor->ctx->logger, LOG_WARNING, - "%s: Unexpected DCE8 compression ratio", - __func__); - - /* Clear content first. */ - dm_write_reg(compressor->ctx, DCP_REG(mmGRPH_COMPRESS_PITCH), 0); - - /* Write FBC Pitch. */ - set_reg_field_value( - value, - fbc_pitch, - GRPH_COMPRESS_PITCH, - GRPH_COMPRESS_PITCH); - dm_write_reg(compressor->ctx, DCP_REG(mmGRPH_COMPRESS_PITCH), value); - -} - -void dce80_compressor_disable_lpt(struct compressor *compressor) -{ - struct dce80_compressor *cp80 = TO_DCE80_COMPRESSOR(compressor); - uint32_t value; - uint32_t addr; - uint32_t inx; - - /* Disable all pipes LPT Stutter */ - for (inx = 0; inx < 3; inx++) { - value = - dm_read_reg( - compressor->ctx, - DMIF_REG(mmDPG_PIPE_STUTTER_CONTROL_NONLPTCH)); - set_reg_field_value( - value, - 0, - DPG_PIPE_STUTTER_CONTROL_NONLPTCH, - STUTTER_ENABLE_NONLPTCH); - dm_write_reg( - compressor->ctx, - DMIF_REG(mmDPG_PIPE_STUTTER_CONTROL_NONLPTCH), - value); - } - - /* Disable LPT */ - addr = mmLOW_POWER_TILING_CONTROL; - value = dm_read_reg(compressor->ctx, addr); - set_reg_field_value( - value, - 0, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_ENABLE); - dm_write_reg(compressor->ctx, addr, value); - - /* Clear selection of Channel(s) containing Compressed Surface */ - addr = mmGMCON_LPT_TARGET; - value = dm_read_reg(compressor->ctx, addr); - set_reg_field_value( - value, - 0xFFFFFFFF, - GMCON_LPT_TARGET, - STCTRL_LPT_TARGET); - dm_write_reg(compressor->ctx, mmGMCON_LPT_TARGET, value); -} - -void dce80_compressor_enable_lpt(struct compressor *compressor) -{ - struct dce80_compressor *cp80 = TO_DCE80_COMPRESSOR(compressor); - uint32_t value; - uint32_t addr; - uint32_t value_control; - uint32_t channels; - - /* Enable LPT Stutter from Display pipe */ - value = dm_read_reg(compressor->ctx, - DMIF_REG(mmDPG_PIPE_STUTTER_CONTROL_NONLPTCH)); - set_reg_field_value( - value, - 1, - DPG_PIPE_STUTTER_CONTROL_NONLPTCH, - STUTTER_ENABLE_NONLPTCH); - dm_write_reg(compressor->ctx, - DMIF_REG(mmDPG_PIPE_STUTTER_CONTROL_NONLPTCH), value); - - /* Selection of Channel(s) containing Compressed Surface: 0xfffffff - * will disable LPT. - * STCTRL_LPT_TARGETn corresponds to channel n. */ - addr = mmLOW_POWER_TILING_CONTROL; - value_control = dm_read_reg(compressor->ctx, addr); - channels = get_reg_field_value(value_control, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_MODE); - - addr = mmGMCON_LPT_TARGET; - value = dm_read_reg(compressor->ctx, addr); - set_reg_field_value( - value, - channels + 1, /* not mentioned in programming guide, - but follow DCE8.1 */ - GMCON_LPT_TARGET, - STCTRL_LPT_TARGET); - dm_write_reg(compressor->ctx, addr, value); - - /* Enable LPT */ - addr = mmLOW_POWER_TILING_CONTROL; - value = dm_read_reg(compressor->ctx, addr); - set_reg_field_value( - value, - 1, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_ENABLE); - dm_write_reg(compressor->ctx, addr, value); -} - -void dce80_compressor_program_lpt_control( - struct compressor *compressor, - struct compr_addr_and_pitch_params *params) -{ - struct dce80_compressor *cp80 = TO_DCE80_COMPRESSOR(compressor); - uint32_t rows_per_channel; - uint32_t lpt_alignment; - uint32_t source_view_width; - uint32_t source_view_height; - uint32_t lpt_control = 0; - - if (!compressor->options.bits.LPT_SUPPORT) - return; - - lpt_control = dm_read_reg(compressor->ctx, - mmLOW_POWER_TILING_CONTROL); - - /* POSSIBLE VALUES for Low Power Tiling Mode: - * 00 - Use channel 0 - * 01 - Use Channel 0 and 1 - * 02 - Use Channel 0,1,2,3 - * 03 - reserved */ - switch (compressor->lpt_channels_num) { - /* case 2: - * Use Channel 0 & 1 / Not used for DCE 11 */ - case 1: - /*Use Channel 0 for LPT for DCE 11 */ - set_reg_field_value( - lpt_control, - 0, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_MODE); - break; - default: - dm_logger_write( - compressor->ctx->logger, LOG_WARNING, - "%s: Invalid selected DRAM channels for LPT!!!", - __func__); - break; - } - - lpt_control = lpt_memory_control_config(cp80, lpt_control); - - /* Program LOW_POWER_TILING_ROWS_PER_CHAN field which depends on - * FBC compressed surface pitch. - * LOW_POWER_TILING_ROWS_PER_CHAN = Roundup ((Surface Height * - * Surface Pitch) / (Row Size * Number of Channels * - * Number of Banks)). */ - rows_per_channel = 0; - lpt_alignment = lpt_size_alignment(cp80); - source_view_width = - align_to_chunks_number_per_line( - cp80, - params->source_view_width); - source_view_height = (params->source_view_height + 1) & (~0x1); - - if (lpt_alignment != 0) { - rows_per_channel = source_view_width * source_view_height * 4; - rows_per_channel = - (rows_per_channel % lpt_alignment) ? - (rows_per_channel / lpt_alignment + 1) : - rows_per_channel / lpt_alignment; - } - - set_reg_field_value( - lpt_control, - rows_per_channel, - LOW_POWER_TILING_CONTROL, - LOW_POWER_TILING_ROWS_PER_CHAN); - - dm_write_reg(compressor->ctx, - mmLOW_POWER_TILING_CONTROL, lpt_control); -} - -/* - * DCE 11 Frame Buffer Compression Implementation - */ - -void dce80_compressor_set_fbc_invalidation_triggers( - struct compressor *compressor, - uint32_t fbc_trigger) -{ - /* Disable region hit event, FBC_MEMORY_REGION_MASK = 0 (bits 16-19) - * for DCE 11 regions cannot be used - does not work with S/G - */ - uint32_t addr = mmFBC_CLIENT_REGION_MASK; - uint32_t value = dm_read_reg(compressor->ctx, addr); - - set_reg_field_value( - value, - 0, - FBC_CLIENT_REGION_MASK, - FBC_MEMORY_REGION_MASK); - dm_write_reg(compressor->ctx, addr, value); - - /* Setup events when to clear all CSM entries (effectively marking - * current compressed data invalid) - * For DCE 11 CSM metadata 11111 means - "Not Compressed" - * Used as the initial value of the metadata sent to the compressor - * after invalidation, to indicate that the compressor should attempt - * to compress all chunks on the current pass. Also used when the chunk - * is not successfully written to memory. - * When this CSM value is detected, FBC reads from the uncompressed - * buffer. Set events according to passed in value, these events are - * valid for DCE8: - * - bit 0 - display register updated - * - bit 28 - memory write from any client except from MCIF - * - bit 29 - CG static screen signal is inactive - * In addition, DCE8.1 also needs to set new DCE8.1 specific events - * that are used to trigger invalidation on certain register changes, - * for example enabling of Alpha Compression may trigger invalidation of - * FBC once bit is set. These events are as follows: - * - Bit 2 - FBC_GRPH_COMP_EN register updated - * - Bit 3 - FBC_SRC_SEL register updated - * - Bit 4 - FBC_MIN_COMPRESSION register updated - * - Bit 5 - FBC_ALPHA_COMP_EN register updated - * - Bit 6 - FBC_ZERO_ALPHA_CHUNK_SKIP_EN register updated - * - Bit 7 - FBC_FORCE_COPY_TO_COMP_BUF register updated - */ - addr = mmFBC_IDLE_FORCE_CLEAR_MASK; - value = dm_read_reg(compressor->ctx, addr); - set_reg_field_value( - value, - fbc_trigger | - FBC_IDLE_FORCE_GRPH_COMP_EN | - FBC_IDLE_FORCE_SRC_SEL_CHANGE | - FBC_IDLE_FORCE_MIN_COMPRESSION_CHANGE | - FBC_IDLE_FORCE_ALPHA_COMP_EN | - FBC_IDLE_FORCE_ZERO_ALPHA_CHUNK_SKIP_EN | - FBC_IDLE_FORCE_FORCE_COPY_TO_COMP_BUF, - FBC_IDLE_FORCE_CLEAR_MASK, - FBC_IDLE_FORCE_CLEAR_MASK); - dm_write_reg(compressor->ctx, addr, value); -} - -void dce80_compressor_construct(struct dce80_compressor *compressor, - struct dc_context *ctx) -{ - struct dc_bios *bp = ctx->dc_bios; - struct embedded_panel_info panel_info; - - compressor->base.options.raw = 0; - compressor->base.options.bits.FBC_SUPPORT = true; - compressor->base.options.bits.LPT_SUPPORT = true; - /* For DCE 11 always use one DRAM channel for LPT */ - compressor->base.lpt_channels_num = 1; - compressor->base.options.bits.DUMMY_BACKEND = false; - - /* Check if this system has more than 1 DRAM channel; if only 1 then LPT - * should not be supported */ - if (compressor->base.memory_bus_width == 64) - compressor->base.options.bits.LPT_SUPPORT = false; - - compressor->base.options.bits.CLK_GATING_DISABLED = false; - - compressor->base.ctx = ctx; - compressor->base.embedded_panel_h_size = 0; - compressor->base.embedded_panel_v_size = 0; - compressor->base.memory_bus_width = ctx->asic_id.vram_width; - compressor->base.allocated_size = 0; - compressor->base.preferred_requested_size = 0; - compressor->base.min_compress_ratio = FBC_COMPRESS_RATIO_INVALID; - compressor->base.banks_num = 0; - compressor->base.raw_size = 0; - compressor->base.channel_interleave_size = 0; - compressor->base.dram_channels_num = 0; - compressor->base.lpt_channels_num = 0; - compressor->base.attached_inst = 0; - compressor->base.is_enabled = false; - - if (BP_RESULT_OK == - bp->funcs->get_embedded_panel_info(bp, &panel_info)) { - compressor->base.embedded_panel_h_size = - panel_info.lcd_timing.horizontal_addressable; - compressor->base.embedded_panel_v_size = - panel_info.lcd_timing.vertical_addressable; - } -} - -struct compressor *dce80_compressor_create(struct dc_context *ctx) -{ - struct dce80_compressor *cp80 = - kzalloc(sizeof(struct dce80_compressor), GFP_KERNEL); - - if (!cp80) - return NULL; - - dce80_compressor_construct(cp80, ctx); - return &cp80->base; -} - -void dce80_compressor_destroy(struct compressor **compressor) -{ - kfree(TO_DCE80_COMPRESSOR(*compressor)); - *compressor = NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_compressor.h b/drivers/gpu/drm/amd/display/dc/dce80/dce80_compressor.h deleted file mode 100644 index cca58b044402..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_compressor.h +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_COMPRESSOR_DCE80_H__ -#define __DC_COMPRESSOR_DCE80_H__ - -#include "../inc/compressor.h" - -#define TO_DCE80_COMPRESSOR(compressor)\ - container_of(compressor, struct dce80_compressor, base) - -struct dce80_compressor_reg_offsets { - uint32_t dcp_offset; - uint32_t dmif_offset; -}; - -struct dce80_compressor { - struct compressor base; - struct dce80_compressor_reg_offsets offsets; -}; - -struct compressor *dce80_compressor_create(struct dc_context *ctx); - -void dce80_compressor_construct(struct dce80_compressor *cp80, - struct dc_context *ctx); - -void dce80_compressor_destroy(struct compressor **cp); - -/* FBC RELATED */ -void dce80_compressor_power_up_fbc(struct compressor *cp); - -void dce80_compressor_enable_fbc(struct compressor *cp, uint32_t paths_num, - struct compr_addr_and_pitch_params *params); - -void dce80_compressor_disable_fbc(struct compressor *cp); - -void dce80_compressor_set_fbc_invalidation_triggers(struct compressor *cp, - uint32_t fbc_trigger); - -void dce80_compressor_program_compressed_surface_address_and_pitch( - struct compressor *cp, - struct compr_addr_and_pitch_params *params); - -bool dce80_compressor_is_fbc_enabled_in_hw(struct compressor *cp, - uint32_t *fbc_mapped_crtc_id); - -/* LPT RELATED */ -void dce80_compressor_enable_lpt(struct compressor *cp); - -void dce80_compressor_disable_lpt(struct compressor *cp); - -void dce80_compressor_program_lpt_control(struct compressor *cp, - struct compr_addr_and_pitch_params *params); - -bool dce80_compressor_is_lpt_enabled_in_hw(struct compressor *cp); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c index ccfcf1c0eeb3..6c6a1a16af19 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c @@ -70,47 +70,11 @@ static const struct dce80_hw_seq_reg_offsets reg_offsets[] = { /***************************PIPE_CONTROL***********************************/ -static bool dce80_enable_display_power_gating( - struct dc *dc, - uint8_t controller_id, - struct dc_bios *dcb, - enum pipe_gating_control power_gating) -{ - enum bp_result bp_result = BP_RESULT_OK; - enum bp_pipe_control_action cntl; - struct dc_context *ctx = dc->ctx; - - if (power_gating == PIPE_GATING_CONTROL_INIT) - cntl = ASIC_PIPE_INIT; - else if (power_gating == PIPE_GATING_CONTROL_ENABLE) - cntl = ASIC_PIPE_ENABLE; - else - cntl = ASIC_PIPE_DISABLE; - - if (!(power_gating == PIPE_GATING_CONTROL_INIT && controller_id != 0)){ - - bp_result = dcb->funcs->enable_disp_power_gating( - dcb, controller_id + 1, cntl); - - /* Revert MASTER_UPDATE_MODE to 0 because bios sets it 2 - * by default when command table is called - */ - dm_write_reg(ctx, - HW_REG_CRTC(mmMASTER_UPDATE_MODE, controller_id), - 0); - } - - if (bp_result == BP_RESULT_OK) - return true; - else - return false; -} - void dce80_hw_sequencer_construct(struct dc *dc) { dce110_hw_sequencer_construct(dc); - dc->hwss.enable_display_power_gating = dce80_enable_display_power_gating; + dc->hwss.enable_display_power_gating = dce100_enable_display_power_gating; dc->hwss.pipe_control_lock = dce_pipe_control_lock; dc->hwss.set_bandwidth = dce100_set_bandwidth; } diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 8f2bd56f3461..a36c14d3d9a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -790,9 +790,11 @@ static bool dce80_construct( *************************************************/ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; pool->base.pipe_count = res_cap.num_timing_generator; + pool->base.timing_generator_count = res_cap.num_timing_generator; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 40; dc->caps.max_cursor_size = 128; + dc->caps.dual_link_dvi = true; /************************************************* * Create resources * @@ -954,6 +956,7 @@ static bool dce81_construct( *************************************************/ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; pool->base.pipe_count = res_cap_81.num_timing_generator; + pool->base.timing_generator_count = res_cap_81.num_timing_generator; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 40; dc->caps.max_cursor_size = 128; @@ -1119,6 +1122,7 @@ static bool dce83_construct( *************************************************/ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; pool->base.pipe_count = res_cap_83.num_timing_generator; + pool->base.timing_generator_count = res_cap_83.num_timing_generator; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 40; dc->caps.max_cursor_size = 128; diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 265894851493..3ba4712a35ab 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -84,7 +84,7 @@ static const struct dce110_timing_generator_offsets reg_offsets[] = { #define DCP_REG(reg) (reg + tg110->offsets.dcp) #define DMIF_REG(reg) (reg + tg110->offsets.dmif) -void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_khz) +static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_khz) { uint64_t pix_dur; uint32_t addr = mmDMIF_PG0_DPG_PIPE_ARBITRATION_CONTROL1 @@ -115,6 +115,68 @@ static void program_timing(struct timing_generator *tg, dce110_tg_program_timing(tg, timing, use_vbios); } +static void dce80_timing_generator_enable_advanced_request( + struct timing_generator *tg, + bool enable, + const struct dc_crtc_timing *timing) +{ + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + uint32_t addr = CRTC_REG(mmCRTC_START_LINE_CONTROL); + uint32_t value = dm_read_reg(tg->ctx, addr); + + if (enable) { + set_reg_field_value( + value, + 0, + CRTC_START_LINE_CONTROL, + CRTC_LEGACY_REQUESTOR_EN); + } else { + set_reg_field_value( + value, + 1, + CRTC_START_LINE_CONTROL, + CRTC_LEGACY_REQUESTOR_EN); + } + + if ((timing->v_sync_width + timing->v_front_porch) <= 3) { + set_reg_field_value( + value, + 3, + CRTC_START_LINE_CONTROL, + CRTC_ADVANCED_START_LINE_POSITION); + set_reg_field_value( + value, + 0, + CRTC_START_LINE_CONTROL, + CRTC_PREFETCH_EN); + } else { + set_reg_field_value( + value, + 4, + CRTC_START_LINE_CONTROL, + CRTC_ADVANCED_START_LINE_POSITION); + set_reg_field_value( + value, + 1, + CRTC_START_LINE_CONTROL, + CRTC_PREFETCH_EN); + } + + set_reg_field_value( + value, + 1, + CRTC_START_LINE_CONTROL, + CRTC_PROGRESSIVE_START_LINE_EARLY); + + set_reg_field_value( + value, + 1, + CRTC_START_LINE_CONTROL, + CRTC_INTERLACE_START_LINE_EARLY); + + dm_write_reg(tg->ctx, addr, value); +} + static const struct timing_generator_funcs dce80_tg_funcs = { .validate_timing = dce110_tg_validate_timing, .program_timing = program_timing, @@ -150,6 +212,8 @@ static const struct timing_generator_funcs dce80_tg_funcs = { /* DCE8.0 overrides */ .enable_advanced_request = dce80_timing_generator_enable_advanced_request, + .configure_crc = dce110_configure_crc, + .get_crc = dce110_get_crc, }; void dce80_timing_generator_construct( @@ -176,64 +240,3 @@ void dce80_timing_generator_construct( tg110->min_h_back_porch = 4; } -void dce80_timing_generator_enable_advanced_request( - struct timing_generator *tg, - bool enable, - const struct dc_crtc_timing *timing) -{ - struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); - uint32_t addr = CRTC_REG(mmCRTC_START_LINE_CONTROL); - uint32_t value = dm_read_reg(tg->ctx, addr); - - if (enable) { - set_reg_field_value( - value, - 0, - CRTC_START_LINE_CONTROL, - CRTC_LEGACY_REQUESTOR_EN); - } else { - set_reg_field_value( - value, - 1, - CRTC_START_LINE_CONTROL, - CRTC_LEGACY_REQUESTOR_EN); - } - - if ((timing->v_sync_width + timing->v_front_porch) <= 3) { - set_reg_field_value( - value, - 3, - CRTC_START_LINE_CONTROL, - CRTC_ADVANCED_START_LINE_POSITION); - set_reg_field_value( - value, - 0, - CRTC_START_LINE_CONTROL, - CRTC_PREFETCH_EN); - } else { - set_reg_field_value( - value, - 4, - CRTC_START_LINE_CONTROL, - CRTC_ADVANCED_START_LINE_POSITION); - set_reg_field_value( - value, - 1, - CRTC_START_LINE_CONTROL, - CRTC_PREFETCH_EN); - } - - set_reg_field_value( - value, - 1, - CRTC_START_LINE_CONTROL, - CRTC_PROGRESSIVE_START_LINE_EARLY); - - set_reg_field_value( - value, - 1, - CRTC_START_LINE_CONTROL, - CRTC_INTERLACE_START_LINE_EARLY); - - dm_write_reg(tg->ctx, addr, value); -} diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.h index 9cebb24c94c8..8ff1b06bcd8b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.h @@ -36,10 +36,4 @@ void dce80_timing_generator_construct( uint32_t instance, const struct dce110_timing_generator_offsets *offsets); -/******** HW programming ************/ -void dce80_timing_generator_enable_advanced_request( - struct timing_generator *tg, - bool enable, - const struct dc_crtc_timing *timing); - #endif /* __DC_TIMING_GENERATOR_DCE80_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 53ba3600ee6a..b3db6397d353 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -232,10 +232,11 @@ bool cm_helper_convert_to_custom_float( return true; } - +/* driver uses 32 regions or less, but DCN HW has 34, extra 2 are set to 0 */ #define MAX_REGIONS_NUMBER 34 #define MAX_LOW_POINT 25 -#define NUMBER_SEGMENTS 32 +#define NUMBER_REGIONS 32 +#define NUMBER_SW_SEGMENTS 16 bool cm_helper_translate_curve_to_hw_format( const struct dc_transfer_func *output_tf, @@ -251,7 +252,7 @@ bool cm_helper_translate_curve_to_hw_format( struct fixed31_32 y1_min; struct fixed31_32 y3_max; - int32_t segment_start, segment_end; + int32_t region_start, region_end; int32_t i; uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; @@ -271,11 +272,11 @@ bool cm_helper_translate_curve_to_hw_format( /* 32 segments * segments are from 2^-25 to 2^7 */ - for (i = 0; i < 32 ; i++) + for (i = 0; i < NUMBER_REGIONS ; i++) seg_distr[i] = 3; - segment_start = -25; - segment_end = 7; + region_start = -MAX_LOW_POINT; + region_end = NUMBER_REGIONS - MAX_LOW_POINT; } else { /* 10 segments * segment is from 2^-10 to 2^0 @@ -289,14 +290,14 @@ bool cm_helper_translate_curve_to_hw_format( seg_distr[5] = 4; seg_distr[6] = 4; seg_distr[7] = 4; - seg_distr[8] = 5; - seg_distr[9] = 5; + seg_distr[8] = 4; + seg_distr[9] = 4; - segment_start = -10; - segment_end = 0; + region_start = -10; + region_end = 0; } - for (i = segment_end - segment_start; i < MAX_REGIONS_NUMBER ; i++) + for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) seg_distr[i] = -1; for (k = 0; k < MAX_REGIONS_NUMBER; k++) { @@ -305,10 +306,12 @@ bool cm_helper_translate_curve_to_hw_format( } j = 0; - for (k = 0; k < (segment_end - segment_start); k++) { - increment = NUMBER_SEGMENTS / (1 << seg_distr[k]); - start_index = (segment_start + k + MAX_LOW_POINT) * NUMBER_SEGMENTS; - for (i = start_index; i < start_index + NUMBER_SEGMENTS; i += increment) { + for (k = 0; k < (region_end - region_start); k++) { + increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); + start_index = (region_start + k + MAX_LOW_POINT) * + NUMBER_SW_SEGMENTS; + for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; + i += increment) { if (j == hw_points - 1) break; rgb_resulted[j].red = output_tf->tf_pts.red[i]; @@ -319,15 +322,15 @@ bool cm_helper_translate_curve_to_hw_format( } /* last point */ - start_index = (segment_end + MAX_LOW_POINT) * NUMBER_SEGMENTS; + start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(segment_start)); + dal_fixed31_32_from_int(region_start)); arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(segment_end)); + dal_fixed31_32_from_int(region_end)); y_r = rgb_resulted[0].red; y_g = rgb_resulted[0].green; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index f2a08b156cf0..8725cab9ec00 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -196,7 +196,7 @@ static void dpp1_cm_set_regamma_pwl( case OPP_REGAMMA_SRGB: re_mode = 1; break; - case OPP_REGAMMA_3_6: + case OPP_REGAMMA_XVYCC: re_mode = 2; break; case OPP_REGAMMA_USER: @@ -424,6 +424,26 @@ void dpp1_set_cursor_position( } +void dpp1_dppclk_control( + struct dpp *dpp_base, + bool dppclk_div, + bool enable) +{ + struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); + + if (enable) { + if (dpp->tf_mask->DPPCLK_RATE_CONTROL) { + REG_UPDATE_2(DPP_CONTROL, + DPPCLK_RATE_CONTROL, dppclk_div, + DPP_CLOCK_ENABLE, 1); + } else { + ASSERT(dppclk_div == false); + REG_UPDATE(DPP_CONTROL, DPP_CLOCK_ENABLE, 1); + } + } else + REG_UPDATE(DPP_CONTROL, DPP_CLOCK_ENABLE, 0); +} + static const struct dpp_funcs dcn10_dpp_funcs = { .dpp_reset = dpp_reset, .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale, @@ -445,6 +465,7 @@ static const struct dpp_funcs dcn10_dpp_funcs = { .dpp_full_bypass = dpp1_full_bypass, .set_cursor_attributes = dpp1_set_cursor_attributes, .set_cursor_position = dpp1_set_cursor_position, + .dpp_dppclk_control = dpp1_dppclk_control, }; static struct dpp_caps dcn10_dpp_cap = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h index f56ee4d08d89..07003d9c6bba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h @@ -112,7 +112,8 @@ SRI(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ SRI(CURSOR0_CONTROL, CNVC_CUR, id), \ SRI(CURSOR0_COLOR0, CNVC_CUR, id), \ - SRI(CURSOR0_COLOR1, CNVC_CUR, id) + SRI(CURSOR0_COLOR1, CNVC_CUR, id), \ + SRI(DPP_CONTROL, DPP_TOP, id) @@ -306,7 +307,8 @@ TF_SF(CNVC_CUR0_CURSOR0_CONTROL, CUR0_EXPANSION_MODE, mask_sh), \ TF_SF(CNVC_CUR0_CURSOR0_CONTROL, CUR0_ENABLE, mask_sh), \ TF_SF(CNVC_CUR0_CURSOR0_COLOR0, CUR0_COLOR0, mask_sh), \ - TF_SF(CNVC_CUR0_CURSOR0_COLOR1, CUR0_COLOR1, mask_sh) + TF_SF(CNVC_CUR0_CURSOR0_COLOR1, CUR0_COLOR1, mask_sh), \ + TF_SF(DPP_TOP0_DPP_CONTROL, DPP_CLOCK_ENABLE, mask_sh) #define TF_REG_LIST_SH_MASK_DCN10(mask_sh)\ TF_REG_LIST_SH_MASK_DCN(mask_sh),\ @@ -410,7 +412,8 @@ TF_SF(CURSOR0_CURSOR_CONTROL, CURSOR_MODE, mask_sh), \ TF_SF(CURSOR0_CURSOR_CONTROL, CURSOR_PITCH, mask_sh), \ TF_SF(CURSOR0_CURSOR_CONTROL, CURSOR_LINES_PER_CHUNK, mask_sh), \ - TF_SF(CURSOR0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh) + TF_SF(CURSOR0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh), \ + TF_SF(DPP_TOP0_DPP_CONTROL, DPPCLK_RATE_CONTROL, mask_sh) #define TF_REG_FIELD_LIST(type) \ type EXT_OVERSCAN_LEFT; \ @@ -1007,7 +1010,9 @@ type CM_BYPASS; \ type FORMAT_CONTROL__ALPHA_EN; \ type CUR0_COLOR0; \ - type CUR0_COLOR1; + type CUR0_COLOR1; \ + type DPPCLK_RATE_CONTROL; \ + type DPP_CLOCK_ENABLE; struct dcn_dpp_shift { TF_REG_FIELD_LIST(uint8_t) @@ -1252,7 +1257,8 @@ struct dcn_dpp_mask { uint32_t CURSOR_CONTROL; \ uint32_t CURSOR0_CONTROL; \ uint32_t CURSOR0_COLOR0; \ - uint32_t CURSOR0_COLOR1; + uint32_t CURSOR0_COLOR1; \ + uint32_t DPP_CONTROL; struct dcn_dpp_registers { DPP_COMMON_REG_VARIABLE_LIST @@ -1287,6 +1293,12 @@ void dpp1_set_cursor_attributes( struct dpp *dpp_base, enum dc_cursor_color_format color_format); +void dpp1_set_cursor_position( + struct dpp *dpp_base, + const struct dc_cursor_position *pos, + const struct dc_cursor_mi_param *param, + uint32_t width); + bool dpp1_dscl_is_lb_conf_valid( int ceil_vratio, int num_partitions, @@ -1397,6 +1409,11 @@ void dpp1_cnv_setup ( void dpp1_full_bypass(struct dpp *dpp_base); +void dpp1_dppclk_control( + struct dpp *dpp_base, + bool dppclk_div, + bool enable); + void dpp1_construct(struct dcn10_dpp *dpp1, struct dc_context *ctx, uint32_t inst, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index a5b099023652..bd3fcdfb79c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -193,6 +193,7 @@ void dpp1_cm_set_gamut_remap( const struct dpp_grph_csc_adjustment *adjust) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); + int i = 0; if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW) /* Bypass if type is bypass or hw */ @@ -201,20 +202,8 @@ void dpp1_cm_set_gamut_remap( struct fixed31_32 arr_matrix[12]; uint16_t arr_reg_val[12]; - arr_matrix[0] = adjust->temperature_matrix[0]; - arr_matrix[1] = adjust->temperature_matrix[1]; - arr_matrix[2] = adjust->temperature_matrix[2]; - arr_matrix[3] = dal_fixed31_32_zero; - - arr_matrix[4] = adjust->temperature_matrix[3]; - arr_matrix[5] = adjust->temperature_matrix[4]; - arr_matrix[6] = adjust->temperature_matrix[5]; - arr_matrix[7] = dal_fixed31_32_zero; - - arr_matrix[8] = adjust->temperature_matrix[6]; - arr_matrix[9] = adjust->temperature_matrix[7]; - arr_matrix[10] = adjust->temperature_matrix[8]; - arr_matrix[11] = dal_fixed31_32_zero; + for (i = 0; i < 12; i++) + arr_matrix[i] = adjust->temperature_matrix[i]; convert_float_matrix( arr_reg_val, arr_matrix, 12); @@ -309,6 +298,32 @@ static void dpp1_cm_get_reg_field( reg->masks.exp_resion_start_segment = dpp->tf_mask->CM_RGAM_RAMB_EXP_REGION_START_SEGMENT_B; } +static void dpp1_cm_get_degamma_reg_field( + struct dcn10_dpp *dpp, + struct xfer_func_reg *reg) +{ + reg->shifts.exp_region0_lut_offset = dpp->tf_shift->CM_DGAM_RAMA_EXP_REGION0_LUT_OFFSET; + reg->masks.exp_region0_lut_offset = dpp->tf_mask->CM_DGAM_RAMA_EXP_REGION0_LUT_OFFSET; + reg->shifts.exp_region0_num_segments = dpp->tf_shift->CM_DGAM_RAMA_EXP_REGION0_NUM_SEGMENTS; + reg->masks.exp_region0_num_segments = dpp->tf_mask->CM_DGAM_RAMA_EXP_REGION0_NUM_SEGMENTS; + reg->shifts.exp_region1_lut_offset = dpp->tf_shift->CM_DGAM_RAMA_EXP_REGION1_LUT_OFFSET; + reg->masks.exp_region1_lut_offset = dpp->tf_mask->CM_DGAM_RAMA_EXP_REGION1_LUT_OFFSET; + reg->shifts.exp_region1_num_segments = dpp->tf_shift->CM_DGAM_RAMA_EXP_REGION1_NUM_SEGMENTS; + reg->masks.exp_region1_num_segments = dpp->tf_mask->CM_DGAM_RAMA_EXP_REGION1_NUM_SEGMENTS; + + reg->shifts.field_region_end = dpp->tf_shift->CM_DGAM_RAMB_EXP_REGION_END_B; + reg->masks.field_region_end = dpp->tf_mask->CM_DGAM_RAMB_EXP_REGION_END_B; + reg->shifts.field_region_end_slope = dpp->tf_shift->CM_DGAM_RAMB_EXP_REGION_END_SLOPE_B; + reg->masks.field_region_end_slope = dpp->tf_mask->CM_DGAM_RAMB_EXP_REGION_END_SLOPE_B; + reg->shifts.field_region_end_base = dpp->tf_shift->CM_DGAM_RAMB_EXP_REGION_END_BASE_B; + reg->masks.field_region_end_base = dpp->tf_mask->CM_DGAM_RAMB_EXP_REGION_END_BASE_B; + reg->shifts.field_region_linear_slope = dpp->tf_shift->CM_DGAM_RAMB_EXP_REGION_LINEAR_SLOPE_B; + reg->masks.field_region_linear_slope = dpp->tf_mask->CM_DGAM_RAMB_EXP_REGION_LINEAR_SLOPE_B; + reg->shifts.exp_region_start = dpp->tf_shift->CM_DGAM_RAMB_EXP_REGION_START_B; + reg->masks.exp_region_start = dpp->tf_mask->CM_DGAM_RAMB_EXP_REGION_START_B; + reg->shifts.exp_resion_start_segment = dpp->tf_shift->CM_DGAM_RAMB_EXP_REGION_START_SEGMENT_B; + reg->masks.exp_resion_start_segment = dpp->tf_mask->CM_DGAM_RAMB_EXP_REGION_START_SEGMENT_B; +} void dpp1_cm_set_output_csc_adjustment( struct dpp *dpp_base, const uint16_t *regval) @@ -513,7 +528,7 @@ void dpp1_program_degamma_lutb_settings( struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); struct xfer_func_reg gam_regs; - dpp1_cm_get_reg_field(dpp, &gam_regs); + dpp1_cm_get_degamma_reg_field(dpp, &gam_regs); gam_regs.start_cntl_b = REG(CM_DGAM_RAMB_START_CNTL_B); gam_regs.start_cntl_g = REG(CM_DGAM_RAMB_START_CNTL_G); @@ -542,7 +557,7 @@ void dpp1_program_degamma_luta_settings( struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); struct xfer_func_reg gam_regs; - dpp1_cm_get_reg_field(dpp, &gam_regs); + dpp1_cm_get_degamma_reg_field(dpp, &gam_regs); gam_regs.start_cntl_b = REG(CM_DGAM_RAMA_START_CNTL_B); gam_regs.start_cntl_g = REG(CM_DGAM_RAMA_START_CNTL_G); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 585b33384002..39b72f696ae9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -73,6 +73,9 @@ static void hubp1_disconnect(struct hubp *hubp) REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 1); + + REG_UPDATE(CURSOR_CONTROL, + CURSOR_ENABLE, 0); } static void hubp1_set_hubp_blank_en(struct hubp *hubp, bool blank) @@ -296,8 +299,9 @@ bool hubp1_program_surface_flip_and_addr( if (address->grph.addr.quad_part == 0) break; - REG_UPDATE(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface); + REG_UPDATE_2(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, address->tmz_surface, + PRIMARY_META_SURFACE_TMZ, address->tmz_surface); if (address->grph.meta_addr.quad_part != 0) { REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, @@ -322,8 +326,11 @@ bool hubp1_program_surface_flip_and_addr( || address->video_progressive.chroma_addr.quad_part == 0) break; - REG_UPDATE(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface); + REG_UPDATE_4(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, address->tmz_surface, + PRIMARY_SURFACE_TMZ_C, address->tmz_surface, + PRIMARY_META_SURFACE_TMZ, address->tmz_surface, + PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface); if (address->video_progressive.luma_meta_addr.quad_part != 0) { REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0, @@ -365,8 +372,11 @@ bool hubp1_program_surface_flip_and_addr( if (address->grph_stereo.right_addr.quad_part == 0) break; - REG_UPDATE(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface); + REG_UPDATE_4(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, address->tmz_surface, + PRIMARY_SURFACE_TMZ_C, address->tmz_surface, + PRIMARY_META_SURFACE_TMZ, address->tmz_surface, + PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface); if (address->grph_stereo.right_meta_addr.quad_part != 0) { @@ -909,6 +919,21 @@ void hubp1_cursor_set_position( /* TODO Handle surface pixel formats other than 4:4:4 */ } +void hubp1_clk_cntl(struct hubp *hubp, bool enable) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + uint32_t clk_enable = enable ? 1 : 0; + + REG_UPDATE(HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, clk_enable); +} + +void hubp1_vtg_sel(struct hubp *hubp, uint32_t otg_inst) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_UPDATE(DCHUBP_CNTL, HUBP_VTG_SEL, otg_inst); +} + static struct hubp_funcs dcn10_hubp_funcs = { .hubp_program_surface_flip_and_addr = hubp1_program_surface_flip_and_addr, @@ -925,6 +950,8 @@ static struct hubp_funcs dcn10_hubp_funcs = { .set_cursor_attributes = hubp1_cursor_set_attributes, .set_cursor_position = hubp1_cursor_set_position, .hubp_disconnect = hubp1_disconnect, + .hubp_clk_cntl = hubp1_clk_cntl, + .hubp_vtg_sel = hubp1_vtg_sel, }; /*****************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index 33e91d9c010f..4a3703e12ea1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -96,7 +96,8 @@ SRI(DCN_SURF0_TTU_CNTL1, HUBPREQ, id),\ SRI(DCN_SURF1_TTU_CNTL0, HUBPREQ, id),\ SRI(DCN_SURF1_TTU_CNTL1, HUBPREQ, id),\ - SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id) + SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id),\ + SRI(HUBP_CLK_CNTL, HUBP, id) #define HUBP_REG_LIST_DCN10(id)\ HUBP_REG_LIST_DCN(id),\ @@ -230,7 +231,8 @@ uint32_t CURSOR_CONTROL; \ uint32_t CURSOR_POSITION; \ uint32_t CURSOR_HOT_SPOT; \ - uint32_t CURSOR_DST_OFFSET + uint32_t CURSOR_DST_OFFSET; \ + uint32_t HUBP_CLK_CNTL #define HUBP_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix @@ -240,6 +242,7 @@ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_TTU_DISABLE, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_STATUS, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PIPES, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_BANKS, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, PIPE_INTERLEAVE, mask_sh),\ @@ -293,6 +296,9 @@ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE_C, SURFACE_EARLIEST_INUSE_ADDRESS_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, SURFACE_EARLIEST_INUSE_ADDRESS_HIGH_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_TMZ, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_TMZ_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_META_SURFACE_TMZ, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_META_SURFACE_TMZ_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_DCC_EN, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_DCC_IND_64B_BLK, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, DET_BUF_PLANE1_BASE_ADDRESS, mask_sh),\ @@ -352,7 +358,8 @@ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, mask_sh),\ - HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, mask_sh) + HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, mask_sh),\ + HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh) #define HUBP_MASK_SH_LIST_DCN10(mask_sh)\ HUBP_MASK_SH_LIST_DCN(mask_sh),\ @@ -398,6 +405,7 @@ type HUBP_BLANK_EN;\ type HUBP_TTU_DISABLE;\ type HUBP_NO_OUTSTANDING_REQ;\ + type HUBP_VTG_SEL;\ type HUBP_UNDERFLOW_STATUS;\ type NUM_PIPES;\ type NUM_BANKS;\ @@ -452,6 +460,13 @@ type SURFACE_EARLIEST_INUSE_ADDRESS_C;\ type SURFACE_EARLIEST_INUSE_ADDRESS_HIGH_C;\ type PRIMARY_SURFACE_TMZ;\ + type PRIMARY_SURFACE_TMZ_C;\ + type SECONDARY_SURFACE_TMZ;\ + type SECONDARY_SURFACE_TMZ_C;\ + type PRIMARY_META_SURFACE_TMZ;\ + type PRIMARY_META_SURFACE_TMZ_C;\ + type SECONDARY_META_SURFACE_TMZ;\ + type SECONDARY_META_SURFACE_TMZ_C;\ type PRIMARY_SURFACE_DCC_EN;\ type PRIMARY_SURFACE_DCC_IND_64B_BLK;\ type DET_BUF_PLANE1_BASE_ADDRESS;\ @@ -524,6 +539,7 @@ type VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR_LSB;\ type ENABLE_L1_TLB;\ type SYSTEM_ACCESS_MODE;\ + type HUBP_CLOCK_ENABLE;\ type MC_VM_SYSTEM_APERTURE_DEFAULT_SYSTEM;\ type MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB;\ type MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB;\ @@ -653,6 +669,9 @@ void min_set_viewport(struct hubp *hubp, const struct rect *viewport, const struct rect *viewport_c); +void hubp1_clk_cntl(struct hubp *hubp, bool enable); +void hubp1_vtg_sel(struct hubp *hubp, uint32_t otg_inst); + void dcn10_hubp_construct( struct dcn10_hubp *hubp1, struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 82572863acab..29dc37fbdb26 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -133,7 +133,7 @@ void dcn10_log_hw_state(struct dc *dc) DTN_INFO("[%d]:\t %xh \t %xh \t %d \t %d \t " "%xh \t %xh \t %xh \t " "%d \t %d \t %d \t %xh \t", - i, + hubp->inst, s.pixel_format, s.inuse_addr_hi, s.viewport_width, @@ -155,7 +155,7 @@ void dcn10_log_hw_state(struct dc *dc) DTN_INFO("OTG:\t v_bs \t v_be \t v_ss \t v_se \t vpol \t vmax \t vmin \t " "h_bs \t h_be \t h_ss \t h_se \t hpol \t htot \t vtot \t underflow\n"); - for (i = 0; i < pool->res_cap->num_timing_generator; i++) { + for (i = 0; i < pool->timing_generator_count; i++) { struct timing_generator *tg = pool->timing_generators[i]; struct dcn_otg_state s = {0}; @@ -168,7 +168,7 @@ void dcn10_log_hw_state(struct dc *dc) DTN_INFO("[%d]:\t %d \t %d \t %d \t %d \t " "%d \t %d \t %d \t %d \t %d \t %d \t " "%d \t %d \t %d \t %d \t %d \t ", - i, + tg->inst, s.v_blank_start, s.v_blank_end, s.v_sync_a_start, @@ -193,26 +193,6 @@ void dcn10_log_hw_state(struct dc *dc) DTN_INFO_END(); } -static void enable_dppclk( - struct dce_hwseq *hws, - uint8_t plane_id, - uint32_t requested_pix_clk, - bool dppclk_div) -{ - dm_logger_write(hws->ctx->logger, LOG_SURFACE, - "dppclk_rate_control for pipe %d programed to %d\n", - plane_id, - dppclk_div); - - if (hws->shifts->DPPCLK_RATE_CONTROL) - REG_UPDATE_2(DPP_CONTROL[plane_id], - DPPCLK_RATE_CONTROL, dppclk_div, - DPP_CLOCK_ENABLE, 1); - else - REG_UPDATE(DPP_CONTROL[plane_id], - DPP_CLOCK_ENABLE, 1); -} - static void enable_power_gating_plane( struct dce_hwseq *hws, bool enable) @@ -597,29 +577,22 @@ static void dcn10_verify_allow_pstate_change_high(struct dc *dc) /* trigger HW to start disconnect plane from stream on the next vsync */ static void plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) { - int fe_idx = pipe_ctx->pipe_idx; - struct hubp *hubp = dc->res_pool->hubps[fe_idx]; + struct hubp *hubp = pipe_ctx->plane_res.hubp; + int dpp_id = pipe_ctx->plane_res.dpp->inst; struct mpc *mpc = dc->res_pool->mpc; - int opp_id; struct mpc_tree *mpc_tree_params; struct mpcc *mpcc_to_remove = NULL; + struct output_pixel_processor *opp = pipe_ctx->stream_res.opp; - /* look at tree rather than mi here to know if we already reset */ - for (opp_id = 0; opp_id < dc->res_pool->pipe_count; opp_id++) { - struct output_pixel_processor *opp = dc->res_pool->opps[opp_id]; - - mpc_tree_params = &(opp->mpc_tree_params); - mpcc_to_remove = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, fe_idx); - if (mpcc_to_remove != NULL) - break; - } + mpc_tree_params = &(opp->mpc_tree_params); + mpcc_to_remove = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, dpp_id); /*Already reset*/ - if (opp_id == dc->res_pool->pipe_count) + if (mpcc_to_remove == NULL) return; mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove); - dc->res_pool->opps[opp_id]->mpcc_disconnect_pending[fe_idx] = true; + opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; dc->optimized_required = true; @@ -630,21 +603,21 @@ static void plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) dcn10_verify_allow_pstate_change_high(dc); } -static void plane_atomic_power_down(struct dc *dc, int fe_idx) +static void plane_atomic_power_down(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; - struct dpp *dpp = dc->res_pool->dpps[fe_idx]; + struct dpp *dpp = pipe_ctx->plane_res.dpp; if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - dpp_pg_control(hws, fe_idx, false); - hubp_pg_control(hws, fe_idx, false); + dpp_pg_control(hws, dpp->inst, false); + hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, false); dpp->funcs->dpp_reset(dpp); REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); dm_logger_write(dc->ctx->logger, LOG_DEBUG, - "Power gated front end %d\n", fe_idx); + "Power gated front end %d\n", pipe_ctx->pipe_idx); } } @@ -653,26 +626,25 @@ static void plane_atomic_power_down(struct dc *dc, int fe_idx) */ static void plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) { - int fe_idx = pipe_ctx->pipe_idx; - struct dce_hwseq *hws = dc->hwseq; - struct hubp *hubp = dc->res_pool->hubps[fe_idx]; + struct hubp *hubp = pipe_ctx->plane_res.hubp; + struct dpp *dpp = pipe_ctx->plane_res.dpp; int opp_id = hubp->opp_id; dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx); - REG_UPDATE(HUBP_CLK_CNTL[fe_idx], - HUBP_CLOCK_ENABLE, 0); - REG_UPDATE(DPP_CONTROL[fe_idx], - DPP_CLOCK_ENABLE, 0); + hubp->funcs->hubp_clk_cntl(hubp, false); - if (opp_id != 0xf && dc->res_pool->opps[opp_id]->mpc_tree_params.opp_list == NULL) - REG_UPDATE(OPP_PIPE_CONTROL[opp_id], - OPP_PIPE_CLOCK_EN, 0); + dpp->funcs->dpp_dppclk_control(dpp, false, false); + + if (opp_id != 0xf && pipe_ctx->stream_res.opp->mpc_tree_params.opp_list == NULL) + pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control( + pipe_ctx->stream_res.opp, + false); hubp->power_gated = true; dc->optimized_required = false; /* We're powering off, no need to optimize */ - plane_atomic_power_down(dc, fe_idx); + plane_atomic_power_down(dc, pipe_ctx); pipe_ctx->stream = NULL; memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res)); @@ -684,7 +656,7 @@ static void plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) { - if (dc->res_pool->hubps[pipe_ctx->pipe_idx]->power_gated) + if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated) return; plane_atomic_disable(dc, pipe_ctx); @@ -720,26 +692,25 @@ static void dcn10_init_hw(struct dc *dc) } enable_power_gating_plane(dc->hwseq, true); - return; - } - /* end of FPGA. Below if real ASIC */ + } else { - if (!dcb->funcs->is_accelerated_mode(dcb)) { - bios_golden_init(dc); - disable_vga(dc->hwseq); - } + if (!dcb->funcs->is_accelerated_mode(dcb)) { + bios_golden_init(dc); + disable_vga(dc->hwseq); + } - for (i = 0; i < dc->link_count; i++) { - /* Power up AND update implementation according to the - * required signal (which may be different from the - * default signal on connector). - */ - struct dc_link *link = dc->links[i]; + for (i = 0; i < dc->link_count; i++) { + /* Power up AND update implementation according to the + * required signal (which may be different from the + * default signal on connector). + */ + struct dc_link *link = dc->links[i]; - if (link->link_enc->connector.id == CONNECTOR_ID_EDP) - dc->hwss.edp_power_control(link, true); + if (link->link_enc->connector.id == CONNECTOR_ID_EDP) + dc->hwss.edp_power_control(link, true); - link->link_enc->funcs->hw_init(link->link_enc); + link->link_enc->funcs->hw_init(link->link_enc); + } } for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -768,18 +739,21 @@ static void dcn10_init_hw(struct dc *dc) struct timing_generator *tg = dc->res_pool->timing_generators[i]; struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = dc->res_pool->hubps[i]; + struct dpp *dpp = dc->res_pool->dpps[i]; pipe_ctx->stream_res.tg = tg; pipe_ctx->pipe_idx = i; pipe_ctx->plane_res.hubp = hubp; - hubp->mpcc_id = i; + pipe_ctx->plane_res.dpp = dpp; + pipe_ctx->plane_res.mpcc_inst = dpp->inst; + hubp->mpcc_id = dpp->inst; hubp->opp_id = 0xf; hubp->power_gated = false; dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst; dc->res_pool->opps[i]->mpc_tree_params.opp_list = NULL; - dc->res_pool->opps[i]->mpcc_disconnect_pending[i] = true; + dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; plane_atomic_disconnect(dc, pipe_ctx); @@ -804,6 +778,10 @@ static void dcn10_init_hw(struct dc *dc) tg->funcs->tg_init(tg); } + /* end of FPGA. Below if real ASIC */ + if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + return; + for (i = 0; i < dc->res_pool->audio_count; i++) { struct audio *audio = dc->res_pool->audios[i]; @@ -922,7 +900,10 @@ static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx, if (plane_state->in_transfer_func) tf = plane_state->in_transfer_func; - if (plane_state->gamma_correction && dce_use_lut(plane_state)) + if (plane_state->gamma_correction && + plane_state->gamma_correction->is_identity) + dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS); + else if (plane_state->gamma_correction && dce_use_lut(plane_state->format)) dpp_base->funcs->dpp_program_input_lut(dpp_base, plane_state->gamma_correction); if (tf == NULL) @@ -993,8 +974,6 @@ static void dcn10_pipe_control_lock( struct pipe_ctx *pipe, bool lock) { - struct hubp *hubp = NULL; - hubp = dc->res_pool->hubps[pipe->pipe_idx]; /* use TG master update lock to lock everything on the TG * therefore only top pipe need to lock */ @@ -1097,7 +1076,7 @@ static void dcn10_enable_per_frame_crtc_position_reset( DC_SYNC_INFO("Waiting for trigger\n"); - for (i = 1; i < group_size; i++) + for (i = 0; i < group_size; i++) wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[i]->stream_res.tg); DC_SYNC_INFO("Multi-display sync is complete\n"); @@ -1323,15 +1302,15 @@ static void dcn10_enable_plane( undo_DEGVIDCN10_253_wa(dc); power_on_plane(dc->hwseq, - pipe_ctx->pipe_idx); + pipe_ctx->plane_res.hubp->inst); /* enable DCFCLK current DCHUB */ - REG_UPDATE(HUBP_CLK_CNTL[pipe_ctx->pipe_idx], - HUBP_CLOCK_ENABLE, 1); + pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl(pipe_ctx->plane_res.hubp, true); /* make sure OPP_PIPE_CLOCK_EN = 1 */ - REG_UPDATE(OPP_PIPE_CONTROL[pipe_ctx->stream_res.tg->inst], - OPP_PIPE_CLOCK_EN, 1); + pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control( + pipe_ctx->stream_res.opp, + true); /* TODO: enable/disable in dm as per update type. if (plane_state) { @@ -1380,6 +1359,7 @@ static void dcn10_enable_plane( static void program_gamut_remap(struct pipe_ctx *pipe_ctx) { + int i = 0; struct dpp_grph_csc_adjustment adjust; memset(&adjust, 0, sizeof(adjust)); adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; @@ -1387,33 +1367,9 @@ static void program_gamut_remap(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) { adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; - adjust.temperature_matrix[0] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[0]; - adjust.temperature_matrix[1] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[1]; - adjust.temperature_matrix[2] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[2]; - adjust.temperature_matrix[3] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[4]; - adjust.temperature_matrix[4] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[5]; - adjust.temperature_matrix[5] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[6]; - adjust.temperature_matrix[6] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[8]; - adjust.temperature_matrix[7] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[9]; - adjust.temperature_matrix[8] = - pipe_ctx->stream-> - gamut_remap_matrix.matrix[10]; + for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) + adjust.temperature_matrix[i] = + pipe_ctx->stream->gamut_remap_matrix.matrix[i]; } pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp, &adjust); @@ -1474,7 +1430,7 @@ static bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx) return false; } -static bool is_rgb_cspace(enum dc_color_space output_color_space) +bool is_rgb_cspace(enum dc_color_space output_color_space) { switch (output_color_space) { case COLOR_SPACE_SRGB: @@ -1702,7 +1658,6 @@ static void update_dchubp_dpp( struct pipe_ctx *pipe_ctx, struct dc_state *context) { - struct dce_hwseq *hws = dc->hwseq; struct hubp *hubp = pipe_ctx->plane_res.hubp; struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_plane_state *plane_state = pipe_ctx->plane_state; @@ -1710,11 +1665,11 @@ static void update_dchubp_dpp( /* depends on DML calculation, DPP clock value may change dynamically */ if (plane_state->update_flags.bits.full_update) { - enable_dppclk( - dc->hwseq, - pipe_ctx->pipe_idx, - pipe_ctx->stream_res.pix_clk_params.requested_pix_clk, - context->bw.dcn.calc_clk.dppclk_div); + dpp->funcs->dpp_dppclk_control( + dpp, + context->bw.dcn.calc_clk.dppclk_div, + true); + dc->current_state->bw.dcn.cur_clk.dppclk_div = context->bw.dcn.calc_clk.dppclk_div; context->bw.dcn.cur_clk.dppclk_div = context->bw.dcn.calc_clk.dppclk_div; @@ -1725,7 +1680,7 @@ static void update_dchubp_dpp( * VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG */ if (plane_state->update_flags.bits.full_update) { - REG_UPDATE(DCHUBP_CNTL[pipe_ctx->pipe_idx], HUBP_VTG_SEL, pipe_ctx->stream_res.tg->inst); + hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst); hubp->funcs->hubp_setup( hubp, @@ -1761,6 +1716,11 @@ static void update_dchubp_dpp( &pipe_ctx->plane_res.scl_data.viewport_c); } + if (pipe_ctx->stream->cursor_attributes.address.quad_part != 0) { + dc->hwss.set_cursor_position(pipe_ctx); + dc->hwss.set_cursor_attribute(pipe_ctx); + } + if (plane_state->update_flags.bits.full_update) { /*gamut remap*/ program_gamut_remap(pipe_ctx); @@ -1773,6 +1733,7 @@ static void update_dchubp_dpp( } if (plane_state->update_flags.bits.full_update || + plane_state->update_flags.bits.pixel_format_change || plane_state->update_flags.bits.horizontal_mirror_change || plane_state->update_flags.bits.rotation_change || plane_state->update_flags.bits.swizzle_change || @@ -1822,15 +1783,14 @@ static void program_all_pipe_in_tree( } if (pipe_ctx->plane_state != NULL) { - struct pipe_ctx *cur_pipe_ctx = - &dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx]; - if (pipe_ctx->plane_state->update_flags.bits.full_update) dcn10_enable_plane(dc, pipe_ctx, context); update_dchubp_dpp(dc, pipe_ctx, context); - if (cur_pipe_ctx->plane_state != pipe_ctx->plane_state) + if (pipe_ctx->plane_state->update_flags.bits.full_update || + pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change) dc->hwss.set_input_transfer_func(pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish @@ -1940,7 +1900,7 @@ static void dcn10_apply_ctx_for_surface( tg = top_pipe_to_program->stream_res.tg; - tg->funcs->lock(tg); + dcn10_pipe_control_lock(dc, top_pipe_to_program, true); if (num_planes == 0) { @@ -1989,7 +1949,7 @@ static void dcn10_apply_ctx_for_surface( if (num_planes > 0) program_all_pipe_in_tree(dc, top_pipe_to_program, context); - tg->funcs->unlock(tg); + dcn10_pipe_control_lock(dc, top_pipe_to_program, false); if (num_planes == 0) false_optc_underflow_wa(dc, stream, tg); @@ -2184,6 +2144,8 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx, value |= 0x80; if (events->cursor_update) value |= 0x2; + if (events->force_trigger) + value |= 0x1; for (i = 0; i < num_pipes; i++) pipe_ctx[i]->stream_res.tg->funcs-> @@ -2256,12 +2218,24 @@ static void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc) return; } +static struct hubp *get_hubp_by_inst(struct resource_pool *res_pool, int mpcc_inst) +{ + int i; + + for (i = 0; i < res_pool->pipe_count; i++) { + if (res_pool->hubps[i]->inst == mpcc_inst) + return res_pool->hubps[i]; + } + ASSERT(false); + return NULL; +} + static void dcn10_wait_for_mpcc_disconnect( struct dc *dc, struct resource_pool *res_pool, struct pipe_ctx *pipe_ctx) { - int i; + int mpcc_inst; if (dc->debug.sanity_checks) { dcn10_verify_allow_pstate_change_high(dc); @@ -2270,11 +2244,13 @@ static void dcn10_wait_for_mpcc_disconnect( if (!pipe_ctx->stream_res.opp) return; - for (i = 0; i < MAX_PIPES; i++) { - if (pipe_ctx->stream_res.opp->mpcc_disconnect_pending[i]) { - res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, i); - pipe_ctx->stream_res.opp->mpcc_disconnect_pending[i] = false; - res_pool->hubps[i]->funcs->set_blank(res_pool->hubps[i], true); + for (mpcc_inst = 0; mpcc_inst < MAX_PIPES; mpcc_inst++) { + if (pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst]) { + struct hubp *hubp = get_hubp_by_inst(res_pool, mpcc_inst); + + res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst); + pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false; + hubp->funcs->set_blank(hubp, true); /*dm_logger_write(dc->ctx->logger, LOG_ERROR, "[debug_mpo: wait_for_mpcc finished waiting on mpcc %d]\n", i);*/ @@ -2296,7 +2272,7 @@ static bool dcn10_dummy_display_power_gating( return true; } -void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx) +static void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx) { struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct timing_generator *tg = pipe_ctx->stream_res.tg; @@ -2316,12 +2292,46 @@ void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx) } } -void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data) +static void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data) { if (hws->ctx->dc->res_pool->hubbub != NULL) hubbub1_update_dchub(hws->ctx->dc->res_pool->hubbub, dh_data); } +static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) +{ + struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position; + struct hubp *hubp = pipe_ctx->plane_res.hubp; + struct dpp *dpp = pipe_ctx->plane_res.dpp; + struct dc_cursor_mi_param param = { + .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz, + .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz, + .viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x, + .viewport_width = pipe_ctx->plane_res.scl_data.viewport.width, + .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz + }; + + if (pipe_ctx->plane_state->address.type + == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) + pos_cpy.enable = false; + + if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state) + pos_cpy.enable = false; + + hubp->funcs->set_cursor_position(hubp, &pos_cpy, ¶m); + dpp->funcs->set_cursor_position(dpp, &pos_cpy, ¶m, hubp->curs_attr.width); +} + +static void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx) +{ + struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes; + + pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes( + pipe_ctx->plane_res.hubp, attributes); + pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes( + pipe_ctx->plane_res.dpp, attributes->color_format); +} + static const struct hw_sequencer_funcs dcn10_funcs = { .program_gamut_remap = program_gamut_remap, .program_csc_matrix = program_csc_matrix, @@ -2342,6 +2352,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .enable_stream = dce110_enable_stream, .disable_stream = dce110_disable_stream, .unblank_stream = dce110_unblank_stream, + .blank_stream = dce110_blank_stream, .enable_display_power_gating = dcn10_dummy_display_power_gating, .disable_plane = dcn10_disable_plane, .pipe_control_lock = dcn10_pipe_control_lock, @@ -2362,6 +2373,8 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .edp_backlight_control = hwss_edp_backlight_control, .edp_power_control = hwss_edp_power_control, .edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index b9d326082717..6c526b5095d9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -35,5 +35,6 @@ extern void fill_display_configs( const struct dc_state *context, struct dm_pp_display_configuration *pp_display_cfg); +bool is_rgb_cspace(enum dc_color_space output_color_space); #endif /* __DC_HWSS_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h index d7b5bd20352a..819b749c6e31 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h @@ -33,7 +33,6 @@ #define IPP_REG_LIST_DCN(id) \ SRI(FORMAT_CONTROL, CNVC_CFG, id), \ - SRI(DPP_CONTROL, DPP_TOP, id), \ SRI(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ SRI(CURSOR0_CONTROL, CNVC_CUR, id), \ SRI(CURSOR0_COLOR0, CNVC_CUR, id), \ @@ -130,7 +129,6 @@ struct dcn10_ipp_mask { }; struct dcn10_ipp_registers { - uint32_t DPP_CONTROL; uint32_t CURSOR_SETTINS; uint32_t CURSOR_SETTINGS; uint32_t CNVC_SURFACE_PIXEL_FORMAT; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c index f6ba0eef4489..77a1a9d541a4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c @@ -367,6 +367,14 @@ void opp1_program_oppbuf( } +void opp1_pipe_clock_control(struct output_pixel_processor *opp, bool enable) +{ + struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp); + uint32_t regval = enable ? 1 : 0; + + REG_UPDATE(OPP_PIPE_CONTROL, OPP_PIPE_CLOCK_EN, regval); +} + /*****************************************/ /* Constructor, Destructor */ /*****************************************/ @@ -382,6 +390,7 @@ static struct opp_funcs dcn10_opp_funcs = { .opp_program_fmt = opp1_program_fmt, .opp_program_bit_depth_reduction = opp1_program_bit_depth_reduction, .opp_program_stereo = opp1_program_stereo, + .opp_pipe_clock_control = opp1_pipe_clock_control, .opp_destroy = opp1_destroy }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h index bc5058af6266..0f10adea000c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h @@ -44,7 +44,8 @@ SRI(FMT_MAP420_MEMORY_CONTROL, FMT, id), \ SRI(OPPBUF_CONTROL, OPPBUF, id),\ SRI(OPPBUF_3D_PARAMETERS_0, OPPBUF, id), \ - SRI(OPPBUF_3D_PARAMETERS_1, OPPBUF, id) + SRI(OPPBUF_3D_PARAMETERS_1, OPPBUF, id), \ + SRI(OPP_PIPE_CONTROL, OPP_PIPE, id) #define OPP_REG_LIST_DCN10(id) \ OPP_REG_LIST_DCN(id) @@ -61,7 +62,8 @@ uint32_t OPPBUF_CONTROL; \ uint32_t OPPBUF_CONTROL1; \ uint32_t OPPBUF_3D_PARAMETERS_0; \ - uint32_t OPPBUF_3D_PARAMETERS_1 + uint32_t OPPBUF_3D_PARAMETERS_1; \ + uint32_t OPP_PIPE_CONTROL #define OPP_MASK_SH_LIST_DCN(mask_sh) \ OPP_SF(FMT0_FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, mask_sh), \ @@ -89,7 +91,8 @@ OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, mask_sh),\ OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_PIXEL_REPETITION, mask_sh),\ OPP_SF(OPPBUF0_OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE1_SIZE, mask_sh), \ - OPP_SF(OPPBUF0_OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE2_SIZE, mask_sh) + OPP_SF(OPPBUF0_OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE2_SIZE, mask_sh), \ + OPP_SF(OPP_PIPE0_OPP_PIPE_CONTROL, OPP_PIPE_CLOCK_EN, mask_sh) #define OPP_MASK_SH_LIST_DCN10(mask_sh) \ OPP_MASK_SH_LIST_DCN(mask_sh), \ @@ -125,7 +128,8 @@ type OPPBUF_OVERLAP_PIXEL_NUM;\ type OPPBUF_NUM_SEGMENT_PADDED_PIXELS; \ type OPPBUF_3D_VACT_SPACE1_SIZE; \ - type OPPBUF_3D_VACT_SPACE2_SIZE + type OPPBUF_3D_VACT_SPACE2_SIZE; \ + type OPP_PIPE_CLOCK_EN struct dcn10_opp_registers { OPP_COMMON_REG_VARIABLE_LIST; @@ -176,6 +180,8 @@ void opp1_program_stereo( bool enable, const struct dc_crtc_timing *timing); +void opp1_pipe_clock_control(struct output_pixel_processor *opp, bool enable); + void opp1_destroy(struct output_pixel_processor **opp); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index a3c7c2012f05..014543235df8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -131,7 +131,6 @@ struct dcn_optc_registers { uint32_t OTG_GSL_WINDOW_X; uint32_t OTG_GSL_WINDOW_Y; uint32_t OTG_VUPDATE_KEEPOUT; - uint32_t OTG_DSC_START_POSITION; }; #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\ @@ -241,7 +240,7 @@ struct dcn_optc_registers { SF(OTG0_OTG_TEST_PATTERN_COLOR, OTG_TEST_PATTERN_DATA, mask_sh),\ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SRC_SEL, mask_sh) -#define TG_REG_FIELD_LIST(type) \ +#define TG_REG_FIELD_LIST_DCN1_0(type) \ type VSTARTUP_START;\ type VUPDATE_OFFSET;\ type VUPDATE_WIDTH;\ @@ -352,10 +351,11 @@ struct dcn_optc_registers { type OTG_MASTER_UPDATE_LOCK_GSL_EN;\ type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET;\ type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET;\ - type OTG_DSC_START_POSITION_X;\ - type OTG_DSC_START_POSITION_LINE_NUM;\ type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN; +#define TG_REG_FIELD_LIST(type) \ + TG_REG_FIELD_LIST_DCN1_0(type) + struct dcn_optc_shift { TG_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 44825e2c9ebb..c4a564cb56b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -50,7 +50,8 @@ #include "dcn10_hubp.h" #include "dcn10_hubbub.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" #include "dcn/dcn_1_0_offset.h" #include "dcn/dcn_1_0_sh_mask.h" @@ -365,6 +366,7 @@ static const struct dcn_optc_mask tg_mask = { static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), NBIO_SR(BIOS_SCRATCH_6) }; @@ -450,6 +452,7 @@ static const struct dc_debug debug_defaults_drv = { .disable_stereo_support = true, .vsr_support = true, .performance_trace = false, + .az_endpoint_mute_only = true, }; static const struct dc_debug debug_defaults_diags = { @@ -818,7 +821,7 @@ static void get_pixel_clock_parameters( pixel_clk_params->requested_pix_clk = stream->timing.pix_clk_khz; pixel_clk_params->encoder_object_id = stream->sink->link->link_enc->id; pixel_clk_params->signal_type = pipe_ctx->stream->signal; - pixel_clk_params->controller_id = pipe_ctx->pipe_idx + 1; + pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; /* TODO: un-hardcode*/ pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * LINK_RATE_REF_FREQ_IN_KHZ; @@ -965,6 +968,7 @@ static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer( idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; return idle_pipe; } @@ -1316,13 +1320,11 @@ static bool construct( } } - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - pool->base.display_clock = dce120_disp_clk_create(ctx); - if (pool->base.display_clock == NULL) { - dm_error("DC: failed to create display clock!\n"); - BREAK_TO_DEBUGGER(); - goto fail; - } + pool->base.display_clock = dce120_disp_clk_create(ctx); + if (pool->base.display_clock == NULL) { + dm_error("DC: failed to create display clock!\n"); + BREAK_TO_DEBUGGER(); + goto fail; } pool->base.dmcu = dcn10_dmcu_create(ctx, @@ -1445,6 +1447,7 @@ static bool construct( /* valid pipe num */ pool->base.pipe_count = j; + pool->base.timing_generator_count = j; /* within dml lib, it is hard code to 4. If ASIC pipe is fused, * the value may be changed diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index bbfa83252fc1..eac4bfe12257 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -91,7 +91,8 @@ struct pp_smu_funcs_rv { /* which SMU message? are reader and writer WM separate SMU msg? */ void (*set_wm_ranges)(struct pp_smu *pp, struct pp_smu_wm_range_sets *ranges); - + /* PME w/a */ + void (*set_pme_wa_enable)(struct pp_smu *pp); }; #if 0 diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 225b7bfb09a9..22e7ee7dcd26 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -192,37 +192,6 @@ unsigned int generic_reg_wait(const struct dc_context *ctx, * Power Play (PP) interfaces **************************************/ -/* DAL calls this function to notify PP about clocks it needs for the Mode Set. - * This is done *before* it changes DCE clock. - * - * If required clock is higher than current, then PP will increase the voltage. - * - * If required clock is lower than current, then PP will defer reduction of - * voltage until the call to dc_service_pp_post_dce_clock_change(). - * - * \input - Contains clocks needed for Mode Set. - * - * \output - Contains clocks adjusted by PP which DAL should use for Mode Set. - * Valid only if function returns zero. - * - * \returns true - call is successful - * false - call failed - */ -bool dm_pp_pre_dce_clock_change( - struct dc_context *ctx, - struct dm_pp_gpu_clock_range *requested_state, - struct dm_pp_gpu_clock_range *actual_state); - -/* The returned clocks range are 'static' system clocks which will be used for - * mode validation purposes. - * - * \returns true - call is successful - * false - call failed - */ -bool dc_service_get_system_clocks_range( - const struct dc_context *ctx, - struct dm_pp_gpu_clock_range *sys_clks); - /* Gets valid clocks levels from pplib * * input: clk_type - display clk / sclk / mem clk diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 3488af2b5786..f83a608f93e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -24,19 +24,23 @@ # It provides the general basic services required by other DAL # subcomponents. -CFLAGS_display_mode_vba.o := -mhard-float -msse -mpreferred-stack-boundary=4 -CFLAGS_display_mode_lib.o := -mhard-float -msse -mpreferred-stack-boundary=4 -CFLAGS_display_pipe_clocks.o := -mhard-float -msse -mpreferred-stack-boundary=4 -CFLAGS_display_rq_dlg_calc.o := -mhard-float -msse -mpreferred-stack-boundary=4 -CFLAGS_dml1_display_rq_dlg_calc.o := -mhard-float -msse -mpreferred-stack-boundary=4 -CFLAGS_display_rq_dlg_helpers.o := -mhard-float -msse -mpreferred-stack-boundary=4 -CFLAGS_soc_bounding_box.o := -mhard-float -msse -mpreferred-stack-boundary=4 -CFLAGS_dml_common_defs.o := -mhard-float -msse -mpreferred-stack-boundary=4 +ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) + cc_stack_align := -mpreferred-stack-boundary=4 +else ifneq ($(call cc-option, -mstack-alignment=16),) + cc_stack_align := -mstack-alignment=16 +endif +dml_ccflags := -mhard-float -msse $(cc_stack_align) -DML = display_mode_lib.o display_rq_dlg_calc.o \ - display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \ - soc_bounding_box.o dml_common_defs.o display_mode_vba.o +CFLAGS_display_mode_lib.o := $(dml_ccflags) +CFLAGS_display_pipe_clocks.o := $(dml_ccflags) +CFLAGS_dml1_display_rq_dlg_calc.o := $(dml_ccflags) +CFLAGS_display_rq_dlg_helpers.o := $(dml_ccflags) +CFLAGS_soc_bounding_box.o := $(dml_ccflags) +CFLAGS_dml_common_defs.o := $(dml_ccflags) + +DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \ + soc_bounding_box.o dml_common_defs.o AMD_DAL_DML = $(addprefix $(AMDDALPATH)/dc/dml/,$(DML)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h index 26f4f2a3d90d..3c2abcb8a1b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h @@ -28,8 +28,6 @@ #include "dml_common_defs.h" #include "soc_bounding_box.h" -#include "display_mode_vba.h" -#include "display_rq_dlg_calc.h" #include "dml1_display_rq_dlg_calc.h" enum dml_project { @@ -41,7 +39,6 @@ struct display_mode_lib { struct _vcs_dpi_ip_params_st ip; struct _vcs_dpi_soc_bounding_box_st soc; enum dml_project project; - struct vba_vars_st vba; struct dal_logger *logger; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index aeebd8bee628..09affa16cc43 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -140,7 +140,6 @@ struct _vcs_dpi_ip_params_st { unsigned int max_hscl_taps; unsigned int max_vscl_taps; unsigned int xfc_supported; - unsigned int ptoi_supported; unsigned int xfc_fill_constant_bytes; double dispclk_ramp_margin_percent; double xfc_fill_bw_overhead_percent; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c deleted file mode 100644 index 260e113fcc02..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ /dev/null @@ -1,6085 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "display_mode_lib.h" -#include "display_mode_vba.h" - -#include "dml_inline_defs.h" - -/* - * NOTE: - * This file is gcc-parseable HW gospel, coming straight from HW engineers. - * - * It doesn't adhere to Linux kernel style and sometimes will do things in odd - * ways. Unless there is something clearly wrong with it the code should - * remain as-is as it provides us with a guarantee from HW that it is correct. - */ - -#define BPP_INVALID 0 -#define BPP_BLENDED_PIPE 0xffffffff -static const unsigned int NumberOfStates = DC__VOLTAGE_STATES; - -static void fetch_socbb_params(struct display_mode_lib *mode_lib); -static void fetch_ip_params(struct display_mode_lib *mode_lib); -static void fetch_pipe_params(struct display_mode_lib *mode_lib); -static void recalculate_params( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes); -static void recalculate(struct display_mode_lib *mode_lib); -static double adjust_ReturnBW( - struct display_mode_lib *mode_lib, - double ReturnBW, - bool DCCEnabledAnyPlane, - double ReturnBandwidthToDCN); -static void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib); -static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); -static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( - struct display_mode_lib *mode_lib); -static unsigned int dscceComputeDelay( - unsigned int bpc, - double bpp, - unsigned int sliceWidth, - unsigned int numSlices, - enum output_format_class pixelFormat); -static unsigned int dscComputeDelay(enum output_format_class pixelFormat); -// Super monster function with some 45 argument -static bool CalculatePrefetchSchedule( - struct display_mode_lib *mode_lib, - double DPPCLK, - double DISPCLK, - double PixelClock, - double DCFClkDeepSleep, - unsigned int DSCDelay, - unsigned int DPPPerPlane, - bool ScalerEnabled, - unsigned int NumberOfCursors, - double DPPCLKDelaySubtotal, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCFormater, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, - unsigned int ScalerRecoutWidth, - enum output_format_class OutputFormat, - unsigned int VBlank, - unsigned int HTotal, - unsigned int MaxInterDCNTileRepeaters, - unsigned int VStartup, - unsigned int PageTableLevels, - bool VirtualMemoryEnable, - bool DynamicMetadataEnable, - unsigned int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, - bool DCCEnable, - double UrgentLatency, - double UrgentExtraLatency, - double TCalc, - unsigned int PDEAndMetaPTEBytesFrame, - unsigned int MetaRowByte, - unsigned int PixelPTEBytesPerRow, - double PrefetchSourceLinesY, - unsigned int SwathWidthY, - double BytePerPixelDETY, - double VInitPreFillY, - unsigned int MaxNumSwathY, - double PrefetchSourceLinesC, - double BytePerPixelDETC, - double VInitPreFillC, - unsigned int MaxNumSwathC, - unsigned int SwathHeightY, - unsigned int SwathHeightC, - double TWait, - bool XFCEnabled, - double XFCRemoteSurfaceFlipDelay, - bool InterlaceEnable, - bool ProgressiveToInterlaceUnitInOPP, - double *DSTXAfterScaler, - double *DSTYAfterScaler, - double *DestinationLinesForPrefetch, - double *PrefetchBandwidth, - double *DestinationLinesToRequestVMInVBlank, - double *DestinationLinesToRequestRowInVBlank, - double *VRatioPrefetchY, - double *VRatioPrefetchC, - double *RequiredPrefetchPixDataBW, - unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, - double *Tno_bw, - unsigned int *VUpdateOffsetPix, - unsigned int *VUpdateWidthPix, - unsigned int *VReadyOffsetPix); -static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); -static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); -static double CalculatePrefetchSourceLines( - struct display_mode_lib *mode_lib, - double VRatio, - double vtaps, - bool Interlace, - bool ProgressiveToInterlaceUnitInOPP, - unsigned int SwathHeight, - unsigned int ViewportYStart, - double *VInitPreFill, - unsigned int *MaxNumSwath); -static unsigned int CalculateVMAndRowBytes( - struct display_mode_lib *mode_lib, - bool DCCEnable, - unsigned int BlockHeight256Bytes, - unsigned int BlockWidth256Bytes, - enum source_format_class SourcePixelFormat, - unsigned int SurfaceTiling, - unsigned int BytePerPixel, - enum scan_direction_class ScanDirection, - unsigned int ViewportWidth, - unsigned int ViewportHeight, - unsigned int SwathWidthY, - bool VirtualMemoryEnable, - unsigned int VMMPageSize, - unsigned int PTEBufferSizeInRequests, - unsigned int PDEProcessingBufIn64KBReqs, - unsigned int Pitch, - unsigned int DCCMetaPitch, - unsigned int *MacroTileWidth, - unsigned int *MetaRowByte, - unsigned int *PixelPTEBytesPerRow, - bool *PTEBufferSizeNotExceeded, - unsigned int *dpte_row_height, - unsigned int *meta_row_height); -static double CalculateTWait( - unsigned int PrefetchMode, - double DRAMClockChangeLatency, - double UrgentLatency, - double SREnterPlusExitTime); -static double CalculateRemoteSurfaceFlipDelay( - struct display_mode_lib *mode_lib, - double VRatio, - double SwathWidth, - double Bpp, - double LineTime, - double XFCTSlvVupdateOffset, - double XFCTSlvVupdateWidth, - double XFCTSlvVreadyOffset, - double XFCXBUFLatencyTolerance, - double XFCFillBWOverhead, - double XFCSlvChunkSize, - double XFCBusTransportTime, - double TCalc, - double TWait, - double *SrcActiveDrainRate, - double *TInitXFill, - double *TslvChk); -static double CalculateWriteBackDISPCLK( - enum source_format_class WritebackPixelFormat, - double PixelClock, - double WritebackHRatio, - double WritebackVRatio, - unsigned int WritebackLumaHTaps, - unsigned int WritebackLumaVTaps, - unsigned int WritebackChromaHTaps, - unsigned int WritebackChromaVTaps, - double WritebackDestinationWidth, - unsigned int HTotal, - unsigned int WritebackChromaLineBufferWidth); -static void CalculateActiveRowBandwidth( - bool VirtualMemoryEnable, - enum source_format_class SourcePixelFormat, - double VRatio, - bool DCCEnable, - double LineTime, - unsigned int MetaRowByteLuma, - unsigned int MetaRowByteChroma, - unsigned int meta_row_height_luma, - unsigned int meta_row_height_chroma, - unsigned int PixelPTEBytesPerRowLuma, - unsigned int PixelPTEBytesPerRowChroma, - unsigned int dpte_row_height_luma, - unsigned int dpte_row_height_chroma, - double *meta_row_bw, - double *dpte_row_bw, - double *qual_row_bw); -static void CalculateFlipSchedule( - struct display_mode_lib *mode_lib, - double UrgentExtraLatency, - double UrgentLatency, - unsigned int MaxPageTableLevels, - bool VirtualMemoryEnable, - double BandwidthAvailableForImmediateFlip, - unsigned int TotImmediateFlipBytes, - enum source_format_class SourcePixelFormat, - unsigned int ImmediateFlipBytes, - double LineTime, - double Tno_bw, - double VRatio, - double PDEAndMetaPTEBytesFrame, - unsigned int MetaRowByte, - unsigned int PixelPTEBytesPerRow, - bool DCCEnable, - unsigned int dpte_row_height, - unsigned int meta_row_height, - double qual_row_bw, - double *DestinationLinesToRequestVMInImmediateFlip, - double *DestinationLinesToRequestRowInImmediateFlip, - double *final_flip_bw, - bool *ImmediateFlipSupportedForPipe); -static double CalculateWriteBackDelay( - enum source_format_class WritebackPixelFormat, - double WritebackHRatio, - double WritebackVRatio, - unsigned int WritebackLumaHTaps, - unsigned int WritebackLumaVTaps, - unsigned int WritebackChromaHTaps, - unsigned int WritebackChromaVTaps, - unsigned int WritebackDestinationWidth); -static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct display_mode_lib *mode_lib); -static unsigned int CursorBppEnumToBits(enum cursor_bpp ebpp); -static void ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); - -void set_prefetch_mode( - struct display_mode_lib *mode_lib, - bool cstate_en, - bool pstate_en, - bool ignore_viewport_pos, - bool immediate_flip_support) -{ - unsigned int prefetch_mode; - - if (cstate_en && pstate_en) - prefetch_mode = 0; - else if (cstate_en) - prefetch_mode = 1; - else - prefetch_mode = 2; - if (prefetch_mode != mode_lib->vba.PrefetchMode - || ignore_viewport_pos != mode_lib->vba.IgnoreViewportPositioning - || immediate_flip_support != mode_lib->vba.ImmediateFlipSupport) { - DTRACE( - " Prefetch mode has changed from %i to %i. Recalculating.", - prefetch_mode, - mode_lib->vba.PrefetchMode); - mode_lib->vba.PrefetchMode = prefetch_mode; - mode_lib->vba.IgnoreViewportPositioning = ignore_viewport_pos; - mode_lib->vba.ImmediateFlipSupport = immediate_flip_support; - recalculate(mode_lib); - } -} - -unsigned int dml_get_voltage_level( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes) -{ - bool need_recalculate = memcmp(&mode_lib->soc, &mode_lib->vba.soc, sizeof(mode_lib->vba.soc)) != 0 - || memcmp(&mode_lib->ip, &mode_lib->vba.ip, sizeof(mode_lib->vba.ip)) != 0 - || num_pipes != mode_lib->vba.cache_num_pipes - || memcmp(pipes, mode_lib->vba.cache_pipes, - sizeof(display_e2e_pipe_params_st) * num_pipes) != 0; - - mode_lib->vba.soc = mode_lib->soc; - mode_lib->vba.ip = mode_lib->ip; - memcpy(mode_lib->vba.cache_pipes, pipes, sizeof(*pipes) * num_pipes); - mode_lib->vba.cache_num_pipes = num_pipes; - - if (need_recalculate && pipes[0].clks_cfg.dppclk_mhz != 0) - recalculate(mode_lib); - else { - fetch_socbb_params(mode_lib); - fetch_ip_params(mode_lib); - fetch_pipe_params(mode_lib); - } - ModeSupportAndSystemConfigurationFull(mode_lib); - - return mode_lib->vba.VoltageLevel; -} - -#define dml_get_attr_func(attr, var) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes) \ -{ \ - recalculate_params(mode_lib, pipes, num_pipes); \ - return var; \ -} - -dml_get_attr_func(clk_dcf_deepsleep, mode_lib->vba.DCFClkDeepSleep); -dml_get_attr_func(wm_urgent, mode_lib->vba.UrgentWatermark); -dml_get_attr_func(wm_memory_trip, mode_lib->vba.MemoryTripWatermark); -dml_get_attr_func(wm_writeback_urgent, mode_lib->vba.WritebackUrgentWatermark); -dml_get_attr_func(wm_stutter_exit, mode_lib->vba.StutterExitWatermark); -dml_get_attr_func(wm_stutter_enter_exit, mode_lib->vba.StutterEnterPlusExitWatermark); -dml_get_attr_func(wm_dram_clock_change, mode_lib->vba.DRAMClockChangeWatermark); -dml_get_attr_func(wm_writeback_dram_clock_change, mode_lib->vba.WritebackDRAMClockChangeWatermark); -dml_get_attr_func(wm_xfc_underflow, mode_lib->vba.UrgentWatermark); // xfc_underflow maps to urgent -dml_get_attr_func(stutter_efficiency, mode_lib->vba.StutterEfficiency); -dml_get_attr_func(stutter_efficiency_no_vblank, mode_lib->vba.StutterEfficiencyNotIncludingVBlank); -dml_get_attr_func(urgent_latency, mode_lib->vba.MinUrgentLatencySupportUs); -dml_get_attr_func(urgent_extra_latency, mode_lib->vba.UrgentExtraLatency); -dml_get_attr_func(nonurgent_latency, mode_lib->vba.NonUrgentLatencyTolerance); -dml_get_attr_func( - dram_clock_change_latency, - mode_lib->vba.MinActiveDRAMClockChangeLatencySupported); -dml_get_attr_func(dispclk_calculated, mode_lib->vba.DISPCLK_calculated); -dml_get_attr_func(total_data_read_bw, mode_lib->vba.TotalDataReadBandwidth); -dml_get_attr_func(return_bw, mode_lib->vba.ReturnBW); -dml_get_attr_func(tcalc, mode_lib->vba.TCalc); - -#define dml_get_pipe_attr_func(attr, var) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes, unsigned int which_pipe) \ -{\ - unsigned int which_plane; \ - recalculate_params(mode_lib, pipes, num_pipes); \ - which_plane = mode_lib->vba.pipe_plane[which_pipe]; \ - return var[which_plane]; \ -} - -dml_get_pipe_attr_func(dsc_delay, mode_lib->vba.DSCDelay); -dml_get_pipe_attr_func(dppclk_calculated, mode_lib->vba.DPPCLK_calculated); -dml_get_pipe_attr_func(dscclk_calculated, mode_lib->vba.DSCCLK_calculated); -dml_get_pipe_attr_func(min_ttu_vblank, mode_lib->vba.MinTTUVBlank); -dml_get_pipe_attr_func(vratio_prefetch_l, mode_lib->vba.VRatioPrefetchY); -dml_get_pipe_attr_func(vratio_prefetch_c, mode_lib->vba.VRatioPrefetchC); -dml_get_pipe_attr_func(dst_x_after_scaler, mode_lib->vba.DSTXAfterScaler); -dml_get_pipe_attr_func(dst_y_after_scaler, mode_lib->vba.DSTYAfterScaler); -dml_get_pipe_attr_func(dst_y_per_vm_vblank, mode_lib->vba.DestinationLinesToRequestVMInVBlank); -dml_get_pipe_attr_func(dst_y_per_row_vblank, mode_lib->vba.DestinationLinesToRequestRowInVBlank); -dml_get_pipe_attr_func(dst_y_prefetch, mode_lib->vba.DestinationLinesForPrefetch); -dml_get_pipe_attr_func(dst_y_per_vm_flip, mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip); -dml_get_pipe_attr_func( - dst_y_per_row_flip, - mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip); - -dml_get_pipe_attr_func(xfc_transfer_delay, mode_lib->vba.XFCTransferDelay); -dml_get_pipe_attr_func(xfc_precharge_delay, mode_lib->vba.XFCPrechargeDelay); -dml_get_pipe_attr_func(xfc_remote_surface_flip_latency, mode_lib->vba.XFCRemoteSurfaceFlipLatency); -dml_get_pipe_attr_func(xfc_prefetch_margin, mode_lib->vba.XFCPrefetchMargin); - -unsigned int get_vstartup_calculated( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes, - unsigned int which_pipe) -{ - unsigned int which_plane; - - recalculate_params(mode_lib, pipes, num_pipes); - which_plane = mode_lib->vba.pipe_plane[which_pipe]; - return mode_lib->vba.VStartup[which_plane]; -} - -double get_total_immediate_flip_bytes( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes) -{ - recalculate_params(mode_lib, pipes, num_pipes); - return mode_lib->vba.TotImmediateFlipBytes; -} - -double get_total_immediate_flip_bw( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes) -{ - recalculate_params(mode_lib, pipes, num_pipes); - return mode_lib->vba.ImmediateFlipBW; -} - -double get_total_prefetch_bw( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes) -{ - unsigned int k; - double total_prefetch_bw = 0.0; - - recalculate_params(mode_lib, pipes, num_pipes); - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) - total_prefetch_bw += mode_lib->vba.PrefetchBandwidth[k]; - return total_prefetch_bw; -} - -static void fetch_socbb_params(struct display_mode_lib *mode_lib) -{ - soc_bounding_box_st *soc = &mode_lib->vba.soc; - unsigned int i; - - // SOC Bounding Box Parameters - mode_lib->vba.ReturnBusWidth = soc->return_bus_width_bytes; - mode_lib->vba.NumberOfChannels = soc->num_chans; - mode_lib->vba.PercentOfIdealDRAMAndFabricBWReceivedAfterUrgLatency = - soc->ideal_dram_bw_after_urgent_percent; // there's always that one bastard variable that's so long it throws everything out of alignment! - mode_lib->vba.UrgentLatency = soc->urgent_latency_us; - mode_lib->vba.RoundTripPingLatencyCycles = soc->round_trip_ping_latency_dcfclk_cycles; - mode_lib->vba.UrgentOutOfOrderReturnPerChannel = - soc->urgent_out_of_order_return_per_channel_bytes; - mode_lib->vba.WritebackLatency = soc->writeback_latency_us; - mode_lib->vba.SRExitTime = soc->sr_exit_time_us; - mode_lib->vba.SREnterPlusExitTime = soc->sr_enter_plus_exit_time_us; - mode_lib->vba.DRAMClockChangeLatency = soc->dram_clock_change_latency_us; - mode_lib->vba.Downspreading = soc->downspread_percent; - mode_lib->vba.DRAMChannelWidth = soc->dram_channel_width_bytes; // new! - mode_lib->vba.FabricDatapathToDCNDataReturn = soc->fabric_datapath_to_dcn_data_return_bytes; // new! - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading = soc->dcn_downspread_percent; // new - mode_lib->vba.DISPCLKDPPCLKVCOSpeed = soc->dispclk_dppclk_vco_speed_mhz; // new - mode_lib->vba.VMMPageSize = soc->vmm_page_size_bytes; - // Set the voltage scaling clocks as the defaults. Most of these will - // be set to different values by the test - for (i = 0; i < DC__VOLTAGE_STATES; i++) - if (soc->clock_limits[i].state == mode_lib->vba.VoltageLevel) - break; - - mode_lib->vba.DCFCLK = soc->clock_limits[i].dcfclk_mhz; - mode_lib->vba.SOCCLK = soc->clock_limits[i].socclk_mhz; - mode_lib->vba.DRAMSpeed = soc->clock_limits[i].dram_speed_mhz; - mode_lib->vba.FabricClock = soc->clock_limits[i].fabricclk_mhz; - - mode_lib->vba.XFCBusTransportTime = soc->xfc_bus_transport_time_us; - mode_lib->vba.XFCXBUFLatencyTolerance = soc->xfc_xbuf_latency_tolerance_us; - - mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp = false; - mode_lib->vba.MaxHSCLRatio = 4; - mode_lib->vba.MaxVSCLRatio = 4; - mode_lib->vba.MaxNumWriteback = 0; /*TODO*/ - mode_lib->vba.WritebackLumaAndChromaScalingSupported = true; - mode_lib->vba.Cursor64BppSupport = true; - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.DCFCLKPerState[i] = soc->clock_limits[i].dcfclk_mhz; - mode_lib->vba.FabricClockPerState[i] = soc->clock_limits[i].fabricclk_mhz; - mode_lib->vba.SOCCLKPerState[i] = soc->clock_limits[i].socclk_mhz; - mode_lib->vba.PHYCLKPerState[i] = soc->clock_limits[i].phyclk_mhz; - mode_lib->vba.MaxDppclk[i] = soc->clock_limits[i].dppclk_mhz; - mode_lib->vba.MaxDSCCLK[i] = soc->clock_limits[i].dscclk_mhz; - mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mhz; - mode_lib->vba.MaxDispclk[i] = soc->clock_limits[i].dispclk_mhz; - } -} - -static void fetch_ip_params(struct display_mode_lib *mode_lib) -{ - ip_params_st *ip = &mode_lib->vba.ip; - - // IP Parameters - mode_lib->vba.MaxNumDPP = ip->max_num_dpp; - mode_lib->vba.MaxNumOTG = ip->max_num_otg; - mode_lib->vba.CursorChunkSize = ip->cursor_chunk_size; - mode_lib->vba.CursorBufferSize = ip->cursor_buffer_size; - - mode_lib->vba.MaxDCHUBToPSCLThroughput = ip->max_dchub_pscl_bw_pix_per_clk; - mode_lib->vba.MaxPSCLToLBThroughput = ip->max_pscl_lb_bw_pix_per_clk; - mode_lib->vba.ROBBufferSizeInKByte = ip->rob_buffer_size_kbytes; - mode_lib->vba.DETBufferSizeInKByte = ip->det_buffer_size_kbytes; - mode_lib->vba.PixelChunkSizeInKByte = ip->pixel_chunk_size_kbytes; - mode_lib->vba.MetaChunkSize = ip->meta_chunk_size_kbytes; - mode_lib->vba.PTEChunkSize = ip->pte_chunk_size_kbytes; - mode_lib->vba.WritebackChunkSize = ip->writeback_chunk_size_kbytes; - mode_lib->vba.LineBufferSize = ip->line_buffer_size_bits; - mode_lib->vba.MaxLineBufferLines = ip->max_line_buffer_lines; - mode_lib->vba.PTEBufferSizeInRequests = ip->dpte_buffer_size_in_pte_reqs; - mode_lib->vba.DPPOutputBufferPixels = ip->dpp_output_buffer_pixels; - mode_lib->vba.OPPOutputBufferLines = ip->opp_output_buffer_lines; - mode_lib->vba.WritebackInterfaceLumaBufferSize = ip->writeback_luma_buffer_size_kbytes; - mode_lib->vba.WritebackInterfaceChromaBufferSize = ip->writeback_chroma_buffer_size_kbytes; - mode_lib->vba.WritebackChromaLineBufferWidth = - ip->writeback_chroma_line_buffer_width_pixels; - mode_lib->vba.MaxPageTableLevels = ip->max_page_table_levels; - mode_lib->vba.MaxInterDCNTileRepeaters = ip->max_inter_dcn_tile_repeaters; - mode_lib->vba.NumberOfDSC = ip->num_dsc; - mode_lib->vba.ODMCapability = ip->odm_capable; - mode_lib->vba.DISPCLKRampingMargin = ip->dispclk_ramp_margin_percent; - - mode_lib->vba.XFCSupported = ip->xfc_supported; - mode_lib->vba.XFCFillBWOverhead = ip->xfc_fill_bw_overhead_percent; - mode_lib->vba.XFCFillConstant = ip->xfc_fill_constant_bytes; - mode_lib->vba.DPPCLKDelaySubtotal = ip->dppclk_delay_subtotal; - mode_lib->vba.DPPCLKDelaySCL = ip->dppclk_delay_scl; - mode_lib->vba.DPPCLKDelaySCLLBOnly = ip->dppclk_delay_scl_lb_only; - mode_lib->vba.DPPCLKDelayCNVCFormater = ip->dppclk_delay_cnvc_formatter; - mode_lib->vba.DPPCLKDelayCNVCCursor = ip->dppclk_delay_cnvc_cursor; - mode_lib->vba.DISPCLKDelaySubtotal = ip->dispclk_delay_subtotal; - - mode_lib->vba.ProgressiveToInterlaceUnitInOPP = ip->ptoi_supported; - - mode_lib->vba.PDEProcessingBufIn64KBReqs = ip->pde_proc_buffer_size_64k_reqs; -} - -static void fetch_pipe_params(struct display_mode_lib *mode_lib) -{ - display_e2e_pipe_params_st *pipes = mode_lib->vba.cache_pipes; - ip_params_st *ip = &mode_lib->vba.ip; - - unsigned int OTGInstPlane[DC__NUM_DPP__MAX]; - unsigned int j, k; - bool PlaneVisited[DC__NUM_DPP__MAX]; - bool visited[DC__NUM_DPP__MAX]; - - // Convert Pipes to Planes - for (k = 0; k < mode_lib->vba.cache_num_pipes; ++k) - visited[k] = false; - - mode_lib->vba.NumberOfActivePlanes = 0; - for (j = 0; j < mode_lib->vba.cache_num_pipes; ++j) { - display_pipe_source_params_st *src = &pipes[j].pipe.src; - display_pipe_dest_params_st *dst = &pipes[j].pipe.dest; - scaler_ratio_depth_st *scl = &pipes[j].pipe.scale_ratio_depth; - scaler_taps_st *taps = &pipes[j].pipe.scale_taps; - display_output_params_st *dout = &pipes[j].dout; - display_clocks_and_cfg_st *clks = &pipes[j].clks_cfg; - - if (visited[j]) - continue; - visited[j] = true; - - mode_lib->vba.pipe_plane[j] = mode_lib->vba.NumberOfActivePlanes; - - mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes] = 1; - mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes] = - (enum scan_direction_class) (src->source_scan); - mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] = - src->viewport_width; - mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] = - src->viewport_height; - mode_lib->vba.ViewportYStartY[mode_lib->vba.NumberOfActivePlanes] = - src->viewport_y_y; - mode_lib->vba.ViewportYStartC[mode_lib->vba.NumberOfActivePlanes] = - src->viewport_y_c; - mode_lib->vba.PitchY[mode_lib->vba.NumberOfActivePlanes] = src->data_pitch; - mode_lib->vba.PitchC[mode_lib->vba.NumberOfActivePlanes] = src->data_pitch_c; - mode_lib->vba.DCCMetaPitchY[mode_lib->vba.NumberOfActivePlanes] = src->meta_pitch; - mode_lib->vba.HRatio[mode_lib->vba.NumberOfActivePlanes] = scl->hscl_ratio; - mode_lib->vba.VRatio[mode_lib->vba.NumberOfActivePlanes] = scl->vscl_ratio; - mode_lib->vba.ScalerEnabled[mode_lib->vba.NumberOfActivePlanes] = scl->scl_enable; - mode_lib->vba.Interlace[mode_lib->vba.NumberOfActivePlanes] = dst->interlaced; - if (mode_lib->vba.Interlace[mode_lib->vba.NumberOfActivePlanes]) - mode_lib->vba.VRatio[mode_lib->vba.NumberOfActivePlanes] *= 2.0; - mode_lib->vba.htaps[mode_lib->vba.NumberOfActivePlanes] = taps->htaps; - mode_lib->vba.vtaps[mode_lib->vba.NumberOfActivePlanes] = taps->vtaps; - mode_lib->vba.HTAPsChroma[mode_lib->vba.NumberOfActivePlanes] = taps->htaps_c; - mode_lib->vba.VTAPsChroma[mode_lib->vba.NumberOfActivePlanes] = taps->vtaps_c; - mode_lib->vba.HTotal[mode_lib->vba.NumberOfActivePlanes] = dst->htotal; - mode_lib->vba.VTotal[mode_lib->vba.NumberOfActivePlanes] = dst->vtotal; - mode_lib->vba.DCCEnable[mode_lib->vba.NumberOfActivePlanes] = - src->dcc_use_global ? - ip->dcc_supported : src->dcc && ip->dcc_supported; - mode_lib->vba.DCCRate[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate; - mode_lib->vba.SourcePixelFormat[mode_lib->vba.NumberOfActivePlanes] = - (enum source_format_class) (src->source_format); - mode_lib->vba.HActive[mode_lib->vba.NumberOfActivePlanes] = dst->hactive; - mode_lib->vba.VActive[mode_lib->vba.NumberOfActivePlanes] = dst->vactive; - mode_lib->vba.SurfaceTiling[mode_lib->vba.NumberOfActivePlanes] = - (enum dm_swizzle_mode) (src->sw_mode); - mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] = - dst->recout_width; // TODO: or should this be full_recout_width???...maybe only when in hsplit mode? - mode_lib->vba.ODMCombineEnabled[mode_lib->vba.NumberOfActivePlanes] = - dst->odm_combine; - mode_lib->vba.OutputFormat[mode_lib->vba.NumberOfActivePlanes] = - (enum output_format_class) (dout->output_format); - mode_lib->vba.Output[mode_lib->vba.NumberOfActivePlanes] = - (enum output_encoder_class) (dout->output_type); - mode_lib->vba.OutputBpp[mode_lib->vba.NumberOfActivePlanes] = dout->output_bpp; - mode_lib->vba.OutputLinkDPLanes[mode_lib->vba.NumberOfActivePlanes] = - dout->dp_lanes; - mode_lib->vba.DSCEnabled[mode_lib->vba.NumberOfActivePlanes] = dout->dsc_enable; - mode_lib->vba.NumberOfDSCSlices[mode_lib->vba.NumberOfActivePlanes] = - dout->dsc_slices; - mode_lib->vba.DSCInputBitPerComponent[mode_lib->vba.NumberOfActivePlanes] = - dout->opp_input_bpc == 0 ? 12 : dout->opp_input_bpc; - mode_lib->vba.WritebackEnable[mode_lib->vba.NumberOfActivePlanes] = dout->wb_enable; - mode_lib->vba.WritebackSourceHeight[mode_lib->vba.NumberOfActivePlanes] = - dout->wb.wb_src_height; - mode_lib->vba.WritebackDestinationWidth[mode_lib->vba.NumberOfActivePlanes] = - dout->wb.wb_dst_width; - mode_lib->vba.WritebackDestinationHeight[mode_lib->vba.NumberOfActivePlanes] = - dout->wb.wb_dst_height; - mode_lib->vba.WritebackPixelFormat[mode_lib->vba.NumberOfActivePlanes] = - (enum source_format_class) (dout->wb.wb_pixel_format); - mode_lib->vba.WritebackLumaHTaps[mode_lib->vba.NumberOfActivePlanes] = - dout->wb.wb_htaps_luma; - mode_lib->vba.WritebackLumaVTaps[mode_lib->vba.NumberOfActivePlanes] = - dout->wb.wb_vtaps_luma; - mode_lib->vba.WritebackChromaHTaps[mode_lib->vba.NumberOfActivePlanes] = - dout->wb.wb_htaps_chroma; - mode_lib->vba.WritebackChromaVTaps[mode_lib->vba.NumberOfActivePlanes] = - dout->wb.wb_vtaps_chroma; - mode_lib->vba.WritebackHRatio[mode_lib->vba.NumberOfActivePlanes] = - dout->wb.wb_hratio; - mode_lib->vba.WritebackVRatio[mode_lib->vba.NumberOfActivePlanes] = - dout->wb.wb_vratio; - - mode_lib->vba.DynamicMetadataEnable[mode_lib->vba.NumberOfActivePlanes] = - src->dynamic_metadata_enable; - mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[mode_lib->vba.NumberOfActivePlanes] = - src->dynamic_metadata_lines_before_active; - mode_lib->vba.DynamicMetadataTransmittedBytes[mode_lib->vba.NumberOfActivePlanes] = - src->dynamic_metadata_xmit_bytes; - - mode_lib->vba.XFCEnabled[mode_lib->vba.NumberOfActivePlanes] = src->xfc_enable - && ip->xfc_supported; - mode_lib->vba.XFCSlvChunkSize = src->xfc_params.xfc_slv_chunk_size_bytes; - mode_lib->vba.XFCTSlvVupdateOffset = src->xfc_params.xfc_tslv_vupdate_offset_us; - mode_lib->vba.XFCTSlvVupdateWidth = src->xfc_params.xfc_tslv_vupdate_width_us; - mode_lib->vba.XFCTSlvVreadyOffset = src->xfc_params.xfc_tslv_vready_offset_us; - mode_lib->vba.PixelClock[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz; - mode_lib->vba.DPPCLK[mode_lib->vba.NumberOfActivePlanes] = clks->dppclk_mhz; - if (ip->is_line_buffer_bpp_fixed) - mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] = - ip->line_buffer_fixed_bpp; - else { - unsigned int lb_depth; - - switch (scl->lb_depth) { - case dm_lb_6: - lb_depth = 18; - break; - case dm_lb_8: - lb_depth = 24; - break; - case dm_lb_10: - lb_depth = 30; - break; - case dm_lb_12: - lb_depth = 36; - break; - case dm_lb_16: - lb_depth = 48; - break; - default: - lb_depth = 36; - } - mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] = lb_depth; - } - mode_lib->vba.NumberOfCursors[mode_lib->vba.NumberOfActivePlanes] = 0; - // The DML spreadsheet assumes that the two cursors utilize the same amount of bandwidth. We'll - // calculate things a little more accurately - for (k = 0; k < DC__NUM_CURSOR__MAX; ++k) { - switch (k) { - case 0: - mode_lib->vba.CursorBPP[mode_lib->vba.NumberOfActivePlanes][0] = - CursorBppEnumToBits( - (enum cursor_bpp) (src->cur0_bpp)); - mode_lib->vba.CursorWidth[mode_lib->vba.NumberOfActivePlanes][0] = - src->cur0_src_width; - if (src->cur0_src_width > 0) - mode_lib->vba.NumberOfCursors[mode_lib->vba.NumberOfActivePlanes]++; - break; - case 1: - mode_lib->vba.CursorBPP[mode_lib->vba.NumberOfActivePlanes][1] = - CursorBppEnumToBits( - (enum cursor_bpp) (src->cur1_bpp)); - mode_lib->vba.CursorWidth[mode_lib->vba.NumberOfActivePlanes][1] = - src->cur1_src_width; - if (src->cur1_src_width > 0) - mode_lib->vba.NumberOfCursors[mode_lib->vba.NumberOfActivePlanes]++; - break; - default: - dml_print( - "ERROR: Number of cursors specified exceeds supported maximum\n") - ; - } - } - - OTGInstPlane[mode_lib->vba.NumberOfActivePlanes] = dst->otg_inst; - - if (dst->odm_combine && !src->is_hsplit) - dml_print( - "ERROR: ODM Combine is specified but is_hsplit has not be specified for pipe %i\n", - j); - - if (src->is_hsplit) { - for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) { - display_pipe_source_params_st *src_k = &pipes[k].pipe.src; - display_output_params_st *dout_k = &pipes[k].dout; - - if (src_k->is_hsplit && !visited[k] - && src->hsplit_grp == src_k->hsplit_grp) { - mode_lib->vba.pipe_plane[k] = - mode_lib->vba.NumberOfActivePlanes; - mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes]++; - if (mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes] - == dm_horz) - mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] += - src_k->viewport_width; - else - mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] += - src_k->viewport_height; - - mode_lib->vba.NumberOfDSCSlices[mode_lib->vba.NumberOfActivePlanes] += - dout_k->dsc_slices; - visited[k] = true; - } - } - } - - mode_lib->vba.NumberOfActivePlanes++; - } - - // handle overlays through dml_ml->vba.BlendingAndTiming - // dml_ml->vba.BlendingAndTiming tells you which instance to look at to get timing, the so called 'master' - - for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) - PlaneVisited[j] = false; - - for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { - for (k = j + 1; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (!PlaneVisited[k] && OTGInstPlane[j] == OTGInstPlane[k]) { - // doesn't matter, so choose the smaller one - mode_lib->vba.BlendingAndTiming[j] = j; - PlaneVisited[j] = true; - mode_lib->vba.BlendingAndTiming[k] = j; - PlaneVisited[k] = true; - } - } - - if (!PlaneVisited[j]) { - mode_lib->vba.BlendingAndTiming[j] = j; - PlaneVisited[j] = true; - } - } - - // TODO: dml_ml->vba.ODMCombineEnabled => 2 * dml_ml->vba.DPPPerPlane...actually maybe not since all pipes are specified - // Do we want the dscclk to automatically be halved? Guess not since the value is specified - - mode_lib->vba.SynchronizedVBlank = pipes[0].pipe.dest.synchronized_vblank_all_planes; - for (k = 1; k < mode_lib->vba.cache_num_pipes; ++k) - ASSERT(mode_lib->vba.SynchronizedVBlank == pipes[k].pipe.dest.synchronized_vblank_all_planes); - - mode_lib->vba.VirtualMemoryEnable = false; - mode_lib->vba.OverridePageTableLevels = 0; - - for (k = 0; k < mode_lib->vba.cache_num_pipes; ++k) { - mode_lib->vba.VirtualMemoryEnable = mode_lib->vba.VirtualMemoryEnable - || !!pipes[k].pipe.src.vm; - mode_lib->vba.OverridePageTableLevels = - (pipes[k].pipe.src.vm_levels_force_en - && mode_lib->vba.OverridePageTableLevels - < pipes[k].pipe.src.vm_levels_force) ? - pipes[k].pipe.src.vm_levels_force : - mode_lib->vba.OverridePageTableLevels; - } - - if (mode_lib->vba.OverridePageTableLevels) - mode_lib->vba.MaxPageTableLevels = mode_lib->vba.OverridePageTableLevels; - - mode_lib->vba.VirtualMemoryEnable = mode_lib->vba.VirtualMemoryEnable && !!ip->pte_enable; - - mode_lib->vba.FabricAndDRAMBandwidth = dml_min( - mode_lib->vba.DRAMSpeed * mode_lib->vba.NumberOfChannels - * mode_lib->vba.DRAMChannelWidth, - mode_lib->vba.FabricClock * mode_lib->vba.FabricDatapathToDCNDataReturn) - / 1000.0; - - // TODO: Must be consistent across all pipes - // DCCProgrammingAssumesScanDirectionUnknown = src.dcc_scan_dir_unknown; -} - -static void recalculate(struct display_mode_lib *mode_lib) -{ - ModeSupportAndSystemConfiguration(mode_lib); - PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); - DisplayPipeConfiguration(mode_lib); - DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); -} - -// in wm mode we pull the parameters needed from the display_e2e_pipe_params_st structs -// rather than working them out as in recalculate_ms -static void recalculate_params( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes) -{ - // This is only safe to use memcmp because there are non-POD types in struct display_mode_lib - if (memcmp(&mode_lib->soc, &mode_lib->vba.soc, sizeof(mode_lib->vba.soc)) != 0 - || memcmp(&mode_lib->ip, &mode_lib->vba.ip, sizeof(mode_lib->vba.ip)) != 0 - || num_pipes != mode_lib->vba.cache_num_pipes - || memcmp( - pipes, - mode_lib->vba.cache_pipes, - sizeof(display_e2e_pipe_params_st) * num_pipes) != 0) { - mode_lib->vba.soc = mode_lib->soc; - mode_lib->vba.ip = mode_lib->ip; - memcpy(mode_lib->vba.cache_pipes, pipes, sizeof(*pipes) * num_pipes); - mode_lib->vba.cache_num_pipes = num_pipes; - recalculate(mode_lib); - } -} - -static void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib) -{ - soc_bounding_box_st *soc = &mode_lib->vba.soc; - unsigned int i, k; - unsigned int total_pipes = 0; - - mode_lib->vba.VoltageLevel = mode_lib->vba.cache_pipes[0].clks_cfg.voltage; - for (i = 1; i < mode_lib->vba.cache_num_pipes; ++i) - ASSERT(mode_lib->vba.VoltageLevel == -1 || mode_lib->vba.VoltageLevel == mode_lib->vba.cache_pipes[i].clks_cfg.voltage); - - mode_lib->vba.DCFCLK = mode_lib->vba.cache_pipes[0].clks_cfg.dcfclk_mhz; - mode_lib->vba.SOCCLK = mode_lib->vba.cache_pipes[0].clks_cfg.socclk_mhz; - - if (mode_lib->vba.cache_pipes[0].clks_cfg.dispclk_mhz > 0.0) - mode_lib->vba.DISPCLK = mode_lib->vba.cache_pipes[0].clks_cfg.dispclk_mhz; - else - mode_lib->vba.DISPCLK = soc->clock_limits[mode_lib->vba.VoltageLevel].dispclk_mhz; - - fetch_socbb_params(mode_lib); - fetch_ip_params(mode_lib); - fetch_pipe_params(mode_lib); - - // Total Available Pipes Support Check - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) - total_pipes += mode_lib->vba.DPPPerPlane[k]; - ASSERT(total_pipes <= DC__NUM_DPP__MAX); -} - -static double adjust_ReturnBW( - struct display_mode_lib *mode_lib, - double ReturnBW, - bool DCCEnabledAnyPlane, - double ReturnBandwidthToDCN) -{ - double CriticalCompression; - - if (DCCEnabledAnyPlane - && ReturnBandwidthToDCN - > mode_lib->vba.DCFCLK * mode_lib->vba.ReturnBusWidth / 4.0) - ReturnBW = - dml_min( - ReturnBW, - ReturnBandwidthToDCN * 4 - * (1.0 - - mode_lib->vba.UrgentLatency - / ((mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024 - / ReturnBandwidthToDCN - - mode_lib->vba.DCFCLK - * mode_lib->vba.ReturnBusWidth - / 4) - + mode_lib->vba.UrgentLatency)); - - CriticalCompression = 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK - * mode_lib->vba.UrgentLatency - / (ReturnBandwidthToDCN * mode_lib->vba.UrgentLatency - + (mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024); - - if (DCCEnabledAnyPlane && CriticalCompression > 1.0 && CriticalCompression < 4.0) - ReturnBW = - dml_min( - ReturnBW, - 4.0 * ReturnBandwidthToDCN - * (mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024 - * mode_lib->vba.ReturnBusWidth - * mode_lib->vba.DCFCLK - * mode_lib->vba.UrgentLatency - / dml_pow( - (ReturnBandwidthToDCN - * mode_lib->vba.UrgentLatency - + (mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024), - 2)); - - return ReturnBW; -} - -static unsigned int dscceComputeDelay( - unsigned int bpc, - double bpp, - unsigned int sliceWidth, - unsigned int numSlices, - enum output_format_class pixelFormat) -{ - // valid bpc = source bits per component in the set of {8, 10, 12} - // valid bpp = increments of 1/16 of a bit - // min = 6/7/8 in N420/N422/444, respectively - // max = such that compression is 1:1 - //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) - //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} - //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} - - // fixed value - unsigned int rcModelSize = 8192; - - // N422/N420 operate at 2 pixels per clock - unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, l, - Delay, pixels; - - if (pixelFormat == dm_n422 || pixelFormat == dm_420) - pixelsPerClock = 2; - // #all other modes operate at 1 pixel per clock - else - pixelsPerClock = 1; - - //initial transmit delay as per PPS - initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); - - //compute ssm delay - if (bpc == 8) - D = 81; - else if (bpc == 10) - D = 89; - else - D = 113; - - //divide by pixel per cycle to compute slice width as seen by DSC - w = sliceWidth / pixelsPerClock; - - //422 mode has an additional cycle of delay - if (pixelFormat == dm_s422) - s = 1; - else - s = 0; - - //main calculation for the dscce - ix = initalXmitDelay + 45; - wx = (w + 2) / 3; - p = 3 * wx - w; - l0 = ix / w; - a = ix + p * l0; - ax = (a + 2) / 3 + D + 6 + 1; - l = (ax + wx - 1) / wx; - if ((ix % w) == 0 && p != 0) - lstall = 1; - else - lstall = 0; - Delay = l * wx * (numSlices - 1) + ax + s + lstall + 22; - - //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels - pixels = Delay * 3 * pixelsPerClock; - return pixels; -} - -static unsigned int dscComputeDelay(enum output_format_class pixelFormat) -{ - unsigned int Delay = 0; - - if (pixelFormat == dm_420) { - // sfr - Delay = Delay + 2; - // dsccif - Delay = Delay + 0; - // dscc - input deserializer - Delay = Delay + 3; - // dscc gets pixels every other cycle - Delay = Delay + 2; - // dscc - input cdc fifo - Delay = Delay + 12; - // dscc gets pixels every other cycle - Delay = Delay + 13; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output cdc fifo - Delay = Delay + 7; - // dscc gets pixels every other cycle - Delay = Delay + 3; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output serializer - Delay = Delay + 1; - // sft - Delay = Delay + 1; - } else if (pixelFormat == dm_n422) { - // sfr - Delay = Delay + 2; - // dsccif - Delay = Delay + 1; - // dscc - input deserializer - Delay = Delay + 5; - // dscc - input cdc fifo - Delay = Delay + 25; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output cdc fifo - Delay = Delay + 10; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output serializer - Delay = Delay + 1; - // sft - Delay = Delay + 1; - } else { - // sfr - Delay = Delay + 2; - // dsccif - Delay = Delay + 0; - // dscc - input deserializer - Delay = Delay + 3; - // dscc - input cdc fifo - Delay = Delay + 12; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output cdc fifo - Delay = Delay + 7; - // dscc - output serializer - Delay = Delay + 1; - // dscc - cdc uncertainty - Delay = Delay + 2; - // sft - Delay = Delay + 1; - } - - return Delay; -} - -static bool CalculatePrefetchSchedule( - struct display_mode_lib *mode_lib, - double DPPCLK, - double DISPCLK, - double PixelClock, - double DCFClkDeepSleep, - unsigned int DSCDelay, - unsigned int DPPPerPlane, - bool ScalerEnabled, - unsigned int NumberOfCursors, - double DPPCLKDelaySubtotal, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCFormater, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, - unsigned int ScalerRecoutWidth, - enum output_format_class OutputFormat, - unsigned int VBlank, - unsigned int HTotal, - unsigned int MaxInterDCNTileRepeaters, - unsigned int VStartup, - unsigned int PageTableLevels, - bool VirtualMemoryEnable, - bool DynamicMetadataEnable, - unsigned int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, - bool DCCEnable, - double UrgentLatency, - double UrgentExtraLatency, - double TCalc, - unsigned int PDEAndMetaPTEBytesFrame, - unsigned int MetaRowByte, - unsigned int PixelPTEBytesPerRow, - double PrefetchSourceLinesY, - unsigned int SwathWidthY, - double BytePerPixelDETY, - double VInitPreFillY, - unsigned int MaxNumSwathY, - double PrefetchSourceLinesC, - double BytePerPixelDETC, - double VInitPreFillC, - unsigned int MaxNumSwathC, - unsigned int SwathHeightY, - unsigned int SwathHeightC, - double TWait, - bool XFCEnabled, - double XFCRemoteSurfaceFlipDelay, - bool InterlaceEnable, - bool ProgressiveToInterlaceUnitInOPP, - double *DSTXAfterScaler, - double *DSTYAfterScaler, - double *DestinationLinesForPrefetch, - double *PrefetchBandwidth, - double *DestinationLinesToRequestVMInVBlank, - double *DestinationLinesToRequestRowInVBlank, - double *VRatioPrefetchY, - double *VRatioPrefetchC, - double *RequiredPrefetchPixDataBW, - unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, - double *Tno_bw, - unsigned int *VUpdateOffsetPix, - unsigned int *VUpdateWidthPix, - unsigned int *VReadyOffsetPix) -{ - bool MyError = false; - unsigned int DPPCycles, DISPCLKCycles; - double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime; - double Tdm, LineTime, Tsetup; - double dst_y_prefetch_equ; - double Tsw_oto; - double prefetch_bw_oto; - double Tvm_oto; - double Tr0_oto; - double Tpre_oto; - double dst_y_prefetch_oto; - double TimeForFetchingMetaPTE = 0; - double TimeForFetchingRowInVBlank = 0; - double LinesToRequestPrefetchPixelData = 0; - - if (ScalerEnabled) - DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; - else - DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; - - DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + NumberOfCursors * DPPCLKDelayCNVCCursor; - - DISPCLKCycles = DISPCLKDelaySubtotal; - - if (DPPCLK == 0.0 || DISPCLK == 0.0) - return true; - - *DSTXAfterScaler = DPPCycles * PixelClock / DPPCLK + DISPCLKCycles * PixelClock / DISPCLK - + DSCDelay; - - if (DPPPerPlane > 1) - *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; - - if (OutputFormat == dm_420 || (InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) - *DSTYAfterScaler = 1; - else - *DSTYAfterScaler = 0; - - DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * HTotal)) + *DSTXAfterScaler; - *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / HTotal, 1); - *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * HTotal)); - - *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); - TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / DPPCLK + 3.0 / DISPCLK); - *VUpdateWidthPix = (14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) - * PixelClock; - - *VReadyOffsetPix = dml_max( - 150.0 / DPPCLK, - TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) - * PixelClock; - - Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; - - LineTime = (double) HTotal / PixelClock; - - if (DynamicMetadataEnable) { - double Tdmbf, Tdmec, Tdmsks; - - Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); - Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; - Tdmec = LineTime; - if (DynamicMetadataLinesBeforeActiveRequired == 0) - Tdmsks = VBlank * LineTime / 2.0; - else - Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; - if (InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) - Tdmsks = Tdmsks / 2; - if (VStartup * LineTime - < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { - MyError = true; - *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait - + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime; - } else - *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0; - } else - Tdm = 0; - - if (VirtualMemoryEnable) { - if (PageTableLevels == 4) - *Tno_bw = UrgentExtraLatency + UrgentLatency; - else if (PageTableLevels == 3) - *Tno_bw = UrgentExtraLatency; - else - *Tno_bw = 0; - } else if (DCCEnable) - *Tno_bw = LineTime; - else - *Tno_bw = LineTime / 4; - - dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime - - (Tsetup + Tdm) / LineTime - - (*DSTYAfterScaler + *DSTXAfterScaler / HTotal); - - Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; - - prefetch_bw_oto = (MetaRowByte + PixelPTEBytesPerRow - + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) - + PrefetchSourceLinesC * SwathWidthY / 2 * dml_ceil(BytePerPixelDETC, 2)) - / Tsw_oto; - - if (VirtualMemoryEnable == true) { - Tvm_oto = - dml_max( - *Tno_bw + PDEAndMetaPTEBytesFrame / prefetch_bw_oto, - dml_max( - UrgentExtraLatency - + UrgentLatency - * (PageTableLevels - - 1), - LineTime / 4.0)); - } else - Tvm_oto = LineTime / 4.0; - - if ((VirtualMemoryEnable == true || DCCEnable == true)) { - Tr0_oto = dml_max( - (MetaRowByte + PixelPTEBytesPerRow) / prefetch_bw_oto, - dml_max(UrgentLatency, dml_max(LineTime - Tvm_oto, LineTime / 4))); - } else - Tr0_oto = LineTime - Tvm_oto; - - Tpre_oto = Tvm_oto + Tr0_oto + Tsw_oto; - - dst_y_prefetch_oto = Tpre_oto / LineTime; - - if (dst_y_prefetch_oto < dst_y_prefetch_equ) - *DestinationLinesForPrefetch = dst_y_prefetch_oto; - else - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - - *DestinationLinesForPrefetch = dml_floor(4.0 * (*DestinationLinesForPrefetch + 0.125), 1) - / 4; - - dml_print("DML: VStartup: %d\n", VStartup); - dml_print("DML: TCalc: %f\n", TCalc); - dml_print("DML: TWait: %f\n", TWait); - dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); - dml_print("DML: LineTime: %f\n", LineTime); - dml_print("DML: Tsetup: %f\n", Tsetup); - dml_print("DML: Tdm: %f\n", Tdm); - dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler); - dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler); - dml_print("DML: HTotal: %d\n", HTotal); - - *PrefetchBandwidth = 0; - *DestinationLinesToRequestVMInVBlank = 0; - *DestinationLinesToRequestRowInVBlank = 0; - *VRatioPrefetchY = 0; - *VRatioPrefetchC = 0; - *RequiredPrefetchPixDataBW = 0; - if (*DestinationLinesForPrefetch > 1) { - *PrefetchBandwidth = (PDEAndMetaPTEBytesFrame + 2 * MetaRowByte - + 2 * PixelPTEBytesPerRow - + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) - + PrefetchSourceLinesC * SwathWidthY / 2 - * dml_ceil(BytePerPixelDETC, 2)) - / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); - if (VirtualMemoryEnable) { - TimeForFetchingMetaPTE = - dml_max( - *Tno_bw - + (double) PDEAndMetaPTEBytesFrame - / *PrefetchBandwidth, - dml_max( - UrgentExtraLatency - + UrgentLatency - * (PageTableLevels - - 1), - LineTime / 4)); - } else { - if (NumberOfCursors > 0 || XFCEnabled) - TimeForFetchingMetaPTE = LineTime / 4; - else - TimeForFetchingMetaPTE = 0.0; - } - - if ((VirtualMemoryEnable == true || DCCEnable == true)) { - TimeForFetchingRowInVBlank = - dml_max( - (MetaRowByte + PixelPTEBytesPerRow) - / *PrefetchBandwidth, - dml_max( - UrgentLatency, - dml_max( - LineTime - - TimeForFetchingMetaPTE, - LineTime - / 4.0))); - } else { - if (NumberOfCursors > 0 || XFCEnabled) - TimeForFetchingRowInVBlank = LineTime - TimeForFetchingMetaPTE; - else - TimeForFetchingRowInVBlank = 0.0; - } - - *DestinationLinesToRequestVMInVBlank = dml_floor( - 4.0 * (TimeForFetchingMetaPTE / LineTime + 0.125), - 1) / 4.0; - - *DestinationLinesToRequestRowInVBlank = dml_floor( - 4.0 * (TimeForFetchingRowInVBlank / LineTime + 0.125), - 1) / 4.0; - - LinesToRequestPrefetchPixelData = - *DestinationLinesForPrefetch - - ((NumberOfCursors > 0 || VirtualMemoryEnable - || DCCEnable) ? - (*DestinationLinesToRequestVMInVBlank - + *DestinationLinesToRequestRowInVBlank) : - 0.0); - - if (LinesToRequestPrefetchPixelData > 0) { - - *VRatioPrefetchY = (double) PrefetchSourceLinesY - / LinesToRequestPrefetchPixelData; - *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); - if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { - *VRatioPrefetchY = - dml_max( - (double) PrefetchSourceLinesY - / LinesToRequestPrefetchPixelData, - (double) MaxNumSwathY - * SwathHeightY - / (LinesToRequestPrefetchPixelData - - (VInitPreFillY - - 3.0) - / 2.0)); - *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); - } else { - MyError = true; - *VRatioPrefetchY = 0; - } - } - - *VRatioPrefetchC = (double) PrefetchSourceLinesC - / LinesToRequestPrefetchPixelData; - *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); - - if ((SwathHeightC > 4)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { - *VRatioPrefetchC = - dml_max( - *VRatioPrefetchC, - (double) MaxNumSwathC - * SwathHeightC - / (LinesToRequestPrefetchPixelData - - (VInitPreFillC - - 3.0) - / 2.0)); - *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); - } else { - MyError = true; - *VRatioPrefetchC = 0; - } - } - - *RequiredPrefetchPixDataBW = - DPPPerPlane - * ((double) PrefetchSourceLinesY - / LinesToRequestPrefetchPixelData - * dml_ceil( - BytePerPixelDETY, - 1) - + (double) PrefetchSourceLinesC - / LinesToRequestPrefetchPixelData - * dml_ceil( - BytePerPixelDETC, - 2) - / 2) - * SwathWidthY / LineTime; - } else { - MyError = true; - *VRatioPrefetchY = 0; - *VRatioPrefetchC = 0; - *RequiredPrefetchPixDataBW = 0; - } - - } else { - MyError = true; - } - - if (MyError) { - *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; - *DestinationLinesToRequestVMInVBlank = 0; - *DestinationLinesToRequestRowInVBlank = 0; - *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; - *VRatioPrefetchY = 0; - *VRatioPrefetchC = 0; - *RequiredPrefetchPixDataBW = 0; - } - - return MyError; -} - -static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) -{ - return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); -} - -static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) -{ - return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); -} - -static double CalculatePrefetchSourceLines( - struct display_mode_lib *mode_lib, - double VRatio, - double vtaps, - bool Interlace, - bool ProgressiveToInterlaceUnitInOPP, - unsigned int SwathHeight, - unsigned int ViewportYStart, - double *VInitPreFill, - unsigned int *MaxNumSwath) -{ - unsigned int MaxPartialSwath; - - if (ProgressiveToInterlaceUnitInOPP) - *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); - else - *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); - - if (!mode_lib->vba.IgnoreViewportPositioning) { - - *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; - - if (*VInitPreFill > 1.0) - MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; - else - MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) - % SwathHeight; - MaxPartialSwath = dml_max(1U, MaxPartialSwath); - - } else { - - if (ViewportYStart != 0) - dml_print( - "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); - - *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); - - if (*VInitPreFill > 1.0) - MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; - else - MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) - % SwathHeight; - } - - return *MaxNumSwath * SwathHeight + MaxPartialSwath; -} - -static unsigned int CalculateVMAndRowBytes( - struct display_mode_lib *mode_lib, - bool DCCEnable, - unsigned int BlockHeight256Bytes, - unsigned int BlockWidth256Bytes, - enum source_format_class SourcePixelFormat, - unsigned int SurfaceTiling, - unsigned int BytePerPixel, - enum scan_direction_class ScanDirection, - unsigned int ViewportWidth, - unsigned int ViewportHeight, - unsigned int SwathWidth, - bool VirtualMemoryEnable, - unsigned int VMMPageSize, - unsigned int PTEBufferSizeInRequests, - unsigned int PDEProcessingBufIn64KBReqs, - unsigned int Pitch, - unsigned int DCCMetaPitch, - unsigned int *MacroTileWidth, - unsigned int *MetaRowByte, - unsigned int *PixelPTEBytesPerRow, - bool *PTEBufferSizeNotExceeded, - unsigned int *dpte_row_height, - unsigned int *meta_row_height) -{ - unsigned int MetaRequestHeight; - unsigned int MetaRequestWidth; - unsigned int MetaSurfWidth; - unsigned int MetaSurfHeight; - unsigned int MPDEBytesFrame; - unsigned int MetaPTEBytesFrame; - unsigned int DCCMetaSurfaceBytes; - - unsigned int MacroTileSizeBytes; - unsigned int MacroTileHeight; - unsigned int DPDE0BytesFrame; - unsigned int ExtraDPDEBytesFrame; - unsigned int PDEAndMetaPTEBytesFrame; - - if (DCCEnable == true) { - MetaRequestHeight = 8 * BlockHeight256Bytes; - MetaRequestWidth = 8 * BlockWidth256Bytes; - if (ScanDirection == dm_horz) { - *meta_row_height = MetaRequestHeight; - MetaSurfWidth = dml_ceil((double) SwathWidth - 1, MetaRequestWidth) - + MetaRequestWidth; - *MetaRowByte = MetaSurfWidth * MetaRequestHeight * BytePerPixel / 256.0; - } else { - *meta_row_height = MetaRequestWidth; - MetaSurfHeight = dml_ceil((double) SwathWidth - 1, MetaRequestHeight) - + MetaRequestHeight; - *MetaRowByte = MetaSurfHeight * MetaRequestWidth * BytePerPixel / 256.0; - } - if (ScanDirection == dm_horz) { - DCCMetaSurfaceBytes = DCCMetaPitch - * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) - + 64 * BlockHeight256Bytes) * BytePerPixel - / 256; - } else { - DCCMetaSurfaceBytes = DCCMetaPitch - * (dml_ceil( - (double) ViewportHeight - 1, - 64 * BlockHeight256Bytes) - + 64 * BlockHeight256Bytes) * BytePerPixel - / 256; - } - if (VirtualMemoryEnable == true) { - MetaPTEBytesFrame = (dml_ceil( - (double) (DCCMetaSurfaceBytes - VMMPageSize) - / (8 * VMMPageSize), - 1) + 1) * 64; - MPDEBytesFrame = 128 * (mode_lib->vba.MaxPageTableLevels - 1); - } else { - MetaPTEBytesFrame = 0; - MPDEBytesFrame = 0; - } - } else { - MetaPTEBytesFrame = 0; - MPDEBytesFrame = 0; - *MetaRowByte = 0; - } - - if (SurfaceTiling == dm_sw_linear) { - MacroTileSizeBytes = 256; - MacroTileHeight = 1; - } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x - || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { - MacroTileSizeBytes = 4096; - MacroTileHeight = 4 * BlockHeight256Bytes; - } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t - || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d - || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x - || SurfaceTiling == dm_sw_64kb_r_x) { - MacroTileSizeBytes = 65536; - MacroTileHeight = 16 * BlockHeight256Bytes; - } else { - MacroTileSizeBytes = 262144; - MacroTileHeight = 32 * BlockHeight256Bytes; - } - *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; - - if (VirtualMemoryEnable == true && mode_lib->vba.MaxPageTableLevels > 1) { - if (ScanDirection == dm_horz) { - DPDE0BytesFrame = - 64 - * (dml_ceil( - ((Pitch - * (dml_ceil( - ViewportHeight - - 1, - MacroTileHeight) - + MacroTileHeight) - * BytePerPixel) - - MacroTileSizeBytes) - / (8 - * 2097152), - 1) + 1); - } else { - DPDE0BytesFrame = - 64 - * (dml_ceil( - ((Pitch - * (dml_ceil( - (double) SwathWidth - - 1, - MacroTileHeight) - + MacroTileHeight) - * BytePerPixel) - - MacroTileSizeBytes) - / (8 - * 2097152), - 1) + 1); - } - ExtraDPDEBytesFrame = 128 * (mode_lib->vba.MaxPageTableLevels - 2); - } else { - DPDE0BytesFrame = 0; - ExtraDPDEBytesFrame = 0; - } - - PDEAndMetaPTEBytesFrame = MetaPTEBytesFrame + MPDEBytesFrame + DPDE0BytesFrame - + ExtraDPDEBytesFrame; - - if (VirtualMemoryEnable == true) { - unsigned int PTERequestSize; - unsigned int PixelPTEReqHeight; - unsigned int PixelPTEReqWidth; - double FractionOfPTEReturnDrop; - unsigned int EffectivePDEProcessingBufIn64KBReqs; - - if (SurfaceTiling == dm_sw_linear) { - PixelPTEReqHeight = 1; - PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; - PTERequestSize = 64; - FractionOfPTEReturnDrop = 0; - } else if (MacroTileSizeBytes == 4096) { - PixelPTEReqHeight = MacroTileHeight; - PixelPTEReqWidth = 8 * *MacroTileWidth; - PTERequestSize = 64; - if (ScanDirection == dm_horz) - FractionOfPTEReturnDrop = 0; - else - FractionOfPTEReturnDrop = 7 / 8; - } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { - PixelPTEReqHeight = 16 * BlockHeight256Bytes; - PixelPTEReqWidth = 16 * BlockWidth256Bytes; - PTERequestSize = 128; - FractionOfPTEReturnDrop = 0; - } else { - PixelPTEReqHeight = MacroTileHeight; - PixelPTEReqWidth = 8 * *MacroTileWidth; - PTERequestSize = 64; - FractionOfPTEReturnDrop = 0; - } - - if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) - EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs / 2; - else - EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs; - - if (SurfaceTiling == dm_sw_linear) { - *dpte_row_height = - dml_min( - 128, - 1 - << (unsigned int) dml_floor( - dml_log2( - dml_min( - (double) PTEBufferSizeInRequests - * PixelPTEReqWidth, - EffectivePDEProcessingBufIn64KBReqs - * 65536.0 - / BytePerPixel) - / Pitch), - 1)); - *PixelPTEBytesPerRow = PTERequestSize - * (dml_ceil( - (double) (Pitch * *dpte_row_height - 1) - / PixelPTEReqWidth, - 1) + 1); - } else if (ScanDirection == dm_horz) { - *dpte_row_height = PixelPTEReqHeight; - *PixelPTEBytesPerRow = PTERequestSize - * (dml_ceil(((double) SwathWidth - 1) / PixelPTEReqWidth, 1) - + 1); - } else { - *dpte_row_height = dml_min(PixelPTEReqWidth, *MacroTileWidth); - *PixelPTEBytesPerRow = PTERequestSize - * (dml_ceil( - ((double) SwathWidth - 1) - / PixelPTEReqHeight, - 1) + 1); - } - if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) - <= 64 * PTEBufferSizeInRequests) { - *PTEBufferSizeNotExceeded = true; - } else { - *PTEBufferSizeNotExceeded = false; - } - } else { - *PixelPTEBytesPerRow = 0; - *PTEBufferSizeNotExceeded = true; - } - - return PDEAndMetaPTEBytesFrame; -} - -static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( - struct display_mode_lib *mode_lib) -{ - unsigned int j, k; - - mode_lib->vba.WritebackDISPCLK = 0.0; - mode_lib->vba.DISPCLKWithRamping = 0; - mode_lib->vba.DISPCLKWithoutRamping = 0; - mode_lib->vba.GlobalDPPCLK = 0.0; - - // dml_ml->vba.DISPCLK and dml_ml->vba.DPPCLK Calculation - // - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.WritebackEnable[k]) { - mode_lib->vba.WritebackDISPCLK = - dml_max( - mode_lib->vba.WritebackDISPCLK, - CalculateWriteBackDISPCLK( - mode_lib->vba.WritebackPixelFormat[k], - mode_lib->vba.PixelClock[k], - mode_lib->vba.WritebackHRatio[k], - mode_lib->vba.WritebackVRatio[k], - mode_lib->vba.WritebackLumaHTaps[k], - mode_lib->vba.WritebackLumaVTaps[k], - mode_lib->vba.WritebackChromaHTaps[k], - mode_lib->vba.WritebackChromaVTaps[k], - mode_lib->vba.WritebackDestinationWidth[k], - mode_lib->vba.HTotal[k], - mode_lib->vba.WritebackChromaLineBufferWidth)); - } - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.HRatio[k] > 1) { - mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( - mode_lib->vba.MaxDCHUBToPSCLThroughput, - mode_lib->vba.MaxPSCLToLBThroughput - * mode_lib->vba.HRatio[k] - / dml_ceil( - mode_lib->vba.htaps[k] - / 6.0, - 1)); - } else { - mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( - mode_lib->vba.MaxDCHUBToPSCLThroughput, - mode_lib->vba.MaxPSCLToLBThroughput); - } - - mode_lib->vba.DPPCLKUsingSingleDPPLuma = - mode_lib->vba.PixelClock[k] - * dml_max( - mode_lib->vba.vtaps[k] / 6.0 - * dml_min( - 1.0, - mode_lib->vba.HRatio[k]), - dml_max( - mode_lib->vba.HRatio[k] - * mode_lib->vba.VRatio[k] - / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k], - 1.0)); - - if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) - && mode_lib->vba.DPPCLKUsingSingleDPPLuma - < 2 * mode_lib->vba.PixelClock[k]) { - mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; - } - - if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 - && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { - mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = 0.0; - mode_lib->vba.DPPCLKUsingSingleDPP[k] = - mode_lib->vba.DPPCLKUsingSingleDPPLuma; - } else { - if (mode_lib->vba.HRatio[k] > 1) { - mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = - dml_min( - mode_lib->vba.MaxDCHUBToPSCLThroughput, - mode_lib->vba.MaxPSCLToLBThroughput - * mode_lib->vba.HRatio[k] - / 2 - / dml_ceil( - mode_lib->vba.HTAPsChroma[k] - / 6.0, - 1.0)); - } else { - mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = dml_min( - mode_lib->vba.MaxDCHUBToPSCLThroughput, - mode_lib->vba.MaxPSCLToLBThroughput); - } - mode_lib->vba.DPPCLKUsingSingleDPPChroma = - mode_lib->vba.PixelClock[k] - * dml_max( - mode_lib->vba.VTAPsChroma[k] - / 6.0 - * dml_min( - 1.0, - mode_lib->vba.HRatio[k] - / 2), - dml_max( - mode_lib->vba.HRatio[k] - * mode_lib->vba.VRatio[k] - / 4 - / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k], - 1.0)); - - if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) - && mode_lib->vba.DPPCLKUsingSingleDPPChroma - < 2 * mode_lib->vba.PixelClock[k]) { - mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 - * mode_lib->vba.PixelClock[k]; - } - - mode_lib->vba.DPPCLKUsingSingleDPP[k] = dml_max( - mode_lib->vba.DPPCLKUsingSingleDPPLuma, - mode_lib->vba.DPPCLKUsingSingleDPPChroma); - } - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.BlendingAndTiming[k] != k) - continue; - if (mode_lib->vba.ODMCombineEnabled[k]) { - mode_lib->vba.DISPCLKWithRamping = - dml_max( - mode_lib->vba.DISPCLKWithRamping, - mode_lib->vba.PixelClock[k] / 2 - * (1 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100) - * (1 - + mode_lib->vba.DISPCLKRampingMargin - / 100)); - mode_lib->vba.DISPCLKWithoutRamping = - dml_max( - mode_lib->vba.DISPCLKWithoutRamping, - mode_lib->vba.PixelClock[k] / 2 - * (1 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100)); - } else if (!mode_lib->vba.ODMCombineEnabled[k]) { - mode_lib->vba.DISPCLKWithRamping = - dml_max( - mode_lib->vba.DISPCLKWithRamping, - mode_lib->vba.PixelClock[k] - * (1 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100) - * (1 - + mode_lib->vba.DISPCLKRampingMargin - / 100)); - mode_lib->vba.DISPCLKWithoutRamping = - dml_max( - mode_lib->vba.DISPCLKWithoutRamping, - mode_lib->vba.PixelClock[k] - * (1 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100)); - } - } - - mode_lib->vba.DISPCLKWithRamping = dml_max( - mode_lib->vba.DISPCLKWithRamping, - mode_lib->vba.WritebackDISPCLK); - mode_lib->vba.DISPCLKWithoutRamping = dml_max( - mode_lib->vba.DISPCLKWithoutRamping, - mode_lib->vba.WritebackDISPCLK); - - ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); - mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( - mode_lib->vba.DISPCLKWithRamping, - mode_lib->vba.DISPCLKDPPCLKVCOSpeed); - mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( - mode_lib->vba.DISPCLKWithoutRamping, - mode_lib->vba.DISPCLKDPPCLKVCOSpeed); - mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( - mode_lib->vba.soc.clock_limits[NumberOfStates - 1].dispclk_mhz, - mode_lib->vba.DISPCLKDPPCLKVCOSpeed); - if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity - > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { - mode_lib->vba.DISPCLK_calculated = - mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; - } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity - > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { - mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; - } else { - mode_lib->vba.DISPCLK_calculated = - mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; - } - DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.DPPCLKUsingSingleDPP[k] - / mode_lib->vba.DPPPerPlane[k] - * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); - mode_lib->vba.GlobalDPPCLK = dml_max( - mode_lib->vba.GlobalDPPCLK, - mode_lib->vba.DPPCLK_calculated[k]); - } - mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( - mode_lib->vba.GlobalDPPCLK, - mode_lib->vba.DISPCLKDPPCLKVCOSpeed); - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 - * dml_ceil( - mode_lib->vba.DPPCLK_calculated[k] * 255 - / mode_lib->vba.GlobalDPPCLK, - 1); - DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); - } - - // Urgent Watermark - mode_lib->vba.DCCEnabledAnyPlane = false; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) - if (mode_lib->vba.DCCEnable[k]) - mode_lib->vba.DCCEnabledAnyPlane = true; - - mode_lib->vba.ReturnBandwidthToDCN = dml_min( - mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, - mode_lib->vba.FabricAndDRAMBandwidth * 1000) - * mode_lib->vba.PercentOfIdealDRAMAndFabricBWReceivedAfterUrgLatency / 100; - - mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBandwidthToDCN; - mode_lib->vba.ReturnBW = adjust_ReturnBW( - mode_lib, - mode_lib->vba.ReturnBW, - mode_lib->vba.DCCEnabledAnyPlane, - mode_lib->vba.ReturnBandwidthToDCN); - - // Let's do this calculation again?? - mode_lib->vba.ReturnBandwidthToDCN = dml_min( - mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, - mode_lib->vba.FabricAndDRAMBandwidth * 1000); - mode_lib->vba.ReturnBW = adjust_ReturnBW( - mode_lib, - mode_lib->vba.ReturnBW, - mode_lib->vba.DCCEnabledAnyPlane, - mode_lib->vba.ReturnBandwidthToDCN); - - DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); - DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); - DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - bool MainPlaneDoesODMCombine = false; - - if (mode_lib->vba.SourceScan[k] == dm_horz) - mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; - else - mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - - if (mode_lib->vba.ODMCombineEnabled[k] == true) - MainPlaneDoesODMCombine = true; - for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) - if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) - MainPlaneDoesODMCombine = true; - - if (MainPlaneDoesODMCombine == true) - mode_lib->vba.SwathWidthY[k] = dml_min( - (double) mode_lib->vba.SwathWidthSingleDPPY[k], - dml_round( - mode_lib->vba.HActive[k] / 2.0 - * mode_lib->vba.HRatio[k])); - else - mode_lib->vba.SwathWidthY[k] = mode_lib->vba.SwathWidthSingleDPPY[k] - / mode_lib->vba.DPPPerPlane[k]; - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { - mode_lib->vba.BytePerPixelDETY[k] = 8; - mode_lib->vba.BytePerPixelDETC[k] = 0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { - mode_lib->vba.BytePerPixelDETY[k] = 4; - mode_lib->vba.BytePerPixelDETC[k] = 0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { - mode_lib->vba.BytePerPixelDETY[k] = 2; - mode_lib->vba.BytePerPixelDETC[k] = 0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { - mode_lib->vba.BytePerPixelDETY[k] = 1; - mode_lib->vba.BytePerPixelDETC[k] = 0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { - mode_lib->vba.BytePerPixelDETY[k] = 1; - mode_lib->vba.BytePerPixelDETC[k] = 2; - } else { // dm_420_10 - mode_lib->vba.BytePerPixelDETY[k] = 4.0 / 3.0; - mode_lib->vba.BytePerPixelDETC[k] = 8.0 / 3.0; - } - } - - mode_lib->vba.TotalDataReadBandwidth = 0.0; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.ReadBandwidthPlaneLuma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] - * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) - / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) - * mode_lib->vba.VRatio[k]; - mode_lib->vba.ReadBandwidthPlaneChroma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] - / 2 * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) - / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) - * mode_lib->vba.VRatio[k] / 2; - DTRACE( - " read_bw[%i] = %fBps", - k, - mode_lib->vba.ReadBandwidthPlaneLuma[k] - + mode_lib->vba.ReadBandwidthPlaneChroma[k]); - mode_lib->vba.TotalDataReadBandwidth += mode_lib->vba.ReadBandwidthPlaneLuma[k] - + mode_lib->vba.ReadBandwidthPlaneChroma[k]; - } - - mode_lib->vba.TotalDCCActiveDPP = 0; - mode_lib->vba.TotalActiveDPP = 0; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP - + mode_lib->vba.DPPPerPlane[k]; - if (mode_lib->vba.DCCEnable[k]) - mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP - + mode_lib->vba.DPPPerPlane[k]; - } - - mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = - (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK - + mode_lib->vba.UrgentOutOfOrderReturnPerChannel - * mode_lib->vba.NumberOfChannels - / mode_lib->vba.ReturnBW; - - mode_lib->vba.LastPixelOfLineExtraWatermark = 0; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - double DataFabricLineDeliveryTimeLuma, DataFabricLineDeliveryTimeChroma; - - if (mode_lib->vba.VRatio[k] <= 1.0) - mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = - (double) mode_lib->vba.SwathWidthY[k] - * mode_lib->vba.DPPPerPlane[k] - / mode_lib->vba.HRatio[k] - / mode_lib->vba.PixelClock[k]; - else - mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = - (double) mode_lib->vba.SwathWidthY[k] - / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] - / mode_lib->vba.DPPCLK[k]; - - DataFabricLineDeliveryTimeLuma = mode_lib->vba.SwathWidthSingleDPPY[k] - * mode_lib->vba.SwathHeightY[k] - * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) - / (mode_lib->vba.ReturnBW * mode_lib->vba.ReadBandwidthPlaneLuma[k] - / mode_lib->vba.TotalDataReadBandwidth); - mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max( - mode_lib->vba.LastPixelOfLineExtraWatermark, - DataFabricLineDeliveryTimeLuma - - mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]); - - if (mode_lib->vba.BytePerPixelDETC[k] == 0) - mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = 0.0; - else if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) - mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = - mode_lib->vba.SwathWidthY[k] / 2.0 - * mode_lib->vba.DPPPerPlane[k] - / (mode_lib->vba.HRatio[k] / 2.0) - / mode_lib->vba.PixelClock[k]; - else - mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = - mode_lib->vba.SwathWidthY[k] / 2.0 - / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] - / mode_lib->vba.DPPCLK[k]; - - DataFabricLineDeliveryTimeChroma = mode_lib->vba.SwathWidthSingleDPPY[k] / 2.0 - * mode_lib->vba.SwathHeightC[k] - * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) - / (mode_lib->vba.ReturnBW - * mode_lib->vba.ReadBandwidthPlaneChroma[k] - / mode_lib->vba.TotalDataReadBandwidth); - mode_lib->vba.LastPixelOfLineExtraWatermark = - dml_max( - mode_lib->vba.LastPixelOfLineExtraWatermark, - DataFabricLineDeliveryTimeChroma - - mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); - } - - mode_lib->vba.UrgentExtraLatency = mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency - + (mode_lib->vba.TotalActiveDPP * mode_lib->vba.PixelChunkSizeInKByte - + mode_lib->vba.TotalDCCActiveDPP - * mode_lib->vba.MetaChunkSize) * 1024.0 - / mode_lib->vba.ReturnBW; - - if (mode_lib->vba.VirtualMemoryEnable) - mode_lib->vba.UrgentExtraLatency += mode_lib->vba.TotalActiveDPP - * mode_lib->vba.PTEChunkSize * 1024.0 / mode_lib->vba.ReturnBW; - - mode_lib->vba.UrgentWatermark = mode_lib->vba.UrgentLatency - + mode_lib->vba.LastPixelOfLineExtraWatermark - + mode_lib->vba.UrgentExtraLatency; - - DTRACE(" urgent_extra_latency = %fus", mode_lib->vba.UrgentExtraLatency); - DTRACE(" wm_urgent = %fus", mode_lib->vba.UrgentWatermark); - - mode_lib->vba.MemoryTripWatermark = mode_lib->vba.UrgentLatency; - - mode_lib->vba.TotalActiveWriteback = 0; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.WritebackEnable[k]) - mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1; - } - - if (mode_lib->vba.TotalActiveWriteback <= 1) - mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency; - else - mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency - + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 - / mode_lib->vba.SOCCLK; - - DTRACE(" wm_wb_urgent = %fus", mode_lib->vba.WritebackUrgentWatermark); - - // NB P-State/DRAM Clock Change Watermark - mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.DRAMClockChangeLatency - + mode_lib->vba.UrgentWatermark; - - DTRACE(" wm_pstate_change = %fus", mode_lib->vba.DRAMClockChangeWatermark); - - DTRACE(" calculating wb pstate watermark"); - DTRACE(" total wb outputs %d", mode_lib->vba.TotalActiveWriteback); - DTRACE(" socclk frequency %f Mhz", mode_lib->vba.SOCCLK); - - if (mode_lib->vba.TotalActiveWriteback <= 1) - mode_lib->vba.WritebackDRAMClockChangeWatermark = - mode_lib->vba.DRAMClockChangeLatency - + mode_lib->vba.WritebackLatency; - else - mode_lib->vba.WritebackDRAMClockChangeWatermark = - mode_lib->vba.DRAMClockChangeLatency - + mode_lib->vba.WritebackLatency - + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 - / mode_lib->vba.SOCCLK; - - DTRACE(" wm_wb_pstate %fus", mode_lib->vba.WritebackDRAMClockChangeWatermark); - - // Stutter Efficiency - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.LinesInDETY[k] = mode_lib->vba.DETBufferSizeY[k] - / mode_lib->vba.BytePerPixelDETY[k] / mode_lib->vba.SwathWidthY[k]; - mode_lib->vba.LinesInDETYRoundedDownToSwath[k] = dml_floor( - mode_lib->vba.LinesInDETY[k], - mode_lib->vba.SwathHeightY[k]); - mode_lib->vba.FullDETBufferingTimeY[k] = - mode_lib->vba.LinesInDETYRoundedDownToSwath[k] - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) - / mode_lib->vba.VRatio[k]; - if (mode_lib->vba.BytePerPixelDETC[k] > 0) { - mode_lib->vba.LinesInDETC[k] = mode_lib->vba.DETBufferSizeC[k] - / mode_lib->vba.BytePerPixelDETC[k] - / (mode_lib->vba.SwathWidthY[k] / 2); - mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = dml_floor( - mode_lib->vba.LinesInDETC[k], - mode_lib->vba.SwathHeightC[k]); - mode_lib->vba.FullDETBufferingTimeC[k] = - mode_lib->vba.LinesInDETCRoundedDownToSwath[k] - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) - / (mode_lib->vba.VRatio[k] / 2); - } else { - mode_lib->vba.LinesInDETC[k] = 0; - mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = 0; - mode_lib->vba.FullDETBufferingTimeC[k] = 999999; - } - } - - mode_lib->vba.MinFullDETBufferingTime = 999999.0; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.FullDETBufferingTimeY[k] - < mode_lib->vba.MinFullDETBufferingTime) { - mode_lib->vba.MinFullDETBufferingTime = - mode_lib->vba.FullDETBufferingTimeY[k]; - mode_lib->vba.FrameTimeForMinFullDETBufferingTime = - (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]; - } - if (mode_lib->vba.FullDETBufferingTimeC[k] - < mode_lib->vba.MinFullDETBufferingTime) { - mode_lib->vba.MinFullDETBufferingTime = - mode_lib->vba.FullDETBufferingTimeC[k]; - mode_lib->vba.FrameTimeForMinFullDETBufferingTime = - (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]; - } - } - - mode_lib->vba.AverageReadBandwidthGBytePerSecond = 0.0; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.DCCEnable[k]) { - mode_lib->vba.AverageReadBandwidthGBytePerSecond = - mode_lib->vba.AverageReadBandwidthGBytePerSecond - + mode_lib->vba.ReadBandwidthPlaneLuma[k] - / mode_lib->vba.DCCRate[k] - / 1000 - + mode_lib->vba.ReadBandwidthPlaneChroma[k] - / mode_lib->vba.DCCRate[k] - / 1000; - } else { - mode_lib->vba.AverageReadBandwidthGBytePerSecond = - mode_lib->vba.AverageReadBandwidthGBytePerSecond - + mode_lib->vba.ReadBandwidthPlaneLuma[k] - / 1000 - + mode_lib->vba.ReadBandwidthPlaneChroma[k] - / 1000; - } - if (mode_lib->vba.DCCEnable[k]) { - mode_lib->vba.AverageReadBandwidthGBytePerSecond = - mode_lib->vba.AverageReadBandwidthGBytePerSecond - + mode_lib->vba.ReadBandwidthPlaneLuma[k] - / 1000 / 256 - + mode_lib->vba.ReadBandwidthPlaneChroma[k] - / 1000 / 256; - } - if (mode_lib->vba.VirtualMemoryEnable) { - mode_lib->vba.AverageReadBandwidthGBytePerSecond = - mode_lib->vba.AverageReadBandwidthGBytePerSecond - + mode_lib->vba.ReadBandwidthPlaneLuma[k] - / 1000 / 512 - + mode_lib->vba.ReadBandwidthPlaneChroma[k] - / 1000 / 512; - } - } - - mode_lib->vba.PartOfBurstThatFitsInROB = - dml_min( - mode_lib->vba.MinFullDETBufferingTime - * mode_lib->vba.TotalDataReadBandwidth, - mode_lib->vba.ROBBufferSizeInKByte * 1024 - * mode_lib->vba.TotalDataReadBandwidth - / (mode_lib->vba.AverageReadBandwidthGBytePerSecond - * 1000)); - mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB - * (mode_lib->vba.AverageReadBandwidthGBytePerSecond * 1000) - / mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.ReturnBW - + (mode_lib->vba.MinFullDETBufferingTime - * mode_lib->vba.TotalDataReadBandwidth - - mode_lib->vba.PartOfBurstThatFitsInROB) - / (mode_lib->vba.DCFCLK * 64); - if (mode_lib->vba.TotalActiveWriteback == 0) { - mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 - - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) - / mode_lib->vba.MinFullDETBufferingTime) * 100; - } else { - mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; - } - - mode_lib->vba.SmallestVBlank = 999999; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { - mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] - - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]; - } else { - mode_lib->vba.VBlankTime = 0; - } - mode_lib->vba.SmallestVBlank = dml_min( - mode_lib->vba.SmallestVBlank, - mode_lib->vba.VBlankTime); - } - - mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 - * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime - - mode_lib->vba.SmallestVBlank) - + mode_lib->vba.SmallestVBlank) - / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; - - // dml_ml->vba.DCFCLK Deep Sleep - mode_lib->vba.DCFClkDeepSleep = 8.0; - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) { - if (mode_lib->vba.BytePerPixelDETC[k] > 0) { - mode_lib->vba.DCFCLKDeepSleepPerPlane = - dml_max( - 1.1 * mode_lib->vba.SwathWidthY[k] - * dml_ceil( - mode_lib->vba.BytePerPixelDETY[k], - 1) / 32 - / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], - 1.1 * mode_lib->vba.SwathWidthY[k] / 2.0 - * dml_ceil( - mode_lib->vba.BytePerPixelDETC[k], - 2) / 32 - / mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); - } else - mode_lib->vba.DCFCLKDeepSleepPerPlane = 1.1 * mode_lib->vba.SwathWidthY[k] - * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) / 64.0 - / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]; - mode_lib->vba.DCFCLKDeepSleepPerPlane = dml_max( - mode_lib->vba.DCFCLKDeepSleepPerPlane, - mode_lib->vba.PixelClock[k] / 16.0); - mode_lib->vba.DCFClkDeepSleep = dml_max( - mode_lib->vba.DCFClkDeepSleep, - mode_lib->vba.DCFCLKDeepSleepPerPlane); - - DTRACE( - " dcfclk_deepsleep_per_plane[%i] = %fMHz", - k, - mode_lib->vba.DCFCLKDeepSleepPerPlane); - } - - DTRACE(" dcfclk_deepsleep_mhz = %fMHz", mode_lib->vba.DCFClkDeepSleep); - - // Stutter Watermark - mode_lib->vba.StutterExitWatermark = mode_lib->vba.SRExitTime - + mode_lib->vba.LastPixelOfLineExtraWatermark - + mode_lib->vba.UrgentExtraLatency + 10 / mode_lib->vba.DCFClkDeepSleep; - mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.SREnterPlusExitTime - + mode_lib->vba.LastPixelOfLineExtraWatermark - + mode_lib->vba.UrgentExtraLatency; - - DTRACE(" wm_cstate_exit = %fus", mode_lib->vba.StutterExitWatermark); - DTRACE(" wm_cstate_enter_exit = %fus", mode_lib->vba.StutterEnterPlusExitWatermark); - - // Urgent Latency Supported - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.EffectiveDETPlusLBLinesLuma = - dml_floor( - mode_lib->vba.LinesInDETY[k] - + dml_min( - mode_lib->vba.LinesInDETY[k] - * mode_lib->vba.DPPCLK[k] - * mode_lib->vba.BytePerPixelDETY[k] - * mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] - / (mode_lib->vba.ReturnBW - / mode_lib->vba.DPPPerPlane[k]), - (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma), - mode_lib->vba.SwathHeightY[k]); - - mode_lib->vba.UrgentLatencySupportUsLuma = mode_lib->vba.EffectiveDETPlusLBLinesLuma - * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) - / mode_lib->vba.VRatio[k] - - mode_lib->vba.EffectiveDETPlusLBLinesLuma - * mode_lib->vba.SwathWidthY[k] - * mode_lib->vba.BytePerPixelDETY[k] - / (mode_lib->vba.ReturnBW - / mode_lib->vba.DPPPerPlane[k]); - - if (mode_lib->vba.BytePerPixelDETC[k] > 0) { - mode_lib->vba.EffectiveDETPlusLBLinesChroma = - dml_floor( - mode_lib->vba.LinesInDETC[k] - + dml_min( - mode_lib->vba.LinesInDETC[k] - * mode_lib->vba.DPPCLK[k] - * mode_lib->vba.BytePerPixelDETC[k] - * mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] - / (mode_lib->vba.ReturnBW - / mode_lib->vba.DPPPerPlane[k]), - (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma), - mode_lib->vba.SwathHeightC[k]); - mode_lib->vba.UrgentLatencySupportUsChroma = - mode_lib->vba.EffectiveDETPlusLBLinesChroma - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) - / (mode_lib->vba.VRatio[k] / 2) - - mode_lib->vba.EffectiveDETPlusLBLinesChroma - * (mode_lib->vba.SwathWidthY[k] - / 2) - * mode_lib->vba.BytePerPixelDETC[k] - / (mode_lib->vba.ReturnBW - / mode_lib->vba.DPPPerPlane[k]); - mode_lib->vba.UrgentLatencySupportUs[k] = dml_min( - mode_lib->vba.UrgentLatencySupportUsLuma, - mode_lib->vba.UrgentLatencySupportUsChroma); - } else { - mode_lib->vba.UrgentLatencySupportUs[k] = - mode_lib->vba.UrgentLatencySupportUsLuma; - } - } - - mode_lib->vba.MinUrgentLatencySupportUs = 999999; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.MinUrgentLatencySupportUs = dml_min( - mode_lib->vba.MinUrgentLatencySupportUs, - mode_lib->vba.UrgentLatencySupportUs[k]); - } - - // Non-Urgent Latency Tolerance - mode_lib->vba.NonUrgentLatencyTolerance = mode_lib->vba.MinUrgentLatencySupportUs - - mode_lib->vba.UrgentWatermark; - - // DSCCLK - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { - mode_lib->vba.DSCCLK_calculated[k] = 0.0; - } else { - if (mode_lib->vba.OutputFormat[k] == dm_420 - || mode_lib->vba.OutputFormat[k] == dm_n422) - mode_lib->vba.DSCFormatFactor = 2; - else - mode_lib->vba.DSCFormatFactor = 1; - if (mode_lib->vba.ODMCombineEnabled[k]) - mode_lib->vba.DSCCLK_calculated[k] = - mode_lib->vba.PixelClockBackEnd[k] / 6 - / mode_lib->vba.DSCFormatFactor - / (1 - - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100); - else - mode_lib->vba.DSCCLK_calculated[k] = - mode_lib->vba.PixelClockBackEnd[k] / 3 - / mode_lib->vba.DSCFormatFactor - / (1 - - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100); - } - } - - // DSC Delay - // TODO - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - double bpp = mode_lib->vba.OutputBpp[k]; - unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; - - if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { - if (!mode_lib->vba.ODMCombineEnabled[k]) { - mode_lib->vba.DSCDelay[k] = - dscceComputeDelay( - mode_lib->vba.DSCInputBitPerComponent[k], - bpp, - dml_ceil( - (double) mode_lib->vba.HActive[k] - / mode_lib->vba.NumberOfDSCSlices[k], - 1), - slices, - mode_lib->vba.OutputFormat[k]) - + dscComputeDelay( - mode_lib->vba.OutputFormat[k]); - } else { - mode_lib->vba.DSCDelay[k] = - 2 - * (dscceComputeDelay( - mode_lib->vba.DSCInputBitPerComponent[k], - bpp, - dml_ceil( - (double) mode_lib->vba.HActive[k] - / mode_lib->vba.NumberOfDSCSlices[k], - 1), - slices / 2.0, - mode_lib->vba.OutputFormat[k]) - + dscComputeDelay( - mode_lib->vba.OutputFormat[k])); - } - mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[k] - * mode_lib->vba.PixelClock[k] - / mode_lib->vba.PixelClockBackEnd[k]; - } else { - mode_lib->vba.DSCDelay[k] = 0; - } - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) - for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes - if (j != k && mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.DSCEnabled[j]) - mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[j]; - - // Prefetch - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - unsigned int PDEAndMetaPTEBytesFrameY; - unsigned int PixelPTEBytesPerRowY; - unsigned int MetaRowByteY; - unsigned int MetaRowByteC; - unsigned int PDEAndMetaPTEBytesFrameC; - unsigned int PixelPTEBytesPerRowC; - - Calculate256BBlockSizes( - mode_lib->vba.SourcePixelFormat[k], - mode_lib->vba.SurfaceTiling[k], - dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), - dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2), - &mode_lib->vba.BlockHeight256BytesY[k], - &mode_lib->vba.BlockHeight256BytesC[k], - &mode_lib->vba.BlockWidth256BytesY[k], - &mode_lib->vba.BlockWidth256BytesC[k]); - PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( - mode_lib, - mode_lib->vba.DCCEnable[k], - mode_lib->vba.BlockHeight256BytesY[k], - mode_lib->vba.BlockWidth256BytesY[k], - mode_lib->vba.SourcePixelFormat[k], - mode_lib->vba.SurfaceTiling[k], - dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), - mode_lib->vba.SourceScan[k], - mode_lib->vba.ViewportWidth[k], - mode_lib->vba.ViewportHeight[k], - mode_lib->vba.SwathWidthY[k], - mode_lib->vba.VirtualMemoryEnable, - mode_lib->vba.VMMPageSize, - mode_lib->vba.PTEBufferSizeInRequests, - mode_lib->vba.PDEProcessingBufIn64KBReqs, - mode_lib->vba.PitchY[k], - mode_lib->vba.DCCMetaPitchY[k], - &mode_lib->vba.MacroTileWidthY[k], - &MetaRowByteY, - &PixelPTEBytesPerRowY, - &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel], - &mode_lib->vba.dpte_row_height[k], - &mode_lib->vba.meta_row_height[k]); - mode_lib->vba.PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( - mode_lib, - mode_lib->vba.VRatio[k], - mode_lib->vba.vtaps[k], - mode_lib->vba.Interlace[k], - mode_lib->vba.ProgressiveToInterlaceUnitInOPP, - mode_lib->vba.SwathHeightY[k], - mode_lib->vba.ViewportYStartY[k], - &mode_lib->vba.VInitPreFillY[k], - &mode_lib->vba.MaxNumSwathY[k]); - - if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { - PDEAndMetaPTEBytesFrameC = - CalculateVMAndRowBytes( - mode_lib, - mode_lib->vba.DCCEnable[k], - mode_lib->vba.BlockHeight256BytesC[k], - mode_lib->vba.BlockWidth256BytesC[k], - mode_lib->vba.SourcePixelFormat[k], - mode_lib->vba.SurfaceTiling[k], - dml_ceil( - mode_lib->vba.BytePerPixelDETC[k], - 2), - mode_lib->vba.SourceScan[k], - mode_lib->vba.ViewportWidth[k] / 2, - mode_lib->vba.ViewportHeight[k] / 2, - mode_lib->vba.SwathWidthY[k] / 2, - mode_lib->vba.VirtualMemoryEnable, - mode_lib->vba.VMMPageSize, - mode_lib->vba.PTEBufferSizeInRequests, - mode_lib->vba.PDEProcessingBufIn64KBReqs, - mode_lib->vba.PitchC[k], - 0, - &mode_lib->vba.MacroTileWidthC[k], - &MetaRowByteC, - &PixelPTEBytesPerRowC, - &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel], - &mode_lib->vba.dpte_row_height_chroma[k], - &mode_lib->vba.meta_row_height_chroma[k]); - mode_lib->vba.PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( - mode_lib, - mode_lib->vba.VRatio[k] / 2, - mode_lib->vba.VTAPsChroma[k], - mode_lib->vba.Interlace[k], - mode_lib->vba.ProgressiveToInterlaceUnitInOPP, - mode_lib->vba.SwathHeightC[k], - mode_lib->vba.ViewportYStartC[k], - &mode_lib->vba.VInitPreFillC[k], - &mode_lib->vba.MaxNumSwathC[k]); - } else { - PixelPTEBytesPerRowC = 0; - PDEAndMetaPTEBytesFrameC = 0; - MetaRowByteC = 0; - mode_lib->vba.MaxNumSwathC[k] = 0; - mode_lib->vba.PrefetchSourceLinesC[k] = 0; - } - - mode_lib->vba.PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; - mode_lib->vba.PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY - + PDEAndMetaPTEBytesFrameC; - mode_lib->vba.MetaRowByte[k] = MetaRowByteY + MetaRowByteC; - - CalculateActiveRowBandwidth( - mode_lib->vba.VirtualMemoryEnable, - mode_lib->vba.SourcePixelFormat[k], - mode_lib->vba.VRatio[k], - mode_lib->vba.DCCEnable[k], - mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], - MetaRowByteY, - MetaRowByteC, - mode_lib->vba.meta_row_height[k], - mode_lib->vba.meta_row_height_chroma[k], - PixelPTEBytesPerRowY, - PixelPTEBytesPerRowC, - mode_lib->vba.dpte_row_height[k], - mode_lib->vba.dpte_row_height_chroma[k], - &mode_lib->vba.meta_row_bw[k], - &mode_lib->vba.dpte_row_bw[k], - &mode_lib->vba.qual_row_bw[k]); - } - - mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFClkDeepSleep; - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.BlendingAndTiming[k] == k) { - if (mode_lib->vba.WritebackEnable[k] == true) { - mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = - mode_lib->vba.WritebackLatency - + CalculateWriteBackDelay( - mode_lib->vba.WritebackPixelFormat[k], - mode_lib->vba.WritebackHRatio[k], - mode_lib->vba.WritebackVRatio[k], - mode_lib->vba.WritebackLumaHTaps[k], - mode_lib->vba.WritebackLumaVTaps[k], - mode_lib->vba.WritebackChromaHTaps[k], - mode_lib->vba.WritebackChromaVTaps[k], - mode_lib->vba.WritebackDestinationWidth[k]) - / mode_lib->vba.DISPCLK; - } else - mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; - for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { - if (mode_lib->vba.BlendingAndTiming[j] == k - && mode_lib->vba.WritebackEnable[j] == true) { - mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = - dml_max( - mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k], - mode_lib->vba.WritebackLatency - + CalculateWriteBackDelay( - mode_lib->vba.WritebackPixelFormat[j], - mode_lib->vba.WritebackHRatio[j], - mode_lib->vba.WritebackVRatio[j], - mode_lib->vba.WritebackLumaHTaps[j], - mode_lib->vba.WritebackLumaVTaps[j], - mode_lib->vba.WritebackChromaHTaps[j], - mode_lib->vba.WritebackChromaVTaps[j], - mode_lib->vba.WritebackDestinationWidth[j]) - / mode_lib->vba.DISPCLK); - } - } - } - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) - for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) - if (mode_lib->vba.BlendingAndTiming[k] == j) - mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = - mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][j]; - - mode_lib->vba.VStartupLines = 13; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.MaxVStartupLines[k] = - mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - - dml_max( - 1.0, - dml_ceil( - mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] - / (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]), - 1)); - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) - mode_lib->vba.MaximumMaxVStartupLines = dml_max( - mode_lib->vba.MaximumMaxVStartupLines, - mode_lib->vba.MaxVStartupLines[k]); - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.cursor_bw[k] = 0.0; - for (j = 0; j < mode_lib->vba.NumberOfCursors[k]; ++j) - mode_lib->vba.cursor_bw[k] += mode_lib->vba.CursorWidth[k][j] - * mode_lib->vba.CursorBPP[k][j] / 8.0 - / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) - * mode_lib->vba.VRatio[k]; - } - - do { - double MaxTotalRDBandwidth = 0; - bool DestinationLineTimesForPrefetchLessThan2 = false; - bool VRatioPrefetchMoreThan4 = false; - bool prefetch_vm_bw_valid = true; - bool prefetch_row_bw_valid = true; - double TWait = CalculateTWait( - mode_lib->vba.PrefetchMode, - mode_lib->vba.DRAMClockChangeLatency, - mode_lib->vba.UrgentLatency, - mode_lib->vba.SREnterPlusExitTime); - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.XFCEnabled[k] == true) { - mode_lib->vba.XFCRemoteSurfaceFlipDelay = - CalculateRemoteSurfaceFlipDelay( - mode_lib, - mode_lib->vba.VRatio[k], - mode_lib->vba.SwathWidthY[k], - dml_ceil( - mode_lib->vba.BytePerPixelDETY[k], - 1), - mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k], - mode_lib->vba.XFCTSlvVupdateOffset, - mode_lib->vba.XFCTSlvVupdateWidth, - mode_lib->vba.XFCTSlvVreadyOffset, - mode_lib->vba.XFCXBUFLatencyTolerance, - mode_lib->vba.XFCFillBWOverhead, - mode_lib->vba.XFCSlvChunkSize, - mode_lib->vba.XFCBusTransportTime, - mode_lib->vba.TCalc, - TWait, - &mode_lib->vba.SrcActiveDrainRate, - &mode_lib->vba.TInitXFill, - &mode_lib->vba.TslvChk); - } else { - mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; - } - mode_lib->vba.ErrorResult[k] = - CalculatePrefetchSchedule( - mode_lib, - mode_lib->vba.DPPCLK[k], - mode_lib->vba.DISPCLK, - mode_lib->vba.PixelClock[k], - mode_lib->vba.DCFClkDeepSleep, - mode_lib->vba.DSCDelay[k], - mode_lib->vba.DPPPerPlane[k], - mode_lib->vba.ScalerEnabled[k], - mode_lib->vba.NumberOfCursors[k], - mode_lib->vba.DPPCLKDelaySubtotal, - mode_lib->vba.DPPCLKDelaySCL, - mode_lib->vba.DPPCLKDelaySCLLBOnly, - mode_lib->vba.DPPCLKDelayCNVCFormater, - mode_lib->vba.DPPCLKDelayCNVCCursor, - mode_lib->vba.DISPCLKDelaySubtotal, - (unsigned int) (mode_lib->vba.SwathWidthY[k] - / mode_lib->vba.HRatio[k]), - mode_lib->vba.OutputFormat[k], - mode_lib->vba.VTotal[k] - - mode_lib->vba.VActive[k], - mode_lib->vba.HTotal[k], - mode_lib->vba.MaxInterDCNTileRepeaters, - dml_min( - mode_lib->vba.VStartupLines, - mode_lib->vba.MaxVStartupLines[k]), - mode_lib->vba.MaxPageTableLevels, - mode_lib->vba.VirtualMemoryEnable, - mode_lib->vba.DynamicMetadataEnable[k], - mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], - mode_lib->vba.DynamicMetadataTransmittedBytes[k], - mode_lib->vba.DCCEnable[k], - mode_lib->vba.UrgentLatency, - mode_lib->vba.UrgentExtraLatency, - mode_lib->vba.TCalc, - mode_lib->vba.PDEAndMetaPTEBytesFrame[k], - mode_lib->vba.MetaRowByte[k], - mode_lib->vba.PixelPTEBytesPerRow[k], - mode_lib->vba.PrefetchSourceLinesY[k], - mode_lib->vba.SwathWidthY[k], - mode_lib->vba.BytePerPixelDETY[k], - mode_lib->vba.VInitPreFillY[k], - mode_lib->vba.MaxNumSwathY[k], - mode_lib->vba.PrefetchSourceLinesC[k], - mode_lib->vba.BytePerPixelDETC[k], - mode_lib->vba.VInitPreFillC[k], - mode_lib->vba.MaxNumSwathC[k], - mode_lib->vba.SwathHeightY[k], - mode_lib->vba.SwathHeightC[k], - TWait, - mode_lib->vba.XFCEnabled[k], - mode_lib->vba.XFCRemoteSurfaceFlipDelay, - mode_lib->vba.Interlace[k], - mode_lib->vba.ProgressiveToInterlaceUnitInOPP, - &mode_lib->vba.DSTXAfterScaler[k], - &mode_lib->vba.DSTYAfterScaler[k], - &mode_lib->vba.DestinationLinesForPrefetch[k], - &mode_lib->vba.PrefetchBandwidth[k], - &mode_lib->vba.DestinationLinesToRequestVMInVBlank[k], - &mode_lib->vba.DestinationLinesToRequestRowInVBlank[k], - &mode_lib->vba.VRatioPrefetchY[k], - &mode_lib->vba.VRatioPrefetchC[k], - &mode_lib->vba.RequiredPrefetchPixDataBW[k], - &mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, - &mode_lib->vba.Tno_bw[k], - &mode_lib->vba.VUpdateOffsetPix[k], - &mode_lib->vba.VUpdateWidthPix[k], - &mode_lib->vba.VReadyOffsetPix[k]); - if (mode_lib->vba.BlendingAndTiming[k] == k) { - mode_lib->vba.VStartup[k] = dml_min( - mode_lib->vba.VStartupLines, - mode_lib->vba.MaxVStartupLines[k]); - if (mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata - != 0) { - mode_lib->vba.VStartup[k] = - mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; - } - } else { - mode_lib->vba.VStartup[k] = - dml_min( - mode_lib->vba.VStartupLines, - mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); - } - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - - if (mode_lib->vba.PDEAndMetaPTEBytesFrame[k] == 0) - mode_lib->vba.prefetch_vm_bw[k] = 0; - else if (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] > 0) { - mode_lib->vba.prefetch_vm_bw[k] = - (double) mode_lib->vba.PDEAndMetaPTEBytesFrame[k] - / (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]); - } else { - mode_lib->vba.prefetch_vm_bw[k] = 0; - prefetch_vm_bw_valid = false; - } - if (mode_lib->vba.MetaRowByte[k] + mode_lib->vba.PixelPTEBytesPerRow[k] - == 0) - mode_lib->vba.prefetch_row_bw[k] = 0; - else if (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] > 0) { - mode_lib->vba.prefetch_row_bw[k] = - (double) (mode_lib->vba.MetaRowByte[k] - + mode_lib->vba.PixelPTEBytesPerRow[k]) - / (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]); - } else { - mode_lib->vba.prefetch_row_bw[k] = 0; - prefetch_row_bw_valid = false; - } - - MaxTotalRDBandwidth = - MaxTotalRDBandwidth + mode_lib->vba.cursor_bw[k] - + dml_max( - mode_lib->vba.prefetch_vm_bw[k], - dml_max( - mode_lib->vba.prefetch_row_bw[k], - dml_max( - mode_lib->vba.ReadBandwidthPlaneLuma[k] - + mode_lib->vba.ReadBandwidthPlaneChroma[k], - mode_lib->vba.RequiredPrefetchPixDataBW[k]) - + mode_lib->vba.meta_row_bw[k] - + mode_lib->vba.dpte_row_bw[k])); - - if (mode_lib->vba.DestinationLinesForPrefetch[k] < 2) - DestinationLineTimesForPrefetchLessThan2 = true; - if (mode_lib->vba.VRatioPrefetchY[k] > 4 - || mode_lib->vba.VRatioPrefetchC[k] > 4) - VRatioPrefetchMoreThan4 = true; - } - - if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && prefetch_vm_bw_valid - && prefetch_row_bw_valid && !VRatioPrefetchMoreThan4 - && !DestinationLineTimesForPrefetchLessThan2) - mode_lib->vba.PrefetchModeSupported = true; - else { - mode_lib->vba.PrefetchModeSupported = false; - dml_print( - "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); - } - - if (mode_lib->vba.PrefetchModeSupported == true) { - double final_flip_bw[DC__NUM_DPP__MAX]; - unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX]; - double total_dcn_read_bw_with_flip = 0; - - mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.BandwidthAvailableForImmediateFlip = - mode_lib->vba.BandwidthAvailableForImmediateFlip - - mode_lib->vba.cursor_bw[k] - - dml_max( - mode_lib->vba.ReadBandwidthPlaneLuma[k] - + mode_lib->vba.ReadBandwidthPlaneChroma[k] - + mode_lib->vba.qual_row_bw[k], - mode_lib->vba.PrefetchBandwidth[k]); - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - ImmediateFlipBytes[k] = 0; - if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 - && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { - ImmediateFlipBytes[k] = - mode_lib->vba.PDEAndMetaPTEBytesFrame[k] - + mode_lib->vba.MetaRowByte[k] - + mode_lib->vba.PixelPTEBytesPerRow[k]; - } - } - mode_lib->vba.TotImmediateFlipBytes = 0; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 - && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { - mode_lib->vba.TotImmediateFlipBytes = - mode_lib->vba.TotImmediateFlipBytes - + ImmediateFlipBytes[k]; - } - } - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - CalculateFlipSchedule( - mode_lib, - mode_lib->vba.UrgentExtraLatency, - mode_lib->vba.UrgentLatency, - mode_lib->vba.MaxPageTableLevels, - mode_lib->vba.VirtualMemoryEnable, - mode_lib->vba.BandwidthAvailableForImmediateFlip, - mode_lib->vba.TotImmediateFlipBytes, - mode_lib->vba.SourcePixelFormat[k], - ImmediateFlipBytes[k], - mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k], - mode_lib->vba.VRatio[k], - mode_lib->vba.Tno_bw[k], - mode_lib->vba.PDEAndMetaPTEBytesFrame[k], - mode_lib->vba.MetaRowByte[k], - mode_lib->vba.PixelPTEBytesPerRow[k], - mode_lib->vba.DCCEnable[k], - mode_lib->vba.dpte_row_height[k], - mode_lib->vba.meta_row_height[k], - mode_lib->vba.qual_row_bw[k], - &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], - &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], - &final_flip_bw[k], - &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); - } - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - total_dcn_read_bw_with_flip = - total_dcn_read_bw_with_flip - + mode_lib->vba.cursor_bw[k] - + dml_max( - mode_lib->vba.prefetch_vm_bw[k], - dml_max( - mode_lib->vba.prefetch_row_bw[k], - final_flip_bw[k] - + dml_max( - mode_lib->vba.ReadBandwidthPlaneLuma[k] - + mode_lib->vba.ReadBandwidthPlaneChroma[k], - mode_lib->vba.RequiredPrefetchPixDataBW[k]))); - } - mode_lib->vba.ImmediateFlipSupported = true; - if (total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { - mode_lib->vba.ImmediateFlipSupported = false; - } - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { - mode_lib->vba.ImmediateFlipSupported = false; - } - } - } else { - mode_lib->vba.ImmediateFlipSupported = false; - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.ErrorResult[k]) { - mode_lib->vba.PrefetchModeSupported = false; - dml_print( - "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); - } - } - - mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; - } while (!((mode_lib->vba.PrefetchModeSupported - && (!mode_lib->vba.ImmediateFlipSupport - || mode_lib->vba.ImmediateFlipSupported)) - || mode_lib->vba.MaximumMaxVStartupLines < mode_lib->vba.VStartupLines)); - - //Display Pipeline Delivery Time in Prefetch - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.VRatioPrefetchY[k] <= 1) { - mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = - mode_lib->vba.SwathWidthY[k] * mode_lib->vba.DPPPerPlane[k] - / mode_lib->vba.HRatio[k] - / mode_lib->vba.PixelClock[k]; - } else { - mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = - mode_lib->vba.SwathWidthY[k] - / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] - / mode_lib->vba.DPPCLK[k]; - } - if (mode_lib->vba.BytePerPixelDETC[k] == 0) { - mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; - } else { - if (mode_lib->vba.VRatioPrefetchC[k] <= 1) { - mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = - mode_lib->vba.SwathWidthY[k] - * mode_lib->vba.DPPPerPlane[k] - / mode_lib->vba.HRatio[k] - / mode_lib->vba.PixelClock[k]; - } else { - mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = - mode_lib->vba.SwathWidthY[k] - / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] - / mode_lib->vba.DPPCLK[k]; - } - } - } - - // Min TTUVBlank - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.PrefetchMode == 0) { - mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = true; - mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; - mode_lib->vba.MinTTUVBlank[k] = dml_max( - mode_lib->vba.DRAMClockChangeWatermark, - dml_max( - mode_lib->vba.StutterEnterPlusExitWatermark, - mode_lib->vba.UrgentWatermark)); - } else if (mode_lib->vba.PrefetchMode == 1) { - mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; - mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; - mode_lib->vba.MinTTUVBlank[k] = dml_max( - mode_lib->vba.StutterEnterPlusExitWatermark, - mode_lib->vba.UrgentWatermark); - } else { - mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; - mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = false; - mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark; - } - if (!mode_lib->vba.DynamicMetadataEnable[k]) - mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.TCalc - + mode_lib->vba.MinTTUVBlank[k]; - } - - // DCC Configuration - mode_lib->vba.ActiveDPPs = 0; - // NB P-State/DRAM Clock Change Support - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.ActiveDPPs = mode_lib->vba.ActiveDPPs + mode_lib->vba.DPPPerPlane[k]; - } - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - double EffectiveLBLatencyHidingY; - double EffectiveLBLatencyHidingC; - double DPPOutputBufferLinesY; - double DPPOutputBufferLinesC; - double DPPOPPBufferingY; - double MaxDETBufferingTimeY; - double ActiveDRAMClockChangeLatencyMarginY; - - mode_lib->vba.LBLatencyHidingSourceLinesY = - dml_min( - mode_lib->vba.MaxLineBufferLines, - (unsigned int) dml_floor( - (double) mode_lib->vba.LineBufferSize - / mode_lib->vba.LBBitPerPixel[k] - / (mode_lib->vba.SwathWidthY[k] - / dml_max( - mode_lib->vba.HRatio[k], - 1.0)), - 1)) - (mode_lib->vba.vtaps[k] - 1); - - mode_lib->vba.LBLatencyHidingSourceLinesC = - dml_min( - mode_lib->vba.MaxLineBufferLines, - (unsigned int) dml_floor( - (double) mode_lib->vba.LineBufferSize - / mode_lib->vba.LBBitPerPixel[k] - / (mode_lib->vba.SwathWidthY[k] - / 2.0 - / dml_max( - mode_lib->vba.HRatio[k] - / 2, - 1.0)), - 1)) - - (mode_lib->vba.VTAPsChroma[k] - 1); - - EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY - / mode_lib->vba.VRatio[k] - * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); - - EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC - / (mode_lib->vba.VRatio[k] / 2) - * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); - - if (mode_lib->vba.SwathWidthY[k] > 2 * mode_lib->vba.DPPOutputBufferPixels) { - DPPOutputBufferLinesY = mode_lib->vba.DPPOutputBufferPixels - / mode_lib->vba.SwathWidthY[k]; - } else if (mode_lib->vba.SwathWidthY[k] > mode_lib->vba.DPPOutputBufferPixels) { - DPPOutputBufferLinesY = 0.5; - } else { - DPPOutputBufferLinesY = 1; - } - - if (mode_lib->vba.SwathWidthY[k] / 2 > 2 * mode_lib->vba.DPPOutputBufferPixels) { - DPPOutputBufferLinesC = mode_lib->vba.DPPOutputBufferPixels - / (mode_lib->vba.SwathWidthY[k] / 2); - } else if (mode_lib->vba.SwathWidthY[k] / 2 > mode_lib->vba.DPPOutputBufferPixels) { - DPPOutputBufferLinesC = 0.5; - } else { - DPPOutputBufferLinesC = 1; - } - - DPPOPPBufferingY = (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) - * (DPPOutputBufferLinesY + mode_lib->vba.OPPOutputBufferLines); - MaxDETBufferingTimeY = mode_lib->vba.FullDETBufferingTimeY[k] - + (mode_lib->vba.LinesInDETY[k] - - mode_lib->vba.LinesInDETYRoundedDownToSwath[k]) - / mode_lib->vba.SwathHeightY[k] - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]); - - ActiveDRAMClockChangeLatencyMarginY = DPPOPPBufferingY + EffectiveLBLatencyHidingY - + MaxDETBufferingTimeY - mode_lib->vba.DRAMClockChangeWatermark; - - if (mode_lib->vba.ActiveDPPs > 1) { - ActiveDRAMClockChangeLatencyMarginY = - ActiveDRAMClockChangeLatencyMarginY - - (1 - 1 / (mode_lib->vba.ActiveDPPs - 1)) - * mode_lib->vba.SwathHeightY[k] - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]); - } - - if (mode_lib->vba.BytePerPixelDETC[k] > 0) { - double DPPOPPBufferingC = (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) - * (DPPOutputBufferLinesC - + mode_lib->vba.OPPOutputBufferLines); - double MaxDETBufferingTimeC = - mode_lib->vba.FullDETBufferingTimeC[k] - + (mode_lib->vba.LinesInDETC[k] - - mode_lib->vba.LinesInDETCRoundedDownToSwath[k]) - / mode_lib->vba.SwathHeightC[k] - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]); - double ActiveDRAMClockChangeLatencyMarginC = DPPOPPBufferingC - + EffectiveLBLatencyHidingC + MaxDETBufferingTimeC - - mode_lib->vba.DRAMClockChangeWatermark; - - if (mode_lib->vba.ActiveDPPs > 1) { - ActiveDRAMClockChangeLatencyMarginC = - ActiveDRAMClockChangeLatencyMarginC - - (1 - - 1 - / (mode_lib->vba.ActiveDPPs - - 1)) - * mode_lib->vba.SwathHeightC[k] - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]); - } - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( - ActiveDRAMClockChangeLatencyMarginY, - ActiveDRAMClockChangeLatencyMarginC); - } else { - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = - ActiveDRAMClockChangeLatencyMarginY; - } - - if (mode_lib->vba.WritebackEnable[k]) { - double WritebackDRAMClockChangeLatencyMargin; - - if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { - WritebackDRAMClockChangeLatencyMargin = - (double) (mode_lib->vba.WritebackInterfaceLumaBufferSize - + mode_lib->vba.WritebackInterfaceChromaBufferSize) - / (mode_lib->vba.WritebackDestinationWidth[k] - * mode_lib->vba.WritebackDestinationHeight[k] - / (mode_lib->vba.WritebackSourceHeight[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) - * 4) - - mode_lib->vba.WritebackDRAMClockChangeWatermark; - } else if (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { - WritebackDRAMClockChangeLatencyMargin = - dml_min( - (double) mode_lib->vba.WritebackInterfaceLumaBufferSize - * 8.0 / 10, - 2.0 - * mode_lib->vba.WritebackInterfaceChromaBufferSize - * 8 / 10) - / (mode_lib->vba.WritebackDestinationWidth[k] - * mode_lib->vba.WritebackDestinationHeight[k] - / (mode_lib->vba.WritebackSourceHeight[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k])) - - mode_lib->vba.WritebackDRAMClockChangeWatermark; - } else { - WritebackDRAMClockChangeLatencyMargin = - dml_min( - (double) mode_lib->vba.WritebackInterfaceLumaBufferSize, - 2.0 - * mode_lib->vba.WritebackInterfaceChromaBufferSize) - / (mode_lib->vba.WritebackDestinationWidth[k] - * mode_lib->vba.WritebackDestinationHeight[k] - / (mode_lib->vba.WritebackSourceHeight[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k])) - - mode_lib->vba.WritebackDRAMClockChangeWatermark; - } - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], - WritebackDRAMClockChangeLatencyMargin); - } - } - - mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] - < mode_lib->vba.MinActiveDRAMClockChangeMargin) { - mode_lib->vba.MinActiveDRAMClockChangeMargin = - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; - } - } - - mode_lib->vba.MinActiveDRAMClockChangeLatencySupported = - mode_lib->vba.MinActiveDRAMClockChangeMargin - + mode_lib->vba.DRAMClockChangeLatency; - - if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { - mode_lib->vba.DRAMClockChangeSupport = dm_dram_clock_change_vactive; - } else { - if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { - mode_lib->vba.DRAMClockChangeSupport = dm_dram_clock_change_vblank; - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (!mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k]) { - mode_lib->vba.DRAMClockChangeSupport = - dm_dram_clock_change_unsupported; - } - } - } else { - mode_lib->vba.DRAMClockChangeSupport = dm_dram_clock_change_unsupported; - } - } - - //XFC Parameters: - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.XFCEnabled[k] == true) { - double TWait; - - mode_lib->vba.XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset; - mode_lib->vba.XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth; - mode_lib->vba.XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset; - TWait = CalculateTWait( - mode_lib->vba.PrefetchMode, - mode_lib->vba.DRAMClockChangeLatency, - mode_lib->vba.UrgentLatency, - mode_lib->vba.SREnterPlusExitTime); - mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay( - mode_lib, - mode_lib->vba.VRatio[k], - mode_lib->vba.SwathWidthY[k], - dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), - mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], - mode_lib->vba.XFCTSlvVupdateOffset, - mode_lib->vba.XFCTSlvVupdateWidth, - mode_lib->vba.XFCTSlvVreadyOffset, - mode_lib->vba.XFCXBUFLatencyTolerance, - mode_lib->vba.XFCFillBWOverhead, - mode_lib->vba.XFCSlvChunkSize, - mode_lib->vba.XFCBusTransportTime, - mode_lib->vba.TCalc, - TWait, - &mode_lib->vba.SrcActiveDrainRate, - &mode_lib->vba.TInitXFill, - &mode_lib->vba.TslvChk); - mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = - dml_floor( - mode_lib->vba.XFCRemoteSurfaceFlipDelay - / (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]), - 1); - mode_lib->vba.XFCTransferDelay[k] = - dml_ceil( - mode_lib->vba.XFCBusTransportTime - / (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]), - 1); - mode_lib->vba.XFCPrechargeDelay[k] = - dml_ceil( - (mode_lib->vba.XFCBusTransportTime - + mode_lib->vba.TInitXFill - + mode_lib->vba.TslvChk) - / (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]), - 1); - mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance - * mode_lib->vba.SrcActiveDrainRate; - mode_lib->vba.FinalFillMargin = - (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] - + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k] - * mode_lib->vba.SrcActiveDrainRate - + mode_lib->vba.XFCFillConstant; - mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay - * mode_lib->vba.SrcActiveDrainRate - + mode_lib->vba.FinalFillMargin; - mode_lib->vba.RemainingFillLevel = dml_max( - 0.0, - mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel); - mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel - / (mode_lib->vba.SrcActiveDrainRate - * mode_lib->vba.XFCFillBWOverhead / 100); - mode_lib->vba.XFCPrefetchMargin[k] = - mode_lib->vba.XFCRemoteSurfaceFlipDelay - + mode_lib->vba.TFinalxFill - + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] - + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]; - } else { - mode_lib->vba.XFCSlaveVUpdateOffset[k] = 0; - mode_lib->vba.XFCSlaveVupdateWidth[k] = 0; - mode_lib->vba.XFCSlaveVReadyOffset[k] = 0; - mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = 0; - mode_lib->vba.XFCPrechargeDelay[k] = 0; - mode_lib->vba.XFCTransferDelay[k] = 0; - mode_lib->vba.XFCPrefetchMargin[k] = 0; - } - } -} - -static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) -{ - double BytePerPixDETY; - double BytePerPixDETC; - double Read256BytesBlockHeightY; - double Read256BytesBlockHeightC; - double Read256BytesBlockWidthY; - double Read256BytesBlockWidthC; - double MaximumSwathHeightY; - double MaximumSwathHeightC; - double MinimumSwathHeightY; - double MinimumSwathHeightC; - double SwathWidth; - double SwathWidthGranularityY; - double SwathWidthGranularityC; - double RoundedUpMaxSwathSizeBytesY; - double RoundedUpMaxSwathSizeBytesC; - unsigned int j, k; - - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - bool MainPlaneDoesODMCombine = false; - - if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { - BytePerPixDETY = 8; - BytePerPixDETC = 0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { - BytePerPixDETY = 4; - BytePerPixDETC = 0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { - BytePerPixDETY = 2; - BytePerPixDETC = 0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { - BytePerPixDETY = 1; - BytePerPixDETC = 0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { - BytePerPixDETY = 1; - BytePerPixDETC = 2; - } else { - BytePerPixDETY = 4.0 / 3.0; - BytePerPixDETC = 8.0 / 3.0; - } - - if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { - if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { - Read256BytesBlockHeightY = 1; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { - Read256BytesBlockHeightY = 4; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { - Read256BytesBlockHeightY = 8; - } else { - Read256BytesBlockHeightY = 16; - } - Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) - / Read256BytesBlockHeightY; - Read256BytesBlockHeightC = 0; - Read256BytesBlockWidthC = 0; - } else { - if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { - Read256BytesBlockHeightY = 1; - Read256BytesBlockHeightC = 1; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { - Read256BytesBlockHeightY = 16; - Read256BytesBlockHeightC = 8; - } else { - Read256BytesBlockHeightY = 8; - Read256BytesBlockHeightC = 8; - } - Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) - / Read256BytesBlockHeightY; - Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2) - / Read256BytesBlockHeightC; - } - - if (mode_lib->vba.SourceScan[k] == dm_horz) { - MaximumSwathHeightY = Read256BytesBlockHeightY; - MaximumSwathHeightC = Read256BytesBlockHeightC; - } else { - MaximumSwathHeightY = Read256BytesBlockWidthY; - MaximumSwathHeightC = Read256BytesBlockWidthC; - } - - if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { - if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear - || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 - && (mode_lib->vba.SurfaceTiling[k] - == dm_sw_4kb_s - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_4kb_s_x - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_64kb_s - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_64kb_s_t - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_64kb_s_x - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_var_s - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_var_s_x) - && mode_lib->vba.SourceScan[k] == dm_horz)) { - MinimumSwathHeightY = MaximumSwathHeightY; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 - && mode_lib->vba.SourceScan[k] != dm_horz) { - MinimumSwathHeightY = MaximumSwathHeightY; - } else { - MinimumSwathHeightY = MaximumSwathHeightY / 2.0; - } - MinimumSwathHeightC = MaximumSwathHeightC; - } else { - if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { - MinimumSwathHeightY = MaximumSwathHeightY; - MinimumSwathHeightC = MaximumSwathHeightC; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 - && mode_lib->vba.SourceScan[k] == dm_horz) { - MinimumSwathHeightY = MaximumSwathHeightY / 2.0; - MinimumSwathHeightC = MaximumSwathHeightC; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 - && mode_lib->vba.SourceScan[k] == dm_horz) { - MinimumSwathHeightC = MaximumSwathHeightC / 2.0; - MinimumSwathHeightY = MaximumSwathHeightY; - } else { - MinimumSwathHeightY = MaximumSwathHeightY; - MinimumSwathHeightC = MaximumSwathHeightC; - } - } - - if (mode_lib->vba.SourceScan[k] == dm_horz) { - SwathWidth = mode_lib->vba.ViewportWidth[k]; - } else { - SwathWidth = mode_lib->vba.ViewportHeight[k]; - } - - if (mode_lib->vba.ODMCombineEnabled[k] == true) { - MainPlaneDoesODMCombine = true; - } - for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { - if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { - MainPlaneDoesODMCombine = true; - } - } - - if (MainPlaneDoesODMCombine == true) { - SwathWidth = dml_min( - SwathWidth, - mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]); - } else { - SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k]; - } - - SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY; - RoundedUpMaxSwathSizeBytesY = (dml_ceil( - (double) (SwathWidth - 1), - SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY - * MaximumSwathHeightY; - if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { - RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256) - + 256; - } - if (MaximumSwathHeightC > 0) { - SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2) - / MaximumSwathHeightC; - RoundedUpMaxSwathSizeBytesC = (dml_ceil( - (double) (SwathWidth / 2.0 - 1), - SwathWidthGranularityC) + SwathWidthGranularityC) - * BytePerPixDETC * MaximumSwathHeightC; - if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { - RoundedUpMaxSwathSizeBytesC = dml_ceil( - RoundedUpMaxSwathSizeBytesC, - 256) + 256; - } - } else - RoundedUpMaxSwathSizeBytesC = 0.0; - - if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC - <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { - mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY; - mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC; - } else { - mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY; - mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC; - } - - if (mode_lib->vba.SwathHeightC[k] == 0) { - mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte * 1024; - mode_lib->vba.DETBufferSizeC[k] = 0; - } else if (mode_lib->vba.SwathHeightY[k] <= mode_lib->vba.SwathHeightC[k]) { - mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte - * 1024.0 / 2; - mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte - * 1024.0 / 2; - } else { - mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte - * 1024.0 * 2 / 3; - mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte - * 1024.0 / 3; - } - } -} - -bool Calculate256BBlockSizes( - enum source_format_class SourcePixelFormat, - enum dm_swizzle_mode SurfaceTiling, - unsigned int BytePerPixelY, - unsigned int BytePerPixelC, - unsigned int *BlockHeight256BytesY, - unsigned int *BlockHeight256BytesC, - unsigned int *BlockWidth256BytesY, - unsigned int *BlockWidth256BytesC) -{ - if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 - || SourcePixelFormat == dm_444_16 - || SourcePixelFormat == dm_444_8)) { - if (SurfaceTiling == dm_sw_linear) { - *BlockHeight256BytesY = 1; - } else if (SourcePixelFormat == dm_444_64) { - *BlockHeight256BytesY = 4; - } else if (SourcePixelFormat == dm_444_8) { - *BlockHeight256BytesY = 16; - } else { - *BlockHeight256BytesY = 8; - } - *BlockWidth256BytesY = 256 / BytePerPixelY / *BlockHeight256BytesY; - *BlockHeight256BytesC = 0; - *BlockWidth256BytesC = 0; - } else { - if (SurfaceTiling == dm_sw_linear) { - *BlockHeight256BytesY = 1; - *BlockHeight256BytesC = 1; - } else if (SourcePixelFormat == dm_420_8) { - *BlockHeight256BytesY = 16; - *BlockHeight256BytesC = 8; - } else { - *BlockHeight256BytesY = 8; - *BlockHeight256BytesC = 8; - } - *BlockWidth256BytesY = 256 / BytePerPixelY / *BlockHeight256BytesY; - *BlockWidth256BytesC = 256 / BytePerPixelC / *BlockHeight256BytesC; - } - return true; -} - -static double CalculateTWait( - unsigned int PrefetchMode, - double DRAMClockChangeLatency, - double UrgentLatency, - double SREnterPlusExitTime) -{ - if (PrefetchMode == 0) { - return dml_max( - DRAMClockChangeLatency + UrgentLatency, - dml_max(SREnterPlusExitTime, UrgentLatency)); - } else if (PrefetchMode == 1) { - return dml_max(SREnterPlusExitTime, UrgentLatency); - } else { - return UrgentLatency; - } -} - -static double CalculateRemoteSurfaceFlipDelay( - struct display_mode_lib *mode_lib, - double VRatio, - double SwathWidth, - double Bpp, - double LineTime, - double XFCTSlvVupdateOffset, - double XFCTSlvVupdateWidth, - double XFCTSlvVreadyOffset, - double XFCXBUFLatencyTolerance, - double XFCFillBWOverhead, - double XFCSlvChunkSize, - double XFCBusTransportTime, - double TCalc, - double TWait, - double *SrcActiveDrainRate, - double *TInitXFill, - double *TslvChk) -{ - double TSlvSetup, AvgfillRate, result; - - *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime; - TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset; - *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100); - AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100); - *TslvChk = XFCSlvChunkSize / AvgfillRate; - dml_print( - "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n", - *SrcActiveDrainRate); - dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup); - dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill); - dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate); - dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk); - result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide - dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result); - return result; -} - -static double CalculateWriteBackDISPCLK( - enum source_format_class WritebackPixelFormat, - double PixelClock, - double WritebackHRatio, - double WritebackVRatio, - unsigned int WritebackLumaHTaps, - unsigned int WritebackLumaVTaps, - unsigned int WritebackChromaHTaps, - unsigned int WritebackChromaVTaps, - double WritebackDestinationWidth, - unsigned int HTotal, - unsigned int WritebackChromaLineBufferWidth) -{ - double CalculateWriteBackDISPCLK = - 1.01 * PixelClock - * dml_max( - dml_ceil(WritebackLumaHTaps / 4.0, 1) - / WritebackHRatio, - dml_max( - (WritebackLumaVTaps - * dml_ceil( - 1.0 - / WritebackVRatio, - 1) - * dml_ceil( - WritebackDestinationWidth - / 4.0, - 1) - + dml_ceil( - WritebackDestinationWidth - / 4.0, - 1)) - / (double) HTotal - + dml_ceil( - 1.0 - / WritebackVRatio, - 1) - * (dml_ceil( - WritebackLumaVTaps - / 4.0, - 1) - + 4.0) - / (double) HTotal, - dml_ceil( - 1.0 - / WritebackVRatio, - 1) - * WritebackDestinationWidth - / (double) HTotal)); - if (WritebackPixelFormat != dm_444_32) { - CalculateWriteBackDISPCLK = - dml_max( - CalculateWriteBackDISPCLK, - 1.01 * PixelClock - * dml_max( - dml_ceil( - WritebackChromaHTaps - / 2.0, - 1) - / (2 - * WritebackHRatio), - dml_max( - (WritebackChromaVTaps - * dml_ceil( - 1 - / (2 - * WritebackVRatio), - 1) - * dml_ceil( - WritebackDestinationWidth - / 2.0 - / 2.0, - 1) - + dml_ceil( - WritebackDestinationWidth - / 2.0 - / WritebackChromaLineBufferWidth, - 1)) - / HTotal - + dml_ceil( - 1 - / (2 - * WritebackVRatio), - 1) - * (dml_ceil( - WritebackChromaVTaps - / 4.0, - 1) - + 4) - / HTotal, - dml_ceil( - 1.0 - / (2 - * WritebackVRatio), - 1) - * WritebackDestinationWidth - / 2.0 - / HTotal))); - } - return CalculateWriteBackDISPCLK; -} - -static double CalculateWriteBackDelay( - enum source_format_class WritebackPixelFormat, - double WritebackHRatio, - double WritebackVRatio, - unsigned int WritebackLumaHTaps, - unsigned int WritebackLumaVTaps, - unsigned int WritebackChromaHTaps, - unsigned int WritebackChromaVTaps, - unsigned int WritebackDestinationWidth) -{ - double CalculateWriteBackDelay = - dml_max( - dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, - WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) - * dml_ceil( - WritebackDestinationWidth - / 4.0, - 1) - + dml_ceil(1.0 / WritebackVRatio, 1) - * (dml_ceil( - WritebackLumaVTaps - / 4.0, - 1) + 4)); - - if (WritebackPixelFormat != dm_444_32) { - CalculateWriteBackDelay = - dml_max( - CalculateWriteBackDelay, - dml_max( - dml_ceil( - WritebackChromaHTaps - / 2.0, - 1) - / (2 - * WritebackHRatio), - WritebackChromaVTaps - * dml_ceil( - 1 - / (2 - * WritebackVRatio), - 1) - * dml_ceil( - WritebackDestinationWidth - / 2.0 - / 2.0, - 1) - + dml_ceil( - 1 - / (2 - * WritebackVRatio), - 1) - * (dml_ceil( - WritebackChromaVTaps - / 4.0, - 1) - + 4))); - } - return CalculateWriteBackDelay; -} - -static void CalculateActiveRowBandwidth( - bool VirtualMemoryEnable, - enum source_format_class SourcePixelFormat, - double VRatio, - bool DCCEnable, - double LineTime, - unsigned int MetaRowByteLuma, - unsigned int MetaRowByteChroma, - unsigned int meta_row_height_luma, - unsigned int meta_row_height_chroma, - unsigned int PixelPTEBytesPerRowLuma, - unsigned int PixelPTEBytesPerRowChroma, - unsigned int dpte_row_height_luma, - unsigned int dpte_row_height_chroma, - double *meta_row_bw, - double *dpte_row_bw, - double *qual_row_bw) -{ - if (DCCEnable != true) { - *meta_row_bw = 0; - } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { - *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) - + VRatio / 2 * MetaRowByteChroma - / (meta_row_height_chroma * LineTime); - } else { - *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); - } - - if (VirtualMemoryEnable != true) { - *dpte_row_bw = 0; - } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { - *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) - + VRatio / 2 * PixelPTEBytesPerRowChroma - / (dpte_row_height_chroma * LineTime); - } else { - *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); - } - - if ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10)) { - *qual_row_bw = *meta_row_bw + *dpte_row_bw; - } else { - *qual_row_bw = 0; - } -} - -static void CalculateFlipSchedule( - struct display_mode_lib *mode_lib, - double UrgentExtraLatency, - double UrgentLatency, - unsigned int MaxPageTableLevels, - bool VirtualMemoryEnable, - double BandwidthAvailableForImmediateFlip, - unsigned int TotImmediateFlipBytes, - enum source_format_class SourcePixelFormat, - unsigned int ImmediateFlipBytes, - double LineTime, - double Tno_bw, - double VRatio, - double PDEAndMetaPTEBytesFrame, - unsigned int MetaRowByte, - unsigned int PixelPTEBytesPerRow, - bool DCCEnable, - unsigned int dpte_row_height, - unsigned int meta_row_height, - double qual_row_bw, - double *DestinationLinesToRequestVMInImmediateFlip, - double *DestinationLinesToRequestRowInImmediateFlip, - double *final_flip_bw, - bool *ImmediateFlipSupportedForPipe) -{ - double min_row_time = 0.0; - - if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { - *DestinationLinesToRequestVMInImmediateFlip = 0.0; - *DestinationLinesToRequestRowInImmediateFlip = 0.0; - *final_flip_bw = qual_row_bw; - *ImmediateFlipSupportedForPipe = true; - } else { - double TimeForFetchingMetaPTEImmediateFlip; - double TimeForFetchingRowInVBlankImmediateFlip; - - if (VirtualMemoryEnable == true) { - mode_lib->vba.ImmediateFlipBW = BandwidthAvailableForImmediateFlip - * ImmediateFlipBytes / TotImmediateFlipBytes; - TimeForFetchingMetaPTEImmediateFlip = - dml_max( - Tno_bw - + PDEAndMetaPTEBytesFrame - / mode_lib->vba.ImmediateFlipBW, - dml_max( - UrgentExtraLatency - + UrgentLatency - * (MaxPageTableLevels - - 1), - LineTime / 4.0)); - } else { - TimeForFetchingMetaPTEImmediateFlip = 0; - } - - *DestinationLinesToRequestVMInImmediateFlip = dml_floor( - 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime + 0.125), - 1) / 4.0; - - if ((VirtualMemoryEnable == true || DCCEnable == true)) { - mode_lib->vba.ImmediateFlipBW = BandwidthAvailableForImmediateFlip - * ImmediateFlipBytes / TotImmediateFlipBytes; - TimeForFetchingRowInVBlankImmediateFlip = dml_max( - (MetaRowByte + PixelPTEBytesPerRow) - / mode_lib->vba.ImmediateFlipBW, - dml_max(UrgentLatency, LineTime / 4.0)); - } else { - TimeForFetchingRowInVBlankImmediateFlip = 0; - } - - *DestinationLinesToRequestRowInImmediateFlip = dml_floor( - 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime + 0.125), - 1) / 4.0; - - if (VirtualMemoryEnable == true) { - *final_flip_bw = - dml_max( - PDEAndMetaPTEBytesFrame - / (*DestinationLinesToRequestVMInImmediateFlip - * LineTime), - (MetaRowByte + PixelPTEBytesPerRow) - / (TimeForFetchingRowInVBlankImmediateFlip - * LineTime)); - } else if (MetaRowByte + PixelPTEBytesPerRow > 0) { - *final_flip_bw = (MetaRowByte + PixelPTEBytesPerRow) - / (TimeForFetchingRowInVBlankImmediateFlip * LineTime); - } else { - *final_flip_bw = 0; - } - - if (VirtualMemoryEnable && !DCCEnable) - min_row_time = dpte_row_height * LineTime / VRatio; - else if (!VirtualMemoryEnable && DCCEnable) - min_row_time = meta_row_height * LineTime / VRatio; - else - min_row_time = dml_min(dpte_row_height, meta_row_height) * LineTime - / VRatio; - - if (*DestinationLinesToRequestVMInImmediateFlip >= 8 - || *DestinationLinesToRequestRowInImmediateFlip >= 16 - || TimeForFetchingMetaPTEImmediateFlip - + 2 * TimeForFetchingRowInVBlankImmediateFlip - > min_row_time) - *ImmediateFlipSupportedForPipe = false; - else - *ImmediateFlipSupportedForPipe = true; - } -} - -static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct display_mode_lib *mode_lib) -{ - unsigned int k; - - //Progressive To dml_ml->vba.Interlace Unit Effect - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - mode_lib->vba.PixelClockBackEnd[k] = mode_lib->vba.PixelClock[k]; - if (mode_lib->vba.Interlace[k] == 1 - && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true) { - mode_lib->vba.PixelClock[k] = 2 * mode_lib->vba.PixelClock[k]; - } - } -} - -static unsigned int CursorBppEnumToBits(enum cursor_bpp ebpp) -{ - switch (ebpp) { - case dm_cur_2bit: - return 2; - case dm_cur_32bit: - return 32; - case dm_cur_64bit: - return 64; - default: - return 0; - } -} - -static unsigned int TruncToValidBPP( - double DecimalBPP, - bool DSCEnabled, - enum output_encoder_class Output, - enum output_format_class Format, - unsigned int DSCInputBitPerComponent) -{ - if (Output == dm_hdmi) { - if (Format == dm_420) { - if (DecimalBPP >= 18) - return 18; - else if (DecimalBPP >= 15) - return 15; - else if (DecimalBPP >= 12) - return 12; - else - return BPP_INVALID; - } else if (Format == dm_444) { - if (DecimalBPP >= 36) - return 36; - else if (DecimalBPP >= 30) - return 30; - else if (DecimalBPP >= 24) - return 24; - else - return BPP_INVALID; - } else { - if (DecimalBPP / 1.5 >= 24) - return 24; - else if (DecimalBPP / 1.5 >= 20) - return 20; - else if (DecimalBPP / 1.5 >= 16) - return 16; - else - return BPP_INVALID; - } - } else { - if (DSCEnabled) { - if (Format == dm_420) { - if (DecimalBPP < 6) - return BPP_INVALID; - else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1 / 16) - return 1.5 * DSCInputBitPerComponent - 1 / 16; - else - return dml_floor(16 * DecimalBPP, 1) / 16; - } else if (Format == dm_n422) { - if (DecimalBPP < 7) - return BPP_INVALID; - else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1 / 16) - return 2 * DSCInputBitPerComponent - 1 / 16; - else - return dml_floor(16 * DecimalBPP, 1) / 16; - } else { - if (DecimalBPP < 8) - return BPP_INVALID; - else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1 / 16) - return 3 * DSCInputBitPerComponent - 1 / 16; - else - return dml_floor(16 * DecimalBPP, 1) / 16; - } - } else if (Format == dm_420) { - if (DecimalBPP >= 18) - return 18; - else if (DecimalBPP >= 15) - return 15; - else if (DecimalBPP >= 12) - return 12; - else - return BPP_INVALID; - } else if (Format == dm_s422 || Format == dm_n422) { - if (DecimalBPP >= 24) - return 24; - else if (DecimalBPP >= 20) - return 20; - else if (DecimalBPP >= 16) - return 16; - else - return BPP_INVALID; - } else { - if (DecimalBPP >= 36) - return 36; - else if (DecimalBPP >= 30) - return 30; - else if (DecimalBPP >= 24) - return 24; - else - return BPP_INVALID; - } - } -} - -static void ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) -{ - int i; - unsigned int j, k; - /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ - - /*Scale Ratio, taps Support Check*/ - - mode_lib->vba.ScaleRatioAndTapsSupport = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.ScalerEnabled[k] == false - && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 - && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 - && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) - || mode_lib->vba.HRatio[k] != 1.0 - || mode_lib->vba.htaps[k] != 1.0 - || mode_lib->vba.VRatio[k] != 1.0 - || mode_lib->vba.vtaps[k] != 1.0)) { - mode_lib->vba.ScaleRatioAndTapsSupport = false; - } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 - || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0 - || (mode_lib->vba.htaps[k] > 1.0 - && (mode_lib->vba.htaps[k] % 2) == 1) - || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio - || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio - || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] - || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] - || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 - && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 - && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 - && (mode_lib->vba.HRatio[k] / 2.0 - > mode_lib->vba.HTAPsChroma[k] - || mode_lib->vba.VRatio[k] / 2.0 - > mode_lib->vba.VTAPsChroma[k]))) { - mode_lib->vba.ScaleRatioAndTapsSupport = false; - } - } - /*Source Format, Pixel Format and Scan Support Check*/ - - mode_lib->vba.SourceFormatPixelAndScanSupport = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear - && mode_lib->vba.SourceScan[k] != dm_horz) - || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d - || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x - || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d - || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t - || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x - || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d - || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x) - && mode_lib->vba.SourcePixelFormat[k] != dm_444_64) - || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x - && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8 - || mode_lib->vba.SourcePixelFormat[k] - == dm_420_8 - || mode_lib->vba.SourcePixelFormat[k] - == dm_420_10)) - || (((mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_gl - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) - && !((mode_lib->vba.SourcePixelFormat[k] - == dm_444_64 - || mode_lib->vba.SourcePixelFormat[k] - == dm_444_32) - && mode_lib->vba.SourceScan[k] - == dm_horz - && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp - == true - && mode_lib->vba.DCCEnable[k] - == false)) - || (mode_lib->vba.DCCEnable[k] == true - && (mode_lib->vba.SurfaceTiling[k] - == dm_sw_linear - || mode_lib->vba.SourcePixelFormat[k] - == dm_420_8 - || mode_lib->vba.SourcePixelFormat[k] - == dm_420_10)))) { - mode_lib->vba.SourceFormatPixelAndScanSupport = false; - } - } - /*Bandwidth Support Check*/ - - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.SourceScan[k] == dm_horz) { - mode_lib->vba.SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; - } else { - mode_lib->vba.SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; - } - if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { - mode_lib->vba.BytePerPixelInDETY[k] = 8.0; - mode_lib->vba.BytePerPixelInDETC[k] = 0.0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { - mode_lib->vba.BytePerPixelInDETY[k] = 4.0; - mode_lib->vba.BytePerPixelInDETC[k] = 0.0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 - || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { - mode_lib->vba.BytePerPixelInDETY[k] = 2.0; - mode_lib->vba.BytePerPixelInDETC[k] = 0.0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { - mode_lib->vba.BytePerPixelInDETY[k] = 1.0; - mode_lib->vba.BytePerPixelInDETC[k] = 0.0; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { - mode_lib->vba.BytePerPixelInDETY[k] = 1.0; - mode_lib->vba.BytePerPixelInDETC[k] = 2.0; - } else { - mode_lib->vba.BytePerPixelInDETY[k] = 4.0 / 3; - mode_lib->vba.BytePerPixelInDETC[k] = 8.0 / 3; - } - } - mode_lib->vba.TotalReadBandwidthConsumedGBytePerSecond = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.ReadBandwidth[k] = mode_lib->vba.SwathWidthYSingleDPP[k] - * (dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0) - * mode_lib->vba.VRatio[k] - + dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0) - / 2.0 * mode_lib->vba.VRatio[k] / 2) - / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); - if (mode_lib->vba.DCCEnable[k] == true) { - mode_lib->vba.ReadBandwidth[k] = mode_lib->vba.ReadBandwidth[k] - * (1 + 1 / 256); - } - if (mode_lib->vba.VirtualMemoryEnable == true - && mode_lib->vba.SourceScan[k] != dm_horz - && (mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_s - || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_s_x - || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d - || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x)) { - mode_lib->vba.ReadBandwidth[k] = mode_lib->vba.ReadBandwidth[k] - * (1 + 1 / 64); - } else if (mode_lib->vba.VirtualMemoryEnable == true - && mode_lib->vba.SourceScan[k] == dm_horz - && (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_32) - && (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_s - || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_s_t - || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_s_x - || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d - || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t - || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x - || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x)) { - mode_lib->vba.ReadBandwidth[k] = mode_lib->vba.ReadBandwidth[k] - * (1 + 1 / 256); - } else if (mode_lib->vba.VirtualMemoryEnable == true) { - mode_lib->vba.ReadBandwidth[k] = mode_lib->vba.ReadBandwidth[k] - * (1 + 1 / 512); - } - mode_lib->vba.TotalReadBandwidthConsumedGBytePerSecond = - mode_lib->vba.TotalReadBandwidthConsumedGBytePerSecond - + mode_lib->vba.ReadBandwidth[k] / 1000.0; - } - mode_lib->vba.TotalWriteBandwidthConsumedGBytePerSecond = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.WritebackEnable[k] == true - && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { - mode_lib->vba.WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] - * mode_lib->vba.WritebackDestinationHeight[k] - / (mode_lib->vba.WritebackSourceHeight[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) * 4.0; - } else if (mode_lib->vba.WritebackEnable[k] == true - && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { - mode_lib->vba.WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] - * mode_lib->vba.WritebackDestinationHeight[k] - / (mode_lib->vba.WritebackSourceHeight[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) * 3.0; - } else if (mode_lib->vba.WritebackEnable[k] == true) { - mode_lib->vba.WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] - * mode_lib->vba.WritebackDestinationHeight[k] - / (mode_lib->vba.WritebackSourceHeight[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) * 1.5; - } else { - mode_lib->vba.WriteBandwidth[k] = 0.0; - } - mode_lib->vba.TotalWriteBandwidthConsumedGBytePerSecond = - mode_lib->vba.TotalWriteBandwidthConsumedGBytePerSecond - + mode_lib->vba.WriteBandwidth[k] / 1000.0; - } - mode_lib->vba.TotalBandwidthConsumedGBytePerSecond = - mode_lib->vba.TotalReadBandwidthConsumedGBytePerSecond - + mode_lib->vba.TotalWriteBandwidthConsumedGBytePerSecond; - mode_lib->vba.DCCEnabledInAnyPlane = false; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.DCCEnable[k] == true) { - mode_lib->vba.DCCEnabledInAnyPlane = true; - } - } - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.FabricAndDRAMBandwidthPerState[i] = dml_min( - mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels - * mode_lib->vba.DRAMChannelWidth, - mode_lib->vba.FabricClockPerState[i] - * mode_lib->vba.FabricDatapathToDCNDataReturn) - / 1000; - mode_lib->vba.ReturnBWToDCNPerState = dml_min( - mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], - mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000.0) - * mode_lib->vba.PercentOfIdealDRAMAndFabricBWReceivedAfterUrgLatency - / 100; - mode_lib->vba.ReturnBWPerState[i] = mode_lib->vba.ReturnBWToDCNPerState; - if (mode_lib->vba.DCCEnabledInAnyPlane == true - && mode_lib->vba.ReturnBWToDCNPerState - > mode_lib->vba.DCFCLKPerState[i] - * mode_lib->vba.ReturnBusWidth - / 4.0) { - mode_lib->vba.ReturnBWPerState[i] = - dml_min( - mode_lib->vba.ReturnBWPerState[i], - mode_lib->vba.ReturnBWToDCNPerState * 4.0 - * (1.0 - - mode_lib->vba.UrgentLatency - / ((mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024.0 - / (mode_lib->vba.ReturnBWToDCNPerState - - mode_lib->vba.DCFCLKPerState[i] - * mode_lib->vba.ReturnBusWidth - / 4.0) - + mode_lib->vba.UrgentLatency))); - } - mode_lib->vba.CriticalPoint = - 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i] - * mode_lib->vba.UrgentLatency - / (mode_lib->vba.ReturnBWToDCNPerState - * mode_lib->vba.UrgentLatency - + (mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024.0); - if (mode_lib->vba.DCCEnabledInAnyPlane == true && mode_lib->vba.CriticalPoint > 1.0 - && mode_lib->vba.CriticalPoint < 4.0) { - mode_lib->vba.ReturnBWPerState[i] = - dml_min( - mode_lib->vba.ReturnBWPerState[i], - dml_pow( - 4.0 - * mode_lib->vba.ReturnBWToDCNPerState - * (mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024.0 - * mode_lib->vba.ReturnBusWidth - * mode_lib->vba.DCFCLKPerState[i] - * mode_lib->vba.UrgentLatency - / (mode_lib->vba.ReturnBWToDCNPerState - * mode_lib->vba.UrgentLatency - + (mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024.0), - 2)); - } - mode_lib->vba.ReturnBWToDCNPerState = dml_min( - mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], - mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000.0); - if (mode_lib->vba.DCCEnabledInAnyPlane == true - && mode_lib->vba.ReturnBWToDCNPerState - > mode_lib->vba.DCFCLKPerState[i] - * mode_lib->vba.ReturnBusWidth - / 4.0) { - mode_lib->vba.ReturnBWPerState[i] = - dml_min( - mode_lib->vba.ReturnBWPerState[i], - mode_lib->vba.ReturnBWToDCNPerState * 4.0 - * (1.0 - - mode_lib->vba.UrgentLatency - / ((mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024.0 - / (mode_lib->vba.ReturnBWToDCNPerState - - mode_lib->vba.DCFCLKPerState[i] - * mode_lib->vba.ReturnBusWidth - / 4.0) - + mode_lib->vba.UrgentLatency))); - } - mode_lib->vba.CriticalPoint = - 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i] - * mode_lib->vba.UrgentLatency - / (mode_lib->vba.ReturnBWToDCNPerState - * mode_lib->vba.UrgentLatency - + (mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024.0); - if (mode_lib->vba.DCCEnabledInAnyPlane == true && mode_lib->vba.CriticalPoint > 1.0 - && mode_lib->vba.CriticalPoint < 4.0) { - mode_lib->vba.ReturnBWPerState[i] = - dml_min( - mode_lib->vba.ReturnBWPerState[i], - dml_pow( - 4.0 - * mode_lib->vba.ReturnBWToDCNPerState - * (mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024.0 - * mode_lib->vba.ReturnBusWidth - * mode_lib->vba.DCFCLKPerState[i] - * mode_lib->vba.UrgentLatency - / (mode_lib->vba.ReturnBWToDCNPerState - * mode_lib->vba.UrgentLatency - + (mode_lib->vba.ROBBufferSizeInKByte - - mode_lib->vba.PixelChunkSizeInKByte) - * 1024.0), - 2)); - } - } - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - if ((mode_lib->vba.TotalReadBandwidthConsumedGBytePerSecond * 1000.0 - <= mode_lib->vba.ReturnBWPerState[i]) - && (mode_lib->vba.TotalBandwidthConsumedGBytePerSecond * 1000.0 - <= mode_lib->vba.FabricAndDRAMBandwidthPerState[i] - * 1000.0 - * mode_lib->vba.PercentOfIdealDRAMAndFabricBWReceivedAfterUrgLatency - / 100.0)) { - mode_lib->vba.BandwidthSupport[i] = true; - } else { - mode_lib->vba.BandwidthSupport[i] = false; - } - } - /*Writeback Latency support check*/ - - mode_lib->vba.WritebackLatencySupport = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.WritebackEnable[k] == true) { - if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { - if (mode_lib->vba.WriteBandwidth[k] - > (mode_lib->vba.WritebackInterfaceLumaBufferSize - + mode_lib->vba.WritebackInterfaceChromaBufferSize) - / mode_lib->vba.WritebackLatency) { - mode_lib->vba.WritebackLatencySupport = false; - } - } else { - if (mode_lib->vba.WriteBandwidth[k] - > 1.5 - * dml_min( - mode_lib->vba.WritebackInterfaceLumaBufferSize, - 2.0 - * mode_lib->vba.WritebackInterfaceChromaBufferSize) - / mode_lib->vba.WritebackLatency) { - mode_lib->vba.WritebackLatencySupport = false; - } - } - } - } - /*Re-ordering Buffer Support Check*/ - - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.UrgentRoundTripAndOutOfOrderLatencyPerState[i] = - (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) - / mode_lib->vba.DCFCLKPerState[i] - + mode_lib->vba.UrgentOutOfOrderReturnPerChannel - * mode_lib->vba.NumberOfChannels - / mode_lib->vba.ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) - * 1024.0 / mode_lib->vba.ReturnBWPerState[i] - > mode_lib->vba.UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - mode_lib->vba.ROBSupport[i] = true; - } else { - mode_lib->vba.ROBSupport[i] = false; - } - } - /*Writeback Mode Support Check*/ - - mode_lib->vba.TotalNumberOfActiveWriteback = 0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.WritebackEnable[k] == true) { - mode_lib->vba.TotalNumberOfActiveWriteback = - mode_lib->vba.TotalNumberOfActiveWriteback + 1; - } - } - mode_lib->vba.WritebackModeSupport = true; - if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) { - mode_lib->vba.WritebackModeSupport = false; - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.WritebackEnable[k] == true - && mode_lib->vba.Writeback10bpc420Supported != true - && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { - mode_lib->vba.WritebackModeSupport = false; - } - } - /*Writeback Scale Ratio and Taps Support Check*/ - - mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.WritebackEnable[k] == true) { - if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false - && (mode_lib->vba.WritebackHRatio[k] != 1.0 - || mode_lib->vba.WritebackVRatio[k] != 1.0)) { - mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; - } - if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio - || mode_lib->vba.WritebackVRatio[k] - > mode_lib->vba.WritebackMaxVSCLRatio - || mode_lib->vba.WritebackHRatio[k] - < mode_lib->vba.WritebackMinHSCLRatio - || mode_lib->vba.WritebackVRatio[k] - < mode_lib->vba.WritebackMinVSCLRatio - || mode_lib->vba.WritebackLumaHTaps[k] - > mode_lib->vba.WritebackMaxHSCLTaps - || mode_lib->vba.WritebackLumaVTaps[k] - > mode_lib->vba.WritebackMaxVSCLTaps - || mode_lib->vba.WritebackHRatio[k] - > mode_lib->vba.WritebackLumaHTaps[k] - || mode_lib->vba.WritebackVRatio[k] - > mode_lib->vba.WritebackLumaVTaps[k] - || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0 - && ((mode_lib->vba.WritebackLumaHTaps[k] % 2) - == 1)) - || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32 - && (mode_lib->vba.WritebackChromaHTaps[k] - > mode_lib->vba.WritebackMaxHSCLTaps - || mode_lib->vba.WritebackChromaVTaps[k] - > mode_lib->vba.WritebackMaxVSCLTaps - || 2.0 - * mode_lib->vba.WritebackHRatio[k] - > mode_lib->vba.WritebackChromaHTaps[k] - || 2.0 - * mode_lib->vba.WritebackVRatio[k] - > mode_lib->vba.WritebackChromaVTaps[k] - || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0 - && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) { - mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; - } - if (mode_lib->vba.WritebackVRatio[k] < 1.0) { - mode_lib->vba.WritebackLumaVExtra = - dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0); - } else { - mode_lib->vba.WritebackLumaVExtra = -1; - } - if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32 - && mode_lib->vba.WritebackLumaVTaps[k] - > (mode_lib->vba.WritebackLineBufferLumaBufferSize - + mode_lib->vba.WritebackLineBufferChromaBufferSize) - / 3.0 - / mode_lib->vba.WritebackDestinationWidth[k] - - mode_lib->vba.WritebackLumaVExtra) - || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 - && mode_lib->vba.WritebackLumaVTaps[k] - > mode_lib->vba.WritebackLineBufferLumaBufferSize - / mode_lib->vba.WritebackDestinationWidth[k] - - mode_lib->vba.WritebackLumaVExtra) - || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 - && mode_lib->vba.WritebackLumaVTaps[k] - > mode_lib->vba.WritebackLineBufferLumaBufferSize - * 8.0 / 10.0 - / mode_lib->vba.WritebackDestinationWidth[k] - - mode_lib->vba.WritebackLumaVExtra)) { - mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; - } - if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) { - mode_lib->vba.WritebackChromaVExtra = 0.0; - } else { - mode_lib->vba.WritebackChromaVExtra = -1; - } - if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 - && mode_lib->vba.WritebackChromaVTaps[k] - > mode_lib->vba.WritebackLineBufferChromaBufferSize - / mode_lib->vba.WritebackDestinationWidth[k] - - mode_lib->vba.WritebackChromaVExtra) - || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 - && mode_lib->vba.WritebackChromaVTaps[k] - > mode_lib->vba.WritebackLineBufferChromaBufferSize - * 8.0 / 10.0 - / mode_lib->vba.WritebackDestinationWidth[k] - - mode_lib->vba.WritebackChromaVExtra)) { - mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; - } - } - } - /*Maximum DISPCLK/DPPCLK Support check*/ - - mode_lib->vba.WritebackRequiredDISPCLK = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.WritebackEnable[k] == true) { - mode_lib->vba.WritebackRequiredDISPCLK = - dml_max( - mode_lib->vba.WritebackRequiredDISPCLK, - CalculateWriteBackDISPCLK( - mode_lib->vba.WritebackPixelFormat[k], - mode_lib->vba.PixelClock[k], - mode_lib->vba.WritebackHRatio[k], - mode_lib->vba.WritebackVRatio[k], - mode_lib->vba.WritebackLumaHTaps[k], - mode_lib->vba.WritebackLumaVTaps[k], - mode_lib->vba.WritebackChromaHTaps[k], - mode_lib->vba.WritebackChromaVTaps[k], - mode_lib->vba.WritebackDestinationWidth[k], - mode_lib->vba.HTotal[k], - mode_lib->vba.WritebackChromaLineBufferWidth)); - } - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.HRatio[k] > 1.0) { - mode_lib->vba.PSCL_FACTOR[k] = dml_min( - mode_lib->vba.MaxDCHUBToPSCLThroughput, - mode_lib->vba.MaxPSCLToLBThroughput - * mode_lib->vba.HRatio[k] - / dml_ceil( - mode_lib->vba.htaps[k] - / 6.0, - 1.0)); - } else { - mode_lib->vba.PSCL_FACTOR[k] = dml_min( - mode_lib->vba.MaxDCHUBToPSCLThroughput, - mode_lib->vba.MaxPSCLToLBThroughput); - } - if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { - mode_lib->vba.PSCL_FACTOR_CHROMA[k] = 0.0; - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] = - mode_lib->vba.PixelClock[k] - * dml_max3( - mode_lib->vba.vtaps[k] / 6.0 - * dml_min( - 1.0, - mode_lib->vba.HRatio[k]), - mode_lib->vba.HRatio[k] - * mode_lib->vba.VRatio[k] - / mode_lib->vba.PSCL_FACTOR[k], - 1.0); - if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0) - && mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - < 2.0 * mode_lib->vba.PixelClock[k]) { - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] = 2.0 - * mode_lib->vba.PixelClock[k]; - } - } else { - if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) { - mode_lib->vba.PSCL_FACTOR_CHROMA[k] = - dml_min( - mode_lib->vba.MaxDCHUBToPSCLThroughput, - mode_lib->vba.MaxPSCLToLBThroughput - * mode_lib->vba.HRatio[k] - / 2.0 - / dml_ceil( - mode_lib->vba.HTAPsChroma[k] - / 6.0, - 1.0)); - } else { - mode_lib->vba.PSCL_FACTOR_CHROMA[k] = dml_min( - mode_lib->vba.MaxDCHUBToPSCLThroughput, - mode_lib->vba.MaxPSCLToLBThroughput); - } - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] = - mode_lib->vba.PixelClock[k] - * dml_max5( - mode_lib->vba.vtaps[k] / 6.0 - * dml_min( - 1.0, - mode_lib->vba.HRatio[k]), - mode_lib->vba.HRatio[k] - * mode_lib->vba.VRatio[k] - / mode_lib->vba.PSCL_FACTOR[k], - mode_lib->vba.VTAPsChroma[k] - / 6.0 - * dml_min( - 1.0, - mode_lib->vba.HRatio[k] - / 2.0), - mode_lib->vba.HRatio[k] - * mode_lib->vba.VRatio[k] - / 4.0 - / mode_lib->vba.PSCL_FACTOR_CHROMA[k], - 1.0); - if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0 - || mode_lib->vba.HTAPsChroma[k] > 6.0 - || mode_lib->vba.VTAPsChroma[k] > 6.0) - && mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - < 2.0 * mode_lib->vba.PixelClock[k]) { - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] = 2.0 - * mode_lib->vba.PixelClock[k]; - } - } - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - Calculate256BBlockSizes( - mode_lib->vba.SourcePixelFormat[k], - mode_lib->vba.SurfaceTiling[k], - dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0), - dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0), - &mode_lib->vba.Read256BlockHeightY[k], - &mode_lib->vba.Read256BlockHeightC[k], - &mode_lib->vba.Read256BlockWidthY[k], - &mode_lib->vba.Read256BlockWidthC[k]); - if (mode_lib->vba.SourceScan[k] == dm_horz) { - mode_lib->vba.MaxSwathHeightY[k] = mode_lib->vba.Read256BlockHeightY[k]; - mode_lib->vba.MaxSwathHeightC[k] = mode_lib->vba.Read256BlockHeightC[k]; - } else { - mode_lib->vba.MaxSwathHeightY[k] = mode_lib->vba.Read256BlockWidthY[k]; - mode_lib->vba.MaxSwathHeightC[k] = mode_lib->vba.Read256BlockWidthC[k]; - } - if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 - || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 - || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16 - || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) { - if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear - || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 - && (mode_lib->vba.SurfaceTiling[k] - == dm_sw_4kb_s - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_4kb_s_x - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_64kb_s - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_64kb_s_t - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_64kb_s_x - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_var_s - || mode_lib->vba.SurfaceTiling[k] - == dm_sw_var_s_x) - && mode_lib->vba.SourceScan[k] == dm_horz)) { - mode_lib->vba.MinSwathHeightY[k] = mode_lib->vba.MaxSwathHeightY[k]; - } else { - mode_lib->vba.MinSwathHeightY[k] = mode_lib->vba.MaxSwathHeightY[k] - / 2.0; - } - mode_lib->vba.MinSwathHeightC[k] = mode_lib->vba.MaxSwathHeightC[k]; - } else { - if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { - mode_lib->vba.MinSwathHeightY[k] = mode_lib->vba.MaxSwathHeightY[k]; - mode_lib->vba.MinSwathHeightC[k] = mode_lib->vba.MaxSwathHeightC[k]; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 - && mode_lib->vba.SourceScan[k] == dm_horz) { - mode_lib->vba.MinSwathHeightY[k] = mode_lib->vba.MaxSwathHeightY[k] - / 2.0; - mode_lib->vba.MinSwathHeightC[k] = mode_lib->vba.MaxSwathHeightC[k]; - } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 - && mode_lib->vba.SourceScan[k] == dm_horz) { - mode_lib->vba.MinSwathHeightC[k] = mode_lib->vba.MaxSwathHeightC[k] - / 2.0; - mode_lib->vba.MinSwathHeightY[k] = mode_lib->vba.MaxSwathHeightY[k]; - } else { - mode_lib->vba.MinSwathHeightY[k] = mode_lib->vba.MaxSwathHeightY[k]; - mode_lib->vba.MinSwathHeightC[k] = mode_lib->vba.MaxSwathHeightC[k]; - } - } - if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { - mode_lib->vba.MaximumSwathWidthSupport = 8192.0; - } else { - mode_lib->vba.MaximumSwathWidthSupport = 5120.0; - } - mode_lib->vba.MaximumSwathWidthInDETBuffer = - dml_min( - mode_lib->vba.MaximumSwathWidthSupport, - mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0 - / (mode_lib->vba.BytePerPixelInDETY[k] - * mode_lib->vba.MinSwathHeightY[k] - + mode_lib->vba.BytePerPixelInDETC[k] - / 2.0 - * mode_lib->vba.MinSwathHeightC[k])); - if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { - mode_lib->vba.MaximumSwathWidthInLineBuffer = - mode_lib->vba.LineBufferSize - * dml_max(mode_lib->vba.HRatio[k], 1.0) - / mode_lib->vba.LBBitPerPixel[k] - / (mode_lib->vba.vtaps[k] - + dml_max( - dml_ceil( - mode_lib->vba.VRatio[k], - 1.0) - - 2, - 0.0)); - } else { - mode_lib->vba.MaximumSwathWidthInLineBuffer = - dml_min( - mode_lib->vba.LineBufferSize - * dml_max( - mode_lib->vba.HRatio[k], - 1.0) - / mode_lib->vba.LBBitPerPixel[k] - / (mode_lib->vba.vtaps[k] - + dml_max( - dml_ceil( - mode_lib->vba.VRatio[k], - 1.0) - - 2, - 0.0)), - 2.0 * mode_lib->vba.LineBufferSize - * dml_max( - mode_lib->vba.HRatio[k] - / 2.0, - 1.0) - / mode_lib->vba.LBBitPerPixel[k] - / (mode_lib->vba.VTAPsChroma[k] - + dml_max( - dml_ceil( - mode_lib->vba.VRatio[k] - / 2.0, - 1.0) - - 2, - 0.0))); - } - mode_lib->vba.MaximumSwathWidth[k] = dml_min( - mode_lib->vba.MaximumSwathWidthInDETBuffer, - mode_lib->vba.MaximumSwathWidthInLineBuffer); - } - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( - mode_lib->vba.MaxDispclk[i], - mode_lib->vba.DISPCLKDPPCLKVCOSpeed); - mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( - mode_lib->vba.MaxDppclk[i], - mode_lib->vba.DISPCLKDPPCLKVCOSpeed); - mode_lib->vba.RequiredDISPCLK[i] = 0.0; - mode_lib->vba.DISPCLK_DPPCLK_Support[i] = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = - mode_lib->vba.PixelClock[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - * (1.0 - + mode_lib->vba.DISPCLKRampingMargin - / 100.0); - if (mode_lib->vba.ODMCapability == true - && mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine - > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { - mode_lib->vba.ODMCombineEnablePerState[i][k] = true; - mode_lib->vba.PlaneRequiredDISPCLK = - mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine - / 2.0; - } else { - mode_lib->vba.ODMCombineEnablePerState[i][k] = false; - mode_lib->vba.PlaneRequiredDISPCLK = - mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; - } - if (mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity - && mode_lib->vba.SwathWidthYSingleDPP[k] - <= mode_lib->vba.MaximumSwathWidth[k] - && mode_lib->vba.ODMCombineEnablePerState[i][k] == false) { - mode_lib->vba.NoOfDPP[i][k] = 1; - mode_lib->vba.RequiredDPPCLK[i][k] = - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0); - } else { - mode_lib->vba.NoOfDPP[i][k] = 2; - mode_lib->vba.RequiredDPPCLK[i][k] = - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - / 2.0; - } - mode_lib->vba.RequiredDISPCLK[i] = dml_max( - mode_lib->vba.RequiredDISPCLK[i], - mode_lib->vba.PlaneRequiredDISPCLK); - if ((mode_lib->vba.MinDPPCLKUsingSingleDPP[k] / mode_lib->vba.NoOfDPP[i][k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) - || (mode_lib->vba.PlaneRequiredDISPCLK - > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { - mode_lib->vba.DISPCLK_DPPCLK_Support[i] = false; - } - } - mode_lib->vba.TotalNumberOfActiveDPP[i] = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.TotalNumberOfActiveDPP[i] = - mode_lib->vba.TotalNumberOfActiveDPP[i] - + mode_lib->vba.NoOfDPP[i][k]; - } - if ((mode_lib->vba.MaxDispclk[i] == mode_lib->vba.MaxDispclk[DC__VOLTAGE_STATES] - && mode_lib->vba.MaxDppclk[i] - == mode_lib->vba.MaxDppclk[DC__VOLTAGE_STATES]) - && (mode_lib->vba.TotalNumberOfActiveDPP[i] - > mode_lib->vba.MaxNumDPP - || mode_lib->vba.DISPCLK_DPPCLK_Support[i] == false)) { - mode_lib->vba.RequiredDISPCLK[i] = 0.0; - mode_lib->vba.DISPCLK_DPPCLK_Support[i] = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = - mode_lib->vba.PixelClock[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0); - if (mode_lib->vba.ODMCapability == true - && mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine - > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { - mode_lib->vba.ODMCombineEnablePerState[i][k] = true; - mode_lib->vba.PlaneRequiredDISPCLK = - mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine - / 2.0; - } else { - mode_lib->vba.ODMCombineEnablePerState[i][k] = false; - mode_lib->vba.PlaneRequiredDISPCLK = - mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; - } - if (mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity - && mode_lib->vba.SwathWidthYSingleDPP[k] - <= mode_lib->vba.MaximumSwathWidth[k] - && mode_lib->vba.ODMCombineEnablePerState[i][k] - == false) { - mode_lib->vba.NoOfDPP[i][k] = 1; - mode_lib->vba.RequiredDPPCLK[i][k] = - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0); - } else { - mode_lib->vba.NoOfDPP[i][k] = 2; - mode_lib->vba.RequiredDPPCLK[i][k] = - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - / 2.0; - } - mode_lib->vba.RequiredDISPCLK[i] = dml_max( - mode_lib->vba.RequiredDISPCLK[i], - mode_lib->vba.PlaneRequiredDISPCLK); - if ((mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - / mode_lib->vba.NoOfDPP[i][k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) - || (mode_lib->vba.PlaneRequiredDISPCLK - > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { - mode_lib->vba.DISPCLK_DPPCLK_Support[i] = false; - } - } - mode_lib->vba.TotalNumberOfActiveDPP[i] = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.TotalNumberOfActiveDPP[i] = - mode_lib->vba.TotalNumberOfActiveDPP[i] - + mode_lib->vba.NoOfDPP[i][k]; - } - } - if (mode_lib->vba.TotalNumberOfActiveDPP[i] > mode_lib->vba.MaxNumDPP) { - mode_lib->vba.RequiredDISPCLK[i] = 0.0; - mode_lib->vba.DISPCLK_DPPCLK_Support[i] = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.ODMCombineEnablePerState[i][k] = false; - if (mode_lib->vba.SwathWidthYSingleDPP[k] - <= mode_lib->vba.MaximumSwathWidth[k]) { - mode_lib->vba.NoOfDPP[i][k] = 1; - mode_lib->vba.RequiredDPPCLK[i][k] = - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0); - } else { - mode_lib->vba.NoOfDPP[i][k] = 2; - mode_lib->vba.RequiredDPPCLK[i][k] = - mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - / 2.0; - } - if (!(mode_lib->vba.MaxDispclk[i] - == mode_lib->vba.MaxDispclk[DC__VOLTAGE_STATES] - && mode_lib->vba.MaxDppclk[i] - == mode_lib->vba.MaxDppclk[DC__VOLTAGE_STATES])) { - mode_lib->vba.PlaneRequiredDISPCLK = - mode_lib->vba.PixelClock[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - * (1.0 - + mode_lib->vba.DISPCLKRampingMargin - / 100.0); - } else { - mode_lib->vba.PlaneRequiredDISPCLK = - mode_lib->vba.PixelClock[k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0); - } - mode_lib->vba.RequiredDISPCLK[i] = dml_max( - mode_lib->vba.RequiredDISPCLK[i], - mode_lib->vba.PlaneRequiredDISPCLK); - if ((mode_lib->vba.MinDPPCLKUsingSingleDPP[k] - / mode_lib->vba.NoOfDPP[i][k] - * (1.0 - + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) - || (mode_lib->vba.PlaneRequiredDISPCLK - > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { - mode_lib->vba.DISPCLK_DPPCLK_Support[i] = false; - } - } - mode_lib->vba.TotalNumberOfActiveDPP[i] = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.TotalNumberOfActiveDPP[i] = - mode_lib->vba.TotalNumberOfActiveDPP[i] - + mode_lib->vba.NoOfDPP[i][k]; - } - } - mode_lib->vba.RequiredDISPCLK[i] = dml_max( - mode_lib->vba.RequiredDISPCLK[i], - mode_lib->vba.WritebackRequiredDISPCLK); - if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity - < mode_lib->vba.WritebackRequiredDISPCLK) { - mode_lib->vba.DISPCLK_DPPCLK_Support[i] = false; - } - } - /*Viewport Size Check*/ - - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.ViewportSizeSupport[i] = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.ODMCombineEnablePerState[i][k] == true) { - if (dml_min(mode_lib->vba.SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) - > mode_lib->vba.MaximumSwathWidth[k]) { - mode_lib->vba.ViewportSizeSupport[i] = false; - } - } else { - if (mode_lib->vba.SwathWidthYSingleDPP[k] / 2.0 - > mode_lib->vba.MaximumSwathWidth[k]) { - mode_lib->vba.ViewportSizeSupport[i] = false; - } - } - } - } - /*Total Available Pipes Support Check*/ - - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - if (mode_lib->vba.TotalNumberOfActiveDPP[i] <= mode_lib->vba.MaxNumDPP) { - mode_lib->vba.TotalAvailablePipesSupport[i] = true; - } else { - mode_lib->vba.TotalAvailablePipesSupport[i] = false; - } - } - /*Total Available OTG Support Check*/ - - mode_lib->vba.TotalNumberOfActiveOTG = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.BlendingAndTiming[k] == k) { - mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG - + 1.0; - } - } - if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) { - mode_lib->vba.NumberOfOTGSupport = true; - } else { - mode_lib->vba.NumberOfOTGSupport = false; - } - /*Display IO and DSC Support Check*/ - - mode_lib->vba.NonsupportedDSCInputBPC = false; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 - || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 - || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) { - mode_lib->vba.NonsupportedDSCInputBPC = true; - } - } - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.RequiresDSC[i][k] = 0; - mode_lib->vba.RequiresFEC[i][k] = 0; - if (mode_lib->vba.BlendingAndTiming[k] == k) { - if (mode_lib->vba.Output[k] == dm_hdmi) { - mode_lib->vba.RequiresDSC[i][k] = 0; - mode_lib->vba.RequiresFEC[i][k] = 0; - mode_lib->vba.OutputBppPerState[i][k] = - TruncToValidBPP(dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) - / mode_lib->vba.PixelClockBackEnd[k] * 24, - false, - mode_lib->vba.Output[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.DSCInputBitPerComponent[k]); - } else if (mode_lib->vba.Output[k] == dm_dp - || mode_lib->vba.Output[k] == dm_edp) { - if (mode_lib->vba.Output[k] == dm_edp) { - mode_lib->vba.EffectiveFECOverhead = 0.0; - } else { - mode_lib->vba.EffectiveFECOverhead = - mode_lib->vba.FECOverhead; - } - if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) { - mode_lib->vba.Outbpp = - TruncToValidBPP((1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0 - * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, - false, - mode_lib->vba.Output[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.DSCInputBitPerComponent[k]); - mode_lib->vba.OutbppDSC = - TruncToValidBPP((1.0 - mode_lib->vba.Downspreading / 100.0) - * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0 - * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, - true, - mode_lib->vba.Output[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.DSCInputBitPerComponent[k]); - if (mode_lib->vba.DSCEnabled[k] == true) { - mode_lib->vba.RequiresDSC[i][k] = true; - if (mode_lib->vba.Output[k] == dm_dp) { - mode_lib->vba.RequiresFEC[i][k] = - true; - } else { - mode_lib->vba.RequiresFEC[i][k] = - false; - } - mode_lib->vba.Outbpp = - mode_lib->vba.OutbppDSC; - } else { - mode_lib->vba.RequiresDSC[i][k] = false; - mode_lib->vba.RequiresFEC[i][k] = false; - } - mode_lib->vba.OutputBppPerState[i][k] = - mode_lib->vba.Outbpp; - } - if (mode_lib->vba.Outbpp == BPP_INVALID) { - mode_lib->vba.Outbpp = - TruncToValidBPP((1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0 - * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, - false, - mode_lib->vba.Output[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.DSCInputBitPerComponent[k]); - mode_lib->vba.OutbppDSC = - TruncToValidBPP((1.0 - mode_lib->vba.Downspreading / 100.0) - * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0 - * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, - true, - mode_lib->vba.Output[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.DSCInputBitPerComponent[k]); - if (mode_lib->vba.DSCEnabled[k] == true) { - mode_lib->vba.RequiresDSC[i][k] = true; - if (mode_lib->vba.Output[k] == dm_dp) { - mode_lib->vba.RequiresFEC[i][k] = - true; - } else { - mode_lib->vba.RequiresFEC[i][k] = - false; - } - mode_lib->vba.Outbpp = - mode_lib->vba.OutbppDSC; - } else { - mode_lib->vba.RequiresDSC[i][k] = false; - mode_lib->vba.RequiresFEC[i][k] = false; - } - mode_lib->vba.OutputBppPerState[i][k] = - mode_lib->vba.Outbpp; - } - if (mode_lib->vba.Outbpp == BPP_INVALID - && mode_lib->vba.PHYCLKPerState[i] - >= 810.0) { - mode_lib->vba.Outbpp = - TruncToValidBPP((1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0 - * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, - false, - mode_lib->vba.Output[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.DSCInputBitPerComponent[k]); - mode_lib->vba.OutbppDSC = - TruncToValidBPP((1.0 - mode_lib->vba.Downspreading / 100.0) - * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0 - * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, - true, - mode_lib->vba.Output[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.DSCInputBitPerComponent[k]); - if (mode_lib->vba.DSCEnabled[k] == true - || mode_lib->vba.Outbpp == BPP_INVALID) { - mode_lib->vba.RequiresDSC[i][k] = true; - if (mode_lib->vba.Output[k] == dm_dp) { - mode_lib->vba.RequiresFEC[i][k] = - true; - } else { - mode_lib->vba.RequiresFEC[i][k] = - false; - } - mode_lib->vba.Outbpp = - mode_lib->vba.OutbppDSC; - } else { - mode_lib->vba.RequiresDSC[i][k] = false; - mode_lib->vba.RequiresFEC[i][k] = false; - } - mode_lib->vba.OutputBppPerState[i][k] = - mode_lib->vba.Outbpp; - } - } - } else { - mode_lib->vba.OutputBppPerState[i][k] = BPP_BLENDED_PIPE; - } - } - } - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.DIOSupport[i] = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.OutputBppPerState[i][k] == BPP_INVALID - || (mode_lib->vba.OutputFormat[k] == dm_420 - && mode_lib->vba.ProgressiveToInterlaceUnitInOPP - == true)) { - mode_lib->vba.DIOSupport[i] = false; - } - } - } - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = false; - if (mode_lib->vba.BlendingAndTiming[k] == k) { - if ((mode_lib->vba.Output[k] == dm_dp - || mode_lib->vba.Output[k] == dm_edp)) { - if (mode_lib->vba.OutputFormat[k] == dm_420 - || mode_lib->vba.OutputFormat[k] - == dm_n422) { - mode_lib->vba.DSCFormatFactor = 2; - } else { - mode_lib->vba.DSCFormatFactor = 1; - } - if (mode_lib->vba.RequiresDSC[i][k] == true) { - if (mode_lib->vba.ODMCombineEnablePerState[i][k] - == true) { - if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 - / mode_lib->vba.DSCFormatFactor - > (1.0 - - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - * mode_lib->vba.MaxDSCCLK[i]) { - mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = - true; - } - } else { - if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 - / mode_lib->vba.DSCFormatFactor - > (1.0 - - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading - / 100.0) - * mode_lib->vba.MaxDSCCLK[i]) { - mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = - true; - } - } - } - } - } - } - } - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.NotEnoughDSCUnits[i] = false; - mode_lib->vba.TotalDSCUnitsRequired = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.RequiresDSC[i][k] == true) { - if (mode_lib->vba.ODMCombineEnablePerState[i][k] == true) { - mode_lib->vba.TotalDSCUnitsRequired = - mode_lib->vba.TotalDSCUnitsRequired + 2.0; - } else { - mode_lib->vba.TotalDSCUnitsRequired = - mode_lib->vba.TotalDSCUnitsRequired + 1.0; - } - } - } - if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) { - mode_lib->vba.NotEnoughDSCUnits[i] = true; - } - } - /*DSC Delay per state*/ - - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.BlendingAndTiming[k] != k) { - mode_lib->vba.slices = 0; - } else if (mode_lib->vba.RequiresDSC[i][k] == 0 - || mode_lib->vba.RequiresDSC[i][k] == false) { - mode_lib->vba.slices = 0; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) { - mode_lib->vba.slices = dml_ceil( - mode_lib->vba.PixelClockBackEnd[k] / 400.0, - 4.0); - } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) { - mode_lib->vba.slices = 8.0; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) { - mode_lib->vba.slices = 4.0; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) { - mode_lib->vba.slices = 2.0; - } else { - mode_lib->vba.slices = 1.0; - } - if (mode_lib->vba.OutputBppPerState[i][k] == BPP_BLENDED_PIPE - || mode_lib->vba.OutputBppPerState[i][k] == BPP_INVALID) { - mode_lib->vba.bpp = 0.0; - } else { - mode_lib->vba.bpp = mode_lib->vba.OutputBppPerState[i][k]; - } - if (mode_lib->vba.RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (mode_lib->vba.ODMCombineEnablePerState[i][k] == false) { - mode_lib->vba.DSCDelayPerState[i][k] = - dscceComputeDelay( - mode_lib->vba.DSCInputBitPerComponent[k], - mode_lib->vba.bpp, - dml_ceil( - mode_lib->vba.HActive[k] - / mode_lib->vba.slices, - 1.0), - mode_lib->vba.slices, - mode_lib->vba.OutputFormat[k]) - + dscComputeDelay( - mode_lib->vba.OutputFormat[k]); - } else { - mode_lib->vba.DSCDelayPerState[i][k] = - 2.0 - * (dscceComputeDelay( - mode_lib->vba.DSCInputBitPerComponent[k], - mode_lib->vba.bpp, - dml_ceil( - mode_lib->vba.HActive[k] - / mode_lib->vba.slices, - 1.0), - mode_lib->vba.slices - / 2, - mode_lib->vba.OutputFormat[k]) - + dscComputeDelay( - mode_lib->vba.OutputFormat[k])); - } - mode_lib->vba.DSCDelayPerState[i][k] = - mode_lib->vba.DSCDelayPerState[i][k] - * mode_lib->vba.PixelClock[k] - / mode_lib->vba.PixelClockBackEnd[k]; - } else { - mode_lib->vba.DSCDelayPerState[i][k] = 0.0; - } - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { - if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.RequiresDSC[i][j] == true) { - mode_lib->vba.DSCDelayPerState[i][k] = - mode_lib->vba.DSCDelayPerState[i][j]; - } - } - } - } - /*Urgent Latency Support Check*/ - - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - if (mode_lib->vba.ODMCombineEnablePerState[i][k] == true) { - mode_lib->vba.SwathWidthYPerState[i][k] = - dml_min( - mode_lib->vba.SwathWidthYSingleDPP[k], - dml_round( - mode_lib->vba.HActive[k] - / 2.0 - * mode_lib->vba.HRatio[k])); - } else { - mode_lib->vba.SwathWidthYPerState[i][k] = - mode_lib->vba.SwathWidthYSingleDPP[k] - / mode_lib->vba.NoOfDPP[i][k]; - } - mode_lib->vba.SwathWidthGranularityY = 256.0 - / dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0) - / mode_lib->vba.MaxSwathHeightY[k]; - mode_lib->vba.RoundedUpMaxSwathSizeBytesY = (dml_ceil( - mode_lib->vba.SwathWidthYPerState[i][k] - 1.0, - mode_lib->vba.SwathWidthGranularityY) - + mode_lib->vba.SwathWidthGranularityY) - * mode_lib->vba.BytePerPixelInDETY[k] - * mode_lib->vba.MaxSwathHeightY[k]; - if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { - mode_lib->vba.RoundedUpMaxSwathSizeBytesY = dml_ceil( - mode_lib->vba.RoundedUpMaxSwathSizeBytesY, - 256.0) + 256; - } - if (mode_lib->vba.MaxSwathHeightC[k] > 0.0) { - mode_lib->vba.SwathWidthGranularityC = 256.0 - / dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0) - / mode_lib->vba.MaxSwathHeightC[k]; - mode_lib->vba.RoundedUpMaxSwathSizeBytesC = (dml_ceil( - mode_lib->vba.SwathWidthYPerState[i][k] / 2.0 - 1.0, - mode_lib->vba.SwathWidthGranularityC) - + mode_lib->vba.SwathWidthGranularityC) - * mode_lib->vba.BytePerPixelInDETC[k] - * mode_lib->vba.MaxSwathHeightC[k]; - if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { - mode_lib->vba.RoundedUpMaxSwathSizeBytesC = dml_ceil( - mode_lib->vba.RoundedUpMaxSwathSizeBytesC, - 256.0) + 256; - } - } else { - mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0; - } - if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY - + mode_lib->vba.RoundedUpMaxSwathSizeBytesC - <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { - mode_lib->vba.SwathHeightYPerState[i][k] = - mode_lib->vba.MaxSwathHeightY[k]; - mode_lib->vba.SwathHeightCPerState[i][k] = - mode_lib->vba.MaxSwathHeightC[k]; - } else { - mode_lib->vba.SwathHeightYPerState[i][k] = - mode_lib->vba.MinSwathHeightY[k]; - mode_lib->vba.SwathHeightCPerState[i][k] = - mode_lib->vba.MinSwathHeightC[k]; - } - if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { - mode_lib->vba.LinesInDETLuma = mode_lib->vba.DETBufferSizeInKByte - * 1024.0 / mode_lib->vba.BytePerPixelInDETY[k] - / mode_lib->vba.SwathWidthYPerState[i][k]; - mode_lib->vba.LinesInDETChroma = 0.0; - } else if (mode_lib->vba.SwathHeightYPerState[i][k] - <= mode_lib->vba.SwathHeightCPerState[i][k]) { - mode_lib->vba.LinesInDETLuma = mode_lib->vba.DETBufferSizeInKByte - * 1024.0 / 2.0 / mode_lib->vba.BytePerPixelInDETY[k] - / mode_lib->vba.SwathWidthYPerState[i][k]; - mode_lib->vba.LinesInDETChroma = mode_lib->vba.DETBufferSizeInKByte - * 1024.0 / 2.0 / mode_lib->vba.BytePerPixelInDETC[k] - / (mode_lib->vba.SwathWidthYPerState[i][k] / 2.0); - } else { - mode_lib->vba.LinesInDETLuma = mode_lib->vba.DETBufferSizeInKByte - * 1024.0 * 2.0 / 3.0 - / mode_lib->vba.BytePerPixelInDETY[k] - / mode_lib->vba.SwathWidthYPerState[i][k]; - mode_lib->vba.LinesInDETChroma = mode_lib->vba.DETBufferSizeInKByte - * 1024.0 / 3.0 / mode_lib->vba.BytePerPixelInDETY[k] - / (mode_lib->vba.SwathWidthYPerState[i][k] / 2.0); - } - mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma = - dml_min( - mode_lib->vba.MaxLineBufferLines, - dml_floor( - mode_lib->vba.LineBufferSize - / mode_lib->vba.LBBitPerPixel[k] - / (mode_lib->vba.SwathWidthYPerState[i][k] - / dml_max( - mode_lib->vba.HRatio[k], - 1.0)), - 1.0)) - - (mode_lib->vba.vtaps[k] - 1.0); - mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma = - dml_min( - mode_lib->vba.MaxLineBufferLines, - dml_floor( - mode_lib->vba.LineBufferSize - / mode_lib->vba.LBBitPerPixel[k] - / (mode_lib->vba.SwathWidthYPerState[i][k] - / 2.0 - / dml_max( - mode_lib->vba.HRatio[k] - / 2.0, - 1.0)), - 1.0)) - - (mode_lib->vba.VTAPsChroma[k] - 1.0); - mode_lib->vba.EffectiveDETLBLinesLuma = - dml_floor( - mode_lib->vba.LinesInDETLuma - + dml_min( - mode_lib->vba.LinesInDETLuma - * mode_lib->vba.RequiredDISPCLK[i] - * mode_lib->vba.BytePerPixelInDETY[k] - * mode_lib->vba.PSCL_FACTOR[k] - / mode_lib->vba.ReturnBWPerState[i], - mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma), - mode_lib->vba.SwathHeightYPerState[i][k]); - mode_lib->vba.EffectiveDETLBLinesChroma = - dml_floor( - mode_lib->vba.LinesInDETChroma - + dml_min( - mode_lib->vba.LinesInDETChroma - * mode_lib->vba.RequiredDISPCLK[i] - * mode_lib->vba.BytePerPixelInDETC[k] - * mode_lib->vba.PSCL_FACTOR_CHROMA[k] - / mode_lib->vba.ReturnBWPerState[i], - mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma), - mode_lib->vba.SwathHeightCPerState[i][k]); - if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { - mode_lib->vba.UrgentLatencySupportUsPerState[i][k] = - mode_lib->vba.EffectiveDETLBLinesLuma - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) - / mode_lib->vba.VRatio[k] - - mode_lib->vba.EffectiveDETLBLinesLuma - * mode_lib->vba.SwathWidthYPerState[i][k] - * dml_ceil( - mode_lib->vba.BytePerPixelInDETY[k], - 1.0) - / (mode_lib->vba.ReturnBWPerState[i] - / mode_lib->vba.NoOfDPP[i][k]); - } else { - mode_lib->vba.UrgentLatencySupportUsPerState[i][k] = - dml_min( - mode_lib->vba.EffectiveDETLBLinesLuma - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) - / mode_lib->vba.VRatio[k] - - mode_lib->vba.EffectiveDETLBLinesLuma - * mode_lib->vba.SwathWidthYPerState[i][k] - * dml_ceil( - mode_lib->vba.BytePerPixelInDETY[k], - 1.0) - / (mode_lib->vba.ReturnBWPerState[i] - / mode_lib->vba.NoOfDPP[i][k]), - mode_lib->vba.EffectiveDETLBLinesChroma - * (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]) - / (mode_lib->vba.VRatio[k] - / 2.0) - - mode_lib->vba.EffectiveDETLBLinesChroma - * mode_lib->vba.SwathWidthYPerState[i][k] - / 2.0 - * dml_ceil( - mode_lib->vba.BytePerPixelInDETC[k], - 2.0) - / (mode_lib->vba.ReturnBWPerState[i] - / mode_lib->vba.NoOfDPP[i][k])); - } - } - } - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.UrgentLatencySupport[i] = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.UrgentLatencySupportUsPerState[i][k] - < mode_lib->vba.UrgentLatency / 1.0) { - mode_lib->vba.UrgentLatencySupport[i] = false; - } - } - } - /*Prefetch Check*/ - - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.TotalNumberOfDCCActiveDPP[i] = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.DCCEnable[k] == true) { - mode_lib->vba.TotalNumberOfDCCActiveDPP[i] = - mode_lib->vba.TotalNumberOfDCCActiveDPP[i] - + mode_lib->vba.NoOfDPP[i][k]; - } - } - } - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = 8.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, - mode_lib->vba.PixelClock[k] / 16.0); - if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { - if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = - dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, - 1.1 - * dml_ceil( - mode_lib->vba.BytePerPixelInDETY[k], - 1.0) - / 64.0 - * mode_lib->vba.HRatio[k] - * mode_lib->vba.PixelClock[k] - / mode_lib->vba.NoOfDPP[i][k]); - } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = - dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, - 1.1 - * dml_ceil( - mode_lib->vba.BytePerPixelInDETY[k], - 1.0) - / 64.0 - * mode_lib->vba.PSCL_FACTOR[k] - * mode_lib->vba.RequiredDPPCLK[i][k]); - } - } else { - if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = - dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, - 1.1 - * dml_ceil( - mode_lib->vba.BytePerPixelInDETY[k], - 1.0) - / 32.0 - * mode_lib->vba.HRatio[k] - * mode_lib->vba.PixelClock[k] - / mode_lib->vba.NoOfDPP[i][k]); - } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = - dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, - 1.1 - * dml_ceil( - mode_lib->vba.BytePerPixelInDETY[k], - 1.0) - / 32.0 - * mode_lib->vba.PSCL_FACTOR[k] - * mode_lib->vba.RequiredDPPCLK[i][k]); - } - if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = - dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, - 1.1 - * dml_ceil( - mode_lib->vba.BytePerPixelInDETC[k], - 2.0) - / 32.0 - * mode_lib->vba.HRatio[k] - / 2.0 - * mode_lib->vba.PixelClock[k] - / mode_lib->vba.NoOfDPP[i][k]); - } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = - dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, - 1.1 - * dml_ceil( - mode_lib->vba.BytePerPixelInDETC[k], - 2.0) - / 32.0 - * mode_lib->vba.PSCL_FACTOR_CHROMA[k] - * mode_lib->vba.RequiredDPPCLK[i][k]); - } - } - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( - mode_lib, - mode_lib->vba.DCCEnable[k], - mode_lib->vba.Read256BlockHeightY[k], - mode_lib->vba.Read256BlockWidthY[k], - mode_lib->vba.SourcePixelFormat[k], - mode_lib->vba.SurfaceTiling[k], - dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0), - mode_lib->vba.SourceScan[k], - mode_lib->vba.ViewportWidth[k], - mode_lib->vba.ViewportHeight[k], - mode_lib->vba.SwathWidthYPerState[i][k], - mode_lib->vba.VirtualMemoryEnable, - mode_lib->vba.VMMPageSize, - mode_lib->vba.PTEBufferSizeInRequests, - mode_lib->vba.PDEProcessingBufIn64KBReqs, - mode_lib->vba.PitchY[k], - mode_lib->vba.DCCMetaPitchY[k], - &mode_lib->vba.MacroTileWidthY[k], - &mode_lib->vba.MetaRowBytesY, - &mode_lib->vba.DPTEBytesPerRowY, - &mode_lib->vba.PTEBufferSizeNotExceededY[i][k], - &mode_lib->vba.dpte_row_height[k], - &mode_lib->vba.meta_row_height[k]); - mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( - mode_lib, - mode_lib->vba.VRatio[k], - mode_lib->vba.vtaps[k], - mode_lib->vba.Interlace[k], - mode_lib->vba.ProgressiveToInterlaceUnitInOPP, - mode_lib->vba.SwathHeightYPerState[i][k], - mode_lib->vba.ViewportYStartY[k], - &mode_lib->vba.PrefillY[k], - &mode_lib->vba.MaxNumSwY[k]); - if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 - && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 - && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) { - mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( - mode_lib, - mode_lib->vba.DCCEnable[k], - mode_lib->vba.Read256BlockHeightY[k], - mode_lib->vba.Read256BlockWidthY[k], - mode_lib->vba.SourcePixelFormat[k], - mode_lib->vba.SurfaceTiling[k], - dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0), - mode_lib->vba.SourceScan[k], - mode_lib->vba.ViewportWidth[k] / 2.0, - mode_lib->vba.ViewportHeight[k] / 2.0, - mode_lib->vba.SwathWidthYPerState[i][k] / 2.0, - mode_lib->vba.VirtualMemoryEnable, - mode_lib->vba.VMMPageSize, - mode_lib->vba.PTEBufferSizeInRequests, - mode_lib->vba.PDEProcessingBufIn64KBReqs, - mode_lib->vba.PitchC[k], - 0.0, - &mode_lib->vba.MacroTileWidthC[k], - &mode_lib->vba.MetaRowBytesC, - &mode_lib->vba.DPTEBytesPerRowC, - &mode_lib->vba.PTEBufferSizeNotExceededC[i][k], - &mode_lib->vba.dpte_row_height_chroma[k], - &mode_lib->vba.meta_row_height_chroma[k]); - mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( - mode_lib, - mode_lib->vba.VRatio[k] / 2.0, - mode_lib->vba.VTAPsChroma[k], - mode_lib->vba.Interlace[k], - mode_lib->vba.ProgressiveToInterlaceUnitInOPP, - mode_lib->vba.SwathHeightCPerState[i][k], - mode_lib->vba.ViewportYStartC[k], - &mode_lib->vba.PrefillC[k], - &mode_lib->vba.MaxNumSwC[k]); - } else { - mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; - mode_lib->vba.MetaRowBytesC = 0.0; - mode_lib->vba.DPTEBytesPerRowC = 0.0; - mode_lib->vba.PrefetchLinesC[k] = 0.0; - mode_lib->vba.PTEBufferSizeNotExceededC[i][k] = true; - } - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] = - mode_lib->vba.PDEAndMetaPTEBytesPerFrameY - + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - mode_lib->vba.MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY - + mode_lib->vba.MetaRowBytesC; - mode_lib->vba.DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY - + mode_lib->vba.DPTEBytesPerRowC; - } - mode_lib->vba.ExtraLatency = - mode_lib->vba.UrgentRoundTripAndOutOfOrderLatencyPerState[i] - + (mode_lib->vba.TotalNumberOfActiveDPP[i] - * mode_lib->vba.PixelChunkSizeInKByte - + mode_lib->vba.TotalNumberOfDCCActiveDPP[i] - * mode_lib->vba.MetaChunkSize) - * 1024.0 - / mode_lib->vba.ReturnBWPerState[i]; - if (mode_lib->vba.VirtualMemoryEnable == true) { - mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency - + mode_lib->vba.TotalNumberOfActiveDPP[i] - * mode_lib->vba.PTEChunkSize * 1024.0 - / mode_lib->vba.ReturnBWPerState[i]; - } - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.BlendingAndTiming[k] == k) { - if (mode_lib->vba.WritebackEnable[k] == true) { - mode_lib->vba.WritebackDelay[i][k] = - mode_lib->vba.WritebackLatency - + CalculateWriteBackDelay( - mode_lib->vba.WritebackPixelFormat[k], - mode_lib->vba.WritebackHRatio[k], - mode_lib->vba.WritebackVRatio[k], - mode_lib->vba.WritebackLumaHTaps[k], - mode_lib->vba.WritebackLumaVTaps[k], - mode_lib->vba.WritebackChromaHTaps[k], - mode_lib->vba.WritebackChromaVTaps[k], - mode_lib->vba.WritebackDestinationWidth[k]) - / mode_lib->vba.RequiredDISPCLK[i]; - } else { - mode_lib->vba.WritebackDelay[i][k] = 0.0; - } - for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { - if (mode_lib->vba.BlendingAndTiming[j] == k - && mode_lib->vba.WritebackEnable[j] - == true) { - mode_lib->vba.WritebackDelay[i][k] = - dml_max( - mode_lib->vba.WritebackDelay[i][k], - mode_lib->vba.WritebackLatency - + CalculateWriteBackDelay( - mode_lib->vba.WritebackPixelFormat[j], - mode_lib->vba.WritebackHRatio[j], - mode_lib->vba.WritebackVRatio[j], - mode_lib->vba.WritebackLumaHTaps[j], - mode_lib->vba.WritebackLumaVTaps[j], - mode_lib->vba.WritebackChromaHTaps[j], - mode_lib->vba.WritebackChromaVTaps[j], - mode_lib->vba.WritebackDestinationWidth[j]) - / mode_lib->vba.RequiredDISPCLK[i]); - } - } - } - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { - if (mode_lib->vba.BlendingAndTiming[k] == j) { - mode_lib->vba.WritebackDelay[i][k] = - mode_lib->vba.WritebackDelay[i][j]; - } - } - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.MaximumVStartup[k] = - mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - - dml_max( - 1.0, - dml_ceil( - mode_lib->vba.WritebackDelay[i][k] - / (mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]), - 1.0)); - } - mode_lib->vba.TWait = CalculateTWait( - mode_lib->vba.PrefetchMode, - mode_lib->vba.DRAMClockChangeLatency, - mode_lib->vba.UrgentLatency, - mode_lib->vba.SREnterPlusExitTime); - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.XFCEnabled[k] == true) { - mode_lib->vba.XFCRemoteSurfaceFlipDelay = - CalculateRemoteSurfaceFlipDelay( - mode_lib, - mode_lib->vba.VRatio[k], - mode_lib->vba.SwathWidthYPerState[i][k], - dml_ceil( - mode_lib->vba.BytePerPixelInDETY[k], - 1.0), - mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k], - mode_lib->vba.XFCTSlvVupdateOffset, - mode_lib->vba.XFCTSlvVupdateWidth, - mode_lib->vba.XFCTSlvVreadyOffset, - mode_lib->vba.XFCXBUFLatencyTolerance, - mode_lib->vba.XFCFillBWOverhead, - mode_lib->vba.XFCSlvChunkSize, - mode_lib->vba.XFCBusTransportTime, - mode_lib->vba.TimeCalc, - mode_lib->vba.TWait, - &mode_lib->vba.SrcActiveDrainRate, - &mode_lib->vba.TInitXFill, - &mode_lib->vba.TslvChk); - } else { - mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; - } - mode_lib->vba.IsErrorResult[i][k] = - CalculatePrefetchSchedule( - mode_lib, - mode_lib->vba.RequiredDPPCLK[i][k], - mode_lib->vba.RequiredDISPCLK[i], - mode_lib->vba.PixelClock[k], - mode_lib->vba.ProjectedDCFCLKDeepSleep, - mode_lib->vba.DSCDelayPerState[i][k], - mode_lib->vba.NoOfDPP[i][k], - mode_lib->vba.ScalerEnabled[k], - mode_lib->vba.NumberOfCursors[k], - mode_lib->vba.DPPCLKDelaySubtotal, - mode_lib->vba.DPPCLKDelaySCL, - mode_lib->vba.DPPCLKDelaySCLLBOnly, - mode_lib->vba.DPPCLKDelayCNVCFormater, - mode_lib->vba.DPPCLKDelayCNVCCursor, - mode_lib->vba.DISPCLKDelaySubtotal, - mode_lib->vba.SwathWidthYPerState[i][k] - / mode_lib->vba.HRatio[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.VTotal[k] - - mode_lib->vba.VActive[k], - mode_lib->vba.HTotal[k], - mode_lib->vba.MaxInterDCNTileRepeaters, - mode_lib->vba.MaximumVStartup[k], - mode_lib->vba.MaxPageTableLevels, - mode_lib->vba.VirtualMemoryEnable, - mode_lib->vba.DynamicMetadataEnable[k], - mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], - mode_lib->vba.DynamicMetadataTransmittedBytes[k], - mode_lib->vba.DCCEnable[k], - mode_lib->vba.UrgentLatency, - mode_lib->vba.ExtraLatency, - mode_lib->vba.TimeCalc, - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], - mode_lib->vba.PrefetchLinesY[k], - mode_lib->vba.SwathWidthYPerState[i][k], - mode_lib->vba.BytePerPixelInDETY[k], - mode_lib->vba.PrefillY[k], - mode_lib->vba.MaxNumSwY[k], - mode_lib->vba.PrefetchLinesC[k], - mode_lib->vba.BytePerPixelInDETC[k], - mode_lib->vba.PrefillC[k], - mode_lib->vba.MaxNumSwC[k], - mode_lib->vba.SwathHeightYPerState[i][k], - mode_lib->vba.SwathHeightCPerState[i][k], - mode_lib->vba.TWait, - mode_lib->vba.XFCEnabled[k], - mode_lib->vba.XFCRemoteSurfaceFlipDelay, - mode_lib->vba.Interlace[k], - mode_lib->vba.ProgressiveToInterlaceUnitInOPP, - mode_lib->vba.DSTXAfterScaler, - mode_lib->vba.DSTYAfterScaler, - &mode_lib->vba.LineTimesForPrefetch[k], - &mode_lib->vba.PrefetchBW[k], - &mode_lib->vba.LinesForMetaPTE[k], - &mode_lib->vba.LinesForMetaAndDPTERow[k], - &mode_lib->vba.VRatioPreY[i][k], - &mode_lib->vba.VRatioPreC[i][k], - &mode_lib->vba.RequiredPrefetchPixelDataBW[i][k], - &mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, - &mode_lib->vba.Tno_bw[k], - &mode_lib->vba.VUpdateOffsetPix[k], - &mode_lib->vba.VUpdateWidthPix[k], - &mode_lib->vba.VReadyOffsetPix[k]); - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] - * mode_lib->vba.CursorWidth[k][0] - * mode_lib->vba.CursorBPP[k][0] / 8.0 - / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) - * mode_lib->vba.VRatio[k]; - } - mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; - mode_lib->vba.prefetch_vm_bw_valid = true; - mode_lib->vba.prefetch_row_bw_valid = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] == 0.0) { - mode_lib->vba.prefetch_vm_bw[k] = 0.0; - } else if (mode_lib->vba.LinesForMetaPTE[k] > 0.0) { - mode_lib->vba.prefetch_vm_bw[k] = - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] - / (mode_lib->vba.LinesForMetaPTE[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]); - } else { - mode_lib->vba.prefetch_vm_bw[k] = 0.0; - mode_lib->vba.prefetch_vm_bw_valid = false; - } - if (mode_lib->vba.MetaRowBytes[k] + mode_lib->vba.DPTEBytesPerRow[k] - == 0.0) { - mode_lib->vba.prefetch_row_bw[k] = 0.0; - } else if (mode_lib->vba.LinesForMetaAndDPTERow[k] > 0.0) { - mode_lib->vba.prefetch_row_bw[k] = (mode_lib->vba.MetaRowBytes[k] - + mode_lib->vba.DPTEBytesPerRow[k]) - / (mode_lib->vba.LinesForMetaAndDPTERow[k] - * mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k]); - } else { - mode_lib->vba.prefetch_row_bw[k] = 0.0; - mode_lib->vba.prefetch_row_bw_valid = false; - } - mode_lib->vba.MaximumReadBandwidthWithPrefetch = - mode_lib->vba.MaximumReadBandwidthWithPrefetch - + mode_lib->vba.cursor_bw[k] - + dml_max4( - mode_lib->vba.prefetch_vm_bw[k], - mode_lib->vba.prefetch_row_bw[k], - mode_lib->vba.ReadBandwidth[k], - mode_lib->vba.RequiredPrefetchPixelDataBW[i][k]); - } - mode_lib->vba.PrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch - > mode_lib->vba.ReturnBWPerState[i] - || mode_lib->vba.prefetch_vm_bw_valid == false - || mode_lib->vba.prefetch_row_bw_valid == false) { - mode_lib->vba.PrefetchSupported[i] = false; - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.LineTimesForPrefetch[k] < 2.0 - || mode_lib->vba.LinesForMetaPTE[k] >= 8.0 - || mode_lib->vba.LinesForMetaAndDPTERow[k] >= 16.0 - || mode_lib->vba.IsErrorResult[i][k] == true) { - mode_lib->vba.PrefetchSupported[i] = false; - } - } - mode_lib->vba.VRatioInPrefetchSupported[i] = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.VRatioPreY[i][k] > 4.0 - || mode_lib->vba.VRatioPreC[i][k] > 4.0 - || mode_lib->vba.IsErrorResult[i][k] == true) { - mode_lib->vba.VRatioInPrefetchSupported[i] = false; - } - } - if (mode_lib->vba.PrefetchSupported[i] == true - && mode_lib->vba.VRatioInPrefetchSupported[i] == true) { - mode_lib->vba.BandwidthAvailableForImmediateFlip = - mode_lib->vba.ReturnBWPerState[i]; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.BandwidthAvailableForImmediateFlip = - mode_lib->vba.BandwidthAvailableForImmediateFlip - - mode_lib->vba.cursor_bw[k] - - dml_max( - mode_lib->vba.ReadBandwidth[k], - mode_lib->vba.PrefetchBW[k]); - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.ImmediateFlipBytes[k] = 0.0; - if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 - && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { - mode_lib->vba.ImmediateFlipBytes[k] = - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] - + mode_lib->vba.MetaRowBytes[k] - + mode_lib->vba.DPTEBytesPerRow[k]; - } - } - mode_lib->vba.TotImmediateFlipBytes = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 - && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { - mode_lib->vba.TotImmediateFlipBytes = - mode_lib->vba.TotImmediateFlipBytes - + mode_lib->vba.ImmediateFlipBytes[k]; - } - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - CalculateFlipSchedule( - mode_lib, - mode_lib->vba.ExtraLatency, - mode_lib->vba.UrgentLatency, - mode_lib->vba.MaxPageTableLevels, - mode_lib->vba.VirtualMemoryEnable, - mode_lib->vba.BandwidthAvailableForImmediateFlip, - mode_lib->vba.TotImmediateFlipBytes, - mode_lib->vba.SourcePixelFormat[k], - mode_lib->vba.ImmediateFlipBytes[k], - mode_lib->vba.HTotal[k] - / mode_lib->vba.PixelClock[k], - mode_lib->vba.VRatio[k], - mode_lib->vba.Tno_bw[k], - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], - mode_lib->vba.DCCEnable[k], - mode_lib->vba.dpte_row_height[k], - mode_lib->vba.meta_row_height[k], - mode_lib->vba.qual_row_bw[k], - &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], - &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], - &mode_lib->vba.final_flip_bw[k], - &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); - } - mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.total_dcn_read_bw_with_flip = - mode_lib->vba.total_dcn_read_bw_with_flip - + mode_lib->vba.cursor_bw[k] - + dml_max3( - mode_lib->vba.prefetch_vm_bw[k], - mode_lib->vba.prefetch_row_bw[k], - mode_lib->vba.final_flip_bw[k] - + dml_max( - mode_lib->vba.ReadBandwidth[k], - mode_lib->vba.RequiredPrefetchPixelDataBW[i][k])); - } - mode_lib->vba.ImmediateFlipSupportedForState[i] = true; - if (mode_lib->vba.total_dcn_read_bw_with_flip - > mode_lib->vba.ReturnBWPerState[i]) { - mode_lib->vba.ImmediateFlipSupportedForState[i] = false; - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { - mode_lib->vba.ImmediateFlipSupportedForState[i] = false; - } - } - } else { - mode_lib->vba.ImmediateFlipSupportedForState[i] = false; - } - } - /*PTE Buffer Size Check*/ - - for (i = 0; i <= DC__VOLTAGE_STATES; i++) { - mode_lib->vba.PTEBufferSizeNotExceeded[i] = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.PTEBufferSizeNotExceededY[i][k] == false - || mode_lib->vba.PTEBufferSizeNotExceededC[i][k] == false) { - mode_lib->vba.PTEBufferSizeNotExceeded[i] = false; - } - } - } - /*Cursor Support Check*/ - - mode_lib->vba.CursorSupport = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.CursorWidth[k][0] > 0.0) { - if (dml_floor( - dml_floor( - mode_lib->vba.CursorBufferSize - - mode_lib->vba.CursorChunkSize, - mode_lib->vba.CursorChunkSize) * 1024.0 - / (mode_lib->vba.CursorWidth[k][0] - * mode_lib->vba.CursorBPP[k][0] - / 8.0), - 1.0) - * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) - / mode_lib->vba.VRatio[k] < mode_lib->vba.UrgentLatency - || (mode_lib->vba.CursorBPP[k][0] == 64.0 - && mode_lib->vba.Cursor64BppSupport == false)) { - mode_lib->vba.CursorSupport = false; - } - } - } - /*Valid Pitch Check*/ - - mode_lib->vba.PitchSupport = true; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.AlignedYPitch[k] = dml_ceil( - dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]), - mode_lib->vba.MacroTileWidthY[k]); - if (mode_lib->vba.AlignedYPitch[k] > mode_lib->vba.PitchY[k]) { - mode_lib->vba.PitchSupport = false; - } - if (mode_lib->vba.DCCEnable[k] == true) { - mode_lib->vba.AlignedDCCMetaPitch[k] = dml_ceil( - dml_max( - mode_lib->vba.DCCMetaPitchY[k], - mode_lib->vba.ViewportWidth[k]), - 64.0 * mode_lib->vba.Read256BlockWidthY[k]); - } else { - mode_lib->vba.AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k]; - } - if (mode_lib->vba.AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) { - mode_lib->vba.PitchSupport = false; - } - if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 - && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 - && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 - && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { - mode_lib->vba.AlignedCPitch[k] = dml_ceil( - dml_max( - mode_lib->vba.PitchC[k], - mode_lib->vba.ViewportWidth[k] / 2.0), - mode_lib->vba.MacroTileWidthC[k]); - } else { - mode_lib->vba.AlignedCPitch[k] = mode_lib->vba.PitchC[k]; - } - if (mode_lib->vba.AlignedCPitch[k] > mode_lib->vba.PitchC[k]) { - mode_lib->vba.PitchSupport = false; - } - } - /*Mode Support, Voltage State and SOC Configuration*/ - - for (i = DC__VOLTAGE_STATES; i >= 0; i--) { - if (mode_lib->vba.ScaleRatioAndTapsSupport == true - && mode_lib->vba.SourceFormatPixelAndScanSupport == true - && mode_lib->vba.ViewportSizeSupport[i] == true - && mode_lib->vba.BandwidthSupport[i] == true - && mode_lib->vba.DIOSupport[i] == true - && mode_lib->vba.NotEnoughDSCUnits[i] == false - && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false - && mode_lib->vba.UrgentLatencySupport[i] == true - && mode_lib->vba.ROBSupport[i] == true - && mode_lib->vba.DISPCLK_DPPCLK_Support[i] == true - && mode_lib->vba.TotalAvailablePipesSupport[i] == true - && mode_lib->vba.NumberOfOTGSupport == true - && mode_lib->vba.WritebackModeSupport == true - && mode_lib->vba.WritebackLatencySupport == true - && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true - && mode_lib->vba.CursorSupport == true - && mode_lib->vba.PitchSupport == true - && mode_lib->vba.PrefetchSupported[i] == true - && mode_lib->vba.VRatioInPrefetchSupported[i] == true - && mode_lib->vba.PTEBufferSizeNotExceeded[i] == true - && mode_lib->vba.NonsupportedDSCInputBPC == false) { - mode_lib->vba.ModeSupport[i] = true; - } else { - mode_lib->vba.ModeSupport[i] = false; - } - } - for (i = DC__VOLTAGE_STATES; i >= 0; i--) { - if (i == DC__VOLTAGE_STATES || mode_lib->vba.ModeSupport[i] == true) { - mode_lib->vba.VoltageLevel = i; - } - } - mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.FabricAndDRAMBandwidth = - mode_lib->vba.FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ImmediateFlipSupport = - mode_lib->vba.ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel]; - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - mode_lib->vba.DPPPerPlane[k] = mode_lib->vba.NoOfDPP[mode_lib->vba.VoltageLevel][k]; - } - for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (mode_lib->vba.BlendingAndTiming[k] == k) { - mode_lib->vba.ODMCombineEnabled[k] = - mode_lib->vba.ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; - } else { - mode_lib->vba.ODMCombineEnabled[k] = 0; - } - mode_lib->vba.DSCEnabled[k] = - mode_lib->vba.RequiresDSC[mode_lib->vba.VoltageLevel][k]; - mode_lib->vba.OutputBpp[k] = - mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k]; - } -} diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h deleted file mode 100644 index 4112409cd974..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ /dev/null @@ -1,598 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DML2_DISPLAY_MODE_VBA_H__ -#define __DML2_DISPLAY_MODE_VBA_H__ - -#include "dml_common_defs.h" - -struct display_mode_lib; - -void set_prefetch_mode(struct display_mode_lib *mode_lib, - bool cstate_en, - bool pstate_en, - bool ignore_viewport_pos, - bool immediate_flip_support); - -#define dml_get_attr_decl(attr) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes) - -dml_get_attr_decl(clk_dcf_deepsleep); -dml_get_attr_decl(wm_urgent); -dml_get_attr_decl(wm_memory_trip); -dml_get_attr_decl(wm_writeback_urgent); -dml_get_attr_decl(wm_stutter_exit); -dml_get_attr_decl(wm_stutter_enter_exit); -dml_get_attr_decl(wm_dram_clock_change); -dml_get_attr_decl(wm_writeback_dram_clock_change); -dml_get_attr_decl(wm_xfc_underflow); -dml_get_attr_decl(stutter_efficiency_no_vblank); -dml_get_attr_decl(stutter_efficiency); -dml_get_attr_decl(urgent_latency); -dml_get_attr_decl(urgent_extra_latency); -dml_get_attr_decl(nonurgent_latency); -dml_get_attr_decl(dram_clock_change_latency); -dml_get_attr_decl(dispclk_calculated); -dml_get_attr_decl(total_data_read_bw); -dml_get_attr_decl(return_bw); -dml_get_attr_decl(tcalc); - -#define dml_get_pipe_attr_decl(attr) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes, unsigned int which_pipe) - -dml_get_pipe_attr_decl(dsc_delay); -dml_get_pipe_attr_decl(dppclk_calculated); -dml_get_pipe_attr_decl(dscclk_calculated); -dml_get_pipe_attr_decl(min_ttu_vblank); -dml_get_pipe_attr_decl(vratio_prefetch_l); -dml_get_pipe_attr_decl(vratio_prefetch_c); -dml_get_pipe_attr_decl(dst_x_after_scaler); -dml_get_pipe_attr_decl(dst_y_after_scaler); -dml_get_pipe_attr_decl(dst_y_per_vm_vblank); -dml_get_pipe_attr_decl(dst_y_per_row_vblank); -dml_get_pipe_attr_decl(dst_y_prefetch); -dml_get_pipe_attr_decl(dst_y_per_vm_flip); -dml_get_pipe_attr_decl(dst_y_per_row_flip); -dml_get_pipe_attr_decl(xfc_transfer_delay); -dml_get_pipe_attr_decl(xfc_precharge_delay); -dml_get_pipe_attr_decl(xfc_remote_surface_flip_latency); -dml_get_pipe_attr_decl(xfc_prefetch_margin); - -unsigned int get_vstartup_calculated( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes, - unsigned int which_pipe); - -double get_total_immediate_flip_bytes( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes); -double get_total_immediate_flip_bw( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes); -double get_total_prefetch_bw( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes); - -unsigned int dml_get_voltage_level( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes); - -bool Calculate256BBlockSizes( - enum source_format_class SourcePixelFormat, - enum dm_swizzle_mode SurfaceTiling, - unsigned int BytePerPixelY, - unsigned int BytePerPixelC, - unsigned int *BlockHeight256BytesY, - unsigned int *BlockHeight256BytesC, - unsigned int *BlockWidth256BytesY, - unsigned int *BlockWidth256BytesC); - - -struct vba_vars_st { - ip_params_st ip; - soc_bounding_box_st soc; - - unsigned int MaximumMaxVStartupLines; - double cursor_bw[DC__NUM_DPP__MAX]; - double meta_row_bw[DC__NUM_DPP__MAX]; - double dpte_row_bw[DC__NUM_DPP__MAX]; - double qual_row_bw[DC__NUM_DPP__MAX]; - double WritebackDISPCLK; - double PSCL_THROUGHPUT_LUMA[DC__NUM_DPP__MAX]; - double PSCL_THROUGHPUT_CHROMA[DC__NUM_DPP__MAX]; - double DPPCLKUsingSingleDPPLuma; - double DPPCLKUsingSingleDPPChroma; - double DPPCLKUsingSingleDPP[DC__NUM_DPP__MAX]; - double DISPCLKWithRamping; - double DISPCLKWithoutRamping; - double GlobalDPPCLK; - double DISPCLKWithRampingRoundedToDFSGranularity; - double DISPCLKWithoutRampingRoundedToDFSGranularity; - double MaxDispclkRoundedToDFSGranularity; - bool DCCEnabledAnyPlane; - double ReturnBandwidthToDCN; - unsigned int SwathWidthY[DC__NUM_DPP__MAX]; - unsigned int SwathWidthSingleDPPY[DC__NUM_DPP__MAX]; - double BytePerPixelDETY[DC__NUM_DPP__MAX]; - double BytePerPixelDETC[DC__NUM_DPP__MAX]; - double ReadBandwidthPlaneLuma[DC__NUM_DPP__MAX]; - double ReadBandwidthPlaneChroma[DC__NUM_DPP__MAX]; - unsigned int TotalActiveDPP; - unsigned int TotalDCCActiveDPP; - double UrgentRoundTripAndOutOfOrderLatency; - double DisplayPipeLineDeliveryTimeLuma[DC__NUM_DPP__MAX]; // WM - double DisplayPipeLineDeliveryTimeChroma[DC__NUM_DPP__MAX]; // WM - double LinesInDETY[DC__NUM_DPP__MAX]; // WM - double LinesInDETC[DC__NUM_DPP__MAX]; // WM - unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; // WM - unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; // WM - double FullDETBufferingTimeY[DC__NUM_DPP__MAX]; // WM - double FullDETBufferingTimeC[DC__NUM_DPP__MAX]; // WM - double MinFullDETBufferingTime; - double FrameTimeForMinFullDETBufferingTime; - double AverageReadBandwidthGBytePerSecond; - double PartOfBurstThatFitsInROB; - double StutterBurstTime; - //unsigned int NextPrefetchMode; - double VBlankTime; - double SmallestVBlank; - double DCFCLKDeepSleepPerPlane; - double EffectiveDETPlusLBLinesLuma; - double EffectiveDETPlusLBLinesChroma; - double UrgentLatencySupportUsLuma; - double UrgentLatencySupportUsChroma; - double UrgentLatencySupportUs[DC__NUM_DPP__MAX]; - unsigned int DSCFormatFactor; - unsigned int BlockHeight256BytesY[DC__NUM_DPP__MAX]; - unsigned int BlockHeight256BytesC[DC__NUM_DPP__MAX]; - unsigned int BlockWidth256BytesY[DC__NUM_DPP__MAX]; - unsigned int BlockWidth256BytesC[DC__NUM_DPP__MAX]; - double VInitPreFillY[DC__NUM_DPP__MAX]; - double VInitPreFillC[DC__NUM_DPP__MAX]; - unsigned int MaxNumSwathY[DC__NUM_DPP__MAX]; - unsigned int MaxNumSwathC[DC__NUM_DPP__MAX]; - double PrefetchSourceLinesY[DC__NUM_DPP__MAX]; - double PrefetchSourceLinesC[DC__NUM_DPP__MAX]; - double PixelPTEBytesPerRow[DC__NUM_DPP__MAX]; - double MetaRowByte[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_chroma[DC__NUM_DPP__MAX]; - unsigned int meta_row_height[DC__NUM_DPP__MAX]; - unsigned int meta_row_height_chroma[DC__NUM_DPP__MAX]; - - unsigned int MacroTileWidthY[DC__NUM_DPP__MAX]; - unsigned int MacroTileWidthC[DC__NUM_DPP__MAX]; - unsigned int MaxVStartupLines[DC__NUM_DPP__MAX]; - double WritebackDelay[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - bool PrefetchModeSupported; - bool AllowDRAMClockChangeDuringVBlank[DC__NUM_DPP__MAX]; - bool AllowDRAMSelfRefreshDuringVBlank[DC__NUM_DPP__MAX]; - double RequiredPrefetchPixDataBW[DC__NUM_DPP__MAX]; - double XFCRemoteSurfaceFlipDelay; - double TInitXFill; - double TslvChk; - double SrcActiveDrainRate; - double Tno_bw[DC__NUM_DPP__MAX]; - bool ImmediateFlipSupported; - - double prefetch_vm_bw[DC__NUM_DPP__MAX]; - double prefetch_row_bw[DC__NUM_DPP__MAX]; - bool ImmediateFlipSupportedForPipe[DC__NUM_DPP__MAX]; - unsigned int VStartupLines; - double DisplayPipeLineDeliveryTimeLumaPrefetch[DC__NUM_DPP__MAX]; - double DisplayPipeLineDeliveryTimeChromaPrefetch[DC__NUM_DPP__MAX]; - unsigned int ActiveDPPs; - unsigned int LBLatencyHidingSourceLinesY; - unsigned int LBLatencyHidingSourceLinesC; - double ActiveDRAMClockChangeLatencyMargin[DC__NUM_DPP__MAX]; - double MinActiveDRAMClockChangeMargin; - double XFCSlaveVUpdateOffset[DC__NUM_DPP__MAX]; - double XFCSlaveVupdateWidth[DC__NUM_DPP__MAX]; - double XFCSlaveVReadyOffset[DC__NUM_DPP__MAX]; - double InitFillLevel; - double FinalFillMargin; - double FinalFillLevel; - double RemainingFillLevel; - double TFinalxFill; - - - // - // SOC Bounding Box Parameters - // - double SRExitTime; - double SREnterPlusExitTime; - double UrgentLatency; - double WritebackLatency; - double PercentOfIdealDRAMAndFabricBWReceivedAfterUrgLatency; - double NumberOfChannels; - double DRAMChannelWidth; - double FabricDatapathToDCNDataReturn; - double ReturnBusWidth; - double Downspreading; - double DISPCLKDPPCLKDSCCLKDownSpreading; - double DISPCLKDPPCLKVCOSpeed; - double RoundTripPingLatencyCycles; - double UrgentOutOfOrderReturnPerChannel; - unsigned int VMMPageSize; - double DRAMClockChangeLatency; - double XFCBusTransportTime; - double XFCXBUFLatencyTolerance; - - // - // IP Parameters - // - unsigned int ROBBufferSizeInKByte; - double DETBufferSizeInKByte; - unsigned int DPPOutputBufferPixels; - unsigned int OPPOutputBufferLines; - unsigned int PixelChunkSizeInKByte; - double ReturnBW; - bool VirtualMemoryEnable; - unsigned int MaxPageTableLevels; - unsigned int OverridePageTableLevels; - unsigned int PTEChunkSize; - unsigned int MetaChunkSize; - unsigned int WritebackChunkSize; - bool ODMCapability; - unsigned int NumberOfDSC; - unsigned int LineBufferSize; - unsigned int MaxLineBufferLines; - unsigned int WritebackInterfaceLumaBufferSize; - unsigned int WritebackInterfaceChromaBufferSize; - unsigned int WritebackChromaLineBufferWidth; - double MaxDCHUBToPSCLThroughput; - double MaxPSCLToLBThroughput; - unsigned int PTEBufferSizeInRequests; - double DISPCLKRampingMargin; - unsigned int MaxInterDCNTileRepeaters; - bool XFCSupported; - double XFCSlvChunkSize; - double XFCFillBWOverhead; - double XFCFillConstant; - double XFCTSlvVupdateOffset; - double XFCTSlvVupdateWidth; - double XFCTSlvVreadyOffset; - double DPPCLKDelaySubtotal; - double DPPCLKDelaySCL; - double DPPCLKDelaySCLLBOnly; - double DPPCLKDelayCNVCFormater; - double DPPCLKDelayCNVCCursor; - double DISPCLKDelaySubtotal; - bool ProgressiveToInterlaceUnitInOPP; - unsigned int PDEProcessingBufIn64KBReqs; - - // Pipe/Plane Parameters - int VoltageLevel; - double FabricAndDRAMBandwidth; - double FabricClock; - double DRAMSpeed; - double DISPCLK; - double SOCCLK; - double DCFCLK; - - unsigned int NumberOfActivePlanes; - unsigned int ViewportWidth[DC__NUM_DPP__MAX]; - unsigned int ViewportHeight[DC__NUM_DPP__MAX]; - unsigned int ViewportYStartY[DC__NUM_DPP__MAX]; - unsigned int ViewportYStartC[DC__NUM_DPP__MAX]; - unsigned int PitchY[DC__NUM_DPP__MAX]; - unsigned int PitchC[DC__NUM_DPP__MAX]; - double HRatio[DC__NUM_DPP__MAX]; - double VRatio[DC__NUM_DPP__MAX]; - unsigned int htaps[DC__NUM_DPP__MAX]; - unsigned int vtaps[DC__NUM_DPP__MAX]; - unsigned int HTAPsChroma[DC__NUM_DPP__MAX]; - unsigned int VTAPsChroma[DC__NUM_DPP__MAX]; - unsigned int HTotal[DC__NUM_DPP__MAX]; - unsigned int VTotal[DC__NUM_DPP__MAX]; - unsigned int DPPPerPlane[DC__NUM_DPP__MAX]; - double PixelClock[DC__NUM_DPP__MAX]; - double PixelClockBackEnd[DC__NUM_DPP__MAX]; - double DPPCLK[DC__NUM_DPP__MAX]; - bool DCCEnable[DC__NUM_DPP__MAX]; - unsigned int DCCMetaPitchY[DC__NUM_DPP__MAX]; - enum scan_direction_class SourceScan[DC__NUM_DPP__MAX]; - enum source_format_class SourcePixelFormat[DC__NUM_DPP__MAX]; - bool WritebackEnable[DC__NUM_DPP__MAX]; - double WritebackDestinationWidth[DC__NUM_DPP__MAX]; - double WritebackDestinationHeight[DC__NUM_DPP__MAX]; - double WritebackSourceHeight[DC__NUM_DPP__MAX]; - enum source_format_class WritebackPixelFormat[DC__NUM_DPP__MAX]; - unsigned int WritebackLumaHTaps[DC__NUM_DPP__MAX]; - unsigned int WritebackLumaVTaps[DC__NUM_DPP__MAX]; - unsigned int WritebackChromaHTaps[DC__NUM_DPP__MAX]; - unsigned int WritebackChromaVTaps[DC__NUM_DPP__MAX]; - double WritebackHRatio[DC__NUM_DPP__MAX]; - double WritebackVRatio[DC__NUM_DPP__MAX]; - unsigned int HActive[DC__NUM_DPP__MAX]; - unsigned int VActive[DC__NUM_DPP__MAX]; - bool Interlace[DC__NUM_DPP__MAX]; - enum dm_swizzle_mode SurfaceTiling[DC__NUM_DPP__MAX]; - unsigned int ScalerRecoutWidth[DC__NUM_DPP__MAX]; - bool DynamicMetadataEnable[DC__NUM_DPP__MAX]; - unsigned int DynamicMetadataLinesBeforeActiveRequired[DC__NUM_DPP__MAX]; - unsigned int DynamicMetadataTransmittedBytes[DC__NUM_DPP__MAX]; - double DCCRate[DC__NUM_DPP__MAX]; - bool ODMCombineEnabled[DC__NUM_DPP__MAX]; - double OutputBpp[DC__NUM_DPP__MAX]; - unsigned int NumberOfDSCSlices[DC__NUM_DPP__MAX]; - bool DSCEnabled[DC__NUM_DPP__MAX]; - unsigned int DSCDelay[DC__NUM_DPP__MAX]; - unsigned int DSCInputBitPerComponent[DC__NUM_DPP__MAX]; - enum output_format_class OutputFormat[DC__NUM_DPP__MAX]; - enum output_encoder_class Output[DC__NUM_DPP__MAX]; - unsigned int BlendingAndTiming[DC__NUM_DPP__MAX]; - bool SynchronizedVBlank; - unsigned int NumberOfCursors[DC__NUM_DPP__MAX]; - unsigned int CursorWidth[DC__NUM_DPP__MAX][DC__NUM_CURSOR__MAX]; - unsigned int CursorBPP[DC__NUM_DPP__MAX][DC__NUM_CURSOR__MAX]; - bool XFCEnabled[DC__NUM_DPP__MAX]; - bool ScalerEnabled[DC__NUM_DPP__MAX]; - - // Intermediates/Informational - bool ImmediateFlipSupport; - unsigned int SwathHeightY[DC__NUM_DPP__MAX]; - unsigned int SwathHeightC[DC__NUM_DPP__MAX]; - unsigned int DETBufferSizeY[DC__NUM_DPP__MAX]; - unsigned int DETBufferSizeC[DC__NUM_DPP__MAX]; - unsigned int LBBitPerPixel[DC__NUM_DPP__MAX]; - double LastPixelOfLineExtraWatermark; - double TotalDataReadBandwidth; - unsigned int TotalActiveWriteback; - unsigned int EffectiveLBLatencyHidingSourceLinesLuma; - unsigned int EffectiveLBLatencyHidingSourceLinesChroma; - double BandwidthAvailableForImmediateFlip; - unsigned int PrefetchMode; - bool IgnoreViewportPositioning; - double PrefetchBandwidth[DC__NUM_DPP__MAX]; - bool ErrorResult[DC__NUM_DPP__MAX]; - double PDEAndMetaPTEBytesFrame[DC__NUM_DPP__MAX]; - - // - // Calculated dml_ml->vba.Outputs - // - double DCFClkDeepSleep; - double UrgentWatermark; - double UrgentExtraLatency; - double MemoryTripWatermark; - double WritebackUrgentWatermark; - double StutterExitWatermark; - double StutterEnterPlusExitWatermark; - double DRAMClockChangeWatermark; - double WritebackDRAMClockChangeWatermark; - double StutterEfficiency; - double StutterEfficiencyNotIncludingVBlank; - double MinUrgentLatencySupportUs; - double NonUrgentLatencyTolerance; - double MinActiveDRAMClockChangeLatencySupported; - enum clock_change_support DRAMClockChangeSupport; - - // These are the clocks calcuated by the library but they are not actually - // used explicitly. They are fetched by tests and then possibly used. The - // ultimate values to use are the ones specified by the parameters to DML - double DISPCLK_calculated; - double DSCCLK_calculated[DC__NUM_DPP__MAX]; - double DPPCLK_calculated[DC__NUM_DPP__MAX]; - - unsigned int VStartup[DC__NUM_DPP__MAX]; - unsigned int VUpdateOffsetPix[DC__NUM_DPP__MAX]; - unsigned int VUpdateWidthPix[DC__NUM_DPP__MAX]; - unsigned int VReadyOffsetPix[DC__NUM_DPP__MAX]; - unsigned int VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; - - double ImmediateFlipBW; - unsigned int TotImmediateFlipBytes; - double TCalc; - double MinTTUVBlank[DC__NUM_DPP__MAX]; - double VRatioPrefetchY[DC__NUM_DPP__MAX]; - double VRatioPrefetchC[DC__NUM_DPP__MAX]; - double DSTXAfterScaler[DC__NUM_DPP__MAX]; - double DSTYAfterScaler[DC__NUM_DPP__MAX]; - - double DestinationLinesToRequestVMInVBlank[DC__NUM_DPP__MAX]; - double DestinationLinesToRequestRowInVBlank[DC__NUM_DPP__MAX]; - double DestinationLinesForPrefetch[DC__NUM_DPP__MAX]; - double DestinationLinesToRequestRowInImmediateFlip[DC__NUM_DPP__MAX]; - double DestinationLinesToRequestVMInImmediateFlip[DC__NUM_DPP__MAX]; - - double XFCTransferDelay[DC__NUM_DPP__MAX]; - double XFCPrechargeDelay[DC__NUM_DPP__MAX]; - double XFCRemoteSurfaceFlipLatency[DC__NUM_DPP__MAX]; - double XFCPrefetchMargin[DC__NUM_DPP__MAX]; - - display_e2e_pipe_params_st cache_pipes[DC__NUM_DPP__MAX]; - unsigned int cache_num_pipes; - unsigned int pipe_plane[DC__NUM_DPP__MAX]; - - /* vba mode support */ - /*inputs*/ - bool SupportGFX7CompatibleTilingIn32bppAnd64bpp; - double MaxHSCLRatio; - double MaxVSCLRatio; - unsigned int MaxNumWriteback; - bool WritebackLumaAndChromaScalingSupported; - bool Cursor64BppSupport; - double DCFCLKPerState[DC__VOLTAGE_STATES + 1]; - double FabricClockPerState[DC__VOLTAGE_STATES + 1]; - double SOCCLKPerState[DC__VOLTAGE_STATES + 1]; - double PHYCLKPerState[DC__VOLTAGE_STATES + 1]; - double MaxDppclk[DC__VOLTAGE_STATES + 1]; - double MaxDSCCLK[DC__VOLTAGE_STATES + 1]; - double DRAMSpeedPerState[DC__VOLTAGE_STATES + 1]; - double MaxDispclk[DC__VOLTAGE_STATES + 1]; - - /*outputs*/ - bool ScaleRatioAndTapsSupport; - bool SourceFormatPixelAndScanSupport; - unsigned int SwathWidthYSingleDPP[DC__NUM_DPP__MAX]; - double BytePerPixelInDETY[DC__NUM_DPP__MAX]; - double BytePerPixelInDETC[DC__NUM_DPP__MAX]; - double TotalReadBandwidthConsumedGBytePerSecond; - double ReadBandwidth[DC__NUM_DPP__MAX]; - double TotalWriteBandwidthConsumedGBytePerSecond; - double WriteBandwidth[DC__NUM_DPP__MAX]; - double TotalBandwidthConsumedGBytePerSecond; - bool DCCEnabledInAnyPlane; - bool WritebackLatencySupport; - bool WritebackModeSupport; - bool Writeback10bpc420Supported; - bool BandwidthSupport[DC__VOLTAGE_STATES + 1]; - unsigned int TotalNumberOfActiveWriteback; - double CriticalPoint; - double ReturnBWToDCNPerState; - double FabricAndDRAMBandwidthPerState[DC__VOLTAGE_STATES + 1]; - double ReturnBWPerState[DC__VOLTAGE_STATES + 1]; - double UrgentRoundTripAndOutOfOrderLatencyPerState[DC__VOLTAGE_STATES + 1]; - bool ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - bool PTEBufferSizeNotExceededY[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - bool PTEBufferSizeNotExceededC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - bool PrefetchSupported[DC__VOLTAGE_STATES + 1]; - bool VRatioInPrefetchSupported[DC__VOLTAGE_STATES + 1]; - bool DISPCLK_DPPCLK_Support[DC__VOLTAGE_STATES + 1]; - bool TotalAvailablePipesSupport[DC__VOLTAGE_STATES + 1]; - bool UrgentLatencySupport[DC__VOLTAGE_STATES + 1]; - bool ModeSupport[DC__VOLTAGE_STATES + 1]; - bool DIOSupport[DC__VOLTAGE_STATES + 1]; - bool NotEnoughDSCUnits[DC__VOLTAGE_STATES + 1]; - bool DSCCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1]; - bool ROBSupport[DC__VOLTAGE_STATES + 1]; - bool PTEBufferSizeNotExceeded[DC__VOLTAGE_STATES + 1]; - bool RequiresDSC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - bool IsErrorResult[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1]; - bool prefetch_vm_bw_valid; - bool prefetch_row_bw_valid; - bool NumberOfOTGSupport; - bool NonsupportedDSCInputBPC; - bool WritebackScaleRatioAndTapsSupport; - bool CursorSupport; - bool PitchSupport; - - double WritebackLineBufferLumaBufferSize; - double WritebackLineBufferChromaBufferSize; - double WritebackMinHSCLRatio; - double WritebackMinVSCLRatio; - double WritebackMaxHSCLRatio; - double WritebackMaxVSCLRatio; - double WritebackMaxHSCLTaps; - double WritebackMaxVSCLTaps; - unsigned int MaxNumDPP; - unsigned int MaxNumOTG; - double CursorBufferSize; - double CursorChunkSize; - unsigned int Mode; - unsigned int NoOfDPP[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double OutputLinkDPLanes[DC__NUM_DPP__MAX]; - double SwathWidthYPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double SwathHeightYPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double SwathHeightCPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double UrgentLatencySupportUsPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double VRatioPreY[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double VRatioPreC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double RequiredPrefetchPixelDataBW[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double RequiredDPPCLK[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double RequiredDISPCLK[DC__VOLTAGE_STATES + 1]; - double TotalNumberOfActiveDPP[DC__VOLTAGE_STATES + 1]; - double TotalNumberOfDCCActiveDPP[DC__VOLTAGE_STATES + 1]; - double PrefetchBW[DC__NUM_DPP__MAX]; - double PDEAndMetaPTEBytesPerFrame[DC__NUM_DPP__MAX]; - double MetaRowBytes[DC__NUM_DPP__MAX]; - double DPTEBytesPerRow[DC__NUM_DPP__MAX]; - double PrefetchLinesY[DC__NUM_DPP__MAX]; - double PrefetchLinesC[DC__NUM_DPP__MAX]; - unsigned int MaxNumSwY[DC__NUM_DPP__MAX]; - unsigned int MaxNumSwC[DC__NUM_DPP__MAX]; - double PrefillY[DC__NUM_DPP__MAX]; - double PrefillC[DC__NUM_DPP__MAX]; - double LineTimesForPrefetch[DC__NUM_DPP__MAX]; - double LinesForMetaPTE[DC__NUM_DPP__MAX]; - double LinesForMetaAndDPTERow[DC__NUM_DPP__MAX]; - double MinDPPCLKUsingSingleDPP[DC__NUM_DPP__MAX]; - double RequiresFEC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - unsigned int OutputBppPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - double DSCDelayPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - unsigned int Read256BlockHeightY[DC__NUM_DPP__MAX]; - unsigned int Read256BlockWidthY[DC__NUM_DPP__MAX]; - unsigned int Read256BlockHeightC[DC__NUM_DPP__MAX]; - unsigned int Read256BlockWidthC[DC__NUM_DPP__MAX]; - unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX]; - double MaxSwathHeightY[DC__NUM_DPP__MAX]; - double MaxSwathHeightC[DC__NUM_DPP__MAX]; - double MinSwathHeightY[DC__NUM_DPP__MAX]; - double MinSwathHeightC[DC__NUM_DPP__MAX]; - double PSCL_FACTOR[DC__NUM_DPP__MAX]; - double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX]; - double MaximumVStartup[DC__NUM_DPP__MAX]; - double AlignedDCCMetaPitch[DC__NUM_DPP__MAX]; - double AlignedYPitch[DC__NUM_DPP__MAX]; - double AlignedCPitch[DC__NUM_DPP__MAX]; - double MaximumSwathWidth[DC__NUM_DPP__MAX]; - double final_flip_bw[DC__NUM_DPP__MAX]; - double ImmediateFlipSupportedForState[DC__VOLTAGE_STATES + 1]; - - double WritebackLumaVExtra; - double WritebackChromaVExtra; - double WritebackRequiredDISPCLK; - double MaximumSwathWidthSupport; - double MaximumSwathWidthInDETBuffer; - double MaximumSwathWidthInLineBuffer; - double MaxDispclkRoundedDownToDFSGranularity; - double MaxDppclkRoundedDownToDFSGranularity; - double PlaneRequiredDISPCLKWithoutODMCombine; - double PlaneRequiredDISPCLK; - double TotalNumberOfActiveOTG; - double FECOverhead; - double EffectiveFECOverhead; - unsigned int Outbpp; - unsigned int OutbppDSC; - double TotalDSCUnitsRequired; - double bpp; - unsigned int slices; - double SwathWidthGranularityY; - double RoundedUpMaxSwathSizeBytesY; - double SwathWidthGranularityC; - double RoundedUpMaxSwathSizeBytesC; - double LinesInDETLuma; - double LinesInDETChroma; - double EffectiveDETLBLinesLuma; - double EffectiveDETLBLinesChroma; - double ProjectedDCFCLKDeepSleep; - double PDEAndMetaPTEBytesPerFrameY; - double PDEAndMetaPTEBytesPerFrameC; - unsigned int MetaRowBytesY; - unsigned int MetaRowBytesC; - unsigned int DPTEBytesPerRowC; - unsigned int DPTEBytesPerRowY; - double ExtraLatency; - double TimeCalc; - double TWait; - double MaximumReadBandwidthWithPrefetch; - double total_dcn_read_bw_with_flip; -}; - -#endif /* _DML2_DISPLAY_MODE_VBA_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c deleted file mode 100644 index 325dd2b757d6..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c +++ /dev/null @@ -1,1772 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "display_mode_lib.h" -#include "display_mode_vba.h" -#include "display_rq_dlg_calc.h" - -/* - * NOTE: - * This file is gcc-parseable HW gospel, coming straight from HW engineers. - * - * It doesn't adhere to Linux kernel style and sometimes will do things in odd - * ways. Unless there is something clearly wrong with it the code should - * remain as-is as it provides us with a guarantee from HW that it is correct. - */ - -static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, - double *refcyc_per_req_delivery_pre_cur, - double *refcyc_per_req_delivery_cur, - double refclk_freq_in_mhz, - double ref_freq_to_pix_freq, - double hscale_pixel_rate_l, - double hscl_ratio, - double vratio_pre_l, - double vratio_l, - unsigned int cur_width, - enum cursor_bpp cur_bpp); - -#include "dml_inline_defs.h" - -static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) -{ - unsigned int ret_val = 0; - - if (source_format == dm_444_16) { - if (!is_chroma) - ret_val = 2; - } else if (source_format == dm_444_32) { - if (!is_chroma) - ret_val = 4; - } else if (source_format == dm_444_64) { - if (!is_chroma) - ret_val = 8; - } else if (source_format == dm_420_8) { - if (is_chroma) - ret_val = 2; - else - ret_val = 1; - } else if (source_format == dm_420_10) { - if (is_chroma) - ret_val = 4; - else - ret_val = 2; - } else if (source_format == dm_444_8) { - ret_val = 1; - } - return ret_val; -} - -static bool is_dual_plane(enum source_format_class source_format) -{ - bool ret_val = 0; - - if ((source_format == dm_420_8) || (source_format == dm_420_10)) - ret_val = 1; - - return ret_val; -} - -static double get_refcyc_per_delivery(struct display_mode_lib *mode_lib, - double refclk_freq_in_mhz, - double pclk_freq_in_mhz, - bool odm_combine, - unsigned int recout_width, - unsigned int hactive, - double vratio, - double hscale_pixel_rate, - unsigned int delivery_width, - unsigned int req_per_swath_ub) -{ - double refcyc_per_delivery = 0.0; - - if (vratio <= 1.0) { - if (odm_combine) - refcyc_per_delivery = (double) refclk_freq_in_mhz - * dml_min((double) recout_width, (double) hactive / 2.0) - / pclk_freq_in_mhz / (double) req_per_swath_ub; - else - refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width - / pclk_freq_in_mhz / (double) req_per_swath_ub; - } else { - refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width - / (double) hscale_pixel_rate / (double) req_per_swath_ub; - } - - dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); - dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); - dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); - dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); - dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); - dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); - - return refcyc_per_delivery; - -} - -static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) -{ - if (tile_size == dm_256k_tile) - return (256 * 1024); - else if (tile_size == dm_64k_tile) - return (64 * 1024); - else - return (4 * 1024); -} - -static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, - display_data_rq_regs_st *rq_regs, - const display_data_rq_sizing_params_st rq_sizing) -{ - dml_print("DML_DLG: %s: rq_sizing param\n", __func__); - print__data_rq_sizing_params_st(mode_lib, rq_sizing); - - rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10; - - if (rq_sizing.min_chunk_bytes == 0) - rq_regs->min_chunk_size = 0; - else - rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1; - - rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10; - if (rq_sizing.min_meta_chunk_bytes == 0) - rq_regs->min_meta_chunk_size = 0; - else - rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1; - - rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6; - rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6; -} - -static void extract_rq_regs(struct display_mode_lib *mode_lib, - display_rq_regs_st *rq_regs, - const display_rq_params_st rq_param) -{ - unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; - unsigned int detile_buf_plane1_addr = 0; - - extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l); - - rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_l.dpte_row_height), - 1) - 3; - - if (rq_param.yuv420) { - extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c); - rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_c.dpte_row_height), - 1) - 3; - } - - rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); - rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); - - // FIXME: take the max between luma, chroma chunk size? - // okay for now, as we are setting chunk_bytes to 8kb anyways - if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb - rq_regs->drq_expansion_mode = 0; - } else { - rq_regs->drq_expansion_mode = 2; - } - rq_regs->prq_expansion_mode = 1; - rq_regs->mrq_expansion_mode = 1; - rq_regs->crq_expansion_mode = 1; - - if (rq_param.yuv420) { - if ((double) rq_param.misc.rq_l.stored_swath_bytes - / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) { - detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma - } else { - detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), - 256, - 0) / 64.0; // 2/3 to chroma - } - } - rq_regs->plane1_base_address = detile_buf_plane1_addr; -} - -static void handle_det_buf_split(struct display_mode_lib *mode_lib, - display_rq_params_st *rq_param, - const display_pipe_source_params_st pipe_src_param) -{ - unsigned int total_swath_bytes = 0; - unsigned int swath_bytes_l = 0; - unsigned int swath_bytes_c = 0; - unsigned int full_swath_bytes_packed_l = 0; - unsigned int full_swath_bytes_packed_c = 0; - bool req128_l = 0; - bool req128_c = 0; - bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear); - bool surf_vert = (pipe_src_param.source_scan == dm_vert); - unsigned int log2_swath_height_l = 0; - unsigned int log2_swath_height_c = 0; - unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; - - full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; - full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; - - if (rq_param->yuv420_10bpc) { - full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2 / 3, - 256, - 1) + 256; - full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2 / 3, - 256, - 1) + 256; - } - - if (rq_param->yuv420) { - total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; - - if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request - req128_l = 0; - req128_c = 0; - swath_bytes_l = full_swath_bytes_packed_l; - swath_bytes_c = full_swath_bytes_packed_c; - } else { //128b request (for luma only for yuv420 8bpc) - req128_l = 1; - req128_c = 0; - swath_bytes_l = full_swath_bytes_packed_l / 2; - swath_bytes_c = full_swath_bytes_packed_c; - } - // Note: assumption, the config that pass in will fit into - // the detiled buffer. - } else { - total_swath_bytes = 2 * full_swath_bytes_packed_l; - - if (total_swath_bytes <= detile_buf_size_in_bytes) - req128_l = 0; - else - req128_l = 1; - - swath_bytes_l = total_swath_bytes; - swath_bytes_c = 0; - } - rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; - rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; - - if (surf_linear) { - log2_swath_height_l = 0; - log2_swath_height_c = 0; - } else if (!surf_vert) { - log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l; - log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c; - } else { - log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l; - log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c; - } - rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; - rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; - - dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); - dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); - dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", - __func__, - full_swath_bytes_packed_l); - dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", - __func__, - full_swath_bytes_packed_c); -} - -static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, - display_data_rq_dlg_params_st *rq_dlg_param, - display_data_rq_misc_params_st *rq_misc_param, - display_data_rq_sizing_params_st *rq_sizing_param, - unsigned int vp_width, - unsigned int vp_height, - unsigned int data_pitch, - unsigned int meta_pitch, - unsigned int source_format, - unsigned int tiling, - unsigned int macro_tile_size, - unsigned int source_scan, - unsigned int is_chroma) -{ - bool surf_linear = (tiling == dm_sw_linear); - bool surf_vert = (source_scan == dm_vert); - - unsigned int bytes_per_element; - unsigned int bytes_per_element_y = get_bytes_per_element((enum source_format_class)(source_format), - false); - unsigned int bytes_per_element_c = get_bytes_per_element((enum source_format_class)(source_format), - true); - - unsigned int blk256_width = 0; - unsigned int blk256_height = 0; - - unsigned int blk256_width_y = 0; - unsigned int blk256_height_y = 0; - unsigned int blk256_width_c = 0; - unsigned int blk256_height_c = 0; - unsigned int log2_bytes_per_element; - unsigned int log2_blk256_width; - unsigned int log2_blk256_height; - unsigned int blk_bytes; - unsigned int log2_blk_bytes; - unsigned int log2_blk_height; - unsigned int log2_blk_width; - unsigned int log2_meta_req_bytes; - unsigned int log2_meta_req_height; - unsigned int log2_meta_req_width; - unsigned int meta_req_width; - unsigned int meta_req_height; - unsigned int log2_meta_row_height; - unsigned int meta_row_width_ub; - unsigned int log2_meta_chunk_bytes; - unsigned int log2_meta_chunk_height; - - //full sized meta chunk width in unit of data elements - unsigned int log2_meta_chunk_width; - unsigned int log2_min_meta_chunk_bytes; - unsigned int min_meta_chunk_width; - unsigned int meta_chunk_width; - unsigned int meta_chunk_per_row_int; - unsigned int meta_row_remainder; - unsigned int meta_chunk_threshold; - unsigned int meta_blk_bytes; - unsigned int meta_blk_height; - unsigned int meta_blk_width; - unsigned int meta_surface_bytes; - unsigned int vmpg_bytes; - unsigned int meta_pte_req_per_frame_ub; - unsigned int meta_pte_bytes_per_frame_ub; - const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); - const unsigned int dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs; - const unsigned int pde_proc_buffer_size_64k_reqs = - mode_lib->ip.pde_proc_buffer_size_64k_reqs; - - unsigned int log2_vmpg_height = 0; - unsigned int log2_vmpg_width = 0; - unsigned int log2_dpte_req_height_ptes = 0; - unsigned int log2_dpte_req_height = 0; - unsigned int log2_dpte_req_width = 0; - unsigned int log2_dpte_row_height_linear = 0; - unsigned int log2_dpte_row_height = 0; - unsigned int log2_dpte_group_width = 0; - unsigned int dpte_row_width_ub = 0; - unsigned int dpte_req_height = 0; - unsigned int dpte_req_width = 0; - unsigned int dpte_group_width = 0; - unsigned int log2_dpte_group_bytes = 0; - unsigned int log2_dpte_group_length = 0; - unsigned int pde_buf_entries; - bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10); - - Calculate256BBlockSizes((enum source_format_class)(source_format), - (enum dm_swizzle_mode)(tiling), - bytes_per_element_y, - bytes_per_element_c, - &blk256_height_y, - &blk256_height_c, - &blk256_width_y, - &blk256_width_c); - - if (!is_chroma) { - blk256_width = blk256_width_y; - blk256_height = blk256_height_y; - bytes_per_element = bytes_per_element_y; - } else { - blk256_width = blk256_width_c; - blk256_height = blk256_height_c; - bytes_per_element = bytes_per_element_c; - } - - log2_bytes_per_element = dml_log2(bytes_per_element); - - dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); - dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); - dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); - dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); - - log2_blk256_width = dml_log2((double) blk256_width); - log2_blk256_height = dml_log2((double) blk256_height); - blk_bytes = surf_linear ? - 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); - log2_blk_bytes = dml_log2((double) blk_bytes); - log2_blk_height = 0; - log2_blk_width = 0; - - // remember log rule - // "+" in log is multiply - // "-" in log is divide - // "/2" is like square root - // blk is vertical biased - if (tiling != dm_sw_linear) - log2_blk_height = log2_blk256_height - + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); - else - log2_blk_height = 0; // blk height of 1 - - log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; - - if (!surf_vert) { - rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) - + blk256_width; - rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; - } else { - rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_height - 1, blk256_height, 1) - + blk256_height; - rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; - } - - if (!surf_vert) - rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height - * bytes_per_element; - else - rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width - * bytes_per_element; - - rq_misc_param->blk256_height = blk256_height; - rq_misc_param->blk256_width = blk256_width; - - // ------- - // meta - // ------- - log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element - - // each 64b meta request for dcn is 8x8 meta elements and - // a meta element covers one 256b block of the the data surface. - log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 - log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - - log2_meta_req_height; - meta_req_width = 1 << log2_meta_req_width; - meta_req_height = 1 << log2_meta_req_height; - log2_meta_row_height = 0; - meta_row_width_ub = 0; - - // the dimensions of a meta row are meta_row_width x meta_row_height in elements. - // calculate upper bound of the meta_row_width - if (!surf_vert) { - log2_meta_row_height = log2_meta_req_height; - meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) - + meta_req_width; - rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; - } else { - log2_meta_row_height = log2_meta_req_width; - meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) - + meta_req_height; - rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; - } - rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; - - rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; - - log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); - log2_meta_chunk_height = log2_meta_row_height; - - //full sized meta chunk width in unit of data elements - log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element - - log2_meta_chunk_height; - log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); - min_meta_chunk_width = 1 - << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element - - log2_meta_chunk_height); - meta_chunk_width = 1 << log2_meta_chunk_width; - meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); - meta_row_remainder = meta_row_width_ub % meta_chunk_width; - meta_chunk_threshold = 0; - meta_blk_bytes = 4096; - meta_blk_height = blk256_height * 64; - meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; - meta_surface_bytes = meta_pitch - * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) - * bytes_per_element / 256; - vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; - meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, - 8 * vmpg_bytes, - 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); - meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request - rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; - - dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); - dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width); - dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); - dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", - __func__, - meta_pte_req_per_frame_ub); - dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", - __func__, - meta_pte_bytes_per_frame_ub); - - if (!surf_vert) - meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; - else - meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; - - if (meta_row_remainder <= meta_chunk_threshold) - rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; - else - rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; - - // ------ - // dpte - // ------ - if (surf_linear) { - log2_vmpg_height = 0; // one line high - } else { - log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; - } - log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; - - // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. - if (surf_linear) { //one 64B PTE request returns 8 PTEs - log2_dpte_req_height_ptes = 0; - log2_dpte_req_width = log2_vmpg_width + 3; - log2_dpte_req_height = 0; - } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size - //one 64B req gives 8x1 PTEs for 4KB tile - log2_dpte_req_height_ptes = 0; - log2_dpte_req_width = log2_blk_width + 3; - log2_dpte_req_height = log2_blk_height + 0; - } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB - //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB - log2_dpte_req_height_ptes = 4; - log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width - log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height - } else { //64KB page size and must 64KB tile block - //one 64B req gives 8x1 PTEs for 64KB tile - log2_dpte_req_height_ptes = 0; - log2_dpte_req_width = log2_blk_width + 3; - log2_dpte_req_height = log2_blk_height + 0; - } - - // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height - // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent - // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) - //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; - //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; - dpte_req_height = 1 << log2_dpte_req_height; - dpte_req_width = 1 << log2_dpte_req_width; - - // calculate pitch dpte row buffer can hold - // round the result down to a power of two. - pde_buf_entries = yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs; - if (surf_linear) { - unsigned int dpte_row_height; - - log2_dpte_row_height_linear = dml_floor(dml_log2(dml_min(64 * 1024 * pde_buf_entries - / bytes_per_element, - dpte_buf_in_pte_reqs - * dpte_req_width) - / data_pitch), - 1); - - ASSERT(log2_dpte_row_height_linear >= 3); - - if (log2_dpte_row_height_linear > 7) - log2_dpte_row_height_linear = 7; - - log2_dpte_row_height = log2_dpte_row_height_linear; - // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. - // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. - dpte_row_height = 1 << log2_dpte_row_height; - dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, - dpte_req_width, - 1) + dpte_req_width; - rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; - } else { - // the upper bound of the dpte_row_width without dependency on viewport position follows. - // for tiled mode, row height is the same as req height and row store up to vp size upper bound - if (!surf_vert) { - log2_dpte_row_height = log2_dpte_req_height; - dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) - + dpte_req_width; - rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; - } else { - log2_dpte_row_height = - (log2_blk_width < log2_dpte_req_width) ? - log2_blk_width : log2_dpte_req_width; - dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) - + dpte_req_height; - rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; - } - } - if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB - rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request - else - rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request - - rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; - - // the dpte_group_bytes is reduced for the specific case of vertical - // access of a tile surface that has dpte request of 8x1 ptes. - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group - rq_sizing_param->dpte_group_bytes = 512; - else - //full size - rq_sizing_param->dpte_group_bytes = 2048; - - //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. - log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); - log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests - - // full sized data pte group width in elements - if (!surf_vert) - log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; - else - log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; - - //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B - if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB - log2_dpte_group_width = log2_dpte_group_width - 1; - - dpte_group_width = 1 << log2_dpte_group_width; - - // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, - // the upper bound for the dpte groups per row is as follows. - rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, - 1); -} - -static void get_surf_rq_param(struct display_mode_lib *mode_lib, - display_data_rq_sizing_params_st *rq_sizing_param, - display_data_rq_dlg_params_st *rq_dlg_param, - display_data_rq_misc_params_st *rq_misc_param, - const display_pipe_source_params_st pipe_src_param, - bool is_chroma) -{ - bool mode_422 = 0; - unsigned int vp_width = 0; - unsigned int vp_height = 0; - unsigned int data_pitch = 0; - unsigned int meta_pitch = 0; - unsigned int ppe = mode_422 ? 2 : 1; - - // FIXME check if ppe apply for both luma and chroma in 422 case - if (is_chroma) { - vp_width = pipe_src_param.viewport_width_c / ppe; - vp_height = pipe_src_param.viewport_height_c; - data_pitch = pipe_src_param.data_pitch_c; - meta_pitch = pipe_src_param.meta_pitch_c; - } else { - vp_width = pipe_src_param.viewport_width / ppe; - vp_height = pipe_src_param.viewport_height; - data_pitch = pipe_src_param.data_pitch; - meta_pitch = pipe_src_param.meta_pitch; - } - - rq_sizing_param->chunk_bytes = 8192; - - if (rq_sizing_param->chunk_bytes == 64 * 1024) - rq_sizing_param->min_chunk_bytes = 0; - else - rq_sizing_param->min_chunk_bytes = 1024; - - rq_sizing_param->meta_chunk_bytes = 2048; - rq_sizing_param->min_meta_chunk_bytes = 256; - - rq_sizing_param->mpte_group_bytes = 2048; - - get_meta_and_pte_attr(mode_lib, - rq_dlg_param, - rq_misc_param, - rq_sizing_param, - vp_width, - vp_height, - data_pitch, - meta_pitch, - pipe_src_param.source_format, - pipe_src_param.sw_mode, - pipe_src_param.macro_tile_size, - pipe_src_param.source_scan, - is_chroma); -} - -void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, - display_rq_params_st *rq_param, - const display_pipe_source_params_st pipe_src_param) -{ - // get param for luma surface - rq_param->yuv420 = pipe_src_param.source_format == dm_420_8 - || pipe_src_param.source_format == dm_420_10; - rq_param->yuv420_10bpc = pipe_src_param.source_format == dm_420_10; - - get_surf_rq_param(mode_lib, - &(rq_param->sizing.rq_l), - &(rq_param->dlg.rq_l), - &(rq_param->misc.rq_l), - pipe_src_param, - 0); - - if (is_dual_plane((enum source_format_class)(pipe_src_param.source_format))) { - // get param for chroma surface - get_surf_rq_param(mode_lib, - &(rq_param->sizing.rq_c), - &(rq_param->dlg.rq_c), - &(rq_param->misc.rq_c), - pipe_src_param, - 1); - } - - // calculate how to split the det buffer space between luma and chroma - handle_det_buf_split(mode_lib, rq_param, pipe_src_param); - print__rq_params_st(mode_lib, *rq_param); -} - -void dml_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, - display_rq_regs_st *rq_regs, - const display_pipe_source_params_st pipe_src_param) -{ - display_rq_params_st rq_param = {0}; - - memset(rq_regs, 0, sizeof(*rq_regs)); - dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_src_param); - extract_rq_regs(mode_lib, rq_regs, rq_param); - - print__rq_regs_st(mode_lib, *rq_regs); -} - -// Note: currently taken in as is. -// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. -void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *e2e_pipe_param, - const unsigned int num_pipes, - const unsigned int pipe_idx, - display_dlg_regs_st *disp_dlg_regs, - display_ttu_regs_st *disp_ttu_regs, - const display_rq_dlg_params_st rq_dlg_param, - const display_dlg_sys_params_st dlg_sys_param, - const bool cstate_en, - const bool pstate_en, - const bool vm_en, - const bool ignore_viewport_pos, - const bool immediate_flip_support) -{ - const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; - const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; - const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; - const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; - const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; - const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; - - // ------------------------- - // Section 1.15.2.1: OTG dependent Params - // ------------------------- - // Timing - unsigned int htotal = dst->htotal; -// unsigned int hblank_start = dst.hblank_start; // TODO: Remove - unsigned int hblank_end = dst->hblank_end; - unsigned int vblank_start = dst->vblank_start; - unsigned int vblank_end = dst->vblank_end; - unsigned int min_vblank = mode_lib->ip.min_vblank_lines; - - double dppclk_freq_in_mhz = clks->dppclk_mhz; - double dispclk_freq_in_mhz = clks->dispclk_mhz; - double refclk_freq_in_mhz = clks->refclk_mhz; - double pclk_freq_in_mhz = dst->pixel_rate_mhz; - bool interlaced = dst->interlaced; - - double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; - - double min_dcfclk_mhz; - double t_calc_us; - double min_ttu_vblank; - - double min_dst_y_ttu_vblank; - unsigned int dlg_vblank_start; - bool dual_plane; - bool mode_422; - unsigned int access_dir; - unsigned int vp_height_l; - unsigned int vp_width_l; - unsigned int vp_height_c; - unsigned int vp_width_c; - - // Scaling - unsigned int htaps_l; - unsigned int htaps_c; - double hratio_l; - double hratio_c; - double vratio_l; - double vratio_c; - bool scl_enable; - - double line_time_in_us; - // double vinit_l; - // double vinit_c; - // double vinit_bot_l; - // double vinit_bot_c; - - // unsigned int swath_height_l; - unsigned int swath_width_ub_l; - // unsigned int dpte_bytes_per_row_ub_l; - unsigned int dpte_groups_per_row_ub_l; - // unsigned int meta_pte_bytes_per_frame_ub_l; - // unsigned int meta_bytes_per_row_ub_l; - - // unsigned int swath_height_c; - unsigned int swath_width_ub_c; - // unsigned int dpte_bytes_per_row_ub_c; - unsigned int dpte_groups_per_row_ub_c; - - unsigned int meta_chunks_per_row_ub_l; - unsigned int meta_chunks_per_row_ub_c; - unsigned int vupdate_offset; - unsigned int vupdate_width; - unsigned int vready_offset; - - unsigned int dppclk_delay_subtotal; - unsigned int dispclk_delay_subtotal; - unsigned int pixel_rate_delay_subtotal; - - unsigned int vstartup_start; - unsigned int dst_x_after_scaler; - unsigned int dst_y_after_scaler; - double line_wait; - double dst_y_prefetch; - double dst_y_per_vm_vblank; - double dst_y_per_row_vblank; - double dst_y_per_vm_flip; - double dst_y_per_row_flip; - double min_dst_y_per_vm_vblank; - double min_dst_y_per_row_vblank; - double lsw; - double vratio_pre_l; - double vratio_pre_c; - unsigned int req_per_swath_ub_l; - unsigned int req_per_swath_ub_c; - unsigned int meta_row_height_l; - unsigned int meta_row_height_c; - unsigned int swath_width_pixels_ub_l; - unsigned int swath_width_pixels_ub_c; - unsigned int scaler_rec_in_width_l; - unsigned int scaler_rec_in_width_c; - unsigned int dpte_row_height_l; - unsigned int dpte_row_height_c; - double hscale_pixel_rate_l; - double hscale_pixel_rate_c; - double min_hratio_fact_l; - double min_hratio_fact_c; - double refcyc_per_line_delivery_pre_l; - double refcyc_per_line_delivery_pre_c; - double refcyc_per_line_delivery_l; - double refcyc_per_line_delivery_c; - - double refcyc_per_req_delivery_pre_l; - double refcyc_per_req_delivery_pre_c; - double refcyc_per_req_delivery_l; - double refcyc_per_req_delivery_c; - - unsigned int full_recout_width; - double xfc_transfer_delay; - double xfc_precharge_delay; - double xfc_remote_surface_flip_latency; - double xfc_dst_y_delta_drq_limit; - double xfc_prefetch_margin; - double refcyc_per_req_delivery_pre_cur0; - double refcyc_per_req_delivery_cur0; - double refcyc_per_req_delivery_pre_cur1; - double refcyc_per_req_delivery_cur1; - - memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); - memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); - - dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); - dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); - dml_print("DML_DLG: %s: vm_en = %d\n", __func__, vm_en); - dml_print("DML_DLG: %s: ignore_viewport_pos = %d\n", __func__, ignore_viewport_pos); - dml_print("DML_DLG: %s: immediate_flip_support = %d\n", __func__, immediate_flip_support); - - dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); - dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz); - dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); - dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); - dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); - ASSERT(ref_freq_to_pix_freq < 4.0); - - disp_dlg_regs->ref_freq_to_pix_freq = - (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); - disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal - * dml_pow(2, 8)); - disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits - disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end - * (double) ref_freq_to_pix_freq); - ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13)); - - min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz; - set_prefetch_mode(mode_lib, cstate_en, pstate_en, ignore_viewport_pos, immediate_flip_support); - t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes); - min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - - min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; - dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; - - disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start - + min_dst_y_ttu_vblank) * dml_pow(2, 2)); - ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18)); - - dml_print("DML_DLG: %s: min_dcfclk_mhz = %3.2f\n", - __func__, - min_dcfclk_mhz); - dml_print("DML_DLG: %s: min_ttu_vblank = %3.2f\n", - __func__, - min_ttu_vblank); - dml_print("DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n", - __func__, - min_dst_y_ttu_vblank); - dml_print("DML_DLG: %s: t_calc_us = %3.2f\n", - __func__, - t_calc_us); - dml_print("DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n", - __func__, - disp_dlg_regs->min_dst_y_next_start); - dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", - __func__, - ref_freq_to_pix_freq); - - // ------------------------- - // Section 1.15.2.2: Prefetch, Active and TTU - // ------------------------- - // Prefetch Calc - // Source -// dcc_en = src.dcc; - dual_plane = is_dual_plane((enum source_format_class)(src->source_format)); - mode_422 = 0; // FIXME - access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed -// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); -// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); - vp_height_l = src->viewport_height; - vp_width_l = src->viewport_width; - vp_height_c = src->viewport_height_c; - vp_width_c = src->viewport_width_c; - - // Scaling - htaps_l = taps->htaps; - htaps_c = taps->htaps_c; - hratio_l = scl->hscl_ratio; - hratio_c = scl->hscl_ratio_c; - vratio_l = scl->vscl_ratio; - vratio_c = scl->vscl_ratio_c; - scl_enable = scl->scl_enable; - - line_time_in_us = (htotal / pclk_freq_in_mhz); -// vinit_l = scl.vinit; -// vinit_c = scl.vinit_c; -// vinit_bot_l = scl.vinit_bot; -// vinit_bot_c = scl.vinit_bot_c; - -// unsigned int swath_height_l = rq_dlg_param.rq_l.swath_height; - swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub; -// unsigned int dpte_bytes_per_row_ub_l = rq_dlg_param.rq_l.dpte_bytes_per_row_ub; - dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub; -// unsigned int meta_pte_bytes_per_frame_ub_l = rq_dlg_param.rq_l.meta_pte_bytes_per_frame_ub; -// unsigned int meta_bytes_per_row_ub_l = rq_dlg_param.rq_l.meta_bytes_per_row_ub; - -// unsigned int swath_height_c = rq_dlg_param.rq_c.swath_height; - swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub; - // dpte_bytes_per_row_ub_c = rq_dlg_param.rq_c.dpte_bytes_per_row_ub; - dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub; - - meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub; - meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub; - vupdate_offset = dst->vupdate_offset; - vupdate_width = dst->vupdate_width; - vready_offset = dst->vready_offset; - - dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; - dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; - - if (scl_enable) - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; - else - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; - - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter - + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; - - if (dout->dsc_enable) { - double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - - dispclk_delay_subtotal += dsc_delay; - } - - pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz - + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; - - vstartup_start = dst->vstartup_start; - if (interlaced) { - if (vstartup_start / 2.0 - - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal - <= vblank_end / 2.0) - disp_dlg_regs->vready_after_vcount0 = 1; - else - disp_dlg_regs->vready_after_vcount0 = 0; - } else { - if (vstartup_start - - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal - <= vblank_end) - disp_dlg_regs->vready_after_vcount0 = 1; - else - disp_dlg_regs->vready_after_vcount0 = 0; - } - - // TODO: Where is this coming from? - if (interlaced) - vstartup_start = vstartup_start / 2; - - // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp? - if (vstartup_start >= min_vblank) { - dml_print("WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n", - __func__, - vblank_start, - vblank_end); - dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", - __func__, - vstartup_start, - min_vblank); - min_vblank = vstartup_start + 1; - dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", - __func__, - vstartup_start, - min_vblank); - } - - dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - - dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); - dml_print("DML_DLG: %s: pixel_rate_delay_subtotal = %d\n", - __func__, - pixel_rate_delay_subtotal); - dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", - __func__, - dst_x_after_scaler); - dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", - __func__, - dst_y_after_scaler); - - // Lwait - line_wait = mode_lib->soc.urgent_latency_us; - if (cstate_en) - line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); - if (pstate_en) - line_wait = dml_max(mode_lib->soc.dram_clock_change_latency_us - + mode_lib->soc.urgent_latency_us, - line_wait); - line_wait = line_wait / line_time_in_us; - - dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); - - dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, - e2e_pipe_param, - num_pipes, - pipe_idx); - dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, - e2e_pipe_param, - num_pipes, - pipe_idx); - dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - - min_dst_y_per_vm_vblank = 8.0; - min_dst_y_per_row_vblank = 16.0; - - // magic! - if (htotal <= 75) { - min_vblank = 300; - min_dst_y_per_vm_vblank = 100.0; - min_dst_y_per_row_vblank = 100.0; - } - - dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); - dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); - - ASSERT(dst_y_per_vm_vblank < min_dst_y_per_vm_vblank); - ASSERT(dst_y_per_row_vblank < min_dst_y_per_row_vblank); - - ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); - lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); - - dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw); - - vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - - dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l); - dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c); - - // Active - req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub; - req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub; - meta_row_height_l = rq_dlg_param.rq_l.meta_row_height; - meta_row_height_c = rq_dlg_param.rq_c.meta_row_height; - swath_width_pixels_ub_l = 0; - swath_width_pixels_ub_c = 0; - scaler_rec_in_width_l = 0; - scaler_rec_in_width_c = 0; - dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height; - dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height; - - if (mode_422) { - swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element - swath_width_pixels_ub_c = swath_width_ub_c * 2; - } else { - swath_width_pixels_ub_l = swath_width_ub_l * 1; - swath_width_pixels_ub_c = swath_width_ub_c * 1; - } - - hscale_pixel_rate_l = 0.; - hscale_pixel_rate_c = 0.; - min_hratio_fact_l = 1.0; - min_hratio_fact_c = 1.0; - - if (htaps_l <= 1) - min_hratio_fact_l = 2.0; - else if (htaps_l <= 6) { - if ((hratio_l * 2.0) > 4.0) - min_hratio_fact_l = 4.0; - else - min_hratio_fact_l = hratio_l * 2.0; - } else { - if (hratio_l > 4.0) - min_hratio_fact_l = 4.0; - else - min_hratio_fact_l = hratio_l; - } - - hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; - - if (htaps_c <= 1) - min_hratio_fact_c = 2.0; - else if (htaps_c <= 6) { - if ((hratio_c * 2.0) > 4.0) - min_hratio_fact_c = 4.0; - else - min_hratio_fact_c = hratio_c * 2.0; - } else { - if (hratio_c > 4.0) - min_hratio_fact_c = 4.0; - else - min_hratio_fact_c = hratio_c; - } - - hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; - - refcyc_per_line_delivery_pre_l = 0.; - refcyc_per_line_delivery_pre_c = 0.; - refcyc_per_line_delivery_l = 0.; - refcyc_per_line_delivery_c = 0.; - - refcyc_per_req_delivery_pre_l = 0.; - refcyc_per_req_delivery_pre_c = 0.; - refcyc_per_req_delivery_l = 0.; - refcyc_per_req_delivery_c = 0.; - - full_recout_width = 0; - // In ODM - if (src->is_hsplit) { - // This "hack" is only allowed (and valid) for MPC combine. In ODM - // combine, you MUST specify the full_recout_width...according to Oswin - if (dst->full_recout_width == 0 && !dst->odm_combine) { - dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", - __func__); - full_recout_width = dst->recout_width * 2; // assume half split for dcn1 - } else - full_recout_width = dst->full_recout_width; - } else - full_recout_width = dst->recout_width; - - // mpc_combine and odm_combine are mutually exclusive - refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(mode_lib, - refclk_freq_in_mhz, - pclk_freq_in_mhz, - dst->odm_combine, - full_recout_width, - dst->hactive, - vratio_pre_l, - hscale_pixel_rate_l, - swath_width_pixels_ub_l, - 1); // per line - - refcyc_per_line_delivery_l = get_refcyc_per_delivery(mode_lib, - refclk_freq_in_mhz, - pclk_freq_in_mhz, - dst->odm_combine, - full_recout_width, - dst->hactive, - vratio_l, - hscale_pixel_rate_l, - swath_width_pixels_ub_l, - 1); // per line - - dml_print("DML_DLG: %s: full_recout_width = %d\n", - __func__, - full_recout_width); - dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", - __func__, - hscale_pixel_rate_l); - dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", - __func__, - refcyc_per_line_delivery_pre_l); - dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", - __func__, - refcyc_per_line_delivery_l); - - if (dual_plane) { - refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(mode_lib, - refclk_freq_in_mhz, - pclk_freq_in_mhz, - dst->odm_combine, - full_recout_width, - dst->hactive, - vratio_pre_c, - hscale_pixel_rate_c, - swath_width_pixels_ub_c, - 1); // per line - - refcyc_per_line_delivery_c = get_refcyc_per_delivery(mode_lib, - refclk_freq_in_mhz, - pclk_freq_in_mhz, - dst->odm_combine, - full_recout_width, - dst->hactive, - vratio_c, - hscale_pixel_rate_c, - swath_width_pixels_ub_c, - 1); // per line - - dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", - __func__, - refcyc_per_line_delivery_pre_c); - dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", - __func__, - refcyc_per_line_delivery_c); - } - - // TTU - Luma / Chroma - if (access_dir) { // vertical access - scaler_rec_in_width_l = vp_height_l; - scaler_rec_in_width_c = vp_height_c; - } else { - scaler_rec_in_width_l = vp_width_l; - scaler_rec_in_width_c = vp_width_c; - } - - refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(mode_lib, - refclk_freq_in_mhz, - pclk_freq_in_mhz, - dst->odm_combine, - full_recout_width, - dst->hactive, - vratio_pre_l, - hscale_pixel_rate_l, - scaler_rec_in_width_l, - req_per_swath_ub_l); // per req - refcyc_per_req_delivery_l = get_refcyc_per_delivery(mode_lib, - refclk_freq_in_mhz, - pclk_freq_in_mhz, - dst->odm_combine, - full_recout_width, - dst->hactive, - vratio_l, - hscale_pixel_rate_l, - scaler_rec_in_width_l, - req_per_swath_ub_l); // per req - - dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", - __func__, - refcyc_per_req_delivery_pre_l); - dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", - __func__, - refcyc_per_req_delivery_l); - - ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); - ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); - - if (dual_plane) { - refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(mode_lib, - refclk_freq_in_mhz, - pclk_freq_in_mhz, - dst->odm_combine, - full_recout_width, - dst->hactive, - vratio_pre_c, - hscale_pixel_rate_c, - scaler_rec_in_width_c, - req_per_swath_ub_c); // per req - refcyc_per_req_delivery_c = get_refcyc_per_delivery(mode_lib, - refclk_freq_in_mhz, - pclk_freq_in_mhz, - dst->odm_combine, - full_recout_width, - dst->hactive, - vratio_c, - hscale_pixel_rate_c, - scaler_rec_in_width_c, - req_per_swath_ub_c); // per req - - dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", - __func__, - refcyc_per_req_delivery_pre_c); - dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", - __func__, - refcyc_per_req_delivery_c); - - ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); - ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); - } - - // XFC - xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - xfc_precharge_delay = get_xfc_precharge_delay(mode_lib, - e2e_pipe_param, - num_pipes, - pipe_idx); - xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency(mode_lib, - e2e_pipe_param, - num_pipes, - pipe_idx); - xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency; - xfc_prefetch_margin = get_xfc_prefetch_margin(mode_lib, - e2e_pipe_param, - num_pipes, - pipe_idx); - - // TTU - Cursor - refcyc_per_req_delivery_pre_cur0 = 0.0; - refcyc_per_req_delivery_cur0 = 0.0; - if (src->num_cursors > 0) { - calculate_ttu_cursor(mode_lib, - &refcyc_per_req_delivery_pre_cur0, - &refcyc_per_req_delivery_cur0, - refclk_freq_in_mhz, - ref_freq_to_pix_freq, - hscale_pixel_rate_l, - scl->hscl_ratio, - vratio_pre_l, - vratio_l, - src->cur0_src_width, - (enum cursor_bpp)(src->cur0_bpp)); - } - - refcyc_per_req_delivery_pre_cur1 = 0.0; - refcyc_per_req_delivery_cur1 = 0.0; - if (src->num_cursors > 1) { - calculate_ttu_cursor(mode_lib, - &refcyc_per_req_delivery_pre_cur1, - &refcyc_per_req_delivery_cur1, - refclk_freq_in_mhz, - ref_freq_to_pix_freq, - hscale_pixel_rate_l, - scl->hscl_ratio, - vratio_pre_l, - vratio_l, - src->cur1_src_width, - (enum cursor_bpp)(src->cur1_bpp)); - } - - // TTU - Misc - // all hard-coded - - // Assignment to register structures - disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line - disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk - ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int) dml_pow(2, 13)); - disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); - - disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); - disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); - - disp_dlg_regs->refcyc_per_pte_group_vblank_l = - (unsigned int) (dst_y_per_row_vblank * (double) htotal - * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); - ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int) dml_pow(2, 13)); - - if (dual_plane) { - disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank - * (double) htotal * ref_freq_to_pix_freq - / (double) dpte_groups_per_row_ub_c); - ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c - < (unsigned int) dml_pow(2, 13)); - } - - disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = - (unsigned int) (dst_y_per_row_vblank * (double) htotal - * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); - ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int) dml_pow(2, 13)); - - disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = - disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now - - disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal - * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; - disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal - * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; - - if (dual_plane) { - disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip - * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; - disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip - * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; - } - - disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l - / (double) vratio_l * dml_pow(2, 2)); - ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int) dml_pow(2, 17)); - - if (dual_plane) { - disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c - / (double) vratio_c * dml_pow(2, 2)); - if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { - dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", - __func__, - disp_dlg_regs->dst_y_per_pte_row_nom_c, - (unsigned int) dml_pow(2, 17) - 1); - } - } - - disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l - / (double) vratio_l * dml_pow(2, 2)); - ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int) dml_pow(2, 17)); - - disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now - - disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l - / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq - / (double) dpte_groups_per_row_ub_l); - if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; - disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l - / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq - / (double) meta_chunks_per_row_ub_l); - if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; - - if (dual_plane) { - disp_dlg_regs->refcyc_per_pte_group_nom_c = - (unsigned int) ((double) dpte_row_height_c / (double) vratio_c - * (double) htotal * ref_freq_to_pix_freq - / (double) dpte_groups_per_row_ub_c); - if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; - - // TODO: Is this the right calculation? Does htotal need to be halved? - disp_dlg_regs->refcyc_per_meta_chunk_nom_c = - (unsigned int) ((double) meta_row_height_c / (double) vratio_c - * (double) htotal * ref_freq_to_pix_freq - / (double) meta_chunks_per_row_ub_c); - if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; - } - - disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, - 1); - disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, - 1); - ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int) dml_pow(2, 13)); - ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int) dml_pow(2, 13)); - - disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, - 1); - disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, - 1); - ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int) dml_pow(2, 13)); - ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int) dml_pow(2, 13)); - - disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; - disp_dlg_regs->dst_y_offset_cur0 = 0; - disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; - disp_dlg_regs->dst_y_offset_cur1 = 0; - - disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay; - disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay; - disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency; - disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil(xfc_prefetch_margin * refclk_freq_in_mhz, - 1); - - // slave has to have this value also set to off - if (src->xfc_enable && !src->xfc_slave) - disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1); - else - disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off - - disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l - * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l - * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c - * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c - * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = - (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 - * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = - (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 - * dml_pow(2, 10)); - disp_ttu_regs->qos_level_low_wm = 0; - ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); - disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal - * ref_freq_to_pix_freq); - ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14)); - - disp_ttu_regs->qos_level_flip = 14; - disp_ttu_regs->qos_level_fixed_l = 8; - disp_ttu_regs->qos_level_fixed_c = 8; - disp_ttu_regs->qos_level_fixed_cur0 = 8; - disp_ttu_regs->qos_ramp_disable_l = 0; - disp_ttu_regs->qos_ramp_disable_c = 0; - disp_ttu_regs->qos_ramp_disable_cur0 = 0; - - disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; - ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); - - print__ttu_regs_st(mode_lib, *disp_ttu_regs); - print__dlg_regs_st(mode_lib, *disp_dlg_regs); -} - -void dml_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, - display_dlg_regs_st *dlg_regs, - display_ttu_regs_st *ttu_regs, - display_e2e_pipe_params_st *e2e_pipe_param, - const unsigned int num_pipes, - const unsigned int pipe_idx, - const bool cstate_en, - const bool pstate_en, - const bool vm_en, - const bool ignore_viewport_pos, - const bool immediate_flip_support) -{ - display_rq_params_st rq_param = {0}; - display_dlg_sys_params_st dlg_sys_param = {0}; - - // Get watermark and Tex. - dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, - e2e_pipe_param, - num_pipes); - dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, - e2e_pipe_param, - num_pipes); - dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, - e2e_pipe_param, - num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated - - print__dlg_sys_params_st(mode_lib, dlg_sys_param); - - // system parameter calculation done - - dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); - dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe.src); - dml_rq_dlg_get_dlg_params(mode_lib, - e2e_pipe_param, - num_pipes, - pipe_idx, - dlg_regs, - ttu_regs, - rq_param.dlg, - dlg_sys_param, - cstate_en, - pstate_en, - vm_en, - ignore_viewport_pos, - immediate_flip_support); - dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); -} - -void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param) -{ - memset(arb_param, 0, sizeof(*arb_param)); - arb_param->max_req_outstanding = 256; - arb_param->min_req_outstanding = 68; - arb_param->sat_level_us = 60; -} - -void calculate_ttu_cursor(struct display_mode_lib *mode_lib, - double *refcyc_per_req_delivery_pre_cur, - double *refcyc_per_req_delivery_cur, - double refclk_freq_in_mhz, - double ref_freq_to_pix_freq, - double hscale_pixel_rate_l, - double hscl_ratio, - double vratio_pre_l, - double vratio_l, - unsigned int cur_width, - enum cursor_bpp cur_bpp) -{ - unsigned int cur_src_width = cur_width; - unsigned int cur_req_size = 0; - unsigned int cur_req_width = 0; - double cur_width_ub = 0.0; - double cur_req_per_width = 0.0; - double hactive_cur = 0.0; - - ASSERT(cur_src_width <= 256); - - *refcyc_per_req_delivery_pre_cur = 0.0; - *refcyc_per_req_delivery_cur = 0.0; - if (cur_src_width > 0) { - unsigned int cur_bit_per_pixel = 0; - - if (cur_bpp == dm_cur_2bit) { - cur_req_size = 64; // byte - cur_bit_per_pixel = 2; - } else { // 32bit - cur_bit_per_pixel = 32; - if (cur_src_width >= 1 && cur_src_width <= 16) - cur_req_size = 64; - else if (cur_src_width >= 17 && cur_src_width <= 31) - cur_req_size = 128; - else - cur_req_size = 256; - } - - cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); - cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) - * (double) cur_req_width; - cur_req_per_width = cur_width_ub / (double) cur_req_width; - hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor - - if (vratio_pre_l <= 1.0) { - *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq - / (double) cur_req_per_width; - } else { - *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz - * (double) cur_src_width / hscale_pixel_rate_l - / (double) cur_req_per_width; - } - - ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); - - if (vratio_l <= 1.0) { - *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq - / (double) cur_req_per_width; - } else { - *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz - * (double) cur_src_width / hscale_pixel_rate_l - / (double) cur_req_per_width; - } - - dml_print("DML_DLG: %s: cur_req_width = %d\n", - __func__, - cur_req_width); - dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", - __func__, - cur_width_ub); - dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", - __func__, - cur_req_per_width); - dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", - __func__, - hactive_cur); - dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", - __func__, - *refcyc_per_req_delivery_pre_cur); - dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", - __func__, - *refcyc_per_req_delivery_cur); - - ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); - } -} - -unsigned int dml_rq_dlg_get_calculated_vstartup(struct display_mode_lib *mode_lib, - display_e2e_pipe_params_st *e2e_pipe_param, - const unsigned int num_pipes, - const unsigned int pipe_idx) -{ - unsigned int vstartup_pipe[DC__NUM_PIPES__MAX]; - bool visited[DC__NUM_PIPES__MAX]; - unsigned int pipe_inst = 0; - unsigned int i, j, k; - - for (k = 0; k < num_pipes; ++k) - visited[k] = false; - - for (i = 0; i < num_pipes; i++) { - if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) { - unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp; - - for (j = i; j < num_pipes; j++) { - if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp - && e2e_pipe_param[j].pipe.src.is_hsplit - && !visited[j]) { - vstartup_pipe[j] = get_vstartup_calculated(mode_lib, - e2e_pipe_param, - num_pipes, - pipe_inst); - visited[j] = true; - } - } - - pipe_inst++; - } - - if (!visited[i]) { - vstartup_pipe[i] = get_vstartup_calculated(mode_lib, - e2e_pipe_param, - num_pipes, - pipe_inst); - visited[i] = true; - pipe_inst++; - } - } - - return vstartup_pipe[pipe_idx]; - -} - -void dml_rq_dlg_get_row_heights(struct display_mode_lib *mode_lib, - unsigned int *o_dpte_row_height, - unsigned int *o_meta_row_height, - unsigned int vp_width, - unsigned int data_pitch, - int source_format, - int tiling, - int macro_tile_size, - int source_scan, - int is_chroma) -{ - display_data_rq_dlg_params_st rq_dlg_param; - display_data_rq_misc_params_st rq_misc_param; - display_data_rq_sizing_params_st rq_sizing_param; - - get_meta_and_pte_attr(mode_lib, - &rq_dlg_param, - &rq_misc_param, - &rq_sizing_param, - vp_width, - 0, // dummy - data_pitch, - 0, // dummy - source_format, - tiling, - macro_tile_size, - source_scan, - is_chroma); - - *o_dpte_row_height = rq_dlg_param.dpte_row_height; - *o_meta_row_height = rq_dlg_param.meta_row_height; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.h b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.h deleted file mode 100644 index efdd4c73d8f3..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DML2_DISPLAY_RQ_DLG_CALC_H__ -#define __DML2_DISPLAY_RQ_DLG_CALC_H__ - -#include "dml_common_defs.h" -#include "display_rq_dlg_helpers.h" - -struct display_mode_lib; - -// Function: dml_rq_dlg_get_rq_params -// Calculate requestor related parameters that register definition agnostic -// (i.e. this layer does try to separate real values from register definition) -// Input: -// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) -// Output: -// rq_param - values that can be used to setup RQ (e.g. swath_height, plane1_addr, etc.) -// -void dml_rq_dlg_get_rq_params( - struct display_mode_lib *mode_lib, - display_rq_params_st *rq_param, - const display_pipe_source_params_st pipe_src_param); - -// Function: dml_rq_dlg_get_rq_reg -// Main entry point for test to get the register values out of this DML class. -// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate -// and then populate the rq_regs struct -// Input: -// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) -// Output: -// rq_regs - struct that holds all the RQ registers field value. -// See also: <display_rq_regs_st> -void dml_rq_dlg_get_rq_reg( - struct display_mode_lib *mode_lib, - display_rq_regs_st *rq_regs, - const display_pipe_source_params_st pipe_src_param); - -// Function: dml_rq_dlg_get_dlg_params -// Calculate deadline related parameters -// -void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *e2e_pipe_param, - const unsigned int num_pipes, - const unsigned int pipe_idx, - display_dlg_regs_st *disp_dlg_regs, - display_ttu_regs_st *disp_ttu_regs, - const display_rq_dlg_params_st rq_dlg_param, - const display_dlg_sys_params_st dlg_sys_param, - const bool cstate_en, - const bool pstate_en, - const bool vm_en, - const bool ignore_viewport_pos, - const bool immediate_flip_support); - -// Function: dml_rq_dlg_get_dlg_param_prefetch -// For flip_bw programming guide change, now dml needs to calculate the flip_bytes and prefetch_bw -// for ALL pipes and use this info to calculate the prefetch programming. -// Output: prefetch_param.prefetch_bw and flip_bytes -void dml_rq_dlg_get_dlg_params_prefetch( - struct display_mode_lib *mode_lib, - display_dlg_prefetch_param_st *prefetch_param, - display_rq_dlg_params_st rq_dlg_param, - display_dlg_sys_params_st dlg_sys_param, - display_e2e_pipe_params_st e2e_pipe_param, - const bool cstate_en, - const bool pstate_en, - const bool vm_en); - -// Function: dml_rq_dlg_get_dlg_reg -// Calculate and return DLG and TTU register struct given the system setting -// Output: -// dlg_regs - output DLG register struct -// ttu_regs - output DLG TTU register struct -// Input: -// e2e_pipe_param - "compacted" array of e2e pipe param struct -// num_pipes - num of active "pipe" or "route" -// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg -// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. -// Added for legacy or unrealistic timing tests. -void dml_rq_dlg_get_dlg_reg( - struct display_mode_lib *mode_lib, - display_dlg_regs_st *dlg_regs, - display_ttu_regs_st *ttu_regs, - display_e2e_pipe_params_st *e2e_pipe_param, - const unsigned int num_pipes, - const unsigned int pipe_idx, - const bool cstate_en, - const bool pstate_en, - const bool vm_en, - const bool ignore_viewport_pos, - const bool immediate_flip_support); - -// Function: dml_rq_dlg_get_calculated_vstartup -// Calculate and return vstartup -// Output: -// unsigned int vstartup -// Input: -// e2e_pipe_param - "compacted" array of e2e pipe param struct -// num_pipes - num of active "pipe" or "route" -// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg -// NOTE: this MUST be called after setting the prefetch mode! -unsigned int dml_rq_dlg_get_calculated_vstartup( - struct display_mode_lib *mode_lib, - display_e2e_pipe_params_st *e2e_pipe_param, - const unsigned int num_pipes, - const unsigned int pipe_idx); - -// Function: dml_rq_dlg_get_row_heights -// Calculate dpte and meta row heights -void dml_rq_dlg_get_row_heights( - struct display_mode_lib *mode_lib, - unsigned int *o_dpte_row_height, - unsigned int *o_meta_row_height, - unsigned int vp_width, - unsigned int data_pitch, - int source_format, - int tiling, - int macro_tile_size, - int source_scan, - int is_chroma); - -// Function: dml_rq_dlg_get_arb_params -void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h index 987d7671cd0f..304164986bd8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h @@ -27,10 +27,11 @@ #define __DISPLAY_RQ_DLG_CALC_H__ #include "dml_common_defs.h" -#include "display_rq_dlg_helpers.h" struct display_mode_lib; +#include "display_rq_dlg_helpers.h" + void dml1_extract_rq_regs( struct display_mode_lib *mode_lib, struct _vcs_dpi_display_rq_regs_st *rq_regs, diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dce120/hw_factory_dce120.c b/drivers/gpu/drm/amd/display/dc/gpio/dce120/hw_factory_dce120.c index 0c2314efb47e..ea3f888e5c65 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dce120/hw_factory_dce120.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dce120/hw_factory_dce120.c @@ -36,7 +36,8 @@ #include "dce/dce_12_0_offset.h" #include "dce/dce_12_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" #define block HPD #define reg_num 0 diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dce120/hw_translate_dce120.c b/drivers/gpu/drm/amd/display/dc/gpio/dce120/hw_translate_dce120.c index a225b02cc779..39ef5c7dad97 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dce120/hw_translate_dce120.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dce120/hw_translate_dce120.c @@ -35,7 +35,8 @@ #include "dce/dce_12_0_offset.h" #include "dce/dce_12_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" /* begin ********************* * macros to expend register list macro defined in HW object header file */ diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn10/hw_factory_dcn10.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn10/hw_factory_dcn10.c index 5235f69f0602..32aa47a04a0d 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn10/hw_factory_dcn10.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn10/hw_factory_dcn10.c @@ -36,7 +36,8 @@ #include "dcn/dcn_1_0_offset.h" #include "dcn/dcn_1_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" #define block HPD #define reg_num 0 diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn10/hw_translate_dcn10.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn10/hw_translate_dcn10.c index 347864810d01..fecc8688048d 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn10/hw_translate_dcn10.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn10/hw_translate_dcn10.c @@ -35,7 +35,8 @@ #include "dcn/dcn_1_0_offset.h" #include "dcn/dcn_1_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" /* begin ********************* * macros to expend register list macro defined in HW object header file */ diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c b/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c index fc7a7d4ebca5..0b1db48fef36 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c @@ -284,6 +284,14 @@ static bool read_command( msleep(engine->delay); } while (ctx.operation_succeeded && !ctx.transaction_complete); + if (request->payload.address_space == + I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { + dm_logger_write(engine->base.ctx->logger, LOG_I2C_AUX, "READ: addr:0x%x value:0x%x Result:%d", + request->payload.address, + request->payload.data[0], + ctx.operation_succeeded); + } + return ctx.operation_succeeded; } @@ -484,6 +492,14 @@ static bool write_command( msleep(engine->delay); } while (ctx.operation_succeeded && !ctx.transaction_complete); + if (request->payload.address_space == + I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { + dm_logger_write(engine->base.ctx->logger, LOG_I2C_AUX, "WRITE: addr:0x%x value:0x%x Result:%d", + request->payload.address, + request->payload.data[0], + ctx.operation_succeeded); + } + return ctx.operation_succeeded; } diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c index 81f9f3e34c10..5f47f6c007ac 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c @@ -441,10 +441,6 @@ static void construct( static void destruct( struct aux_engine_dce110 *engine) { - struct aux_engine_dce110 *aux110 = engine; -/*temp w/a, to do*/ - REG_UPDATE(AUX_ARB_CONTROL, AUX_DMCU_DONE_USING_AUX_REG, 1); - REG_UPDATE(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1); dal_aux_engine_destruct(&engine->base); } diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.c index a401636bf3f8..0e7b18260027 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.c @@ -38,7 +38,8 @@ #include "dce/dce_12_0_offset.h" #include "dce/dce_12_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" /* begin ********************* * macros to expend register list macro defined in HW object header file */ diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.c b/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.c index bed7cc3e77de..e44a8901f38b 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.c @@ -38,7 +38,8 @@ #include "dcn/dcn_1_0_offset.h" #include "dcn/dcn_1_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" /* begin ********************* * macros to expend register list macro defined in HW object header file */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index d6971054ec07..5509e13e7edf 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -119,6 +119,11 @@ struct resource_funcs { struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); + + enum dc_status (*remove_stream_from_ctx)( + struct dc *dc, + struct dc_state *new_ctx, + struct dc_stream_state *stream); }; struct audio_support{ @@ -148,6 +153,7 @@ struct resource_pool { unsigned int underlay_pipe_index; unsigned int stream_enc_count; unsigned int ref_clock_inKhz; + unsigned int timing_generator_count; /* * reserved clock source for DP @@ -188,6 +194,7 @@ struct plane_resource { struct input_pixel_processor *ipp; struct transform *xfm; struct dpp *dpp; + uint8_t mpcc_inst; }; struct pipe_ctx { diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index 616c73e2b0bd..2f783c650084 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -53,7 +53,7 @@ bool perform_link_training_with_retries( bool is_mst_supported(struct dc_link *link); -void detect_dp_sink_caps(struct dc_link *link); +bool detect_dp_sink_caps(struct dc_link *link); void detect_edp_sink_caps(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h index ce206355461b..de60f940030d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h @@ -32,13 +32,6 @@ enum dmcu_state { DMCU_RUNNING = 1 }; -struct dmcu_version { - unsigned int day; - unsigned int month; - unsigned int year; - unsigned int interface_version; -}; - struct dmcu { struct dc_context *ctx; const struct dmcu_funcs *funcs; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 25edbde6163e..78abc16ec4dc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -131,6 +131,11 @@ struct dpp_funcs { uint32_t width ); + void (*dpp_dppclk_control)( + struct dpp *dpp_base, + bool dppclk_div, + bool enable); + }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index b7c7e70022e4..9ced254e652c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -119,6 +119,9 @@ struct hubp_funcs { void (*hubp_disconnect)(struct hubp *hubp); + void (*hubp_clk_cntl)(struct hubp *hubp, bool enable); + void (*hubp_vtg_sel)(struct hubp *hubp, uint32_t otg_inst); + }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index e3f0b4056318..b22158190262 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -136,7 +136,7 @@ struct out_csc_color_matrix { enum opp_regamma { OPP_REGAMMA_BYPASS = 0, OPP_REGAMMA_SRGB, - OPP_REGAMMA_3_6, + OPP_REGAMMA_XVYCC, OPP_REGAMMA_USER }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index 0fd329deacd8..54d8a1386142 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -123,8 +123,7 @@ struct link_encoder_funcs { void (*enable_tmds_output)(struct link_encoder *enc, enum clock_source_id clock_source, enum dc_color_depth color_depth, - bool hdmi, - bool dual_link, + enum signal_type signal, uint32_t pixel_clock); void (*enable_dp_output)(struct link_encoder *enc, const struct dc_link_settings *link_settings, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h index ab8fb77f1ae5..d974d9e18612 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h @@ -297,6 +297,10 @@ struct opp_funcs { bool enable, const struct dc_crtc_timing *timing); + void (*opp_pipe_clock_control)( + struct output_pixel_processor *opp, + bool enable); + }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index ec312f1a3e55..3217b5bf6c7a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -92,6 +92,36 @@ struct crtc_stereo_flags { uint8_t DISABLE_STEREO_DP_SYNC : 1; }; +enum crc_selection { + /* Order must match values expected by hardware */ + UNION_WINDOW_A_B = 0, + UNION_WINDOW_A_NOT_B, + UNION_WINDOW_NOT_A_B, + UNION_WINDOW_NOT_A_NOT_B, + INTERSECT_WINDOW_A_B, + INTERSECT_WINDOW_A_NOT_B, + INTERSECT_WINDOW_NOT_A_B, + INTERSECT_WINDOW_NOT_A_NOT_B, +}; + +struct crc_params { + /* Regions used to calculate CRC*/ + uint16_t windowa_x_start; + uint16_t windowa_x_end; + uint16_t windowa_y_start; + uint16_t windowa_y_end; + + uint16_t windowb_x_start; + uint16_t windowb_x_end; + uint16_t windowb_y_start; + uint16_t windowb_y_end; + + enum crc_selection selection; + + bool continuous_mode; + bool enable; +}; + struct timing_generator { const struct timing_generator_funcs *funcs; struct dc_bios *bp; @@ -173,6 +203,21 @@ struct timing_generator_funcs { bool (*is_tg_enabled)(struct timing_generator *tg); bool (*is_optc_underflow_occurred)(struct timing_generator *tg); void (*clear_optc_underflow)(struct timing_generator *tg); + + /** + * Configure CRCs for the given timing generator. Return false if TG is + * not on. + */ + bool (*configure_crc)(struct timing_generator *tg, + const struct crc_params *params); + + /** + * Get CRCs for the given timing generator. Return false if CRCs are + * not enabled (via configure_crc). + */ + bool (*get_crc)(struct timing_generator *tg, + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); + }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 6f6c02b89f90..c5b3623bcbd9 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -30,7 +30,7 @@ #include "dc_hw_types.h" #include "fixed31_32.h" -#define CSC_TEMPERATURE_MATRIX_SIZE 9 +#define CSC_TEMPERATURE_MATRIX_SIZE 12 struct bit_depth_reduction_params; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 4c0aa56f7bae..e764cbad881b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -114,7 +114,7 @@ struct hw_sequencer_funcs { void (*power_down)(struct dc *dc); - void (*enable_accelerated_mode)(struct dc *dc); + void (*enable_accelerated_mode)(struct dc *dc, struct dc_state *context); void (*enable_timing_synchronization)( struct dc *dc, @@ -149,6 +149,7 @@ struct hw_sequencer_funcs { void (*unblank_stream)(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); + void (*blank_stream)(struct pipe_ctx *pipe_ctx); void (*pipe_control_lock)( struct dc *dc, struct pipe_ctx *pipe, @@ -198,6 +199,8 @@ struct hw_sequencer_funcs { bool enable); void (*edp_wait_for_hpd_ready)(struct dc_link *link, bool power_up); + void (*set_cursor_position)(struct pipe_ctx *pipe); + void (*set_cursor_attribute)(struct pipe_ctx *pipe); }; void color_space_to_black_color( diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h index f2b8c9a376d5..30be7bb4a01a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h @@ -51,6 +51,8 @@ void dp_enable_link_phy( const struct dc_link_settings *link_settings); void dp_receiver_power_ctrl(struct dc_link *link, bool on); +bool edp_receiver_ready_T9(struct dc_link *link); +bool edp_receiver_ready_T7(struct dc_link *link); void dp_disable_link_phy(struct dc_link *link, enum signal_type signal); diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c index 66d52580e29f..1ea7256ec89b 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c @@ -32,7 +32,8 @@ #include "dce/dce_12_0_offset.h" #include "dce/dce_12_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" #include "ivsrcid/ivsrcid_vislands30.h" diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c index 7f7db66c48b0..e04ae49243f6 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c @@ -31,7 +31,8 @@ #include "dcn/dcn_1_0_offset.h" #include "dcn/dcn_1_0_sh_mask.h" -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" #include "irq_service_dcn10.h" diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c index 57a54a7b89e5..1c079ba37c30 100644 --- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c @@ -42,8 +42,7 @@ static void virtual_link_encoder_enable_tmds_output( struct link_encoder *enc, enum clock_source_id clock_source, enum dc_color_depth color_depth, - bool hdmi, - bool dual_link, + enum signal_type signal, uint32_t pixel_clock) {} static void virtual_link_encoder_enable_dp_output( diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 7abe663ecc6e..9831cb5eaa7c 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -109,6 +109,14 @@ #define ASIC_REV_IS_STONEY(rev) \ ((rev >= STONEY_A0) && (rev < CZ_UNKNOWN)) +/* DCE12 */ + +#define AI_GREENLAND_P_A0 1 +#define AI_GREENLAND_P_A1 2 + +#define ASICREV_IS_GREENLAND_M(eChipRev) (eChipRev < AI_UNKNOWN) +#define ASICREV_IS_GREENLAND_P(eChipRev) (eChipRev < AI_UNKNOWN) + /* DCN1_0 */ #define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */ #define RAVEN_A0 0x01 diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index 4badaedbaadd..0de258622c12 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -470,4 +470,7 @@ uint32_t dal_fixed31_32_clamp_u0d14( uint32_t dal_fixed31_32_clamp_u0d10( struct fixed31_32 arg); +int32_t dal_fixed31_32_s4d19( + struct fixed31_32 arg); + #endif diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h index 7a9b43f84a31..36bbad594267 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h @@ -419,11 +419,6 @@ struct bios_event_info { bool backlight_changed; }; -enum { - HDMI_PIXEL_CLOCK_IN_KHZ_297 = 297000, - TMDS_PIXEL_CLOCK_IN_KHZ_165 = 165000 -}; - /* * DFS-bypass flag */ diff --git a/drivers/gpu/drm/amd/display/include/signal_types.h b/drivers/gpu/drm/amd/display/include/signal_types.h index b5ebde642207..199c5db67cbc 100644 --- a/drivers/gpu/drm/amd/display/include/signal_types.h +++ b/drivers/gpu/drm/amd/display/include/signal_types.h @@ -26,6 +26,11 @@ #ifndef __DC_SIGNAL_TYPES_H__ #define __DC_SIGNAL_TYPES_H__ +/* Minimum pixel clock, in KHz. For TMDS signal is 25.00 MHz */ +#define TMDS_MIN_PIXEL_CLOCK 25000 +/* Maximum pixel clock, in KHz. For TMDS signal is 165.00 MHz */ +#define TMDS_MAX_PIXEL_CLOCK 165000 + enum signal_type { SIGNAL_TYPE_NONE = 0L, /* no signal */ SIGNAL_TYPE_DVI_SINGLE_LINK = (1 << 0), diff --git a/drivers/gpu/drm/amd/display/modules/color/Makefile b/drivers/gpu/drm/amd/display/modules/color/Makefile new file mode 100644 index 000000000000..65c33a76951a --- /dev/null +++ b/drivers/gpu/drm/amd/display/modules/color/Makefile @@ -0,0 +1,31 @@ +# +# Copyright 2018 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# +# Makefile for the color sub-module of DAL. +# + +MOD_COLOR = color_gamma.o + +AMD_DAL_MOD_COLOR = $(addprefix $(AMDDALPATH)/modules/color/,$(MOD_COLOR)) +#$(info ************ DAL COLOR MODULE MAKEFILE ************) + +AMD_DISPLAY_FILES += $(AMD_DAL_MOD_COLOR) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c new file mode 100644 index 000000000000..a5fd14a4016f --- /dev/null +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -0,0 +1,1403 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dc.h" +#include "opp.h" +#include "color_gamma.h" + + +#define NUM_PTS_IN_REGION 16 +#define NUM_REGIONS 32 +#define NUM_DEGAMMA_REGIONS 12 +#define MAX_HW_POINTS (NUM_PTS_IN_REGION*NUM_REGIONS) +#define MAX_HW_DEGAMMA_POINTS (NUM_PTS_IN_REGION*NUM_DEGAMMA_REGIONS) + +static struct hw_x_point coordinates_x[MAX_HW_POINTS + 2]; +static struct hw_x_point degamma_coordinates_x[MAX_HW_DEGAMMA_POINTS + 2]; + +static struct fixed31_32 pq_table[MAX_HW_POINTS + 2]; +static struct fixed31_32 de_pq_table[MAX_HW_DEGAMMA_POINTS + 2]; + +static bool pq_initialized; /* = false; */ +static bool de_pq_initialized; /* = false; */ + +/* one-time setup of X points */ +void setup_x_points_distribution(void) +{ + struct fixed31_32 region_size = dal_fixed31_32_from_int(128); + int32_t segment; + uint32_t seg_offset; + uint32_t index; + struct fixed31_32 increment; + + coordinates_x[MAX_HW_POINTS].x = region_size; + coordinates_x[MAX_HW_POINTS + 1].x = region_size; + + for (segment = 6; segment > (6 - NUM_REGIONS); segment--) { + region_size = dal_fixed31_32_div_int(region_size, 2); + increment = dal_fixed31_32_div_int(region_size, + NUM_PTS_IN_REGION); + seg_offset = (segment + (NUM_REGIONS - 7)) * NUM_PTS_IN_REGION; + coordinates_x[seg_offset].x = region_size; + + for (index = seg_offset + 1; + index < seg_offset + NUM_PTS_IN_REGION; + index++) { + coordinates_x[index].x = dal_fixed31_32_add + (coordinates_x[index-1].x, increment); + } + } + + region_size = dal_fixed31_32_from_int(1); + degamma_coordinates_x[MAX_HW_DEGAMMA_POINTS].x = region_size; + degamma_coordinates_x[MAX_HW_DEGAMMA_POINTS + 1].x = region_size; + + for (segment = -1; segment > -(NUM_DEGAMMA_REGIONS + 1); segment--) { + region_size = dal_fixed31_32_div_int(region_size, 2); + increment = dal_fixed31_32_div_int(region_size, + NUM_PTS_IN_REGION); + seg_offset = (segment + NUM_DEGAMMA_REGIONS) * NUM_PTS_IN_REGION; + degamma_coordinates_x[seg_offset].x = region_size; + + for (index = seg_offset + 1; + index < seg_offset + NUM_PTS_IN_REGION; + index++) { + degamma_coordinates_x[index].x = dal_fixed31_32_add + (degamma_coordinates_x[index-1].x, increment); + } + } + +} + +static void compute_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y) +{ + /* consts for PQ gamma formula. */ + const struct fixed31_32 m1 = + dal_fixed31_32_from_fraction(159301758, 1000000000); + const struct fixed31_32 m2 = + dal_fixed31_32_from_fraction(7884375, 100000); + const struct fixed31_32 c1 = + dal_fixed31_32_from_fraction(8359375, 10000000); + const struct fixed31_32 c2 = + dal_fixed31_32_from_fraction(188515625, 10000000); + const struct fixed31_32 c3 = + dal_fixed31_32_from_fraction(186875, 10000); + + struct fixed31_32 l_pow_m1; + struct fixed31_32 base; + + if (dal_fixed31_32_lt(in_x, dal_fixed31_32_zero)) + in_x = dal_fixed31_32_zero; + + l_pow_m1 = dal_fixed31_32_pow(in_x, m1); + base = dal_fixed31_32_div( + dal_fixed31_32_add(c1, + (dal_fixed31_32_mul(c2, l_pow_m1))), + dal_fixed31_32_add(dal_fixed31_32_one, + (dal_fixed31_32_mul(c3, l_pow_m1)))); + *out_y = dal_fixed31_32_pow(base, m2); +} + +static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y) +{ + /* consts for dePQ gamma formula. */ + const struct fixed31_32 m1 = + dal_fixed31_32_from_fraction(159301758, 1000000000); + const struct fixed31_32 m2 = + dal_fixed31_32_from_fraction(7884375, 100000); + const struct fixed31_32 c1 = + dal_fixed31_32_from_fraction(8359375, 10000000); + const struct fixed31_32 c2 = + dal_fixed31_32_from_fraction(188515625, 10000000); + const struct fixed31_32 c3 = + dal_fixed31_32_from_fraction(186875, 10000); + + struct fixed31_32 l_pow_m1; + struct fixed31_32 base, div; + + + if (dal_fixed31_32_lt(in_x, dal_fixed31_32_zero)) + in_x = dal_fixed31_32_zero; + + l_pow_m1 = dal_fixed31_32_pow(in_x, + dal_fixed31_32_div(dal_fixed31_32_one, m2)); + base = dal_fixed31_32_sub(l_pow_m1, c1); + + if (dal_fixed31_32_lt(base, dal_fixed31_32_zero)) + base = dal_fixed31_32_zero; + + div = dal_fixed31_32_sub(c2, dal_fixed31_32_mul(c3, l_pow_m1)); + + *out_y = dal_fixed31_32_pow(dal_fixed31_32_div(base, div), + dal_fixed31_32_div(dal_fixed31_32_one, m1)); + +} +/* one-time pre-compute PQ values - only for sdr_white_level 80 */ +void precompute_pq(void) +{ + int i; + struct fixed31_32 x; + const struct hw_x_point *coord_x = coordinates_x + 32; + struct fixed31_32 scaling_factor = + dal_fixed31_32_from_fraction(80, 10000); + + /* pow function has problems with arguments too small */ + for (i = 0; i < 32; i++) + pq_table[i] = dal_fixed31_32_zero; + + for (i = 32; i <= MAX_HW_POINTS; i++) { + x = dal_fixed31_32_mul(coord_x->x, scaling_factor); + compute_pq(x, &pq_table[i]); + ++coord_x; + } +} + +/* one-time pre-compute dePQ values - only for max pixel value 125 FP16 */ +void precompute_de_pq(void) +{ + int i; + struct fixed31_32 y; + const struct hw_x_point *coord_x = degamma_coordinates_x; + struct fixed31_32 scaling_factor = dal_fixed31_32_from_int(125); + + + for (i = 0; i <= MAX_HW_DEGAMMA_POINTS; i++) { + compute_de_pq(coord_x->x, &y); + de_pq_table[i] = dal_fixed31_32_mul(y, scaling_factor); + ++coord_x; + } +} +struct dividers { + struct fixed31_32 divider1; + struct fixed31_32 divider2; + struct fixed31_32 divider3; +}; + +static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4) +{ + static const int32_t numerator01[] = { 31308, 180000}; + static const int32_t numerator02[] = { 12920, 4500}; + static const int32_t numerator03[] = { 55, 99}; + static const int32_t numerator04[] = { 55, 99}; + static const int32_t numerator05[] = { 2400, 2200}; + + uint32_t i = 0; + uint32_t index = is_2_4 == true ? 0:1; + + do { + coefficients->a0[i] = dal_fixed31_32_from_fraction( + numerator01[index], 10000000); + coefficients->a1[i] = dal_fixed31_32_from_fraction( + numerator02[index], 1000); + coefficients->a2[i] = dal_fixed31_32_from_fraction( + numerator03[index], 1000); + coefficients->a3[i] = dal_fixed31_32_from_fraction( + numerator04[index], 1000); + coefficients->user_gamma[i] = dal_fixed31_32_from_fraction( + numerator05[index], 1000); + + ++i; + } while (i != ARRAY_SIZE(coefficients->a0)); +} + +static struct fixed31_32 translate_from_linear_space( + struct fixed31_32 arg, + struct fixed31_32 a0, + struct fixed31_32 a1, + struct fixed31_32 a2, + struct fixed31_32 a3, + struct fixed31_32 gamma) +{ + const struct fixed31_32 one = dal_fixed31_32_from_int(1); + + if (dal_fixed31_32_lt(one, arg)) + return one; + + if (dal_fixed31_32_le(arg, dal_fixed31_32_neg(a0))) + return dal_fixed31_32_sub( + a2, + dal_fixed31_32_mul( + dal_fixed31_32_add( + one, + a3), + dal_fixed31_32_pow( + dal_fixed31_32_neg(arg), + dal_fixed31_32_recip(gamma)))); + else if (dal_fixed31_32_le(a0, arg)) + return dal_fixed31_32_sub( + dal_fixed31_32_mul( + dal_fixed31_32_add( + one, + a3), + dal_fixed31_32_pow( + arg, + dal_fixed31_32_recip(gamma))), + a2); + else + return dal_fixed31_32_mul( + arg, + a1); +} + +static struct fixed31_32 translate_to_linear_space( + struct fixed31_32 arg, + struct fixed31_32 a0, + struct fixed31_32 a1, + struct fixed31_32 a2, + struct fixed31_32 a3, + struct fixed31_32 gamma) +{ + struct fixed31_32 linear; + + a0 = dal_fixed31_32_mul(a0, a1); + if (dal_fixed31_32_le(arg, dal_fixed31_32_neg(a0))) + + linear = dal_fixed31_32_neg( + dal_fixed31_32_pow( + dal_fixed31_32_div( + dal_fixed31_32_sub(a2, arg), + dal_fixed31_32_add( + dal_fixed31_32_one, a3)), gamma)); + + else if (dal_fixed31_32_le(dal_fixed31_32_neg(a0), arg) && + dal_fixed31_32_le(arg, a0)) + linear = dal_fixed31_32_div(arg, a1); + else + linear = dal_fixed31_32_pow( + dal_fixed31_32_div( + dal_fixed31_32_add(a2, arg), + dal_fixed31_32_add( + dal_fixed31_32_one, a3)), gamma); + + return linear; +} + +static inline struct fixed31_32 translate_from_linear_space_ex( + struct fixed31_32 arg, + struct gamma_coefficients *coeff, + uint32_t color_index) +{ + return translate_from_linear_space( + arg, + coeff->a0[color_index], + coeff->a1[color_index], + coeff->a2[color_index], + coeff->a3[color_index], + coeff->user_gamma[color_index]); +} + + +static inline struct fixed31_32 translate_to_linear_space_ex( + struct fixed31_32 arg, + struct gamma_coefficients *coeff, + uint32_t color_index) +{ + return translate_to_linear_space( + arg, + coeff->a0[color_index], + coeff->a1[color_index], + coeff->a2[color_index], + coeff->a3[color_index], + coeff->user_gamma[color_index]); +} + + +static bool find_software_points( + const struct dc_gamma *ramp, + const struct gamma_pixel *axis_x, + struct fixed31_32 hw_point, + enum channel_name channel, + uint32_t *index_to_start, + uint32_t *index_left, + uint32_t *index_right, + enum hw_point_position *pos) +{ + const uint32_t max_number = ramp->num_entries + 3; + + struct fixed31_32 left, right; + + uint32_t i = *index_to_start; + + while (i < max_number) { + if (channel == CHANNEL_NAME_RED) { + left = axis_x[i].r; + + if (i < max_number - 1) + right = axis_x[i + 1].r; + else + right = axis_x[max_number - 1].r; + } else if (channel == CHANNEL_NAME_GREEN) { + left = axis_x[i].g; + + if (i < max_number - 1) + right = axis_x[i + 1].g; + else + right = axis_x[max_number - 1].g; + } else { + left = axis_x[i].b; + + if (i < max_number - 1) + right = axis_x[i + 1].b; + else + right = axis_x[max_number - 1].b; + } + + if (dal_fixed31_32_le(left, hw_point) && + dal_fixed31_32_le(hw_point, right)) { + *index_to_start = i; + *index_left = i; + + if (i < max_number - 1) + *index_right = i + 1; + else + *index_right = max_number - 1; + + *pos = HW_POINT_POSITION_MIDDLE; + + return true; + } else if ((i == *index_to_start) && + dal_fixed31_32_le(hw_point, left)) { + *index_to_start = i; + *index_left = i; + *index_right = i; + + *pos = HW_POINT_POSITION_LEFT; + + return true; + } else if ((i == max_number - 1) && + dal_fixed31_32_le(right, hw_point)) { + *index_to_start = i; + *index_left = i; + *index_right = i; + + *pos = HW_POINT_POSITION_RIGHT; + + return true; + } + + ++i; + } + + return false; +} + +static bool build_custom_gamma_mapping_coefficients_worker( + const struct dc_gamma *ramp, + struct pixel_gamma_point *coeff, + const struct hw_x_point *coordinates_x, + const struct gamma_pixel *axis_x, + enum channel_name channel, + uint32_t number_of_points) +{ + uint32_t i = 0; + + while (i <= number_of_points) { + struct fixed31_32 coord_x; + + uint32_t index_to_start = 0; + uint32_t index_left = 0; + uint32_t index_right = 0; + + enum hw_point_position hw_pos; + + struct gamma_point *point; + + struct fixed31_32 left_pos; + struct fixed31_32 right_pos; + + if (channel == CHANNEL_NAME_RED) + coord_x = coordinates_x[i].regamma_y_red; + else if (channel == CHANNEL_NAME_GREEN) + coord_x = coordinates_x[i].regamma_y_green; + else + coord_x = coordinates_x[i].regamma_y_blue; + + if (!find_software_points( + ramp, axis_x, coord_x, channel, + &index_to_start, &index_left, &index_right, &hw_pos)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (index_left >= ramp->num_entries + 3) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (index_right >= ramp->num_entries + 3) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (channel == CHANNEL_NAME_RED) { + point = &coeff[i].r; + + left_pos = axis_x[index_left].r; + right_pos = axis_x[index_right].r; + } else if (channel == CHANNEL_NAME_GREEN) { + point = &coeff[i].g; + + left_pos = axis_x[index_left].g; + right_pos = axis_x[index_right].g; + } else { + point = &coeff[i].b; + + left_pos = axis_x[index_left].b; + right_pos = axis_x[index_right].b; + } + + if (hw_pos == HW_POINT_POSITION_MIDDLE) + point->coeff = dal_fixed31_32_div( + dal_fixed31_32_sub( + coord_x, + left_pos), + dal_fixed31_32_sub( + right_pos, + left_pos)); + else if (hw_pos == HW_POINT_POSITION_LEFT) + point->coeff = dal_fixed31_32_zero; + else if (hw_pos == HW_POINT_POSITION_RIGHT) + point->coeff = dal_fixed31_32_from_int(2); + else { + BREAK_TO_DEBUGGER(); + return false; + } + + point->left_index = index_left; + point->right_index = index_right; + point->pos = hw_pos; + + ++i; + } + + return true; +} + +static struct fixed31_32 calculate_mapped_value( + struct pwl_float_data *rgb, + const struct pixel_gamma_point *coeff, + enum channel_name channel, + uint32_t max_index) +{ + const struct gamma_point *point; + + struct fixed31_32 result; + + if (channel == CHANNEL_NAME_RED) + point = &coeff->r; + else if (channel == CHANNEL_NAME_GREEN) + point = &coeff->g; + else + point = &coeff->b; + + if ((point->left_index < 0) || (point->left_index > max_index)) { + BREAK_TO_DEBUGGER(); + return dal_fixed31_32_zero; + } + + if ((point->right_index < 0) || (point->right_index > max_index)) { + BREAK_TO_DEBUGGER(); + return dal_fixed31_32_zero; + } + + if (point->pos == HW_POINT_POSITION_MIDDLE) + if (channel == CHANNEL_NAME_RED) + result = dal_fixed31_32_add( + dal_fixed31_32_mul( + point->coeff, + dal_fixed31_32_sub( + rgb[point->right_index].r, + rgb[point->left_index].r)), + rgb[point->left_index].r); + else if (channel == CHANNEL_NAME_GREEN) + result = dal_fixed31_32_add( + dal_fixed31_32_mul( + point->coeff, + dal_fixed31_32_sub( + rgb[point->right_index].g, + rgb[point->left_index].g)), + rgb[point->left_index].g); + else + result = dal_fixed31_32_add( + dal_fixed31_32_mul( + point->coeff, + dal_fixed31_32_sub( + rgb[point->right_index].b, + rgb[point->left_index].b)), + rgb[point->left_index].b); + else if (point->pos == HW_POINT_POSITION_LEFT) { + BREAK_TO_DEBUGGER(); + result = dal_fixed31_32_zero; + } else { + BREAK_TO_DEBUGGER(); + result = dal_fixed31_32_one; + } + + return result; +} + +static void build_pq(struct pwl_float_data_ex *rgb_regamma, + uint32_t hw_points_num, + const struct hw_x_point *coordinate_x, + uint32_t sdr_white_level) +{ + uint32_t i, start_index; + + struct pwl_float_data_ex *rgb = rgb_regamma; + const struct hw_x_point *coord_x = coordinate_x; + struct fixed31_32 x; + struct fixed31_32 output; + struct fixed31_32 scaling_factor = + dal_fixed31_32_from_fraction(sdr_white_level, 10000); + + if (!pq_initialized && sdr_white_level == 80) { + precompute_pq(); + pq_initialized = true; + } + + /* TODO: start index is from segment 2^-24, skipping first segment + * due to x values too small for power calculations + */ + start_index = 32; + rgb += start_index; + coord_x += start_index; + + for (i = start_index; i <= hw_points_num; i++) { + /* Multiply 0.008 as regamma is 0-1 and FP16 input is 0-125. + * FP 1.0 = 80nits + */ + if (sdr_white_level == 80) { + output = pq_table[i]; + } else { + x = dal_fixed31_32_mul(coord_x->x, scaling_factor); + compute_pq(x, &output); + } + + /* should really not happen? */ + if (dal_fixed31_32_lt(output, dal_fixed31_32_zero)) + output = dal_fixed31_32_zero; + else if (dal_fixed31_32_lt(dal_fixed31_32_one, output)) + output = dal_fixed31_32_one; + + rgb->r = output; + rgb->g = output; + rgb->b = output; + + ++coord_x; + ++rgb; + } +} + +static void build_de_pq(struct pwl_float_data_ex *de_pq, + uint32_t hw_points_num, + const struct hw_x_point *coordinate_x) +{ + uint32_t i; + struct fixed31_32 output; + + struct pwl_float_data_ex *rgb = de_pq; + const struct hw_x_point *coord_x = degamma_coordinates_x; + struct fixed31_32 scaling_factor = dal_fixed31_32_from_int(125); + + if (!de_pq_initialized) { + precompute_de_pq(); + de_pq_initialized = true; + } + + + for (i = 0; i <= hw_points_num; i++) { + output = de_pq_table[i]; + /* should really not happen? */ + if (dal_fixed31_32_lt(output, dal_fixed31_32_zero)) + output = dal_fixed31_32_zero; + else if (dal_fixed31_32_lt(scaling_factor, output)) + output = scaling_factor; + + rgb->r = output; + rgb->g = output; + rgb->b = output; + + ++coord_x; + ++rgb; + } +} + +static void build_regamma(struct pwl_float_data_ex *rgb_regamma, + uint32_t hw_points_num, + const struct hw_x_point *coordinate_x, bool is_2_4) +{ + uint32_t i; + + struct gamma_coefficients coeff; + struct pwl_float_data_ex *rgb = rgb_regamma; + const struct hw_x_point *coord_x = coordinate_x; + + build_coefficients(&coeff, is_2_4); + + i = 0; + + while (i != hw_points_num + 1) { + /*TODO use y vs r,g,b*/ + rgb->r = translate_from_linear_space_ex( + coord_x->x, &coeff, 0); + rgb->g = rgb->r; + rgb->b = rgb->r; + ++coord_x; + ++rgb; + ++i; + } +} + +static void build_degamma(struct pwl_float_data_ex *curve, + uint32_t hw_points_num, + const struct hw_x_point *coordinate_x, bool is_2_4) +{ + uint32_t i; + + struct gamma_coefficients coeff; + struct pwl_float_data_ex *rgb = curve; + const struct hw_x_point *coord_x = degamma_coordinates_x; + + build_coefficients(&coeff, is_2_4); + + i = 0; + + while (i != hw_points_num + 1) { + /*TODO use y vs r,g,b*/ + rgb->r = translate_to_linear_space_ex( + coord_x->x, &coeff, 0); + rgb->g = rgb->r; + rgb->b = rgb->r; + ++coord_x; + ++rgb; + ++i; + } +} + +static bool scale_gamma(struct pwl_float_data *pwl_rgb, + const struct dc_gamma *ramp, + struct dividers dividers) +{ + const struct fixed31_32 max_driver = dal_fixed31_32_from_int(0xFFFF); + const struct fixed31_32 max_os = dal_fixed31_32_from_int(0xFF00); + struct fixed31_32 scaler = max_os; + uint32_t i; + struct pwl_float_data *rgb = pwl_rgb; + struct pwl_float_data *rgb_last = rgb + ramp->num_entries - 1; + + i = 0; + + do { + if (dal_fixed31_32_lt(max_os, ramp->entries.red[i]) || + dal_fixed31_32_lt(max_os, ramp->entries.green[i]) || + dal_fixed31_32_lt(max_os, ramp->entries.blue[i])) { + scaler = max_driver; + break; + } + ++i; + } while (i != ramp->num_entries); + + i = 0; + + do { + rgb->r = dal_fixed31_32_div( + ramp->entries.red[i], scaler); + rgb->g = dal_fixed31_32_div( + ramp->entries.green[i], scaler); + rgb->b = dal_fixed31_32_div( + ramp->entries.blue[i], scaler); + + ++rgb; + ++i; + } while (i != ramp->num_entries); + + rgb->r = dal_fixed31_32_mul(rgb_last->r, + dividers.divider1); + rgb->g = dal_fixed31_32_mul(rgb_last->g, + dividers.divider1); + rgb->b = dal_fixed31_32_mul(rgb_last->b, + dividers.divider1); + + ++rgb; + + rgb->r = dal_fixed31_32_mul(rgb_last->r, + dividers.divider2); + rgb->g = dal_fixed31_32_mul(rgb_last->g, + dividers.divider2); + rgb->b = dal_fixed31_32_mul(rgb_last->b, + dividers.divider2); + + ++rgb; + + rgb->r = dal_fixed31_32_mul(rgb_last->r, + dividers.divider3); + rgb->g = dal_fixed31_32_mul(rgb_last->g, + dividers.divider3); + rgb->b = dal_fixed31_32_mul(rgb_last->b, + dividers.divider3); + + return true; +} + +static bool scale_gamma_dx(struct pwl_float_data *pwl_rgb, + const struct dc_gamma *ramp, + struct dividers dividers) +{ + uint32_t i; + struct fixed31_32 min = dal_fixed31_32_zero; + struct fixed31_32 max = dal_fixed31_32_one; + + struct fixed31_32 delta = dal_fixed31_32_zero; + struct fixed31_32 offset = dal_fixed31_32_zero; + + for (i = 0 ; i < ramp->num_entries; i++) { + if (dal_fixed31_32_lt(ramp->entries.red[i], min)) + min = ramp->entries.red[i]; + + if (dal_fixed31_32_lt(ramp->entries.green[i], min)) + min = ramp->entries.green[i]; + + if (dal_fixed31_32_lt(ramp->entries.blue[i], min)) + min = ramp->entries.blue[i]; + + if (dal_fixed31_32_lt(max, ramp->entries.red[i])) + max = ramp->entries.red[i]; + + if (dal_fixed31_32_lt(max, ramp->entries.green[i])) + max = ramp->entries.green[i]; + + if (dal_fixed31_32_lt(max, ramp->entries.blue[i])) + max = ramp->entries.blue[i]; + } + + if (dal_fixed31_32_lt(min, dal_fixed31_32_zero)) + delta = dal_fixed31_32_neg(min); + + offset = dal_fixed31_32_add(min, max); + + for (i = 0 ; i < ramp->num_entries; i++) { + pwl_rgb[i].r = dal_fixed31_32_div( + dal_fixed31_32_add( + ramp->entries.red[i], delta), offset); + pwl_rgb[i].g = dal_fixed31_32_div( + dal_fixed31_32_add( + ramp->entries.green[i], delta), offset); + pwl_rgb[i].b = dal_fixed31_32_div( + dal_fixed31_32_add( + ramp->entries.blue[i], delta), offset); + + } + + pwl_rgb[i].r = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r); + pwl_rgb[i].g = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g); + pwl_rgb[i].b = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b); + ++i; + pwl_rgb[i].r = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r); + pwl_rgb[i].g = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g); + pwl_rgb[i].b = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b); + + return true; +} + +/* + * RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here + * Input is evenly distributed in the output color space as specified in + * SetTimings + * + * Interpolation details: + * 1D LUT has 4096 values which give curve correction in 0-1 float range + * for evenly spaced points in 0-1 range. lut1D[index] gives correction + * for index/4095. + * First we find index for which: + * index/4095 < regamma_y < (index+1)/4095 => + * index < 4095*regamma_y < index + 1 + * norm_y = 4095*regamma_y, and index is just truncating to nearest integer + * lut1 = lut1D[index], lut2 = lut1D[index+1] + * + *adjustedY is then linearly interpolating regamma Y between lut1 and lut2 + */ +static void apply_lut_1d( + const struct dc_gamma *ramp, + uint32_t num_hw_points, + struct dc_transfer_func_distributed_points *tf_pts) +{ + int i = 0; + int color = 0; + struct fixed31_32 *regamma_y; + struct fixed31_32 norm_y; + struct fixed31_32 lut1; + struct fixed31_32 lut2; + const int max_lut_index = 4095; + const struct fixed31_32 max_lut_index_f = + dal_fixed31_32_from_int_nonconst(max_lut_index); + int32_t index = 0, index_next = 0; + struct fixed31_32 index_f; + struct fixed31_32 delta_lut; + struct fixed31_32 delta_index; + + if (ramp->type != GAMMA_CS_TFM_1D) + return; // this is not expected + + for (i = 0; i < num_hw_points; i++) { + for (color = 0; color < 3; color++) { + if (color == 0) + regamma_y = &tf_pts->red[i]; + else if (color == 1) + regamma_y = &tf_pts->green[i]; + else + regamma_y = &tf_pts->blue[i]; + + norm_y = dal_fixed31_32_mul(max_lut_index_f, + *regamma_y); + index = dal_fixed31_32_floor(norm_y); + index_f = dal_fixed31_32_from_int_nonconst(index); + + if (index < 0 || index > max_lut_index) + continue; + + index_next = (index == max_lut_index) ? index : index+1; + + if (color == 0) { + lut1 = ramp->entries.red[index]; + lut2 = ramp->entries.red[index_next]; + } else if (color == 1) { + lut1 = ramp->entries.green[index]; + lut2 = ramp->entries.green[index_next]; + } else { + lut1 = ramp->entries.blue[index]; + lut2 = ramp->entries.blue[index_next]; + } + + // we have everything now, so interpolate + delta_lut = dal_fixed31_32_sub(lut2, lut1); + delta_index = dal_fixed31_32_sub(norm_y, index_f); + + *regamma_y = dal_fixed31_32_add(lut1, + dal_fixed31_32_mul(delta_index, delta_lut)); + } + } +} + +static void build_evenly_distributed_points( + struct gamma_pixel *points, + uint32_t numberof_points, + struct dividers dividers) +{ + struct gamma_pixel *p = points; + struct gamma_pixel *p_last = p + numberof_points - 1; + + uint32_t i = 0; + + do { + struct fixed31_32 value = dal_fixed31_32_from_fraction(i, + numberof_points - 1); + + p->r = value; + p->g = value; + p->b = value; + + ++p; + ++i; + } while (i != numberof_points); + + p->r = dal_fixed31_32_div(p_last->r, dividers.divider1); + p->g = dal_fixed31_32_div(p_last->g, dividers.divider1); + p->b = dal_fixed31_32_div(p_last->b, dividers.divider1); + + ++p; + + p->r = dal_fixed31_32_div(p_last->r, dividers.divider2); + p->g = dal_fixed31_32_div(p_last->g, dividers.divider2); + p->b = dal_fixed31_32_div(p_last->b, dividers.divider2); + + ++p; + + p->r = dal_fixed31_32_div(p_last->r, dividers.divider3); + p->g = dal_fixed31_32_div(p_last->g, dividers.divider3); + p->b = dal_fixed31_32_div(p_last->b, dividers.divider3); +} + +static inline void copy_rgb_regamma_to_coordinates_x( + struct hw_x_point *coordinates_x, + uint32_t hw_points_num, + const struct pwl_float_data_ex *rgb_ex) +{ + struct hw_x_point *coords = coordinates_x; + uint32_t i = 0; + const struct pwl_float_data_ex *rgb_regamma = rgb_ex; + + while (i <= hw_points_num) { + coords->regamma_y_red = rgb_regamma->r; + coords->regamma_y_green = rgb_regamma->g; + coords->regamma_y_blue = rgb_regamma->b; + + ++coords; + ++rgb_regamma; + ++i; + } +} + +static bool calculate_interpolated_hardware_curve( + const struct dc_gamma *ramp, + struct pixel_gamma_point *coeff128, + struct pwl_float_data *rgb_user, + const struct hw_x_point *coordinates_x, + const struct gamma_pixel *axis_x, + uint32_t number_of_points, + struct dc_transfer_func_distributed_points *tf_pts) +{ + + const struct pixel_gamma_point *coeff = coeff128; + uint32_t max_entries = 3 - 1; + + uint32_t i = 0; + + for (i = 0; i < 3; i++) { + if (!build_custom_gamma_mapping_coefficients_worker( + ramp, coeff128, coordinates_x, axis_x, i, + number_of_points)) + return false; + } + + i = 0; + max_entries += ramp->num_entries; + + /* TODO: float point case */ + + while (i <= number_of_points) { + tf_pts->red[i] = calculate_mapped_value( + rgb_user, coeff, CHANNEL_NAME_RED, max_entries); + tf_pts->green[i] = calculate_mapped_value( + rgb_user, coeff, CHANNEL_NAME_GREEN, max_entries); + tf_pts->blue[i] = calculate_mapped_value( + rgb_user, coeff, CHANNEL_NAME_BLUE, max_entries); + + ++coeff; + ++i; + } + + return true; +} + +static void build_new_custom_resulted_curve( + uint32_t hw_points_num, + struct dc_transfer_func_distributed_points *tf_pts) +{ + uint32_t i; + + i = 0; + + while (i != hw_points_num + 1) { + tf_pts->red[i] = dal_fixed31_32_clamp( + tf_pts->red[i], dal_fixed31_32_zero, + dal_fixed31_32_one); + tf_pts->green[i] = dal_fixed31_32_clamp( + tf_pts->green[i], dal_fixed31_32_zero, + dal_fixed31_32_one); + tf_pts->blue[i] = dal_fixed31_32_clamp( + tf_pts->blue[i], dal_fixed31_32_zero, + dal_fixed31_32_one); + + ++i; + } +} + +static bool map_regamma_hw_to_x_user( + const struct dc_gamma *ramp, + struct pixel_gamma_point *coeff128, + struct pwl_float_data *rgb_user, + struct hw_x_point *coords_x, + const struct gamma_pixel *axis_x, + const struct pwl_float_data_ex *rgb_regamma, + uint32_t hw_points_num, + struct dc_transfer_func_distributed_points *tf_pts, + bool mapUserRamp) +{ + /* setup to spare calculated ideal regamma values */ + + int i = 0; + struct hw_x_point *coords = coords_x; + const struct pwl_float_data_ex *regamma = rgb_regamma; + + if (mapUserRamp) { + copy_rgb_regamma_to_coordinates_x(coords, + hw_points_num, + rgb_regamma); + + calculate_interpolated_hardware_curve( + ramp, coeff128, rgb_user, coords, axis_x, + hw_points_num, tf_pts); + } else { + /* just copy current rgb_regamma into tf_pts */ + while (i <= hw_points_num) { + tf_pts->red[i] = regamma->r; + tf_pts->green[i] = regamma->g; + tf_pts->blue[i] = regamma->b; + + ++regamma; + ++i; + } + } + + build_new_custom_resulted_curve(hw_points_num, tf_pts); + + return true; +} + +#define _EXTRA_POINTS 3 + +bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, + const struct dc_gamma *ramp, bool mapUserRamp) +{ + struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts; + struct dividers dividers; + + struct pwl_float_data *rgb_user = NULL; + struct pwl_float_data_ex *rgb_regamma = NULL; + struct gamma_pixel *axix_x = NULL; + struct pixel_gamma_point *coeff = NULL; + enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; + bool ret = false; + + if (output_tf->type == TF_TYPE_BYPASS) + return false; + + /* we can use hardcoded curve for plain SRGB TF */ + if (output_tf->type == TF_TYPE_PREDEFINED && + output_tf->tf == TRANSFER_FUNCTION_SRGB && + (!mapUserRamp && ramp->type == GAMMA_RGB_256)) + return true; + + output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + + rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), + GFP_KERNEL); + if (!rgb_user) + goto rgb_user_alloc_fail; + rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); + if (!rgb_regamma) + goto rgb_regamma_alloc_fail; + axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + 3), + GFP_KERNEL); + if (!axix_x) + goto axix_x_alloc_fail; + coeff = kzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL); + if (!coeff) + goto coeff_alloc_fail; + + dividers.divider1 = dal_fixed31_32_from_fraction(3, 2); + dividers.divider2 = dal_fixed31_32_from_int(2); + dividers.divider3 = dal_fixed31_32_from_fraction(5, 2); + + tf = output_tf->tf; + + build_evenly_distributed_points( + axix_x, + ramp->num_entries, + dividers); + + if (ramp->type == GAMMA_RGB_256 && mapUserRamp) + scale_gamma(rgb_user, ramp, dividers); + else if (ramp->type == GAMMA_RGB_FLOAT_1024) + scale_gamma_dx(rgb_user, ramp, dividers); + + if (tf == TRANSFER_FUNCTION_PQ) { + tf_pts->end_exponent = 7; + tf_pts->x_point_at_y1_red = 125; + tf_pts->x_point_at_y1_green = 125; + tf_pts->x_point_at_y1_blue = 125; + + build_pq(rgb_regamma, + MAX_HW_POINTS, + coordinates_x, + output_tf->sdr_ref_white_level); + } else { + tf_pts->end_exponent = 0; + tf_pts->x_point_at_y1_red = 1; + tf_pts->x_point_at_y1_green = 1; + tf_pts->x_point_at_y1_blue = 1; + + build_regamma(rgb_regamma, + MAX_HW_POINTS, + coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? true:false); + } + + map_regamma_hw_to_x_user(ramp, coeff, rgb_user, + coordinates_x, axix_x, rgb_regamma, + MAX_HW_POINTS, tf_pts, + (mapUserRamp || ramp->type != GAMMA_RGB_256) && + ramp->type != GAMMA_CS_TFM_1D); + + if (ramp->type == GAMMA_CS_TFM_1D) + apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts); + + ret = true; + + kfree(coeff); +coeff_alloc_fail: + kfree(axix_x); +axix_x_alloc_fail: + kfree(rgb_regamma); +rgb_regamma_alloc_fail: + kfree(rgb_user); +rgb_user_alloc_fail: + return ret; +} + + +/*TODO fix me should be 2*/ +#define _EXTRA_POINTS 3 + +bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, + const struct dc_gamma *ramp, bool mapUserRamp) +{ + struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts; + struct dividers dividers; + + struct pwl_float_data *rgb_user = NULL; + struct pwl_float_data_ex *curve = NULL; + struct gamma_pixel *axix_x = NULL; + struct pixel_gamma_point *coeff = NULL; + enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; + bool ret = false; + + if (input_tf->type == TF_TYPE_BYPASS) + return false; + + /* we can use hardcoded curve for plain SRGB TF */ + if (input_tf->type == TF_TYPE_PREDEFINED && + input_tf->tf == TRANSFER_FUNCTION_SRGB && + (!mapUserRamp && ramp->type == GAMMA_RGB_256)) + return true; + + input_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + + rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), + GFP_KERNEL); + if (!rgb_user) + goto rgb_user_alloc_fail; + curve = kzalloc(sizeof(*curve) * (MAX_HW_DEGAMMA_POINTS + _EXTRA_POINTS), + GFP_KERNEL); + if (!curve) + goto curve_alloc_fail; + axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + _EXTRA_POINTS), + GFP_KERNEL); + if (!axix_x) + goto axix_x_alloc_fail; + coeff = kzalloc(sizeof(*coeff) * (MAX_HW_DEGAMMA_POINTS + _EXTRA_POINTS), GFP_KERNEL); + if (!coeff) + goto coeff_alloc_fail; + + dividers.divider1 = dal_fixed31_32_from_fraction(3, 2); + dividers.divider2 = dal_fixed31_32_from_int(2); + dividers.divider3 = dal_fixed31_32_from_fraction(5, 2); + + tf = input_tf->tf; + + build_evenly_distributed_points( + axix_x, + ramp->num_entries, + dividers); + + if (ramp->type == GAMMA_RGB_256 && mapUserRamp) + scale_gamma(rgb_user, ramp, dividers); + else if (ramp->type == GAMMA_RGB_FLOAT_1024) + scale_gamma_dx(rgb_user, ramp, dividers); + + if (tf == TRANSFER_FUNCTION_PQ) + build_de_pq(curve, + MAX_HW_DEGAMMA_POINTS, + degamma_coordinates_x); + else + build_degamma(curve, + MAX_HW_DEGAMMA_POINTS, + degamma_coordinates_x, + tf == TRANSFER_FUNCTION_SRGB ? true:false); + + tf_pts->end_exponent = 0; + tf_pts->x_point_at_y1_red = 1; + tf_pts->x_point_at_y1_green = 1; + tf_pts->x_point_at_y1_blue = 1; + + map_regamma_hw_to_x_user(ramp, coeff, rgb_user, + degamma_coordinates_x, axix_x, curve, + MAX_HW_DEGAMMA_POINTS, tf_pts, + mapUserRamp); + + ret = true; + + kfree(coeff); +coeff_alloc_fail: + kfree(axix_x); +axix_x_alloc_fail: + kfree(curve); +curve_alloc_fail: + kfree(rgb_user); +rgb_user_alloc_fail: + + return ret; + +} + + +bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, + struct dc_transfer_func_distributed_points *points) +{ + uint32_t i; + bool ret = false; + struct pwl_float_data_ex *rgb_regamma = NULL; + + if (trans == TRANSFER_FUNCTION_UNITY) { + points->end_exponent = 0; + points->x_point_at_y1_red = 1; + points->x_point_at_y1_green = 1; + points->x_point_at_y1_blue = 1; + + for (i = 0; i < MAX_HW_POINTS ; i++) { + points->red[i] = coordinates_x[i].x; + points->green[i] = coordinates_x[i].x; + points->blue[i] = coordinates_x[i].x; + } + ret = true; + } else if (trans == TRANSFER_FUNCTION_PQ) { + rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + + _EXTRA_POINTS), GFP_KERNEL); + if (!rgb_regamma) + goto rgb_regamma_alloc_fail; + points->end_exponent = 7; + points->x_point_at_y1_red = 125; + points->x_point_at_y1_green = 125; + points->x_point_at_y1_blue = 125; + + + build_pq(rgb_regamma, + MAX_HW_POINTS, + coordinates_x, + 80); + for (i = 0; i < MAX_HW_POINTS ; i++) { + points->red[i] = rgb_regamma[i].r; + points->green[i] = rgb_regamma[i].g; + points->blue[i] = rgb_regamma[i].b; + } + ret = true; + + kfree(rgb_regamma); + } else if (trans == TRANSFER_FUNCTION_SRGB || + trans == TRANSFER_FUNCTION_BT709) { + rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + + _EXTRA_POINTS), GFP_KERNEL); + if (!rgb_regamma) + goto rgb_regamma_alloc_fail; + points->end_exponent = 0; + points->x_point_at_y1_red = 1; + points->x_point_at_y1_green = 1; + points->x_point_at_y1_blue = 1; + + build_regamma(rgb_regamma, + MAX_HW_POINTS, + coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); + for (i = 0; i < MAX_HW_POINTS ; i++) { + points->red[i] = rgb_regamma[i].r; + points->green[i] = rgb_regamma[i].g; + points->blue[i] = rgb_regamma[i].b; + } + ret = true; + + kfree(rgb_regamma); + } +rgb_regamma_alloc_fail: + return ret; +} + + +bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, + struct dc_transfer_func_distributed_points *points) +{ + uint32_t i; + bool ret = false; + struct pwl_float_data_ex *rgb_degamma = NULL; + + if (trans == TRANSFER_FUNCTION_UNITY) { + + for (i = 0; i < MAX_HW_DEGAMMA_POINTS ; i++) { + points->red[i] = degamma_coordinates_x[i].x; + points->green[i] = degamma_coordinates_x[i].x; + points->blue[i] = degamma_coordinates_x[i].x; + } + ret = true; + } else if (trans == TRANSFER_FUNCTION_PQ) { + rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_DEGAMMA_POINTS + + _EXTRA_POINTS), GFP_KERNEL); + if (!rgb_degamma) + goto rgb_degamma_alloc_fail; + + + build_de_pq(rgb_degamma, + MAX_HW_DEGAMMA_POINTS, + degamma_coordinates_x); + for (i = 0; i < MAX_HW_DEGAMMA_POINTS ; i++) { + points->red[i] = rgb_degamma[i].r; + points->green[i] = rgb_degamma[i].g; + points->blue[i] = rgb_degamma[i].b; + } + ret = true; + + kfree(rgb_degamma); + } else if (trans == TRANSFER_FUNCTION_SRGB || + trans == TRANSFER_FUNCTION_BT709) { + rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_DEGAMMA_POINTS + + _EXTRA_POINTS), GFP_KERNEL); + if (!rgb_degamma) + goto rgb_degamma_alloc_fail; + + build_degamma(rgb_degamma, + MAX_HW_DEGAMMA_POINTS, + degamma_coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); + for (i = 0; i < MAX_HW_DEGAMMA_POINTS ; i++) { + points->red[i] = rgb_degamma[i].r; + points->green[i] = rgb_degamma[i].g; + points->blue[i] = rgb_degamma[i].b; + } + ret = true; + + kfree(rgb_degamma); + } + points->end_exponent = 0; + points->x_point_at_y1_red = 1; + points->x_point_at_y1_green = 1; + points->x_point_at_y1_blue = 1; + +rgb_degamma_alloc_fail: + return ret; +} + + diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h new file mode 100644 index 000000000000..b7f9bc27d101 --- /dev/null +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -0,0 +1,53 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef COLOR_MOD_COLOR_GAMMA_H_ +#define COLOR_MOD_COLOR_GAMMA_H_ + +struct dc_transfer_func; +struct dc_gamma; +struct dc_transfer_func_distributed_points; +struct dc_rgb_fixed; +enum dc_transfer_func_predefined; + +void setup_x_points_distribution(void); +void precompute_pq(void); +void precompute_de_pq(void); + +bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, + const struct dc_gamma *ramp, bool mapUserRamp); + +bool mod_color_calculate_degamma_params(struct dc_transfer_func *output_tf, + const struct dc_gamma *ramp, bool mapUserRamp); + +bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, + struct dc_transfer_func_distributed_points *points); + +bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, + struct dc_transfer_func_distributed_points *points); + + + +#endif /* COLOR_MOD_COLOR_GAMMA_H_ */ diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h index b28d4b64c05d..e2a2f114bd8e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h @@ -9364,17 +9364,31 @@ #define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_TMZ__SHIFT 0x0 #define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_DCC_EN__SHIFT 0x1 #define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_DCC_IND_64B_BLK__SHIFT 0x2 +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_TMZ_C__SHIFT 0x4 #define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_DCC_IND_64B_BLK_C__SHIFT 0x5 +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_TMZ__SHIFT 0x8 #define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_DCC_EN__SHIFT 0x9 #define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_DCC_IND_64B_BLK__SHIFT 0xa +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_TMZ_C__SHIFT 0xc #define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_DCC_IND_64B_BLK_C__SHIFT 0xd +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_META_SURFACE_TMZ__SHIFT 0x10 +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_META_SURFACE_TMZ_C__SHIFT 0x14 +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_META_SURFACE_TMZ__SHIFT 0x18 +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_META_SURFACE_TMZ_C__SHIFT 0x1c #define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_TMZ_MASK 0x00000001L #define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_DCC_EN_MASK 0x00000002L #define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_DCC_IND_64B_BLK_MASK 0x00000004L +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_TMZ_C_MASK 0x00000010L #define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_SURFACE_DCC_IND_64B_BLK_C_MASK 0x00000020L +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_TMZ_MASK 0x00000100L #define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_DCC_EN_MASK 0x00000200L #define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_DCC_IND_64B_BLK_MASK 0x00000400L +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_TMZ_C_MASK 0x00001000L #define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_SURFACE_DCC_IND_64B_BLK_C_MASK 0x00002000L +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_META_SURFACE_TMZ_MASK 0x00010000L +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__PRIMARY_META_SURFACE_TMZ_C_MASK 0x00100000L +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_META_SURFACE_TMZ_MASK 0x01000000L +#define HUBPREQ0_DCSURF_SURFACE_CONTROL__SECONDARY_META_SURFACE_TMZ_C_MASK 0x10000000L //HUBPREQ0_DCSURF_FLIP_CONTROL #define HUBPREQ0_DCSURF_FLIP_CONTROL__SURFACE_UPDATE_LOCK__SHIFT 0x0 #define HUBPREQ0_DCSURF_FLIP_CONTROL__SURFACE_FLIP_TYPE__SHIFT 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h index b89347ed1a40..f35aba72e640 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h @@ -1246,5 +1246,6 @@ #define ixGC_CAC_OVRD_CU 0xe7 #define ixCURRENT_PG_STATUS 0xc020029c #define ixCURRENT_PG_STATUS_APU 0xd020029c +#define ixPWR_SVI2_STATUS 0xC0200294 #endif /* SMU_7_1_3_D_H */ diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h index 654c1093d362..481ee6560aa9 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h @@ -6078,6 +6078,8 @@ #define GC_CAC_OVRD_CU__OVRRD_VALUE__SHIFT 0x10 #define CURRENT_PG_STATUS__VCE_PG_STATUS_MASK 0x00000002 #define CURRENT_PG_STATUS__UVD_PG_STATUS_MASK 0x00000004 - - +#define PWR_SVI2_STATUS__PLANE1_VID_MASK 0x000000ff +#define PWR_SVI2_STATUS__PLANE1_VID__SHIFT 0x00000000 +#define PWR_SVI2_STATUS__PLANE2_VID_MASK 0x0000ff00 +#define PWR_SVI2_STATUS__PLANE2_VID__SHIFT 0x00000008 #endif /* SMU_7_1_3_SH_MASK_H */ diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_9_0_offset.h index c1006fe58daa..efd2704d0f8f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_9_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_9_0_offset.h @@ -172,4 +172,7 @@ #define mmROM_SW_DATA_64 0x006d #define mmROM_SW_DATA_64_BASE_IDX 0 +#define mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX 0 +#define mmSMUSVI0_PLANE0_CURRENTVID 0x0013 + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_9_0_sh_mask.h index a0be5c9bfc10..2487ab9621e9 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_9_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_9_0_sh_mask.h @@ -254,5 +254,8 @@ //ROM_SW_DATA_64 #define ROM_SW_DATA_64__ROM_SW_DATA__SHIFT 0x0 #define ROM_SW_DATA_64__ROM_SW_DATA_MASK 0xFFFFFFFFL +/* SMUSVI0_PLANE0_CURRENTVID */ +#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT 0x18 +#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK 0xFF000000L #endif diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h index 675988d56392..f5c73970ab88 100644 --- a/drivers/gpu/drm/amd/include/cgs_common.h +++ b/drivers/gpu/drm/amd/include/cgs_common.h @@ -427,6 +427,9 @@ struct amd_pp_init; typedef void* (*cgs_register_pp_handle)(struct cgs_device *cgs_device, int (*call_back_func)(struct amd_pp_init *, void **)); +typedef int (*cgs_set_temperature_range)(struct cgs_device *cgs_device, + int min_temperature, + int max_temperature); struct cgs_ops { /* memory management calls (similar to KFD interface) */ cgs_alloc_gpu_mem_t alloc_gpu_mem; @@ -464,6 +467,7 @@ struct cgs_ops { cgs_enter_safe_mode enter_safe_mode; cgs_lock_grbm_idx lock_grbm_idx; cgs_register_pp_handle register_pp_handle; + cgs_set_temperature_range set_temperature_range; }; struct cgs_os_ops; /* To be define in OS-specific CGS header */ @@ -545,4 +549,7 @@ struct cgs_device #define cgs_register_pp_handle(cgs_device, call_back_func) \ CGS_CALL(register_pp_handle, cgs_device, call_back_func) +#define cgs_set_temperature_range(dev, min_temp, max_temp) \ + CGS_CALL(set_temperature_range, dev, min_temp, max_temp) + #endif /* _CGS_COMMON_H */ diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index ed27626dff14..22c2fa30731f 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -107,6 +107,8 @@ enum pp_clock_type { PP_SCLK, PP_MCLK, PP_PCIE, + OD_SCLK, + OD_MCLK, }; enum amd_pp_sensors { @@ -122,6 +124,8 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_VCE_POWER, AMDGPU_PP_SENSOR_UVD_POWER, AMDGPU_PP_SENSOR_GPU_POWER, + AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, + AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, }; enum amd_pp_task { @@ -140,7 +144,15 @@ struct amd_pp_init { uint32_t feature_mask; }; - +enum PP_SMC_POWER_PROFILE { + PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x0, + PP_SMC_POWER_PROFILE_POWERSAVING = 0x1, + PP_SMC_POWER_PROFILE_VIDEO = 0x2, + PP_SMC_POWER_PROFILE_VR = 0x3, + PP_SMC_POWER_PROFILE_COMPUTE = 0x4, + PP_SMC_POWER_PROFILE_CUSTOM = 0x5, + PP_SMC_POWER_PROFILE_AUTO = 0x6, +}; enum { PP_GROUP_UNKNOWN = 0, @@ -149,6 +161,13 @@ enum { PP_GROUP_MAX }; +enum PP_OD_DPM_TABLE_COMMAND { + PP_OD_EDIT_SCLK_VDDC_TABLE, + PP_OD_EDIT_MCLK_VDDC_TABLE, + PP_OD_RESTORE_DEFAULT_TABLE, + PP_OD_COMMIT_DPM_TABLE +}; + struct pp_states_info { uint32_t nums; uint32_t states[16]; @@ -222,7 +241,6 @@ struct amd_pm_funcs { void *rps, bool *equal); /* export for sysfs */ - int (*get_temperature)(void *handle); void (*set_fan_control_mode)(void *handle, u32 mode); u32 (*get_fan_control_mode)(void *handle); int (*set_fan_speed_percent)(void *handle, u32 speed); @@ -256,7 +274,7 @@ struct amd_pm_funcs { void (*powergate_vce)(void *handle, bool gate); struct amd_vce_state *(*get_vce_clock_state)(void *handle, u32 idx); int (*dispatch_tasks)(void *handle, enum amd_pp_task task_id, - void *input, void *output); + enum amd_pm_state_type *user_state); int (*load_firmware)(void *handle); int (*wait_for_fw_loading_complete)(void *handle); int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id); @@ -265,6 +283,8 @@ struct amd_pm_funcs { uint32_t mc_addr_low, uint32_t mc_addr_hi, uint32_t size); + int (*set_power_limit)(void *handle, uint32_t n); + int (*get_power_limit)(void *handle, uint32_t *limit, bool default_limit); /* export to DC */ u32 (*get_sclk)(void *handle, bool low); u32 (*get_mclk)(void *handle, bool low); @@ -289,6 +309,10 @@ struct amd_pm_funcs { struct pp_display_clock_request *clock); int (*get_display_mode_validation_clocks)(void *handle, struct amd_pp_simple_clock_info *clocks); + int (*get_power_profile_mode)(void *handle, char *buf); + int (*set_power_profile_mode)(void *handle, long *input, uint32_t size); + int (*odn_edit_dpm_table)(void *handle, uint32_t type, long *input, uint32_t size); + int (*set_mmhub_powergating_by_smu)(void *handle); }; #endif diff --git a/drivers/gpu/drm/amd/include/soc15_hw_ip.h b/drivers/gpu/drm/amd/include/soc15_hw_ip.h new file mode 100644 index 000000000000..f17e30cb4eae --- /dev/null +++ b/drivers/gpu/drm/amd/include/soc15_hw_ip.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _soc15_hw_ip_HEADER +#define _soc15_hw_ip_HEADER + +// HW ID +#define MP1_HWID 1 +#define MP2_HWID 2 +#define THM_HWID 3 +#define SMUIO_HWID 4 +#define FUSE_HWID 5 +#define CLKA_HWID 6 +#define PWR_HWID 10 +#define GC_HWID 11 +#define UVD_HWID 12 +#define VCN_HWID UVD_HWID +#define AUDIO_AZ_HWID 13 +#define ACP_HWID 14 +#define DCI_HWID 15 +#define DMU_HWID 271 +#define DCO_HWID 16 +#define DIO_HWID 272 +#define XDMA_HWID 17 +#define DCEAZ_HWID 18 +#define DAZ_HWID 274 +#define SDPMUX_HWID 19 +#define NTB_HWID 20 +#define IOHC_HWID 24 +#define L2IMU_HWID 28 +#define VCE_HWID 32 +#define MMHUB_HWID 34 +#define ATHUB_HWID 35 +#define DBGU_NBIO_HWID 36 +#define DFX_HWID 37 +#define DBGU0_HWID 38 +#define DBGU1_HWID 39 +#define OSSSYS_HWID 40 +#define HDP_HWID 41 +#define SDMA0_HWID 42 +#define SDMA1_HWID 43 +#define ISP_HWID 44 +#define DBGU_IO_HWID 45 +#define DF_HWID 46 +#define CLKB_HWID 47 +#define FCH_HWID 48 +#define DFX_DAP_HWID 49 +#define L1IMU_PCIE_HWID 50 +#define L1IMU_NBIF_HWID 51 +#define L1IMU_IOAGR_HWID 52 +#define L1IMU3_HWID 53 +#define L1IMU4_HWID 54 +#define L1IMU5_HWID 55 +#define L1IMU6_HWID 56 +#define L1IMU7_HWID 57 +#define L1IMU8_HWID 58 +#define L1IMU9_HWID 59 +#define L1IMU10_HWID 60 +#define L1IMU11_HWID 61 +#define L1IMU12_HWID 62 +#define L1IMU13_HWID 63 +#define L1IMU14_HWID 64 +#define L1IMU15_HWID 65 +#define WAFLC_HWID 66 +#define FCH_USB_PD_HWID 67 +#define PCIE_HWID 70 +#define PCS_HWID 80 +#define DDCL_HWID 89 +#define SST_HWID 90 +#define IOAGR_HWID 100 +#define NBIF_HWID 108 +#define IOAPIC_HWID 124 +#define SYSTEMHUB_HWID 128 +#define NTBCCP_HWID 144 +#define UMC_HWID 150 +#define SATA_HWID 168 +#define USB_HWID 170 +#define CCXSEC_HWID 176 +#define XGBE_HWID 216 +#define MP0_HWID 254 +#endif diff --git a/drivers/gpu/drm/amd/include/soc15ip.h b/drivers/gpu/drm/amd/include/vega10_ip_offset.h index 1767db69df7a..4c78dba5cf25 100644 --- a/drivers/gpu/drm/amd/include/soc15ip.h +++ b/drivers/gpu/drm/amd/include/vega10_ip_offset.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017 Advanced Micro Devices, Inc. + * Copyright (C) 2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,88 +18,12 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef _soc15ip_new_HEADER -#define _soc15ip_new_HEADER - -// HW ID -#define MP1_HWID 1 -#define MP2_HWID 2 -#define THM_HWID 3 -#define SMUIO_HWID 4 -#define FUSE_HWID 5 -#define CLKA_HWID 6 -#define PWR_HWID 10 -#define GC_HWID 11 -#define UVD_HWID 12 -#define VCN_HWID UVD_HWID -#define AUDIO_AZ_HWID 13 -#define ACP_HWID 14 -#define DCI_HWID 15 -#define DMU_HWID 271 -#define DCO_HWID 16 -#define DIO_HWID 272 -#define XDMA_HWID 17 -#define DCEAZ_HWID 18 -#define DAZ_HWID 274 -#define SDPMUX_HWID 19 -#define NTB_HWID 20 -#define IOHC_HWID 24 -#define L2IMU_HWID 28 -#define VCE_HWID 32 -#define MMHUB_HWID 34 -#define ATHUB_HWID 35 -#define DBGU_NBIO_HWID 36 -#define DFX_HWID 37 -#define DBGU0_HWID 38 -#define DBGU1_HWID 39 -#define OSSSYS_HWID 40 -#define HDP_HWID 41 -#define SDMA0_HWID 42 -#define SDMA1_HWID 43 -#define ISP_HWID 44 -#define DBGU_IO_HWID 45 -#define DF_HWID 46 -#define CLKB_HWID 47 -#define FCH_HWID 48 -#define DFX_DAP_HWID 49 -#define L1IMU_PCIE_HWID 50 -#define L1IMU_NBIF_HWID 51 -#define L1IMU_IOAGR_HWID 52 -#define L1IMU3_HWID 53 -#define L1IMU4_HWID 54 -#define L1IMU5_HWID 55 -#define L1IMU6_HWID 56 -#define L1IMU7_HWID 57 -#define L1IMU8_HWID 58 -#define L1IMU9_HWID 59 -#define L1IMU10_HWID 60 -#define L1IMU11_HWID 61 -#define L1IMU12_HWID 62 -#define L1IMU13_HWID 63 -#define L1IMU14_HWID 64 -#define L1IMU15_HWID 65 -#define WAFLC_HWID 66 -#define FCH_USB_PD_HWID 67 -#define PCIE_HWID 70 -#define PCS_HWID 80 -#define DDCL_HWID 89 -#define SST_HWID 90 -#define IOAGR_HWID 100 -#define NBIF_HWID 108 -#define IOAPIC_HWID 124 -#define SYSTEMHUB_HWID 128 -#define NTBCCP_HWID 144 -#define UMC_HWID 150 -#define SATA_HWID 168 -#define USB_HWID 170 -#define CCXSEC_HWID 176 -#define XGBE_HWID 216 -#define MP0_HWID 254 +#ifndef _vega10_ip_offset_HEADER +#define _vega10_ip_offset_HEADER #define MAX_INSTANCE 5 #define MAX_SEGMENT 5 - struct IP_BASE_INSTANCE { unsigned int segment[MAX_SEGMENT]; @@ -1337,7 +1261,5 @@ static const struct IP_BASE FUSE_BASE = { { { { 0x00017400, 0, 0, 0, 0 } }, #define FUSE_BASE__INST4_SEG2 0 #define FUSE_BASE__INST4_SEG3 0 #define FUSE_BASE__INST4_SEG4 0 - - #endif diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 4c3223a4d62b..376ed2dd52c7 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -33,7 +33,7 @@ #define PP_DPM_DISABLED 0xCCCC static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id, - void *input, void *output); + enum amd_pm_state_type *user_state); static inline int pp_check(struct pp_instance *handle) { @@ -162,7 +162,7 @@ static int pp_hw_init(void *handle) if(hwmgr->smumgr_funcs->start_smu(pp_handle->hwmgr)) { pr_err("smc start failed\n"); hwmgr->smumgr_funcs->smu_fini(pp_handle->hwmgr); - return -EINVAL;; + return -EINVAL; } if (ret == PP_DPM_DISABLED) goto exit; @@ -198,7 +198,7 @@ static int pp_late_init(void *handle) ret = pp_check(pp_handle); if (ret == 0) pp_dpm_dispatch_tasks(pp_handle, - AMD_PP_TASK_COMPLETE_INIT, NULL, NULL); + AMD_PP_TASK_COMPLETE_INIT, NULL); return 0; } @@ -392,7 +392,7 @@ static int pp_dpm_force_performance_level(void *handle, mutex_lock(&pp_handle->pp_lock); pp_dpm_en_umd_pstate(hwmgr, &level); hwmgr->request_dpm_level = level; - hwmgr_handle_task(pp_handle, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); + hwmgr_handle_task(pp_handle, AMD_PP_TASK_READJUST_POWER_STATE, NULL); mutex_unlock(&pp_handle->pp_lock); return 0; @@ -511,7 +511,7 @@ static void pp_dpm_powergate_uvd(void *handle, bool gate) } static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id, - void *input, void *output) + enum amd_pm_state_type *user_state) { int ret = 0; struct pp_instance *pp_handle = (struct pp_instance *)handle; @@ -522,7 +522,7 @@ static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id, return ret; mutex_lock(&pp_handle->pp_lock); - ret = hwmgr_handle_task(pp_handle, task_id, input, output); + ret = hwmgr_handle_task(pp_handle, task_id, user_state); mutex_unlock(&pp_handle->pp_lock); return ret; @@ -687,29 +687,6 @@ static int pp_dpm_get_fan_speed_rpm(void *handle, uint32_t *rpm) return ret; } -static int pp_dpm_get_temperature(void *handle) -{ - struct pp_hwmgr *hwmgr; - struct pp_instance *pp_handle = (struct pp_instance *)handle; - int ret = 0; - - ret = pp_check(pp_handle); - - if (ret) - return ret; - - hwmgr = pp_handle->hwmgr; - - if (hwmgr->hwmgr_func->get_temperature == NULL) { - pr_info("%s was not implemented.\n", __func__); - return 0; - } - mutex_lock(&pp_handle->pp_lock); - ret = hwmgr->hwmgr_func->get_temperature(hwmgr); - mutex_unlock(&pp_handle->pp_lock); - return ret; -} - static int pp_dpm_get_pp_num_states(void *handle, struct pp_states_info *data) { @@ -799,7 +776,7 @@ static int amd_powerplay_reset(void *handle) if (ret) return ret; - return hwmgr_handle_task(instance, AMD_PP_TASK_COMPLETE_INIT, NULL, NULL); + return hwmgr_handle_task(instance, AMD_PP_TASK_COMPLETE_INIT, NULL); } static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size) @@ -862,7 +839,10 @@ static int pp_dpm_force_clock_level(void *handle, return 0; } mutex_lock(&pp_handle->pp_lock); - hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask); + if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) + ret = hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask); + else + ret = -EINVAL; mutex_unlock(&pp_handle->pp_lock); return ret; } @@ -992,22 +972,27 @@ static int pp_dpm_read_sensor(void *handle, int idx, int ret = 0; ret = pp_check(pp_handle); - if (ret) return ret; + if (value == NULL) + return -EINVAL; + hwmgr = pp_handle->hwmgr; - if (hwmgr->hwmgr_func->read_sensor == NULL) { - pr_info("%s was not implemented.\n", __func__); + switch (idx) { + case AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK: + *((uint32_t *)value) = hwmgr->pstate_sclk; return 0; + case AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK: + *((uint32_t *)value) = hwmgr->pstate_mclk; + return 0; + default: + mutex_lock(&pp_handle->pp_lock); + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, idx, value, size); + mutex_unlock(&pp_handle->pp_lock); + return ret; } - - mutex_lock(&pp_handle->pp_lock); - ret = hwmgr->hwmgr_func->read_sensor(hwmgr, idx, value, size); - mutex_unlock(&pp_handle->pp_lock); - - return ret; } static struct amd_vce_state* @@ -1081,6 +1066,64 @@ static int pp_dpm_get_power_profile_state(void *handle, return 0; } +static int pp_get_power_profile_mode(void *handle, char *buf) +{ + struct pp_hwmgr *hwmgr; + struct pp_instance *pp_handle = (struct pp_instance *)handle; + + if (!buf || pp_check(pp_handle)) + return -EINVAL; + + hwmgr = pp_handle->hwmgr; + + if (hwmgr->hwmgr_func->get_power_profile_mode == NULL) { + pr_info("%s was not implemented.\n", __func__); + return snprintf(buf, PAGE_SIZE, "\n"); + } + + return hwmgr->hwmgr_func->get_power_profile_mode(hwmgr, buf); +} + +static int pp_set_power_profile_mode(void *handle, long *input, uint32_t size) +{ + struct pp_hwmgr *hwmgr; + struct pp_instance *pp_handle = (struct pp_instance *)handle; + int ret = -EINVAL; + + if (pp_check(pp_handle)) + return -EINVAL; + + hwmgr = pp_handle->hwmgr; + + if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) { + pr_info("%s was not implemented.\n", __func__); + return -EINVAL; + } + mutex_lock(&pp_handle->pp_lock); + if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) + ret = hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, input, size); + mutex_unlock(&pp_handle->pp_lock); + return ret; +} + +static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint32_t size) +{ + struct pp_hwmgr *hwmgr; + struct pp_instance *pp_handle = (struct pp_instance *)handle; + + if (pp_check(pp_handle)) + return -EINVAL; + + hwmgr = pp_handle->hwmgr; + + if (hwmgr->hwmgr_func->odn_edit_dpm_table == NULL) { + pr_info("%s was not implemented.\n", __func__); + return -EINVAL; + } + + return hwmgr->hwmgr_func->odn_edit_dpm_table(hwmgr, type, input, size); +} + static int pp_dpm_set_power_profile_state(void *handle, struct amd_pp_profile *request) { @@ -1194,6 +1237,65 @@ static int pp_dpm_notify_smu_memory_info(void *handle, return ret; } +static int pp_set_power_limit(void *handle, uint32_t limit) +{ + struct pp_hwmgr *hwmgr; + struct pp_instance *pp_handle = (struct pp_instance *)handle; + int ret = 0; + + ret = pp_check(pp_handle); + + if (ret) + return ret; + + hwmgr = pp_handle->hwmgr; + + if (hwmgr->hwmgr_func->set_power_limit == NULL) { + pr_info("%s was not implemented.\n", __func__); + return -EINVAL; + } + + if (limit == 0) + limit = hwmgr->default_power_limit; + + if (limit > hwmgr->default_power_limit) + return -EINVAL; + + mutex_lock(&pp_handle->pp_lock); + hwmgr->hwmgr_func->set_power_limit(hwmgr, limit); + hwmgr->power_limit = limit; + mutex_unlock(&pp_handle->pp_lock); + return ret; +} + +static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit) +{ + struct pp_hwmgr *hwmgr; + struct pp_instance *pp_handle = (struct pp_instance *)handle; + int ret = 0; + + ret = pp_check(pp_handle); + + if (ret) + return ret; + + if (limit == NULL) + return -EINVAL; + + hwmgr = pp_handle->hwmgr; + + mutex_lock(&pp_handle->pp_lock); + + if (default_limit) + *limit = hwmgr->default_power_limit; + else + *limit = hwmgr->power_limit; + + mutex_unlock(&pp_handle->pp_lock); + + return ret; +} + static int pp_display_configuration_change(void *handle, const struct amd_pp_display_configuration *display_config) { @@ -1432,8 +1534,28 @@ static int pp_get_display_mode_validation_clocks(void *handle, return ret; } +static int pp_set_mmhub_powergating_by_smu(void *handle) +{ + struct pp_hwmgr *hwmgr; + struct pp_instance *pp_handle = (struct pp_instance *)handle; + int ret = 0; + + ret = pp_check(pp_handle); + + if (ret) + return ret; + + hwmgr = pp_handle->hwmgr; + + if (hwmgr->hwmgr_func->set_mmhub_powergating_by_smu == NULL) { + pr_info("%s was not implemented.\n", __func__); + return 0; + } + + return hwmgr->hwmgr_func->set_mmhub_powergating_by_smu(hwmgr); +} + const struct amd_pm_funcs pp_dpm_funcs = { - .get_temperature = pp_dpm_get_temperature, .load_firmware = pp_dpm_load_fw, .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, .force_performance_level = pp_dpm_force_performance_level, @@ -1464,6 +1586,11 @@ const struct amd_pm_funcs pp_dpm_funcs = { .switch_power_profile = pp_dpm_switch_power_profile, .set_clockgating_by_smu = pp_set_clockgating_by_smu, .notify_smu_memory_info = pp_dpm_notify_smu_memory_info, + .get_power_profile_mode = pp_get_power_profile_mode, + .set_power_profile_mode = pp_set_power_profile_mode, + .odn_edit_dpm_table = pp_odn_edit_dpm_table, + .set_power_limit = pp_set_power_limit, + .get_power_limit = pp_get_power_limit, /* export to DC */ .get_sclk = pp_dpm_get_sclk, .get_mclk = pp_dpm_get_mclk, @@ -1476,4 +1603,5 @@ const struct amd_pm_funcs pp_dpm_funcs = { .set_watermarks_for_clocks_ranges = pp_set_watermarks_for_clocks_ranges, .display_clock_voltage_request = pp_display_clock_voltage_request, .get_display_mode_validation_clocks = pp_get_display_mode_validation_clocks, + .set_mmhub_powergating_by_smu = pp_set_mmhub_powergating_by_smu, }; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c index 44de0874629f..416abebb8b86 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c @@ -166,10 +166,10 @@ void cz_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) cz_dpm_powerup_uvd(hwmgr); cgs_set_clockgating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); + AMD_CG_STATE_UNGATE); cgs_set_powergating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_UVD, - AMD_CG_STATE_UNGATE); + AMD_PG_STATE_UNGATE); cz_dpm_update_uvd_dpm(hwmgr, false); } @@ -197,11 +197,11 @@ void cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) cgs_set_clockgating_state( hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + AMD_CG_STATE_UNGATE); cgs_set_powergating_state( hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); + AMD_PG_STATE_UNGATE); cz_dpm_update_vce_dpm(hwmgr); cz_enable_disable_vce_dpm(hwmgr, true); } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c index b314d09d41af..5a7b99f45d36 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c @@ -38,6 +38,7 @@ #include "cz_hwmgr.h" #include "power_state.h" #include "cz_clockpowergating.h" +#include "pp_thermal.h" #define ixSMUSVI_NB_CURRENTVID 0xD8230044 #define CURRENT_NB_VID_MASK 0xff000000 @@ -172,16 +173,12 @@ static uint32_t cz_get_max_sclk_level(struct pp_hwmgr *hwmgr) static int cz_initialize_dpm_defaults(struct pp_hwmgr *hwmgr) { struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend); - uint32_t i; struct cgs_system_info sys_info = {0}; int result; cz_hwmgr->gfx_ramp_step = 256*25/100; cz_hwmgr->gfx_ramp_delay = 1; /* by default, we delay 1us */ - for (i = 0; i < CZ_MAX_HARDWARE_POWERLEVELS; i++) - cz_hwmgr->activity_target[i] = CZ_AT_DFLT; - cz_hwmgr->mgcg_cgtt_local0 = 0x00000000; cz_hwmgr->mgcg_cgtt_local1 = 0x00000000; cz_hwmgr->clock_slow_down_freq = 25000; @@ -1188,6 +1185,8 @@ static int cz_phm_unforce_dpm_levels(struct pp_hwmgr *hwmgr) cz_hwmgr->sclk_dpm.soft_min_clk = table->entries[0].clk; cz_hwmgr->sclk_dpm.hard_min_clk = table->entries[0].clk; + hwmgr->pstate_sclk = table->entries[0].clk; + hwmgr->pstate_mclk = 0; level = cz_get_max_sclk_level(hwmgr) - 1; @@ -1559,9 +1558,6 @@ static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr, static int cz_force_clock_level(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask) { - if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) - return -EINVAL; - switch (type) { case PP_SCLK: smum_send_msg_to_smc_with_parameter(hwmgr, @@ -1581,6 +1577,7 @@ static int cz_force_clock_level(struct pp_hwmgr *hwmgr, static int cz_print_clock_levels(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf) { + struct cz_hwmgr *data = (struct cz_hwmgr *)(hwmgr->backend); struct phm_clock_voltage_dependency_table *sclk_table = hwmgr->dyn_state.vddc_dependency_on_sclk; int i, now, size = 0; @@ -1598,6 +1595,18 @@ static int cz_print_clock_levels(struct pp_hwmgr *hwmgr, i, sclk_table->entries[i].clk / 100, (i == now) ? "*" : ""); break; + case PP_MCLK: + now = PHM_GET_FIELD(cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + ixTARGET_AND_CURRENT_PROFILE_INDEX), + TARGET_AND_CURRENT_PROFILE_INDEX, + CURR_MCLK_INDEX); + + for (i = CZ_NUM_NBPMEMORYCLOCK; i > 0; i--) + size += sprintf(buf + size, "%d: %uMhz %s\n", + CZ_NUM_NBPMEMORYCLOCK-i, data->sys_info.nbp_memory_clock[i-1] / 100, + (CZ_NUM_NBPMEMORYCLOCK-i == now) ? "*" : ""); + break; default: break; } @@ -1858,6 +1867,19 @@ static int cz_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, return 0; } +static int cz_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, + struct PP_TemperatureRange *thermal_data) +{ + struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend); + + memcpy(thermal_data, &SMU7ThermalPolicy[0], sizeof(struct PP_TemperatureRange)); + + thermal_data->max = (cz_hwmgr->thermal_auto_throttling_treshold + + cz_hwmgr->sys_info.htc_hyst_lmt) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + + return 0; +} static const struct pp_hwmgr_func cz_hwmgr_funcs = { .backend_init = cz_hwmgr_backend_init, @@ -1882,7 +1904,6 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = { .get_current_shallow_sleep_clocks = cz_get_current_shallow_sleep_clocks, .get_clock_by_type = cz_get_clock_by_type, .get_max_high_clocks = cz_get_max_high_clocks, - .get_temperature = cz_thermal_get_temperature, .read_sensor = cz_read_sensor, .power_off_asic = cz_power_off_asic, .asic_setup = cz_setup_asic_task, @@ -1890,6 +1911,7 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = { .power_state_set = cz_set_power_state_tasks, .dynamic_state_management_disable = cz_disable_dpm_tasks, .notify_cac_buffer_info = cz_notify_cac_buffer_info, + .get_thermal_temperature_range = cz_get_thermal_temperature_range, }; int cz_init_function_pointers(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.h index 508b422d6159..468c739a4299 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.h @@ -30,7 +30,6 @@ #define CZ_NUM_NBPSTATES 4 #define CZ_NUM_NBPMEMORYCLOCK 2 #define MAX_DISPLAY_CLOCK_LEVEL 8 -#define CZ_AT_DFLT 30 #define CZ_MAX_HARDWARE_POWERLEVELS 8 #define PPCZ_VOTINGRIGHTSCLIENTS_DFLT0 0x3FFFC102 #define CZ_MIN_DEEP_SLEEP_SCLK 800 @@ -185,7 +184,6 @@ struct cc6_settings { }; struct cz_hwmgr { - uint32_t activity_target[CZ_MAX_HARDWARE_POWERLEVELS]; uint32_t dpm_interval; uint32_t voltage_drop_threshold; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 2b0c53fe4c8d..fdd2c05d25d5 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -223,26 +223,25 @@ int phm_register_thermal_interrupt(struct pp_hwmgr *hwmgr, const void *info) * Initializes the thermal controller subsystem. * * @param pHwMgr the address of the powerplay hardware manager. -* @param pTemperatureRange the address of the structure holding the temperature range. * @exception PP_Result_Failed if any of the paramters is NULL, otherwise the return value from the dispatcher. */ -int phm_start_thermal_controller(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *temperature_range) +int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) { - struct PP_TemperatureRange range; - - if (temperature_range == NULL) { - range.max = TEMP_RANGE_MAX; - range.min = TEMP_RANGE_MIN; - } else { - range.max = temperature_range->max; - range.min = temperature_range->min; - } + int ret = 0; + struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX}; + + if (hwmgr->hwmgr_func->get_thermal_temperature_range) + hwmgr->hwmgr_func->get_thermal_temperature_range( + hwmgr, &range); + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_ThermalController) && hwmgr->hwmgr_func->start_thermal_controller != NULL) - return hwmgr->hwmgr_func->start_thermal_controller(hwmgr, &range); + ret = hwmgr->hwmgr_func->start_thermal_controller(hwmgr, &range); - return 0; + cgs_set_temperature_range(hwmgr->device, range.min, range.max); + + return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 0229f774f7a9..33eabc18211d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -60,6 +60,11 @@ uint8_t convert_to_vid(uint16_t vddc) return (uint8_t) ((6200 - (vddc * VOLTAGE_SCALE)) / 25); } +uint16_t convert_to_vddc(uint8_t vid) +{ + return (uint16_t) ((6200 - (vid * 25)) / VOLTAGE_SCALE); +} + static int phm_get_pci_bus_devfn(struct pp_hwmgr *hwmgr, struct cgs_system_info *sys_info) { @@ -162,9 +167,11 @@ int hwmgr_early_init(struct pp_instance *handle) hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK | PP_ENABLE_GFX_CG_THRU_SMU); hwmgr->pp_table_version = PP_TABLE_V0; + hwmgr->od_enabled = false; smu7_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_CZ: + hwmgr->od_enabled = false; hwmgr->smumgr_funcs = &cz_smu_funcs; cz_init_function_pointers(hwmgr); break; @@ -176,6 +183,7 @@ int hwmgr_early_init(struct pp_instance *handle) hwmgr->feature_mask &= ~ (PP_VBI_TIME_SUPPORT_MASK | PP_ENABLE_GFX_CG_THRU_SMU); hwmgr->pp_table_version = PP_TABLE_V0; + hwmgr->od_enabled = false; break; case CHIP_TONGA: hwmgr->smumgr_funcs = &tonga_smu_funcs; @@ -213,6 +221,7 @@ int hwmgr_early_init(struct pp_instance *handle) case AMDGPU_FAMILY_RV: switch (hwmgr->chip_id) { case CHIP_RAVEN: + hwmgr->od_enabled = false; hwmgr->smumgr_funcs = &rv_smu_funcs; rv_init_function_pointers(hwmgr); break; @@ -261,7 +270,7 @@ int hwmgr_hw_init(struct pp_instance *handle) ret = phm_enable_dynamic_state_management(hwmgr); if (ret) goto err2; - ret = phm_start_thermal_controller(hwmgr, NULL); + ret = phm_start_thermal_controller(hwmgr); ret |= psm_set_performance_states(hwmgr); if (ret) goto err2; @@ -341,7 +350,7 @@ int hwmgr_hw_resume(struct pp_instance *handle) ret = phm_enable_dynamic_state_management(hwmgr); if (ret) return ret; - ret = phm_start_thermal_controller(hwmgr, NULL); + ret = phm_start_thermal_controller(hwmgr); if (ret) return ret; @@ -369,7 +378,7 @@ static enum PP_StateUILabel power_state_convert(enum amd_pm_state_type state) } int hwmgr_handle_task(struct pp_instance *handle, enum amd_pp_task task_id, - void *input, void *output) + enum amd_pm_state_type *user_state) { int ret = 0; struct pp_hwmgr *hwmgr; @@ -391,17 +400,15 @@ int hwmgr_handle_task(struct pp_instance *handle, enum amd_pp_task task_id, break; case AMD_PP_TASK_ENABLE_USER_STATE: { - enum amd_pm_state_type ps; enum PP_StateUILabel requested_ui_label; struct pp_power_state *requested_ps = NULL; - if (input == NULL) { + if (user_state == NULL) { ret = -EINVAL; break; } - ps = *(unsigned long *)input; - requested_ui_label = power_state_convert(ps); + requested_ui_label = power_state_convert(*user_state); ret = psm_set_user_performance_state(hwmgr, requested_ui_label, &requested_ps); if (ret) return ret; @@ -932,6 +939,9 @@ int hwmgr_set_user_specify_caps(struct pp_hwmgr *hwmgr) PHM_PlatformCaps_CAC); } + if (hwmgr->feature_mask & PP_OVERDRIVE_MASK) + hwmgr->od_enabled = true; + return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c index b49d65c3e984..c9eecce5683f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c @@ -836,10 +836,10 @@ static int init_over_drive_limits( hwmgr->platform_descriptor.maxOverdriveVDDC = 0; hwmgr->platform_descriptor.overdriveVDDCStep = 0; - if (hwmgr->platform_descriptor.overdriveLimit.engineClock > 0 \ - && hwmgr->platform_descriptor.overdriveLimit.memoryClock > 0) { - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ACOverdriveSupport); + if (hwmgr->platform_descriptor.overdriveLimit.engineClock == 0 \ + || hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) { + hwmgr->od_enabled = false; + pr_debug("OverDrive feature not support by VBIOS\n"); } return 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c index c3e7e34535e8..36ca7c419c90 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c @@ -1074,12 +1074,11 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr, powerplay_table, (const ATOM_FIRMWARE_INFO_V2_1 *)fw_info); - if (hwmgr->platform_descriptor.overdriveLimit.engineClock > 0 - && hwmgr->platform_descriptor.overdriveLimit.memoryClock > 0 - && !phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_OverdriveDisabledByPowerBudget)) - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ACOverdriveSupport); + if (hwmgr->platform_descriptor.overdriveLimit.engineClock == 0 + && hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) { + hwmgr->od_enabled = false; + pr_debug("OverDrive feature not support by VBIOS\n"); + } return result; } @@ -1697,9 +1696,6 @@ static int pp_tables_uninitialize(struct pp_hwmgr *hwmgr) kfree(hwmgr->dyn_state.vdd_gfx_dependency_on_sclk); hwmgr->dyn_state.vdd_gfx_dependency_on_sclk = NULL; - kfree(hwmgr->dyn_state.vq_budgeting_table); - hwmgr->dyn_state.vq_budgeting_table = NULL; - return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c index 569073e3a5a1..8ddfb78f28cc 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c @@ -451,6 +451,9 @@ static int rv_hwmgr_backend_init(struct pp_hwmgr *hwmgr) hwmgr->platform_descriptor.minimumClocksReductionPercentage = 50; + hwmgr->pstate_sclk = RAVEN_UMD_PSTATE_GFXCLK; + hwmgr->pstate_mclk = RAVEN_UMD_PSTATE_FCLK; + return result; } @@ -1023,6 +1026,11 @@ static int rv_read_sensor(struct pp_hwmgr *hwmgr, int idx, return ret; } +static int rv_set_mmhub_powergating_by_smu(struct pp_hwmgr *hwmgr) +{ + return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerGateMmHub); +} + static const struct pp_hwmgr_func rv_hwmgr_funcs = { .backend_init = rv_hwmgr_backend_init, .backend_fini = rv_hwmgr_backend_fini, @@ -1056,6 +1064,7 @@ static const struct pp_hwmgr_func rv_hwmgr_funcs = { .asic_setup = rv_setup_asic_task, .power_state_set = rv_set_power_state_tasks, .dynamic_state_management_disable = rv_disable_dpm_tasks, + .set_mmhub_powergating_by_smu = rv_set_mmhub_powergating_by_smu, }; int rv_init_function_pointers(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c index 69a0678ace98..402aa9cb1f78 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c @@ -162,7 +162,7 @@ void smu7_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) AMD_CG_STATE_UNGATE); cgs_set_powergating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_UVD, - AMD_CG_STATE_UNGATE); + AMD_PG_STATE_UNGATE); smu7_update_uvd_dpm(hwmgr, false); } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_dyn_defaults.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_dyn_defaults.h index f967613191cf..3477d4dfff70 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_dyn_defaults.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_dyn_defaults.h @@ -50,6 +50,6 @@ #define SMU7_CGULVCONTROL_DFLT 0x00007450 #define SMU7_TARGETACTIVITY_DFLT 50 #define SMU7_MCLK_TARGETACTIVITY_DFLT 10 - +#define SMU7_SCLK_TARGETACTIVITY_DFLT 30 #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 41e42beff213..0202841ae639 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -48,6 +48,7 @@ #include "smu7_thermal.h" #include "smu7_clockpowergating.h" #include "processpptables.h" +#include "pp_thermal.h" #define MC_CG_ARB_FREQ_F0 0x0a #define MC_CG_ARB_FREQ_F1 0x0b @@ -90,7 +91,6 @@ enum DPM_EVENT_SRC { DPM_EVENT_SRC_DIGITAL_OR_EXTERNAL = 4 }; -static int smu7_avfs_control(struct pp_hwmgr *hwmgr, bool enable); static const unsigned long PhwVIslands_Magic = (unsigned long)(PHM_VIslands_Magic); static int smu7_force_clock_level(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask); @@ -792,6 +792,76 @@ static int smu7_setup_dpm_tables_v1(struct pp_hwmgr *hwmgr) return 0; } +static int smu7_get_voltage_dependency_table( + const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table, + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table) +{ + uint8_t i = 0; + PP_ASSERT_WITH_CODE((0 != allowed_dep_table->count), + "Voltage Lookup Table empty", + return -EINVAL); + + dep_table->count = allowed_dep_table->count; + for (i=0; i<dep_table->count; i++) { + dep_table->entries[i].clk = allowed_dep_table->entries[i].clk; + dep_table->entries[i].vddInd = allowed_dep_table->entries[i].vddInd; + dep_table->entries[i].vdd_offset = allowed_dep_table->entries[i].vdd_offset; + dep_table->entries[i].vddc = allowed_dep_table->entries[i].vddc; + dep_table->entries[i].vddgfx = allowed_dep_table->entries[i].vddgfx; + dep_table->entries[i].vddci = allowed_dep_table->entries[i].vddci; + dep_table->entries[i].mvdd = allowed_dep_table->entries[i].mvdd; + dep_table->entries[i].phases = allowed_dep_table->entries[i].phases; + dep_table->entries[i].cks_enable = allowed_dep_table->entries[i].cks_enable; + dep_table->entries[i].cks_voffset = allowed_dep_table->entries[i].cks_voffset; + } + + return 0; +} + +static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct smu7_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t i; + + struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table; + + if (table_info == NULL) + return -EINVAL; + + dep_sclk_table = table_info->vdd_dep_on_sclk; + dep_mclk_table = table_info->vdd_dep_on_mclk; + + odn_table->odn_core_clock_dpm_levels.num_of_pl = + data->golden_dpm_table.sclk_table.count; + for (i=0; i<data->golden_dpm_table.sclk_table.count; i++) { + odn_table->odn_core_clock_dpm_levels.entries[i].clock = + data->golden_dpm_table.sclk_table.dpm_levels[i].value; + odn_table->odn_core_clock_dpm_levels.entries[i].enabled = true; + odn_table->odn_core_clock_dpm_levels.entries[i].vddc = dep_sclk_table->entries[i].vddc; + } + + smu7_get_voltage_dependency_table(dep_sclk_table, + (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk)); + + odn_table->odn_memory_clock_dpm_levels.num_of_pl = + data->golden_dpm_table.mclk_table.count; + for (i=0; i<data->golden_dpm_table.sclk_table.count; i++) { + odn_table->odn_memory_clock_dpm_levels.entries[i].clock = + data->golden_dpm_table.mclk_table.dpm_levels[i].value; + odn_table->odn_memory_clock_dpm_levels.entries[i].enabled = true; + odn_table->odn_memory_clock_dpm_levels.entries[i].vddc = dep_mclk_table->entries[i].vddc; + } + + smu7_get_voltage_dependency_table(dep_mclk_table, + (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_mclk)); + + return 0; +} + static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -808,6 +878,11 @@ static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) /* save a copy of the default DPM table */ memcpy(&(data->golden_dpm_table), &(data->dpm_table), sizeof(struct smu7_dpm_table)); + + /* initialize ODN table */ + if (hwmgr->od_enabled) + smu7_odn_initial_default_setting(hwmgr); + return 0; } @@ -1275,6 +1350,58 @@ static int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr) return 0; } +static int smu7_avfs_control(struct pp_hwmgr *hwmgr, bool enable) +{ + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); + + if (smu_data == NULL) + return -EINVAL; + + if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) + return 0; + + if (enable) { + if (!PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, AVS_ON)) { + PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc( + hwmgr, PPSMC_MSG_EnableAvfs), + "Failed to enable AVFS!", + return -EINVAL); + } + } else if (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, AVS_ON)) { + PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc( + hwmgr, PPSMC_MSG_DisableAvfs), + "Failed to disable AVFS!", + return -EINVAL); + } + + return 0; +} + +static int smu7_update_avfs(struct pp_hwmgr *hwmgr) +{ + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (smu_data == NULL) + return -EINVAL; + + if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) + return 0; + + if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { + smu7_avfs_control(hwmgr, false); + } else if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_SCLK) { + smu7_avfs_control(hwmgr, false); + smu7_avfs_control(hwmgr, true); + } else { + smu7_avfs_control(hwmgr, true); + } + + return 0; +} + int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr) { int tmp_result, result = 0; @@ -1357,7 +1484,6 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr) data->dll_default_on = false; data->mclk_dpm0_activity_target = 0xa; - data->mclk_activity_target = SMU7_MCLK_TARGETACTIVITY_DFLT; data->vddc_vddgfx_delta = 300; data->static_screen_threshold = SMU7_STATICSCREENTHRESHOLD_DFLT; data->static_screen_threshold_unit = SMU7_STATICSCREENTHRESHOLDUNIT_DFLT; @@ -1381,6 +1507,14 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr) data->enable_pkg_pwr_tracking_feature = true; data->force_pcie_gen = PP_PCIEGenInvalid; data->ulv_supported = hwmgr->feature_mask & PP_ULV_MASK ? true : false; + data->current_profile_setting.bupdate_sclk = 1; + data->current_profile_setting.sclk_up_hyst = 0; + data->current_profile_setting.sclk_down_hyst = 100; + data->current_profile_setting.sclk_activity = SMU7_SCLK_TARGETACTIVITY_DFLT; + data->current_profile_setting.bupdate_sclk = 1; + data->current_profile_setting.mclk_up_hyst = 0; + data->current_profile_setting.mclk_down_hyst = 100; + data->current_profile_setting.mclk_activity = SMU7_MCLK_TARGETACTIVITY_DFLT; if (hwmgr->chip_id == CHIP_POLARIS12 || hwmgr->is_kicker) { uint8_t tmp1, tmp2; @@ -2266,14 +2400,18 @@ static int smu7_set_private_data_based_on_pptable_v0(struct pp_hwmgr *hwmgr) struct phm_clock_voltage_dependency_table *allowed_mclk_vddci_table = hwmgr->dyn_state.vddci_dependency_on_mclk; PP_ASSERT_WITH_CODE(allowed_sclk_vddc_table != NULL, - "VDDC dependency on SCLK table is missing. This table is mandatory\n", return -EINVAL); + "VDDC dependency on SCLK table is missing. This table is mandatory", + return -EINVAL); PP_ASSERT_WITH_CODE(allowed_sclk_vddc_table->count >= 1, - "VDDC dependency on SCLK table has to have is missing. This table is mandatory\n", return -EINVAL); + "VDDC dependency on SCLK table has to have is missing. This table is mandatory", + return -EINVAL); PP_ASSERT_WITH_CODE(allowed_mclk_vddc_table != NULL, - "VDDC dependency on MCLK table is missing. This table is mandatory\n", return -EINVAL); + "VDDC dependency on MCLK table is missing. This table is mandatory", + return -EINVAL); PP_ASSERT_WITH_CODE(allowed_mclk_vddc_table->count >= 1, - "VDD dependency on MCLK table has to have is missing. This table is mandatory\n", return -EINVAL); + "VDD dependency on MCLK table has to have is missing. This table is mandatory", + return -EINVAL); data->min_vddc_in_pptable = (uint16_t)allowed_sclk_vddc_table->entries[0].v; data->max_vddc_in_pptable = (uint16_t)allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v; @@ -2574,8 +2712,10 @@ static int smu7_get_profiling_clk(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_le break; } } - if (count < 0 || level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) + if (count < 0 || level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { *sclk_mask = 0; + tmp_sclk = hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].clk; + } if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) *sclk_mask = hwmgr->dyn_state.vddc_dependency_on_sclk->count-1; @@ -2590,8 +2730,10 @@ static int smu7_get_profiling_clk(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_le break; } } - if (count < 0 || level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) + if (count < 0 || level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { *sclk_mask = 0; + tmp_sclk = table_info->vdd_dep_on_sclk->entries[0].clk; + } if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) *sclk_mask = table_info->vdd_dep_on_sclk->count - 1; @@ -2603,6 +2745,9 @@ static int smu7_get_profiling_clk(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_le *mclk_mask = golden_dpm_table->mclk_table.count - 1; *pcie_mask = data->dpm_table.pcie_speed_table.count - 1; + hwmgr->pstate_sclk = tmp_sclk; + hwmgr->pstate_mclk = tmp_mclk; + return 0; } @@ -2614,6 +2759,9 @@ static int smu7_force_dpm_level(struct pp_hwmgr *hwmgr, uint32_t mclk_mask = 0; uint32_t pcie_mask = 0; + if (hwmgr->pstate_sclk == 0) + smu7_get_profiling_clk(hwmgr, level, &sclk_mask, &mclk_mask, &pcie_mask); + switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: ret = smu7_force_dpm_highest(hwmgr); @@ -2756,10 +2904,12 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); - disable_mclk_switching = ((1 < info.display_count) || - disable_mclk_switching_for_frame_lock || - smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) || - (mode_info.refresh_rate > 120)); + if (info.display_count == 0) + disable_mclk_switching = false; + else + disable_mclk_switching = ((1 < info.display_count) || + disable_mclk_switching_for_frame_lock || + smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; @@ -3312,7 +3462,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, void *value, int *size) { uint32_t sclk, mclk, activity_percent; - uint32_t offset; + uint32_t offset, val_vid; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); /* size must be at least 4 bytes for all sensors */ @@ -3360,6 +3510,16 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, return -EINVAL; *size = sizeof(struct pp_gpu_power); return smu7_get_gpu_power(hwmgr, (struct pp_gpu_power *)value); + case AMDGPU_PP_SENSOR_VDDGFX: + if ((data->vr_config & 0xff) == 0x2) + val_vid = PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, PWR_SVI2_STATUS, PLANE2_VID); + else + val_vid = PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, PWR_SVI2_STATUS, PLANE1_VID); + + *((uint32_t *)value) = (uint32_t)convert_to_vddc(val_vid); + return 0; default: return -EINVAL; } @@ -3382,8 +3542,6 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons uint32_t i; struct cgs_display_info info = {0}; - data->need_update_smu7_dpm_table = 0; - for (i = 0; i < sclk_table->count; i++) { if (sclk == sclk_table->dpm_levels[i].value) break; @@ -3525,108 +3683,27 @@ static int smu7_populate_and_upload_sclk_mclk_dpm_levels( struct pp_hwmgr *hwmgr, const void *input) { int result = 0; - const struct phm_set_power_state_input *states = - (const struct phm_set_power_state_input *)input; - const struct smu7_power_state *smu7_ps = - cast_const_phw_smu7_power_state(states->pnew_state); struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t sclk = smu7_ps->performance_levels - [smu7_ps->performance_level_count - 1].engine_clock; - uint32_t mclk = smu7_ps->performance_levels - [smu7_ps->performance_level_count - 1].memory_clock; struct smu7_dpm_table *dpm_table = &data->dpm_table; - - struct smu7_dpm_table *golden_dpm_table = &data->golden_dpm_table; - uint32_t dpm_count, clock_percent; - uint32_t i; + uint32_t count; + struct smu7_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct phm_odn_clock_levels *odn_sclk_table = &(odn_table->odn_core_clock_dpm_levels); + struct phm_odn_clock_levels *odn_mclk_table = &(odn_table->odn_memory_clock_dpm_levels); if (0 == data->need_update_smu7_dpm_table) return 0; - if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_SCLK) { - dpm_table->sclk_table.dpm_levels - [dpm_table->sclk_table.count - 1].value = sclk; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_OD6PlusinACSupport) || - phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_OD6PlusinDCSupport)) { - /* Need to do calculation based on the golden DPM table - * as the Heatmap GPU Clock axis is also based on the default values - */ - PP_ASSERT_WITH_CODE( - (golden_dpm_table->sclk_table.dpm_levels - [golden_dpm_table->sclk_table.count - 1].value != 0), - "Divide by 0!", - return -EINVAL); - dpm_count = dpm_table->sclk_table.count < 2 ? 0 : dpm_table->sclk_table.count - 2; - - for (i = dpm_count; i > 1; i--) { - if (sclk > golden_dpm_table->sclk_table.dpm_levels[golden_dpm_table->sclk_table.count-1].value) { - clock_percent = - ((sclk - - golden_dpm_table->sclk_table.dpm_levels[golden_dpm_table->sclk_table.count-1].value - ) * 100) - / golden_dpm_table->sclk_table.dpm_levels[golden_dpm_table->sclk_table.count-1].value; - - dpm_table->sclk_table.dpm_levels[i].value = - golden_dpm_table->sclk_table.dpm_levels[i].value + - (golden_dpm_table->sclk_table.dpm_levels[i].value * - clock_percent)/100; - - } else if (golden_dpm_table->sclk_table.dpm_levels[dpm_table->sclk_table.count-1].value > sclk) { - clock_percent = - ((golden_dpm_table->sclk_table.dpm_levels[golden_dpm_table->sclk_table.count - 1].value - - sclk) * 100) - / golden_dpm_table->sclk_table.dpm_levels[golden_dpm_table->sclk_table.count-1].value; - - dpm_table->sclk_table.dpm_levels[i].value = - golden_dpm_table->sclk_table.dpm_levels[i].value - - (golden_dpm_table->sclk_table.dpm_levels[i].value * - clock_percent) / 100; - } else - dpm_table->sclk_table.dpm_levels[i].value = - golden_dpm_table->sclk_table.dpm_levels[i].value; - } + if (hwmgr->od_enabled && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_SCLK) { + for (count = 0; count < dpm_table->sclk_table.count; count++) { + dpm_table->sclk_table.dpm_levels[count].enabled = odn_sclk_table->entries[count].enabled; + dpm_table->sclk_table.dpm_levels[count].value = odn_sclk_table->entries[count].clock; } } - if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK) { - dpm_table->mclk_table.dpm_levels - [dpm_table->mclk_table.count - 1].value = mclk; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_OD6PlusinACSupport) || - phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_OD6PlusinDCSupport)) { - - PP_ASSERT_WITH_CODE( - (golden_dpm_table->mclk_table.dpm_levels - [golden_dpm_table->mclk_table.count-1].value != 0), - "Divide by 0!", - return -EINVAL); - dpm_count = dpm_table->mclk_table.count < 2 ? 0 : dpm_table->mclk_table.count - 2; - for (i = dpm_count; i > 1; i--) { - if (golden_dpm_table->mclk_table.dpm_levels[golden_dpm_table->mclk_table.count-1].value < mclk) { - clock_percent = ((mclk - - golden_dpm_table->mclk_table.dpm_levels[golden_dpm_table->mclk_table.count-1].value) * 100) - / golden_dpm_table->mclk_table.dpm_levels[golden_dpm_table->mclk_table.count-1].value; - - dpm_table->mclk_table.dpm_levels[i].value = - golden_dpm_table->mclk_table.dpm_levels[i].value + - (golden_dpm_table->mclk_table.dpm_levels[i].value * - clock_percent) / 100; - - } else if (golden_dpm_table->mclk_table.dpm_levels[dpm_table->mclk_table.count-1].value > mclk) { - clock_percent = ( - (golden_dpm_table->mclk_table.dpm_levels[golden_dpm_table->mclk_table.count-1].value - mclk) - * 100) - / golden_dpm_table->mclk_table.dpm_levels[golden_dpm_table->mclk_table.count-1].value; - - dpm_table->mclk_table.dpm_levels[i].value = - golden_dpm_table->mclk_table.dpm_levels[i].value - - (golden_dpm_table->mclk_table.dpm_levels[i].value * - clock_percent) / 100; - } else - dpm_table->mclk_table.dpm_levels[i].value = - golden_dpm_table->mclk_table.dpm_levels[i].value; - } + if (hwmgr->od_enabled && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK) { + for (count = 0; count < dpm_table->mclk_table.count; count++) { + dpm_table->mclk_table.dpm_levels[count].enabled = odn_mclk_table->entries[count].enabled; + dpm_table->mclk_table.dpm_levels[count].value = odn_mclk_table->entries[count].clock; } } @@ -3748,7 +3825,7 @@ static int smu7_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) return -EINVAL); } - data->need_update_smu7_dpm_table = 0; + data->need_update_smu7_dpm_table &= DPMTABLE_OD_UPDATE_VDDC; return 0; } @@ -3825,6 +3902,11 @@ static int smu7_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input) "Failed to populate and upload SCLK MCLK DPM levels!", result = tmp_result); + tmp_result = smu7_update_avfs(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to update avfs voltages!", + result = tmp_result); + tmp_result = smu7_generate_dpm_level_enable_mask(hwmgr, input); PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to generate DPM level enabled mask!", @@ -4016,6 +4098,7 @@ static int smu7_check_states_equal(struct pp_hwmgr *hwmgr, const struct smu7_power_state *psa; const struct smu7_power_state *psb; int i; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); if (pstate1 == NULL || pstate2 == NULL || equal == NULL) return -EINVAL; @@ -4040,6 +4123,10 @@ static int smu7_check_states_equal(struct pp_hwmgr *hwmgr, *equal = ((psa->uvd_clks.vclk == psb->uvd_clks.vclk) && (psa->uvd_clks.dclk == psb->uvd_clks.dclk)); *equal &= ((psa->vce_clks.evclk == psb->vce_clks.evclk) && (psa->vce_clks.ecclk == psb->vce_clks.ecclk)); *equal &= (psa->sclk_threshold == psb->sclk_threshold); + /* For OD call, set value based on flag */ + *equal &= !(data->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | + DPMTABLE_OD_UPDATE_MCLK | + DPMTABLE_OD_UPDATE_VDDC)); return 0; } @@ -4211,9 +4298,7 @@ static int smu7_force_clock_level(struct pp_hwmgr *hwmgr, { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - if (hwmgr->request_dpm_level & (AMD_DPM_FORCED_LEVEL_AUTO | - AMD_DPM_FORCED_LEVEL_LOW | - AMD_DPM_FORCED_LEVEL_HIGH)) + if (mask == 0) return -EINVAL; switch (type) { @@ -4232,15 +4317,15 @@ static int smu7_force_clock_level(struct pp_hwmgr *hwmgr, case PP_PCIE: { uint32_t tmp = mask & data->dpm_level_enable_mask.pcie_dpm_enable_mask; - uint32_t level = 0; - while (tmp >>= 1) - level++; - - if (!data->pcie_dpm_key_disabled) - smum_send_msg_to_smc_with_parameter(hwmgr, + if (!data->pcie_dpm_key_disabled) { + if (fls(tmp) != ffs(tmp)) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PCIeDPM_UnForceLevel); + else + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_PCIeDPM_ForceLevel, - level); + fls(tmp) - 1); + } break; } default: @@ -4257,6 +4342,9 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, struct smu7_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table); struct smu7_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table); struct smu7_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table); + struct smu7_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct phm_odn_clock_levels *odn_sclk_table = &(odn_table->odn_core_clock_dpm_levels); + struct phm_odn_clock_levels *odn_mclk_table = &(odn_table->odn_memory_clock_dpm_levels); int i, now, size = 0; uint32_t clock, pcie_speed; @@ -4309,6 +4397,24 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, (pcie_table->dpm_levels[i].value == 2) ? "8.0GT/s, x16" : "", (i == now) ? "*" : ""); break; + case OD_SCLK: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s: \n", "OD_SCLK"); + for (i = 0; i < odn_sclk_table->num_of_pl; i++) + size += sprintf(buf + size, "%d: %10uMhz %10u mV\n", + i, odn_sclk_table->entries[i].clock / 100, + odn_sclk_table->entries[i].vddc); + } + break; + case OD_MCLK: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s: \n", "OD_MCLK"); + for (i = 0; i < odn_mclk_table->num_of_pl; i++) + size += sprintf(buf + size, "%d: %10uMhz %10u mV\n", + i, odn_mclk_table->entries[i].clock / 100, + odn_mclk_table->entries[i].vddc); + } + break; default: break; } @@ -4583,33 +4689,6 @@ static int smu7_set_power_profile_state(struct pp_hwmgr *hwmgr, return result; } -static int smu7_avfs_control(struct pp_hwmgr *hwmgr, bool enable) -{ - struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); - - if (smu_data == NULL) - return -EINVAL; - - if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) - return 0; - - if (enable) { - if (!PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, AVS_ON)) - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc( - hwmgr, PPSMC_MSG_EnableAvfs), - "Failed to enable AVFS!", - return -EINVAL); - } else if (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, AVS_ON)) - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc( - hwmgr, PPSMC_MSG_DisableAvfs), - "Failed to disable AVFS!", - return -EINVAL); - - return 0; -} - static int smu7_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, uint32_t virtual_addr_low, uint32_t virtual_addr_hi, @@ -4670,6 +4749,192 @@ static int smu7_get_max_high_clocks(struct pp_hwmgr *hwmgr, return 0; } +static int smu7_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, + struct PP_TemperatureRange *thermal_data) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)hwmgr->pptable; + + memcpy(thermal_data, &SMU7ThermalPolicy[0], sizeof(struct PP_TemperatureRange)); + + if (hwmgr->pp_table_version == PP_TABLE_V1) + thermal_data->max = table_info->cac_dtp_table->usSoftwareShutdownTemp * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + else if (hwmgr->pp_table_version == PP_TABLE_V0) + thermal_data->max = data->thermal_temp_setting.temperature_shutdown * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + + return 0; +} + +static bool smu7_check_clk_voltage_valid(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type, + uint32_t clk, + uint32_t voltage) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t min_vddc; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table; + + if (table_info == NULL) + return -EINVAL; + + dep_sclk_table = table_info->vdd_dep_on_sclk; + min_vddc = dep_sclk_table->entries[0].vddc; + + if (voltage < min_vddc || voltage > 2000) { + pr_info("OD voltage is out of range [%d - 2000] mV\n", min_vddc); + return false; + } + + if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { + if (data->vbios_boot_state.sclk_bootup_value > clk || + hwmgr->platform_descriptor.overdriveLimit.engineClock < clk) { + pr_info("OD engine clock is out of range [%d - %d] MHz\n", + data->vbios_boot_state.sclk_bootup_value, + hwmgr->platform_descriptor.overdriveLimit.engineClock / 100); + return false; + } + } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) { + if (data->vbios_boot_state.mclk_bootup_value > clk || + hwmgr->platform_descriptor.overdriveLimit.memoryClock < clk) { + pr_info("OD memory clock is out of range [%d - %d] MHz\n", + data->vbios_boot_state.mclk_bootup_value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock / 100); + return false; + } + } else { + return false; + } + + return true; +} + +static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct smu7_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t i; + + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table; + struct phm_ppt_v1_clock_voltage_dependency_table *odn_dep_table; + + if (table_info == NULL) + return; + + for (i=0; i<data->dpm_table.sclk_table.count; i++) { + if (odn_table->odn_core_clock_dpm_levels.entries[i].clock != + data->dpm_table.sclk_table.dpm_levels[i].value) { + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + break; + } + } + + for (i=0; i<data->dpm_table.sclk_table.count; i++) { + if (odn_table->odn_memory_clock_dpm_levels.entries[i].clock != + data->dpm_table.mclk_table.dpm_levels[i].value) { + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + break; + } + } + + dep_table = table_info->vdd_dep_on_mclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_mclk); + + for (i=0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + break; + } + } + if (i == dep_table->count) + data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; + + dep_table = table_info->vdd_dep_on_sclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk); + for (i=0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + break; + } + } + if (i == dep_table->count) + data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; +} + +static int smu7_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size) +{ + uint32_t i; + struct phm_odn_clock_levels *podn_dpm_table_in_backend = NULL; + struct smu7_odn_clock_voltage_dependency_table *podn_vdd_dep_in_backend = NULL; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + uint32_t input_clk; + uint32_t input_vol; + uint32_t input_level; + + PP_ASSERT_WITH_CODE(input, "NULL user input for clock and voltage", + return -EINVAL); + + if (!hwmgr->od_enabled) { + pr_info("OverDrive feature not enabled\n"); + return -EINVAL; + } + + if (PP_OD_EDIT_SCLK_VDDC_TABLE == type) { + podn_dpm_table_in_backend = &data->odn_dpm_table.odn_core_clock_dpm_levels; + podn_vdd_dep_in_backend = &data->odn_dpm_table.vdd_dependency_on_sclk; + PP_ASSERT_WITH_CODE((podn_dpm_table_in_backend && podn_vdd_dep_in_backend), + "Failed to get ODN SCLK and Voltage tables", + return -EINVAL); + } else if (PP_OD_EDIT_MCLK_VDDC_TABLE == type) { + podn_dpm_table_in_backend = &data->odn_dpm_table.odn_memory_clock_dpm_levels; + podn_vdd_dep_in_backend = &data->odn_dpm_table.vdd_dependency_on_mclk; + + PP_ASSERT_WITH_CODE((podn_dpm_table_in_backend && podn_vdd_dep_in_backend), + "Failed to get ODN MCLK and Voltage tables", + return -EINVAL); + } else if (PP_OD_RESTORE_DEFAULT_TABLE == type) { + smu7_odn_initial_default_setting(hwmgr); + return 0; + } else if (PP_OD_COMMIT_DPM_TABLE == type) { + smu7_check_dpm_table_updated(hwmgr); + return 0; + } else { + return -EINVAL; + } + + for (i = 0; i < size; i += 3) { + if (i + 3 > size || input[i] >= podn_dpm_table_in_backend->num_of_pl) { + pr_info("invalid clock voltage input \n"); + return 0; + } + input_level = input[i]; + input_clk = input[i+1] * 100; + input_vol = input[i+2]; + + if (smu7_check_clk_voltage_valid(hwmgr, type, input_clk, input_vol)) { + podn_dpm_table_in_backend->entries[input_level].clock = input_clk; + podn_vdd_dep_in_backend->entries[input_level].clk = input_clk; + podn_dpm_table_in_backend->entries[input_level].vddc = input_vol; + podn_vdd_dep_in_backend->entries[input_level].vddc = input_vol; + } else { + return -EINVAL; + } + } + + return 0; +} + + static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .backend_init = &smu7_hwmgr_backend_init, .backend_fini = &smu7_hwmgr_backend_fini, @@ -4693,7 +4958,6 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .display_config_changed = smu7_display_configuration_changed_task, .set_max_fan_pwm_output = smu7_set_max_fan_pwm_output, .set_max_fan_rpm_output = smu7_set_max_fan_rpm_output, - .get_temperature = smu7_thermal_get_temperature, .stop_thermal_controller = smu7_thermal_stop_thermal_controller, .get_fan_speed_info = smu7_fan_ctrl_get_fan_speed_info, .get_fan_speed_percent = smu7_fan_ctrl_get_fan_speed_percent, @@ -4723,6 +4987,9 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .start_thermal_controller = smu7_start_thermal_controller, .notify_cac_buffer_info = smu7_notify_cac_buffer_info, .get_max_high_clocks = smu7_get_max_high_clocks, + .get_thermal_temperature_range = smu7_get_thermal_temperature_range, + .odn_edit_dpm_table = smu7_odn_edit_dpm_table, + .set_power_limit = smu7_set_power_limit, }; uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock, @@ -4754,4 +5021,3 @@ int smu7_init_function_pointers(struct pp_hwmgr *hwmgr) return ret; } - diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h index e021154aedbd..3bcfc61cd5a2 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h @@ -34,11 +34,6 @@ #define SMU7_VOLTAGE_CONTROL_BY_SVID2 0x2 #define SMU7_VOLTAGE_CONTROL_MERGED 0x3 -#define DPMTABLE_OD_UPDATE_SCLK 0x00000001 -#define DPMTABLE_OD_UPDATE_MCLK 0x00000002 -#define DPMTABLE_UPDATE_SCLK 0x00000004 -#define DPMTABLE_UPDATE_MCLK 0x00000008 - enum gpu_pt_config_reg_type { GPU_CONFIGREG_MMR = 0, GPU_CONFIGREG_SMC_IND, @@ -178,9 +173,34 @@ struct smu7_pcie_perf_range { uint16_t min; }; +struct smu7_odn_clock_voltage_dependency_table { + uint32_t count; + phm_ppt_v1_clock_voltage_dependency_record entries[MAX_REGULAR_DPM_NUMBER]; +}; + +struct smu7_odn_dpm_table { + struct phm_odn_clock_levels odn_core_clock_dpm_levels; + struct phm_odn_clock_levels odn_memory_clock_dpm_levels; + struct smu7_odn_clock_voltage_dependency_table vdd_dependency_on_sclk; + struct smu7_odn_clock_voltage_dependency_table vdd_dependency_on_mclk; + uint32_t odn_mclk_min_limit; +}; + +struct profile_mode_setting { + uint8_t bupdate_sclk; + uint8_t sclk_up_hyst; + uint8_t sclk_down_hyst; + uint16_t sclk_activity; + uint8_t bupdate_mclk; + uint8_t mclk_up_hyst; + uint8_t mclk_down_hyst; + uint16_t mclk_activity; +}; + struct smu7_hwmgr { struct smu7_dpm_table dpm_table; struct smu7_dpm_table golden_dpm_table; + struct smu7_odn_dpm_table odn_dpm_table; uint32_t voting_rights_clients[8]; uint32_t static_screen_threshold_unit; @@ -280,7 +300,6 @@ struct smu7_hwmgr { struct smu7_pcie_perf_range pcie_lane_power_saving; bool use_pcie_performance_levels; bool use_pcie_power_saving_levels; - uint32_t mclk_activity_target; uint32_t mclk_dpm0_activity_target; uint32_t low_sclk_interrupt_threshold; uint32_t last_mclk_dpm_enable_mask; @@ -305,6 +324,9 @@ struct smu7_hwmgr { uint32_t frame_time_x2; uint16_t mem_latency_high; uint16_t mem_latency_low; + uint32_t vr_config; + struct profile_mode_setting custom_profile_setting; + struct profile_mode_setting current_profile_setting; }; /* To convert to Q8.8 format for firmware */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c index 85ca16abb626..a93829dfd730 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c @@ -857,6 +857,8 @@ int smu7_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + n = (n & 0xff) << 8; + if (data->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) return smum_send_msg_to_smc_with_parameter(hwmgr, @@ -903,12 +905,12 @@ int smu7_enable_power_containment(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == smc_result), "Failed to enable PkgPwrTracking in SMC.", result = -1;); if (0 == smc_result) { - uint32_t default_limit = - (uint32_t)(cac_table->usMaximumPowerDeliveryLimit * 256); + hwmgr->default_power_limit = hwmgr->power_limit = + cac_table->usMaximumPowerDeliveryLimit; data->power_containment_features |= POWERCONTAINMENT_FEATURE_PkgPwrLimit; - if (smu7_set_power_limit(hwmgr, default_limit)) + if (smu7_set_power_limit(hwmgr, hwmgr->power_limit)) pr_err("Failed to set Default Power Limit in SMC!"); } } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c index d7aa643cdb51..f6573ed0357d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c @@ -310,9 +310,9 @@ int smu7_thermal_get_temperature(struct pp_hwmgr *hwmgr) static int smu7_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, uint32_t low_temp, uint32_t high_temp) { - uint32_t low = SMU7_THERMAL_MINIMUM_ALERT_TEMP * + int low = SMU7_THERMAL_MINIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; - uint32_t high = SMU7_THERMAL_MAXIMUM_ALERT_TEMP * + int high = SMU7_THERMAL_MAXIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; if (low < low_temp) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 2d55dabc77d4..1d442a498bf6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -49,6 +49,10 @@ #include "cgs_linux.h" #include "ppinterrupt.h" #include "pp_overdriver.h" +#include "pp_thermal.h" + +#include "smuio/smuio_9_0_offset.h" +#include "smuio/smuio_9_0_sh_mask.h" #define VOLTAGE_SCALE 4 #define VOLTAGE_VID_OFFSET_SCALE1 625 @@ -756,6 +760,9 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) hwmgr->backend = data; + hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO; + hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO; + vega10_set_default_registry_data(hwmgr); data->disable_dpm_mask = 0xff; @@ -1380,14 +1387,12 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) || PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) { - data->odn_dpm_table.odn_core_clock_dpm_levels. - number_of_performance_levels = data->dpm_table.gfx_table.count; + data->odn_dpm_table.odn_core_clock_dpm_levels.num_of_pl = + data->dpm_table.gfx_table.count; for (i = 0; i < data->dpm_table.gfx_table.count; i++) { - data->odn_dpm_table.odn_core_clock_dpm_levels. - performance_level_entries[i].clock = + data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].clock = data->dpm_table.gfx_table.dpm_levels[i].value; - data->odn_dpm_table.odn_core_clock_dpm_levels. - performance_level_entries[i].enabled = true; + data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].enabled = true; } data->odn_dpm_table.vdd_dependency_on_sclk.count = @@ -1403,14 +1408,12 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) dep_gfx_table->entries[i].cks_voffset; } - data->odn_dpm_table.odn_memory_clock_dpm_levels. - number_of_performance_levels = data->dpm_table.mem_table.count; + data->odn_dpm_table.odn_memory_clock_dpm_levels.num_of_pl = + data->dpm_table.mem_table.count; for (i = 0; i < data->dpm_table.mem_table.count; i++) { - data->odn_dpm_table.odn_memory_clock_dpm_levels. - performance_level_entries[i].clock = + data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].clock = data->dpm_table.mem_table.dpm_levels[i].value; - data->odn_dpm_table.odn_memory_clock_dpm_levels. - performance_level_entries[i].enabled = true; + data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].enabled = true; } data->odn_dpm_table.vdd_dependency_on_mclk.count = dep_mclk_table->count; @@ -3162,16 +3165,19 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, minimum_clocks.memoryClock = stable_pstate_mclk; } - disable_mclk_switching_for_frame_lock = phm_cap_enabled( - hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); - disable_mclk_switching_for_vr = PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR); + disable_mclk_switching_for_frame_lock = + PP_CAP(PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); + disable_mclk_switching_for_vr = + PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR); force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh); - disable_mclk_switching = (info.display_count > 1) || - disable_mclk_switching_for_frame_lock || - disable_mclk_switching_for_vr || - force_mclk_high; + if (info.display_count == 0) + disable_mclk_switching = false; + else + disable_mclk_switching = (info.display_count > 1) || + disable_mclk_switching_for_frame_lock || + disable_mclk_switching_for_vr || + force_mclk_high; sclk = vega10_ps->performance_levels[0].gfx_clock; mclk = vega10_ps->performance_levels[0].mem_clock; @@ -3348,11 +3354,9 @@ static int vega10_populate_and_upload_sclk_mclk_dpm_levels( dpm_count < dpm_table->gfx_table.count; dpm_count++) { dpm_table->gfx_table.dpm_levels[dpm_count].enabled = - data->odn_dpm_table.odn_core_clock_dpm_levels. - performance_level_entries[dpm_count].enabled; + data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].enabled; dpm_table->gfx_table.dpm_levels[dpm_count].value = - data->odn_dpm_table.odn_core_clock_dpm_levels. - performance_level_entries[dpm_count].clock; + data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].clock; } } @@ -3362,11 +3366,9 @@ static int vega10_populate_and_upload_sclk_mclk_dpm_levels( dpm_count < dpm_table->mem_table.count; dpm_count++) { dpm_table->mem_table.dpm_levels[dpm_count].enabled = - data->odn_dpm_table.odn_memory_clock_dpm_levels. - performance_level_entries[dpm_count].enabled; + data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].enabled; dpm_table->mem_table.dpm_levels[dpm_count].value = - data->odn_dpm_table.odn_memory_clock_dpm_levels. - performance_level_entries[dpm_count].clock; + data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].clock; } } @@ -3398,8 +3400,7 @@ static int vega10_populate_and_upload_sclk_mclk_dpm_levels( dpm_table-> gfx_table.dpm_levels[dpm_table->gfx_table.count - 1]. value = sclk; - if (PP_CAP(PHM_PlatformCaps_OD6PlusinACSupport) || - PP_CAP(PHM_PlatformCaps_OD6PlusinDCSupport)) { + if (hwmgr->od_enabled) { /* Need to do calculation based on the golden DPM table * as the Heatmap GPU Clock axis is also based on * the default values @@ -3453,9 +3454,7 @@ static int vega10_populate_and_upload_sclk_mclk_dpm_levels( mem_table.dpm_levels[dpm_table->mem_table.count - 1]. value = mclk; - if (PP_CAP(PHM_PlatformCaps_OD6PlusinACSupport) || - PP_CAP(PHM_PlatformCaps_OD6PlusinDCSupport)) { - + if (hwmgr->od_enabled) { PP_ASSERT_WITH_CODE( golden_dpm_table->mem_table.dpm_levels [golden_dpm_table->mem_table.count - 1].value, @@ -3894,7 +3893,9 @@ static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr, return -EINVAL); vega10_read_arg_from_smc(hwmgr, &value); + /* power value is an integer */ + memset(query, 0, sizeof *query); query->average_gpu_power = value << 8; return 0; @@ -3907,6 +3908,7 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); struct vega10_dpm_table *dpm_table = &data->dpm_table; int ret = 0; + uint32_t reg, val_vid; switch (idx) { case AMDGPU_PP_SENSOR_GFX_SCLK: @@ -3953,10 +3955,20 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, ret = vega10_get_gpu_power(hwmgr, (struct pp_gpu_power *)value); } break; + case AMDGPU_PP_SENSOR_VDDGFX: + reg = soc15_get_register_offset(SMUIO_HWID, 0, + mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX, + mmSMUSVI0_PLANE0_CURRENTVID); + val_vid = (cgs_read_register(hwmgr->device, reg) & + SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK) >> + SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT; + *((uint32_t *)value) = (uint32_t)convert_to_vddc((uint8_t)val_vid); + return 0; default: ret = -EINVAL; break; } + return ret; } @@ -4169,6 +4181,8 @@ static int vega10_get_profiling_clk_mask(struct pp_hwmgr *hwmgr, enum amd_dpm_fo *sclk_mask = VEGA10_UMD_PSTATE_GFXCLK_LEVEL; *soc_mask = VEGA10_UMD_PSTATE_SOCCLK_LEVEL; *mclk_mask = VEGA10_UMD_PSTATE_MCLK_LEVEL; + hwmgr->pstate_sclk = table_info->vdd_dep_on_sclk->entries[VEGA10_UMD_PSTATE_GFXCLK_LEVEL].clk; + hwmgr->pstate_mclk = table_info->vdd_dep_on_mclk->entries[VEGA10_UMD_PSTATE_MCLK_LEVEL].clk; } if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { @@ -4210,6 +4224,9 @@ static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, uint32_t mclk_mask = 0; uint32_t soc_mask = 0; + if (hwmgr->pstate_sclk == 0) + vega10_get_profiling_clk_mask(hwmgr, level, &sclk_mask, &mclk_mask, &soc_mask); + switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: ret = vega10_force_dpm_highest(hwmgr); @@ -4219,6 +4236,11 @@ static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, break; case AMD_DPM_FORCED_LEVEL_AUTO: ret = vega10_unforce_dpm_levels(hwmgr); + if (hwmgr->default_power_profile_mode != hwmgr->power_profile_mode) { + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, + 1 << hwmgr->default_power_profile_mode); + hwmgr->power_profile_mode = hwmgr->default_power_profile_mode; + } break; case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: @@ -4242,6 +4264,7 @@ static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, else if (level != AMD_DPM_FORCED_LEVEL_PROFILE_PEAK && hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) vega10_set_fan_control_mode(hwmgr, AMD_FAN_CTRL_AUTO); } + return ret; } @@ -4488,26 +4511,11 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask) { struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); - int i; - - if (hwmgr->request_dpm_level & (AMD_DPM_FORCED_LEVEL_AUTO | - AMD_DPM_FORCED_LEVEL_LOW | - AMD_DPM_FORCED_LEVEL_HIGH)) - return -EINVAL; switch (type) { case PP_SCLK: - for (i = 0; i < 32; i++) { - if (mask & (1 << i)) - break; - } - data->smc_state_table.gfx_boot_level = i; - - for (i = 31; i >= 0; i--) { - if (mask & (1 << i)) - break; - } - data->smc_state_table.gfx_max_level = i; + data->smc_state_table.gfx_boot_level = mask ? (ffs(mask) - 1) : 0; + data->smc_state_table.gfx_max_level = mask ? (fls(mask) - 1) : 0; PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), "Failed to upload boot level to lowest!", @@ -4519,17 +4527,8 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, break; case PP_MCLK: - for (i = 0; i < 32; i++) { - if (mask & (1 << i)) - break; - } - data->smc_state_table.mem_boot_level = i; - - for (i = 31; i >= 0; i--) { - if (mask & (1 << i)) - break; - } - data->smc_state_table.mem_max_level = i; + data->smc_state_table.mem_boot_level = mask ? (ffs(mask) - 1) : 0; + data->smc_state_table.mem_max_level = mask ? (fls(mask) - 1) : 0; PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), "Failed to upload boot level to lowest!", @@ -4988,6 +4987,20 @@ static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, return 0; } +static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, + struct PP_TemperatureRange *thermal_data) +{ + struct phm_ppt_v2_information *table_info = + (struct phm_ppt_v2_information *)hwmgr->pptable; + + memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); + + thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + + return 0; +} + static int vega10_register_thermal_interrupt(struct pp_hwmgr *hwmgr, const void *info) { @@ -5020,6 +5033,77 @@ static int vega10_register_thermal_interrupt(struct pp_hwmgr *hwmgr, return 0; } +static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) +{ + struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); + uint32_t i, size = 0; + static const uint8_t profile_mode_setting[5][4] = {{70, 60, 1, 3,}, + {90, 60, 0, 0,}, + {70, 60, 0, 0,}, + {70, 90, 0, 0,}, + {30, 60, 0, 6,}, + }; + static const char *profile_name[6] = {"3D_FULL_SCREEN", + "POWER_SAVING", + "VIDEO", + "VR", + "COMPUTE", + "CUSTOM"}; + static const char *title[6] = {"NUM", + "MODE_NAME", + "BUSY_SET_POINT", + "FPS", + "USE_RLC_BUSY", + "MIN_ACTIVE_LEVEL"}; + + if (!buf) + return -EINVAL; + + size += sprintf(buf + size, "%s %16s %s %s %s %s\n",title[0], + title[1], title[2], title[3], title[4], title[5]); + + for (i = 0; i < PP_SMC_POWER_PROFILE_CUSTOM; i++) + size += sprintf(buf + size, "%3d %14s%s: %14d %3d %10d %14d\n", + i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", + profile_mode_setting[i][0], profile_mode_setting[i][1], + profile_mode_setting[i][2], profile_mode_setting[i][3]); + size += sprintf(buf + size, "%3d %14s%s: %14d %3d %10d %14d\n", i, + profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", + data->custom_profile_mode[0], data->custom_profile_mode[1], + data->custom_profile_mode[2], data->custom_profile_mode[3]); + return size; +} + +static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint32_t size) +{ + struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); + uint8_t busy_set_point; + uint8_t FPS; + uint8_t use_rlc_busy; + uint8_t min_active_level; + + hwmgr->power_profile_mode = input[size]; + + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, + 1<<hwmgr->power_profile_mode); + + if (hwmgr->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + if (size == 0 || size > 4) + return -EINVAL; + + data->custom_profile_mode[0] = busy_set_point = input[0]; + data->custom_profile_mode[1] = FPS = input[1]; + data->custom_profile_mode[2] = use_rlc_busy = input[2]; + data->custom_profile_mode[3] = min_active_level = input[3]; + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetCustomGfxDpmParameters, + busy_set_point | FPS<<8 | + use_rlc_busy << 16 | min_active_level<<24); + } + + return 0; +} + static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .backend_init = vega10_hwmgr_backend_init, .backend_fini = vega10_hwmgr_backend_fini, @@ -5038,7 +5122,6 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .notify_smc_display_config_after_ps_adjustment = vega10_notify_smc_display_config_after_ps_adjustment, .force_dpm_level = vega10_dpm_force_dpm_level, - .get_temperature = vega10_thermal_get_temperature, .stop_thermal_controller = vega10_thermal_stop_thermal_controller, .get_fan_speed_info = vega10_fan_ctrl_get_fan_speed_info, .get_fan_speed_percent = vega10_fan_ctrl_get_fan_speed_percent, @@ -5074,8 +5157,12 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .set_mclk_od = vega10_set_mclk_od, .avfs_control = vega10_avfs_enable, .notify_cac_buffer_info = vega10_notify_cac_buffer_info, + .get_thermal_temperature_range = vega10_get_thermal_temperature_range, .register_internal_thermal_interrupt = vega10_register_thermal_interrupt, .start_thermal_controller = vega10_start_thermal_controller, + .get_power_profile_mode = vega10_get_power_profile_mode, + .set_power_profile_mode = vega10_set_power_profile_mode, + .set_power_limit = vega10_set_power_limit, }; int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h index e8507ff8dbb3..ab3e8798bee8 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h @@ -189,12 +189,6 @@ struct vega10_vbios_boot_state { uint32_t dcef_clock; }; -#define DPMTABLE_OD_UPDATE_SCLK 0x00000001 -#define DPMTABLE_OD_UPDATE_MCLK 0x00000002 -#define DPMTABLE_UPDATE_SCLK 0x00000004 -#define DPMTABLE_UPDATE_MCLK 0x00000008 -#define DPMTABLE_OD_UPDATE_VDDC 0x00000010 - struct vega10_smc_state_table { uint32_t soc_boot_level; uint32_t gfx_boot_level; @@ -389,6 +383,7 @@ struct vega10_hwmgr { uint32_t config_telemetry; uint32_t acg_loop_state; uint32_t mem_channels; + uint8_t custom_profile_mode[4]; }; #define VEGA10_DPM2_NEAR_TDP_DEC 10 diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c index 598a194737a9..981c9e5431da 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c @@ -1357,10 +1357,11 @@ int vega10_enable_power_containment(struct pp_hwmgr *hwmgr) struct phm_ppt_v2_information *table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); struct phm_tdp_table *tdp_table = table_info->tdp_table; - uint32_t default_pwr_limit = - (uint32_t)(tdp_table->usMaximumPowerDeliveryLimit); int result = 0; + hwmgr->default_power_limit = hwmgr->power_limit = + (uint32_t)(tdp_table->usMaximumPowerDeliveryLimit); + if (PP_CAP(PHM_PlatformCaps_PowerContainment)) { if (data->smu_features[GNLD_PPT].supported) PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr, @@ -1374,7 +1375,7 @@ int vega10_enable_power_containment(struct pp_hwmgr *hwmgr) "Attempt to enable PPT feature Failed!", data->smu_features[GNLD_TDC].supported = false); - result = vega10_set_power_limit(hwmgr, default_pwr_limit); + result = vega10_set_power_limit(hwmgr, hwmgr->power_limit); PP_ASSERT_WITH_CODE(!result, "Failed to set Default Power Limit in SMC!", return result); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index f14c7611fad3..6d44cf043618 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -267,10 +267,10 @@ static int init_over_drive_limits( hwmgr->platform_descriptor.maxOverdriveVDDC = 0; hwmgr->platform_descriptor.overdriveVDDCStep = 0; - if (hwmgr->platform_descriptor.overdriveLimit.engineClock > 0 && - hwmgr->platform_descriptor.overdriveLimit.memoryClock > 0) { - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ACOverdriveSupport); + if (hwmgr->platform_descriptor.overdriveLimit.engineClock == 0 || + hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) { + hwmgr->od_enabled = false; + pr_debug("OverDrive feature not support by VBIOS\n"); } return 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index dc3761bcb9b6..749116329c36 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -386,9 +386,9 @@ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr) static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range) { - uint32_t low = VEGA10_THERMAL_MINIMUM_ALERT_TEMP * + int low = VEGA10_THERMAL_MINIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; - uint32_t high = VEGA10_THERMAL_MAXIMUM_ALERT_TEMP * + int high = VEGA10_THERMAL_MAXIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; uint32_t val, reg; diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index 5716b937a6ad..6f528e662a6f 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -358,6 +358,17 @@ struct phm_clocks { uint32_t clock[MAX_NUM_CLOCKS]; }; +#define DPMTABLE_OD_UPDATE_SCLK 0x00000001 +#define DPMTABLE_OD_UPDATE_MCLK 0x00000002 +#define DPMTABLE_UPDATE_SCLK 0x00000004 +#define DPMTABLE_UPDATE_MCLK 0x00000008 +#define DPMTABLE_OD_UPDATE_VDDC 0x00000010 + +/* To determine if sclk and mclk are in overdrive state */ +#define SCLK_OVERDRIVE_ENABLED 0x00000001 +#define MCLK_OVERDRIVE_ENABLED 0x00000002 +#define VDDC_OVERDRIVE_ENABLED 0x00000010 + struct phm_odn_performance_level { uint32_t clock; uint32_t vddc; @@ -368,9 +379,9 @@ struct phm_odn_clock_levels { uint32_t size; uint32_t options; uint32_t flags; - uint32_t number_of_performance_levels; - /* variable-sized array, specify by ulNumberOfPerformanceLevels. */ - struct phm_odn_performance_level performance_level_entries[8]; + uint32_t num_of_pl; + /* variable-sized array, specify by num_of_pl. */ + struct phm_odn_performance_level entries[8]; }; extern int phm_disable_clock_power_gatings(struct pp_hwmgr *hwmgr); @@ -393,7 +404,7 @@ extern int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_leve extern int phm_display_configuration_changed(struct pp_hwmgr *hwmgr); extern int phm_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr); extern int phm_register_thermal_interrupt(struct pp_hwmgr *hwmgr, const void *info); -extern int phm_start_thermal_controller(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *temperature_range); +extern int phm_start_thermal_controller(struct pp_hwmgr *hwmgr); extern int phm_stop_thermal_controller(struct pp_hwmgr *hwmgr); extern bool phm_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 565fe0832f41..2a59ee8f4acb 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -42,6 +42,7 @@ struct pp_atomctrl_voltage_table; #define VOLTAGE_SCALE 4 uint8_t convert_to_vid(uint16_t vddc); +uint16_t convert_to_vddc(uint8_t vid); enum DISPLAY_GAP { DISPLAY_GAP_VBLANK_OR_WM = 0, /* Wait for vblank or MCHG watermark. */ @@ -83,6 +84,7 @@ enum PP_FEATURE_MASK { PP_OD_FUZZY_FAN_CONTROL_MASK = 0x800, PP_SOCCLK_DPM_MASK = 0x1000, PP_DCEFCLK_DPM_MASK = 0x2000, + PP_OVERDRIVE_MASK = 0x4000, }; enum PHM_BackEnd_Magic { @@ -277,7 +279,6 @@ struct pp_hwmgr_func { const uint32_t *msg_id); int (*set_max_fan_rpm_output)(struct pp_hwmgr *hwmgr, uint16_t us_max_fan_pwm); int (*set_max_fan_pwm_output)(struct pp_hwmgr *hwmgr, uint16_t us_max_fan_pwm); - int (*get_temperature)(struct pp_hwmgr *hwmgr); int (*stop_thermal_controller)(struct pp_hwmgr *hwmgr); int (*get_fan_speed_info)(struct pp_hwmgr *hwmgr, struct phm_fan_speed_info *fan_speed_info); void (*set_fan_control_mode)(struct pp_hwmgr *hwmgr, uint32_t mode); @@ -339,6 +340,15 @@ struct pp_hwmgr_func { uint32_t mc_addr_low, uint32_t mc_addr_hi, uint32_t size); + int (*get_thermal_temperature_range)(struct pp_hwmgr *hwmgr, + struct PP_TemperatureRange *range); + int (*get_power_profile_mode)(struct pp_hwmgr *hwmgr, char *buf); + int (*set_power_profile_mode)(struct pp_hwmgr *hwmgr, long *input, uint32_t size); + int (*odn_edit_dpm_table)(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size); + int (*set_power_limit)(struct pp_hwmgr *hwmgr, uint32_t n); + int (*set_mmhub_powergating_by_smu)(struct pp_hwmgr *hwmgr); }; struct pp_table_func { @@ -608,7 +618,6 @@ struct phm_dynamic_state_info { struct phm_ppm_table *ppm_parameter_table; struct phm_cac_tdp_table *cac_dtp_table; struct phm_clock_voltage_dependency_table *vdd_gfx_dependency_on_sclk; - struct phm_vq_budgeting_table *vq_budgeting_table; }; struct pp_fan_info { @@ -747,6 +756,13 @@ struct pp_hwmgr { struct amd_pp_profile default_compute_power_profile; enum amd_pp_profile_type current_power_profile; bool en_umd_pstate; + uint32_t power_profile_mode; + uint32_t default_power_profile_mode; + uint32_t pstate_sclk; + uint32_t pstate_mclk; + bool od_enabled; + uint32_t power_limit; + uint32_t default_power_limit; }; struct cgs_irq_src_funcs { @@ -761,7 +777,7 @@ extern int hwmgr_hw_suspend(struct pp_instance *handle); extern int hwmgr_hw_resume(struct pp_instance *handle); extern int hwmgr_handle_task(struct pp_instance *handle, enum amd_pp_task task_id, - void *input, void *output); + enum amd_pm_state_type *user_state); extern int phm_wait_on_register(struct pp_hwmgr *hwmgr, uint32_t index, uint32_t value, uint32_t mask); diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h deleted file mode 100644 index b8f4b73c322e..000000000000 --- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef POLARIS10_PP_SMC_H -#define POLARIS10_PP_SMC_H - - -#pragma pack(push, 1) - -#define PPSMC_MSG_SetGBDroopSettings ((uint16_t) 0x305) - -#define PPSMC_SWSTATE_FLAG_DC 0x01 -#define PPSMC_SWSTATE_FLAG_UVD 0x02 -#define PPSMC_SWSTATE_FLAG_VCE 0x04 - -#define PPSMC_THERMAL_PROTECT_TYPE_INTERNAL 0x00 -#define PPSMC_THERMAL_PROTECT_TYPE_EXTERNAL 0x01 -#define PPSMC_THERMAL_PROTECT_TYPE_NONE 0xff - -#define PPSMC_SYSTEMFLAG_GPIO_DC 0x01 -#define PPSMC_SYSTEMFLAG_STEPVDDC 0x02 -#define PPSMC_SYSTEMFLAG_GDDR5 0x04 - -#define PPSMC_SYSTEMFLAG_DISABLE_BABYSTEP 0x08 - -#define PPSMC_SYSTEMFLAG_REGULATOR_HOT 0x10 -#define PPSMC_SYSTEMFLAG_REGULATOR_HOT_ANALOG 0x20 - -#define PPSMC_EXTRAFLAGS_AC2DC_ACTION_MASK 0x07 -#define PPSMC_EXTRAFLAGS_AC2DC_DONT_WAIT_FOR_VBLANK 0x08 - -#define PPSMC_EXTRAFLAGS_AC2DC_ACTION_GOTODPMLOWSTATE 0x00 -#define PPSMC_EXTRAFLAGS_AC2DC_ACTION_GOTOINITIALSTATE 0x01 - - -#define PPSMC_DPM2FLAGS_TDPCLMP 0x01 -#define PPSMC_DPM2FLAGS_PWRSHFT 0x02 -#define PPSMC_DPM2FLAGS_OCP 0x04 - - -#define PPSMC_DISPLAY_WATERMARK_LOW 0 -#define PPSMC_DISPLAY_WATERMARK_HIGH 1 - - -#define PPSMC_STATEFLAG_AUTO_PULSE_SKIP 0x01 -#define PPSMC_STATEFLAG_POWERBOOST 0x02 -#define PPSMC_STATEFLAG_PSKIP_ON_TDP_FAULT 0x04 -#define PPSMC_STATEFLAG_POWERSHIFT 0x08 -#define PPSMC_STATEFLAG_SLOW_READ_MARGIN 0x10 -#define PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE 0x20 -#define PPSMC_STATEFLAG_DEEPSLEEP_BYPASS 0x40 - - -#define FDO_MODE_HARDWARE 0 -#define FDO_MODE_PIECE_WISE_LINEAR 1 - -enum FAN_CONTROL { - FAN_CONTROL_FUZZY, - FAN_CONTROL_TABLE -}; - - -#define PPSMC_Result_OK ((uint16_t)0x01) -#define PPSMC_Result_NoMore ((uint16_t)0x02) - -#define PPSMC_Result_NotNow ((uint16_t)0x03) -#define PPSMC_Result_Failed ((uint16_t)0xFF) -#define PPSMC_Result_UnknownCmd ((uint16_t)0xFE) -#define PPSMC_Result_UnknownVT ((uint16_t)0xFD) - -typedef uint16_t PPSMC_Result; - -#define PPSMC_isERROR(x) ((uint16_t)0x80 & (x)) - - -#define PPSMC_MSG_Halt ((uint16_t)0x10) -#define PPSMC_MSG_Resume ((uint16_t)0x11) -#define PPSMC_MSG_EnableDPMLevel ((uint16_t)0x12) -#define PPSMC_MSG_ZeroLevelsDisabled ((uint16_t)0x13) -#define PPSMC_MSG_OneLevelsDisabled ((uint16_t)0x14) -#define PPSMC_MSG_TwoLevelsDisabled ((uint16_t)0x15) -#define PPSMC_MSG_EnableThermalInterrupt ((uint16_t)0x16) -#define PPSMC_MSG_RunningOnAC ((uint16_t)0x17) -#define PPSMC_MSG_LevelUp ((uint16_t)0x18) -#define PPSMC_MSG_LevelDown ((uint16_t)0x19) -#define PPSMC_MSG_ResetDPMCounters ((uint16_t)0x1a) -#define PPSMC_MSG_SwitchToSwState ((uint16_t)0x20) -#define PPSMC_MSG_SwitchToSwStateLast ((uint16_t)0x3f) -#define PPSMC_MSG_SwitchToInitialState ((uint16_t)0x40) -#define PPSMC_MSG_NoForcedLevel ((uint16_t)0x41) -#define PPSMC_MSG_ForceHigh ((uint16_t)0x42) -#define PPSMC_MSG_ForceMediumOrHigh ((uint16_t)0x43) -#define PPSMC_MSG_SwitchToMinimumPower ((uint16_t)0x51) -#define PPSMC_MSG_ResumeFromMinimumPower ((uint16_t)0x52) -#define PPSMC_MSG_EnableCac ((uint16_t)0x53) -#define PPSMC_MSG_DisableCac ((uint16_t)0x54) -#define PPSMC_DPMStateHistoryStart ((uint16_t)0x55) -#define PPSMC_DPMStateHistoryStop ((uint16_t)0x56) -#define PPSMC_CACHistoryStart ((uint16_t)0x57) -#define PPSMC_CACHistoryStop ((uint16_t)0x58) -#define PPSMC_TDPClampingActive ((uint16_t)0x59) -#define PPSMC_TDPClampingInactive ((uint16_t)0x5A) -#define PPSMC_StartFanControl ((uint16_t)0x5B) -#define PPSMC_StopFanControl ((uint16_t)0x5C) -#define PPSMC_NoDisplay ((uint16_t)0x5D) -#define PPSMC_HasDisplay ((uint16_t)0x5E) -#define PPSMC_MSG_UVDPowerOFF ((uint16_t)0x60) -#define PPSMC_MSG_UVDPowerON ((uint16_t)0x61) -#define PPSMC_MSG_EnableULV ((uint16_t)0x62) -#define PPSMC_MSG_DisableULV ((uint16_t)0x63) -#define PPSMC_MSG_EnterULV ((uint16_t)0x64) -#define PPSMC_MSG_ExitULV ((uint16_t)0x65) -#define PPSMC_PowerShiftActive ((uint16_t)0x6A) -#define PPSMC_PowerShiftInactive ((uint16_t)0x6B) -#define PPSMC_OCPActive ((uint16_t)0x6C) -#define PPSMC_OCPInactive ((uint16_t)0x6D) -#define PPSMC_CACLongTermAvgEnable ((uint16_t)0x6E) -#define PPSMC_CACLongTermAvgDisable ((uint16_t)0x6F) -#define PPSMC_MSG_InferredStateSweep_Start ((uint16_t)0x70) -#define PPSMC_MSG_InferredStateSweep_Stop ((uint16_t)0x71) -#define PPSMC_MSG_SwitchToLowestInfState ((uint16_t)0x72) -#define PPSMC_MSG_SwitchToNonInfState ((uint16_t)0x73) -#define PPSMC_MSG_AllStateSweep_Start ((uint16_t)0x74) -#define PPSMC_MSG_AllStateSweep_Stop ((uint16_t)0x75) -#define PPSMC_MSG_SwitchNextLowerInfState ((uint16_t)0x76) -#define PPSMC_MSG_SwitchNextHigherInfState ((uint16_t)0x77) -#define PPSMC_MSG_MclkRetrainingTest ((uint16_t)0x78) -#define PPSMC_MSG_ForceTDPClamping ((uint16_t)0x79) -#define PPSMC_MSG_CollectCAC_PowerCorreln ((uint16_t)0x7A) -#define PPSMC_MSG_CollectCAC_WeightCalib ((uint16_t)0x7B) -#define PPSMC_MSG_CollectCAC_SQonly ((uint16_t)0x7C) -#define PPSMC_MSG_CollectCAC_TemperaturePwr ((uint16_t)0x7D) - -#define PPSMC_MSG_ExtremitiesTest_Start ((uint16_t)0x7E) -#define PPSMC_MSG_ExtremitiesTest_Stop ((uint16_t)0x7F) -#define PPSMC_FlushDataCache ((uint16_t)0x80) -#define PPSMC_FlushInstrCache ((uint16_t)0x81) - -#define PPSMC_MSG_SetEnabledLevels ((uint16_t)0x82) -#define PPSMC_MSG_SetForcedLevels ((uint16_t)0x83) - -#define PPSMC_MSG_ResetToDefaults ((uint16_t)0x84) - -#define PPSMC_MSG_SetForcedLevelsAndJump ((uint16_t)0x85) -#define PPSMC_MSG_SetCACHistoryMode ((uint16_t)0x86) -#define PPSMC_MSG_EnableDTE ((uint16_t)0x87) -#define PPSMC_MSG_DisableDTE ((uint16_t)0x88) - -#define PPSMC_MSG_SmcSpaceSetAddress ((uint16_t)0x89) -#define PPSM_MSG_SmcSpaceWriteDWordInc ((uint16_t)0x8A) -#define PPSM_MSG_SmcSpaceWriteWordInc ((uint16_t)0x8B) -#define PPSM_MSG_SmcSpaceWriteByteInc ((uint16_t)0x8C) - -#define PPSMC_MSG_BREAK ((uint16_t)0xF8) - -#define PPSMC_MSG_Test ((uint16_t) 0x100) -#define PPSMC_MSG_DPM_Voltage_Pwrmgt ((uint16_t) 0x101) -#define PPSMC_MSG_DPM_Config ((uint16_t) 0x102) -#define PPSMC_MSG_PM_Controller_Start ((uint16_t) 0x103) -#define PPSMC_MSG_DPM_ForceState ((uint16_t) 0x104) -#define PPSMC_MSG_PG_PowerDownSIMD ((uint16_t) 0x105) -#define PPSMC_MSG_PG_PowerUpSIMD ((uint16_t) 0x106) -#define PPSMC_MSG_PM_Controller_Stop ((uint16_t) 0x107) -#define PPSMC_MSG_PG_SIMD_Config ((uint16_t) 0x108) -#define PPSMC_MSG_Voltage_Cntl_Enable ((uint16_t) 0x109) -#define PPSMC_MSG_Thermal_Cntl_Enable ((uint16_t) 0x10a) -#define PPSMC_MSG_Reset_Service ((uint16_t) 0x10b) -#define PPSMC_MSG_VCEPowerOFF ((uint16_t) 0x10e) -#define PPSMC_MSG_VCEPowerON ((uint16_t) 0x10f) -#define PPSMC_MSG_DPM_Disable_VCE_HS ((uint16_t) 0x110) -#define PPSMC_MSG_DPM_Enable_VCE_HS ((uint16_t) 0x111) -#define PPSMC_MSG_DPM_N_LevelsDisabled ((uint16_t) 0x112) -#define PPSMC_MSG_DCEPowerOFF ((uint16_t) 0x113) -#define PPSMC_MSG_DCEPowerON ((uint16_t) 0x114) -#define PPSMC_MSG_PCIE_DDIPowerDown ((uint16_t) 0x117) -#define PPSMC_MSG_PCIE_DDIPowerUp ((uint16_t) 0x118) -#define PPSMC_MSG_PCIE_CascadePLLPowerDown ((uint16_t) 0x119) -#define PPSMC_MSG_PCIE_CascadePLLPowerUp ((uint16_t) 0x11a) -#define PPSMC_MSG_SYSPLLPowerOff ((uint16_t) 0x11b) -#define PPSMC_MSG_SYSPLLPowerOn ((uint16_t) 0x11c) -#define PPSMC_MSG_DCE_RemoveVoltageAdjustment ((uint16_t) 0x11d) -#define PPSMC_MSG_DCE_AllowVoltageAdjustment ((uint16_t) 0x11e) -#define PPSMC_MSG_DISPLAYPHYStatusNotify ((uint16_t) 0x11f) -#define PPSMC_MSG_EnableBAPM ((uint16_t) 0x120) -#define PPSMC_MSG_DisableBAPM ((uint16_t) 0x121) -#define PPSMC_MSG_Spmi_Enable ((uint16_t) 0x122) -#define PPSMC_MSG_Spmi_Timer ((uint16_t) 0x123) -#define PPSMC_MSG_LCLK_DPM_Config ((uint16_t) 0x124) -#define PPSMC_MSG_VddNB_Request ((uint16_t) 0x125) -#define PPSMC_MSG_PCIE_DDIPhyPowerDown ((uint32_t) 0x126) -#define PPSMC_MSG_PCIE_DDIPhyPowerUp ((uint32_t) 0x127) -#define PPSMC_MSG_MCLKDPM_Config ((uint16_t) 0x128) - -#define PPSMC_MSG_UVDDPM_Config ((uint16_t) 0x129) -#define PPSMC_MSG_VCEDPM_Config ((uint16_t) 0x12A) -#define PPSMC_MSG_ACPDPM_Config ((uint16_t) 0x12B) -#define PPSMC_MSG_SAMUDPM_Config ((uint16_t) 0x12C) -#define PPSMC_MSG_UVDDPM_SetEnabledMask ((uint16_t) 0x12D) -#define PPSMC_MSG_VCEDPM_SetEnabledMask ((uint16_t) 0x12E) -#define PPSMC_MSG_ACPDPM_SetEnabledMask ((uint16_t) 0x12F) -#define PPSMC_MSG_SAMUDPM_SetEnabledMask ((uint16_t) 0x130) -#define PPSMC_MSG_MCLKDPM_ForceState ((uint16_t) 0x131) -#define PPSMC_MSG_MCLKDPM_NoForcedLevel ((uint16_t) 0x132) -#define PPSMC_MSG_Thermal_Cntl_Disable ((uint16_t) 0x133) -#define PPSMC_MSG_SetTDPLimit ((uint16_t) 0x134) -#define PPSMC_MSG_Voltage_Cntl_Disable ((uint16_t) 0x135) -#define PPSMC_MSG_PCIeDPM_Enable ((uint16_t) 0x136) -#define PPSMC_MSG_ACPPowerOFF ((uint16_t) 0x137) -#define PPSMC_MSG_ACPPowerON ((uint16_t) 0x138) -#define PPSMC_MSG_SAMPowerOFF ((uint16_t) 0x139) -#define PPSMC_MSG_SAMPowerON ((uint16_t) 0x13a) -#define PPSMC_MSG_SDMAPowerOFF ((uint16_t) 0x13b) -#define PPSMC_MSG_SDMAPowerON ((uint16_t) 0x13c) -#define PPSMC_MSG_PCIeDPM_Disable ((uint16_t) 0x13d) -#define PPSMC_MSG_IOMMUPowerOFF ((uint16_t) 0x13e) -#define PPSMC_MSG_IOMMUPowerON ((uint16_t) 0x13f) -#define PPSMC_MSG_NBDPM_Enable ((uint16_t) 0x140) -#define PPSMC_MSG_NBDPM_Disable ((uint16_t) 0x141) -#define PPSMC_MSG_NBDPM_ForceNominal ((uint16_t) 0x142) -#define PPSMC_MSG_NBDPM_ForcePerformance ((uint16_t) 0x143) -#define PPSMC_MSG_NBDPM_UnForce ((uint16_t) 0x144) -#define PPSMC_MSG_SCLKDPM_SetEnabledMask ((uint16_t) 0x145) -#define PPSMC_MSG_MCLKDPM_SetEnabledMask ((uint16_t) 0x146) -#define PPSMC_MSG_PCIeDPM_ForceLevel ((uint16_t) 0x147) -#define PPSMC_MSG_PCIeDPM_UnForceLevel ((uint16_t) 0x148) -#define PPSMC_MSG_EnableACDCGPIOInterrupt ((uint16_t) 0x149) -#define PPSMC_MSG_EnableVRHotGPIOInterrupt ((uint16_t) 0x14a) -#define PPSMC_MSG_SwitchToAC ((uint16_t) 0x14b) -#define PPSMC_MSG_XDMAPowerOFF ((uint16_t) 0x14c) -#define PPSMC_MSG_XDMAPowerON ((uint16_t) 0x14d) - -#define PPSMC_MSG_DPM_Enable ((uint16_t) 0x14e) -#define PPSMC_MSG_DPM_Disable ((uint16_t) 0x14f) -#define PPSMC_MSG_MCLKDPM_Enable ((uint16_t) 0x150) -#define PPSMC_MSG_MCLKDPM_Disable ((uint16_t) 0x151) -#define PPSMC_MSG_LCLKDPM_Enable ((uint16_t) 0x152) -#define PPSMC_MSG_LCLKDPM_Disable ((uint16_t) 0x153) -#define PPSMC_MSG_UVDDPM_Enable ((uint16_t) 0x154) -#define PPSMC_MSG_UVDDPM_Disable ((uint16_t) 0x155) -#define PPSMC_MSG_SAMUDPM_Enable ((uint16_t) 0x156) -#define PPSMC_MSG_SAMUDPM_Disable ((uint16_t) 0x157) -#define PPSMC_MSG_ACPDPM_Enable ((uint16_t) 0x158) -#define PPSMC_MSG_ACPDPM_Disable ((uint16_t) 0x159) -#define PPSMC_MSG_VCEDPM_Enable ((uint16_t) 0x15a) -#define PPSMC_MSG_VCEDPM_Disable ((uint16_t) 0x15b) -#define PPSMC_MSG_LCLKDPM_SetEnabledMask ((uint16_t) 0x15c) -#define PPSMC_MSG_DPM_FPS_Mode ((uint16_t) 0x15d) -#define PPSMC_MSG_DPM_Activity_Mode ((uint16_t) 0x15e) -#define PPSMC_MSG_VddC_Request ((uint16_t) 0x15f) -#define PPSMC_MSG_MCLKDPM_GetEnabledMask ((uint16_t) 0x160) -#define PPSMC_MSG_LCLKDPM_GetEnabledMask ((uint16_t) 0x161) -#define PPSMC_MSG_SCLKDPM_GetEnabledMask ((uint16_t) 0x162) -#define PPSMC_MSG_UVDDPM_GetEnabledMask ((uint16_t) 0x163) -#define PPSMC_MSG_SAMUDPM_GetEnabledMask ((uint16_t) 0x164) -#define PPSMC_MSG_ACPDPM_GetEnabledMask ((uint16_t) 0x165) -#define PPSMC_MSG_VCEDPM_GetEnabledMask ((uint16_t) 0x166) -#define PPSMC_MSG_PCIeDPM_SetEnabledMask ((uint16_t) 0x167) -#define PPSMC_MSG_PCIeDPM_GetEnabledMask ((uint16_t) 0x168) -#define PPSMC_MSG_TDCLimitEnable ((uint16_t) 0x169) -#define PPSMC_MSG_TDCLimitDisable ((uint16_t) 0x16a) -#define PPSMC_MSG_DPM_AutoRotate_Mode ((uint16_t) 0x16b) -#define PPSMC_MSG_DISPCLK_FROM_FCH ((uint16_t) 0x16c) -#define PPSMC_MSG_DISPCLK_FROM_DFS ((uint16_t) 0x16d) -#define PPSMC_MSG_DPREFCLK_FROM_FCH ((uint16_t) 0x16e) -#define PPSMC_MSG_DPREFCLK_FROM_DFS ((uint16_t) 0x16f) -#define PPSMC_MSG_PmStatusLogStart ((uint16_t) 0x170) -#define PPSMC_MSG_PmStatusLogSample ((uint16_t) 0x171) -#define PPSMC_MSG_SCLK_AutoDPM_ON ((uint16_t) 0x172) -#define PPSMC_MSG_MCLK_AutoDPM_ON ((uint16_t) 0x173) -#define PPSMC_MSG_LCLK_AutoDPM_ON ((uint16_t) 0x174) -#define PPSMC_MSG_UVD_AutoDPM_ON ((uint16_t) 0x175) -#define PPSMC_MSG_SAMU_AutoDPM_ON ((uint16_t) 0x176) -#define PPSMC_MSG_ACP_AutoDPM_ON ((uint16_t) 0x177) -#define PPSMC_MSG_VCE_AutoDPM_ON ((uint16_t) 0x178) -#define PPSMC_MSG_PCIe_AutoDPM_ON ((uint16_t) 0x179) -#define PPSMC_MSG_MASTER_AutoDPM_ON ((uint16_t) 0x17a) -#define PPSMC_MSG_MASTER_AutoDPM_OFF ((uint16_t) 0x17b) -#define PPSMC_MSG_DYNAMICDISPPHYPOWER ((uint16_t) 0x17c) -#define PPSMC_MSG_CAC_COLLECTION_ON ((uint16_t) 0x17d) -#define PPSMC_MSG_CAC_COLLECTION_OFF ((uint16_t) 0x17e) -#define PPSMC_MSG_CAC_CORRELATION_ON ((uint16_t) 0x17f) -#define PPSMC_MSG_CAC_CORRELATION_OFF ((uint16_t) 0x180) -#define PPSMC_MSG_PM_STATUS_TO_DRAM_ON ((uint16_t) 0x181) -#define PPSMC_MSG_PM_STATUS_TO_DRAM_OFF ((uint16_t) 0x182) -#define PPSMC_MSG_ALLOW_LOWSCLK_INTERRUPT ((uint16_t) 0x184) -#define PPSMC_MSG_PkgPwrLimitEnable ((uint16_t) 0x185) -#define PPSMC_MSG_PkgPwrLimitDisable ((uint16_t) 0x186) -#define PPSMC_MSG_PkgPwrSetLimit ((uint16_t) 0x187) -#define PPSMC_MSG_OverDriveSetTargetTdp ((uint16_t) 0x188) -#define PPSMC_MSG_SCLKDPM_FreezeLevel ((uint16_t) 0x189) -#define PPSMC_MSG_SCLKDPM_UnfreezeLevel ((uint16_t) 0x18A) -#define PPSMC_MSG_MCLKDPM_FreezeLevel ((uint16_t) 0x18B) -#define PPSMC_MSG_MCLKDPM_UnfreezeLevel ((uint16_t) 0x18C) -#define PPSMC_MSG_START_DRAM_LOGGING ((uint16_t) 0x18D) -#define PPSMC_MSG_STOP_DRAM_LOGGING ((uint16_t) 0x18E) -#define PPSMC_MSG_MASTER_DeepSleep_ON ((uint16_t) 0x18F) -#define PPSMC_MSG_MASTER_DeepSleep_OFF ((uint16_t) 0x190) -#define PPSMC_MSG_Remove_DC_Clamp ((uint16_t) 0x191) -#define PPSMC_MSG_DisableACDCGPIOInterrupt ((uint16_t) 0x192) -#define PPSMC_MSG_OverrideVoltageControl_SetVddc ((uint16_t) 0x193) -#define PPSMC_MSG_OverrideVoltageControl_SetVddci ((uint16_t) 0x194) -#define PPSMC_MSG_SetVidOffset_1 ((uint16_t) 0x195) -#define PPSMC_MSG_SetVidOffset_2 ((uint16_t) 0x207) -#define PPSMC_MSG_GetVidOffset_1 ((uint16_t) 0x196) -#define PPSMC_MSG_GetVidOffset_2 ((uint16_t) 0x208) -#define PPSMC_MSG_THERMAL_OVERDRIVE_Enable ((uint16_t) 0x197) -#define PPSMC_MSG_THERMAL_OVERDRIVE_Disable ((uint16_t) 0x198) -#define PPSMC_MSG_SetTjMax ((uint16_t) 0x199) -#define PPSMC_MSG_SetFanPwmMax ((uint16_t) 0x19A) -#define PPSMC_MSG_WaitForMclkSwitchFinish ((uint16_t) 0x19B) -#define PPSMC_MSG_ENABLE_THERMAL_DPM ((uint16_t) 0x19C) -#define PPSMC_MSG_DISABLE_THERMAL_DPM ((uint16_t) 0x19D) - -#define PPSMC_MSG_API_GetSclkFrequency ((uint16_t) 0x200) -#define PPSMC_MSG_API_GetMclkFrequency ((uint16_t) 0x201) -#define PPSMC_MSG_API_GetSclkBusy ((uint16_t) 0x202) -#define PPSMC_MSG_API_GetMclkBusy ((uint16_t) 0x203) -#define PPSMC_MSG_API_GetAsicPower ((uint16_t) 0x204) -#define PPSMC_MSG_SetFanRpmMax ((uint16_t) 0x205) -#define PPSMC_MSG_SetFanSclkTarget ((uint16_t) 0x206) -#define PPSMC_MSG_SetFanMinPwm ((uint16_t) 0x209) -#define PPSMC_MSG_SetFanTemperatureTarget ((uint16_t) 0x20A) - -#define PPSMC_MSG_BACO_StartMonitor ((uint16_t) 0x240) -#define PPSMC_MSG_BACO_Cancel ((uint16_t) 0x241) -#define PPSMC_MSG_EnableVddGfx ((uint16_t) 0x242) -#define PPSMC_MSG_DisableVddGfx ((uint16_t) 0x243) -#define PPSMC_MSG_UcodeAddressLow ((uint16_t) 0x244) -#define PPSMC_MSG_UcodeAddressHigh ((uint16_t) 0x245) -#define PPSMC_MSG_UcodeLoadStatus ((uint16_t) 0x246) - -#define PPSMC_MSG_DRV_DRAM_ADDR_HI ((uint16_t) 0x250) -#define PPSMC_MSG_DRV_DRAM_ADDR_LO ((uint16_t) 0x251) -#define PPSMC_MSG_SMU_DRAM_ADDR_HI ((uint16_t) 0x252) -#define PPSMC_MSG_SMU_DRAM_ADDR_LO ((uint16_t) 0x253) -#define PPSMC_MSG_LoadUcodes ((uint16_t) 0x254) -#define PPSMC_MSG_PowerStateNotify ((uint16_t) 0x255) -#define PPSMC_MSG_COND_EXEC_DRAM_ADDR_HI ((uint16_t) 0x256) -#define PPSMC_MSG_COND_EXEC_DRAM_ADDR_LO ((uint16_t) 0x257) -#define PPSMC_MSG_VBIOS_DRAM_ADDR_HI ((uint16_t) 0x258) -#define PPSMC_MSG_VBIOS_DRAM_ADDR_LO ((uint16_t) 0x259) -#define PPSMC_MSG_LoadVBios ((uint16_t) 0x25A) -#define PPSMC_MSG_GetUcodeVersion ((uint16_t) 0x25B) -#define DMCUSMC_MSG_PSREntry ((uint16_t) 0x25C) -#define DMCUSMC_MSG_PSRExit ((uint16_t) 0x25D) -#define PPSMC_MSG_EnableClockGatingFeature ((uint16_t) 0x260) -#define PPSMC_MSG_DisableClockGatingFeature ((uint16_t) 0x261) -#define PPSMC_MSG_IsDeviceRunning ((uint16_t) 0x262) -#define PPSMC_MSG_LoadMetaData ((uint16_t) 0x263) -#define PPSMC_MSG_TMON_AutoCaliberate_Enable ((uint16_t) 0x264) -#define PPSMC_MSG_TMON_AutoCaliberate_Disable ((uint16_t) 0x265) -#define PPSMC_MSG_GetTelemetry1Slope ((uint16_t) 0x266) -#define PPSMC_MSG_GetTelemetry1Offset ((uint16_t) 0x267) -#define PPSMC_MSG_GetTelemetry2Slope ((uint16_t) 0x268) -#define PPSMC_MSG_GetTelemetry2Offset ((uint16_t) 0x269) -#define PPSMC_MSG_EnableAvfs ((uint16_t) 0x26A) -#define PPSMC_MSG_DisableAvfs ((uint16_t) 0x26B) - -#define PPSMC_MSG_PerformBtc ((uint16_t) 0x26C) -#define PPSMC_MSG_VftTableIsValid ((uint16_t) 0x275) -#define PPSMC_MSG_UseNewGPIOScheme ((uint16_t) 0x277) -#define PPSMC_MSG_GetEnabledPsm ((uint16_t) 0x400) -#define PPSMC_MSG_AgmStartPsm ((uint16_t) 0x401) -#define PPSMC_MSG_AgmReadPsm ((uint16_t) 0x402) -#define PPSMC_MSG_AgmResetPsm ((uint16_t) 0x403) -#define PPSMC_MSG_ReadVftCell ((uint16_t) 0x404) - -#define PPSMC_MSG_GFX_CU_PG_ENABLE ((uint16_t) 0x280) -#define PPSMC_MSG_GFX_CU_PG_DISABLE ((uint16_t) 0x281) -#define PPSMC_MSG_GetCurrPkgPwr ((uint16_t) 0x282) - -#define PPSMC_MSG_SetGpuPllDfsForSclk ((uint16_t) 0x300) -#define PPSMC_MSG_Didt_Block_Function ((uint16_t) 0x301) - -#define PPSMC_MSG_SetVBITimeout ((uint16_t) 0x306) - -#define PPSMC_MSG_SecureSRBMWrite ((uint16_t) 0x600) -#define PPSMC_MSG_SecureSRBMRead ((uint16_t) 0x601) -#define PPSMC_MSG_SetAddress ((uint16_t) 0x800) -#define PPSMC_MSG_GetData ((uint16_t) 0x801) -#define PPSMC_MSG_SetData ((uint16_t) 0x802) - -typedef uint16_t PPSMC_Msg; - -#define PPSMC_EVENT_STATUS_THERMAL 0x00000001 -#define PPSMC_EVENT_STATUS_REGULATORHOT 0x00000002 -#define PPSMC_EVENT_STATUS_DC 0x00000004 - -#pragma pack(pop) - -#endif - diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h index 827860fffe78..a99b5cbb113e 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h +++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h @@ -122,8 +122,8 @@ struct PP_StateSoftwareAlgorithmBlock { * Type to hold a temperature range. */ struct PP_TemperatureRange { - uint32_t min; - uint32_t max; + int min; + int max; }; struct PP_StateValidationBlock { diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_feature.h b/drivers/gpu/drm/amd/powerplay/inc/pp_feature.h deleted file mode 100644 index 0faf6a25c18b..000000000000 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_feature.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _PP_FEATURE_H_ -#define _PP_FEATURE_H_ - -/** - * PowerPlay feature ids. - */ -enum pp_feature { - PP_Feature_PowerPlay = 0, - PP_Feature_User2DPerformance, - PP_Feature_User3DPerformance, - PP_Feature_VariBright, - PP_Feature_VariBrightOnPowerXpress, - PP_Feature_ReducedRefreshRate, - PP_Feature_GFXClockGating, - PP_Feature_OverdriveTest, - PP_Feature_OverDrive, - PP_Feature_PowerBudgetWaiver, - PP_Feature_PowerControl, - PP_Feature_PowerControl_2, - PP_Feature_MultiUVDState, - PP_Feature_Force3DClock, - PP_Feature_BACO, - PP_Feature_PowerDown, - PP_Feature_DynamicUVDState, - PP_Feature_VCEDPM, - PP_Feature_PPM, - PP_Feature_ACP_POWERGATING, - PP_Feature_FFC, - PP_Feature_FPS, - PP_Feature_ViPG, - PP_Feature_Max -}; - -/** - * Struct for PowerPlay feature info. - */ -struct pp_feature_info { - bool supported; /* feature supported by PowerPlay */ - bool enabled; /* feature enabled in PowerPlay */ - bool enabled_default; /* default enable status of the feature */ - uint32_t version; /* feature version */ -}; - -#endif /* _PP_FEATURE_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h b/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h index b7ab69e4c254..214f370c5efd 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h @@ -23,7 +23,8 @@ #ifndef PP_SOC15_H #define PP_SOC15_H -#include "soc15ip.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" inline static uint32_t soc15_get_register_offset( uint32_t hw_id, @@ -43,7 +44,8 @@ inline static uint32_t soc15_get_register_offset( reg = DF_BASE.instance[inst].segment[segment] + offset; else if (hw_id == GC_HWID) reg = GC_BASE.instance[inst].segment[segment] + offset; - + else if (hw_id == SMUIO_HWID) + reg = SMUIO_BASE.instance[inst].segment[segment] + offset; return reg; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h new file mode 100644 index 000000000000..201d2b6329ab --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h @@ -0,0 +1,40 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef PP_THERMAL_H +#define PP_THERMAL_H + +#include "power_state.h" + +static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] = +{ + {-273150, 99000}, + { 120000, 120000}, +}; + +static const struct PP_TemperatureRange SMU7ThermalPolicy[] = +{ + {-273150, 99000}, + { 120000, 120000}, +}; + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h index f15f4df9d0a9..426bff2aad2b 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h @@ -80,7 +80,8 @@ #define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x32 #define PPSMC_MSG_SetSoftMaxFclkByFreq 0x33 #define PPSMC_MSG_SetSoftMaxVcn 0x34 -#define PPSMC_Message_Count 0x35 +#define PPSMC_MSG_PowerGateMmHub 0x35 +#define PPSMC_Message_Count 0x36 typedef uint16_t PPSMC_Result; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu7.h b/drivers/gpu/drm/amd/powerplay/inc/smu7.h index 75a380a15292..e14072d45918 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu7.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu7.h @@ -82,6 +82,25 @@ #define SCRATCH_B_CURR_SAMU_INDEX_MASK (0x7<<SCRATCH_B_CURR_SAMU_INDEX_SHIFT) +/* Voltage Regulator Configuration */ +/* VR Config info is contained in dpmTable */ + +#define VRCONF_VDDC_MASK 0x000000FF +#define VRCONF_VDDC_SHIFT 0 +#define VRCONF_VDDGFX_MASK 0x0000FF00 +#define VRCONF_VDDGFX_SHIFT 8 +#define VRCONF_VDDCI_MASK 0x00FF0000 +#define VRCONF_VDDCI_SHIFT 16 +#define VRCONF_MVDD_MASK 0xFF000000 +#define VRCONF_MVDD_SHIFT 24 + +#define VR_MERGED_WITH_VDDC 0 +#define VR_SVI2_PLANE_1 1 +#define VR_SVI2_PLANE_2 2 +#define VR_SMIO_PATTERN_1 3 +#define VR_SMIO_PATTERN_2 4 +#define VR_STATIC_VOLTAGE 5 + struct SMU7_PIDController { uint32_t Ki; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu7_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu7_discrete.h index 0b0b404ff091..ee876745dd12 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu7_discrete.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu7_discrete.h @@ -316,7 +316,8 @@ struct SMU7_Discrete_DpmTable uint8_t AcpLevelCount; uint8_t SamuLevelCount; uint8_t MasterDeepSleepControl; - uint32_t Reserved[5]; + uint32_t VRConfig; + uint32_t Reserved[4]; // uint32_t SamuDefaultLevel; SMU7_Discrete_GraphicsLevel GraphicsLevel [SMU7_MAX_LEVELS_GRAPHICS]; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 0b4a55660de4..6cdaed06da0b 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -411,8 +411,7 @@ static uint8_t ci_get_sleep_divider_id_from_clock(uint32_t clock, } static int ci_populate_single_graphic_level(struct pp_hwmgr *hwmgr, - uint32_t clock, uint16_t sclk_al_threshold, - struct SMU7_Discrete_GraphicsLevel *level) + uint32_t clock, struct SMU7_Discrete_GraphicsLevel *level) { int result; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -438,14 +437,14 @@ static int ci_populate_single_graphic_level(struct pp_hwmgr *hwmgr, clock, &level->MinVddcPhases); - level->ActivityLevel = sclk_al_threshold; + level->ActivityLevel = data->current_profile_setting.sclk_activity; level->CcPwrDynRm = 0; level->CcPwrDynRm1 = 0; level->EnabledForActivity = 0; /* this level can be used for throttling.*/ level->EnabledForThrottle = 1; - level->UpH = 0; - level->DownH = 0; + level->UpH = data->current_profile_setting.sclk_up_hyst; + level->DownH = data->current_profile_setting.sclk_down_hyst; level->VoltageDownH = 0; level->PowerThrottle = 0; @@ -492,7 +491,6 @@ static int ci_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) for (i = 0; i < dpm_table->sclk_table.count; i++) { result = ci_populate_single_graphic_level(hwmgr, dpm_table->sclk_table.dpm_levels[i].value, - (uint16_t)smu_data->activity_target[i], &levels[i]); if (result) return result; @@ -860,10 +858,13 @@ static int ci_populate_smc_vddc_table(struct pp_hwmgr *hwmgr, PP_ASSERT_WITH_CODE(0 == result, "do not populate SMC VDDC voltage table", return -EINVAL); /* GPIO voltage control */ - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->voltage_control) - table->VddcLevel[count].Smio |= data->vddc_voltage_table.entries[count].smio_low; - else + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->voltage_control) { + table->VddcLevel[count].Smio = (uint8_t) count; + table->Smio[count] |= data->vddc_voltage_table.entries[count].smio_low; + table->SmioMaskVddcVid |= data->vddc_voltage_table.entries[count].smio_low; + } else { table->VddcLevel[count].Smio = 0; + } } CONVERT_FROM_HOST_TO_SMC_UL(table->VddcLevelCount); @@ -885,10 +886,13 @@ static int ci_populate_smc_vdd_ci_table(struct pp_hwmgr *hwmgr, &(data->vddci_voltage_table.entries[count]), &(table->VddciLevel[count])); PP_ASSERT_WITH_CODE(result == 0, "do not populate SMC VDDCI voltage table", return -EINVAL); - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) - table->VddciLevel[count].Smio |= data->vddci_voltage_table.entries[count].smio_low; - else - table->VddciLevel[count].Smio |= 0; + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + table->VddciLevel[count].Smio = (uint8_t) count; + table->Smio[count] |= data->vddci_voltage_table.entries[count].smio_low; + table->SmioMaskVddciVid |= data->vddci_voltage_table.entries[count].smio_low; + } else { + table->VddciLevel[count].Smio = 0; + } } CONVERT_FROM_HOST_TO_SMC_UL(table->VddciLevelCount); @@ -910,10 +914,13 @@ static int ci_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, &(data->mvdd_voltage_table.entries[count]), &table->MvddLevel[count]); PP_ASSERT_WITH_CODE(result == 0, "do not populate SMC mvdd voltage table", return -EINVAL); - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) - table->MvddLevel[count].Smio |= data->mvdd_voltage_table.entries[count].smio_low; - else - table->MvddLevel[count].Smio |= 0; + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + table->MvddLevel[count].Smio = (uint8_t) count; + table->Smio[count] |= data->mvdd_voltage_table.entries[count].smio_low; + table->SmioMaskMvddVid |= data->mvdd_voltage_table.entries[count].smio_low; + } else { + table->MvddLevel[count].Smio = 0; + } } CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount); @@ -1217,12 +1224,12 @@ static int ci_populate_single_memory_level( memory_level->EnabledForThrottle = 1; memory_level->EnabledForActivity = 1; - memory_level->UpH = 0; - memory_level->DownH = 100; + memory_level->UpH = data->current_profile_setting.mclk_up_hyst; + memory_level->DownH = data->current_profile_setting.mclk_down_hyst; memory_level->VoltageDownH = 0; /* Indicates maximum activity level for this performance level.*/ - memory_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + memory_level->ActivityLevel = data->current_profile_setting.mclk_activity; memory_level->StutterEnable = 0; memory_level->StrobeEnable = 0; memory_level->EdcReadEnable = 0; @@ -1506,7 +1513,7 @@ static int ci_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, table->MemoryACPILevel.DownH = 100; table->MemoryACPILevel.VoltageDownH = 0; /* Indicates maximum activity level for this performance level.*/ - table->MemoryACPILevel.ActivityLevel = PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + table->MemoryACPILevel.ActivityLevel = PP_HOST_TO_SMC_US(data->current_profile_setting.mclk_activity); table->MemoryACPILevel.StutterEnable = 0; table->MemoryACPILevel.StrobeEnable = 0; @@ -1941,6 +1948,37 @@ static int ci_start_smc(struct pp_hwmgr *hwmgr) return 0; } +static int ci_populate_vr_config(struct pp_hwmgr *hwmgr, SMU7_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint16_t config; + + config = VR_SVI2_PLANE_1; + table->VRConfig |= (config<<VRCONF_VDDGFX_SHIFT); + + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { + config = VR_SVI2_PLANE_2; + table->VRConfig |= config; + } else { + pr_info("VDDCshould be on SVI2 controller!"); + } + + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { + config = VR_SVI2_PLANE_2; + table->VRConfig |= (config<<VRCONF_VDDCI_SHIFT); + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + config = VR_SMIO_PATTERN_1; + table->VRConfig |= (config<<VRCONF_VDDCI_SHIFT); + } + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + config = VR_SMIO_PATTERN_2; + table->VRConfig |= (config<<VRCONF_MVDD_SHIFT); + } + + return 0; +} + static int ci_init_smc_table(struct pp_hwmgr *hwmgr) { int result; @@ -2064,6 +2102,11 @@ static int ci_init_smc_table(struct pp_hwmgr *hwmgr) table->PCIeBootLinkLevel = (uint8_t)data->dpm_table.pcie_speed_table.count; table->PCIeGenInterval = 1; + result = ci_populate_vr_config(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate VRConfig setting!", return result); + data->vr_config = table->VRConfig; + ci_populate_smc_svi2_config(hwmgr, table); for (i = 0; i < SMU7_MAX_ENTRIES_SMIO; i++) @@ -2084,6 +2127,7 @@ static int ci_init_smc_table(struct pp_hwmgr *hwmgr) table->AcDcGpio = SMU7_UNUSED_GPIO_PIN; CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); + CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddcVid); CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddcPhase); CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddciVid); @@ -2756,7 +2800,6 @@ static int ci_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, static int ci_smu_init(struct pp_hwmgr *hwmgr) { - int i; struct ci_smumgr *ci_priv = NULL; ci_priv = kzalloc(sizeof(struct ci_smumgr), GFP_KERNEL); @@ -2764,9 +2807,6 @@ static int ci_smu_init(struct pp_hwmgr *hwmgr) if (ci_priv == NULL) return -ENOMEM; - for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++) - ci_priv->activity_target[i] = 30; - hwmgr->smu_backend = ci_priv; return 0; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.h index 8189cfa17c46..a8282705c569 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.h @@ -70,8 +70,6 @@ struct ci_smumgr { const struct ci_pt_defaults *power_tune_defaults; SMU7_Discrete_MCRegisters mc_regs; struct ci_mc_reg_table mc_reg_table; - uint32_t activity_target[SMU7_MAX_LEVELS_GRAPHICS]; - }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index 085d81c8b332..9d5ccdbc391d 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -368,7 +368,6 @@ static bool fiji_is_hw_avfs_present(struct pp_hwmgr *hwmgr) static int fiji_smu_init(struct pp_hwmgr *hwmgr) { - int i; struct fiji_smumgr *fiji_priv = NULL; fiji_priv = kzalloc(sizeof(struct fiji_smumgr), GFP_KERNEL); @@ -381,9 +380,6 @@ static int fiji_smu_init(struct pp_hwmgr *hwmgr) if (smu7_init(hwmgr)) return -EINVAL; - for (i = 0; i < SMU73_MAX_LEVELS_GRAPHICS; i++) - fiji_priv->activity_target[i] = 30; - return 0; } @@ -972,8 +968,7 @@ static int fiji_calculate_sclk_params(struct pp_hwmgr *hwmgr, } static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr, - uint32_t clock, uint16_t sclk_al_threshold, - struct SMU73_Discrete_GraphicsLevel *level) + uint32_t clock, struct SMU73_Discrete_GraphicsLevel *level) { int result; /* PP_Clocks minClocks; */ @@ -981,12 +976,18 @@ static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr, struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); struct phm_ppt_v1_information *table_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); + phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; result = fiji_calculate_sclk_params(hwmgr, clock, level); + if (hwmgr->od_enabled) + vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_sclk; + else + vdd_dep_table = table_info->vdd_dep_on_sclk; + /* populate graphics levels */ result = fiji_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_sclk, clock, + vdd_dep_table, clock, (uint32_t *)(&level->MinVoltage), &mvdd); PP_ASSERT_WITH_CODE((0 == result), "can not find VDDC voltage value for " @@ -994,13 +995,13 @@ static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr, return result); level->SclkFrequency = clock; - level->ActivityLevel = sclk_al_threshold; + level->ActivityLevel = data->current_profile_setting.sclk_activity; level->CcPwrDynRm = 0; level->CcPwrDynRm1 = 0; level->EnabledForActivity = 0; level->EnabledForThrottle = 1; - level->UpHyst = 10; - level->DownHyst = 0; + level->UpHyst = data->current_profile_setting.sclk_up_hyst; + level->DownHyst = data->current_profile_setting.sclk_down_hyst; level->VoltageDownHyst = 0; level->PowerThrottle = 0; @@ -1057,7 +1058,6 @@ static int fiji_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) for (i = 0; i < dpm_table->sclk_table.count; i++) { result = fiji_populate_single_graphic_level(hwmgr, dpm_table->sclk_table.dpm_levels[i].value, - (uint16_t)smu_data->activity_target[i], &levels[i]); if (result) return result; @@ -1202,10 +1202,16 @@ static int fiji_populate_single_memory_level(struct pp_hwmgr *hwmgr, (struct phm_ppt_v1_information *)(hwmgr->pptable); int result = 0; uint32_t mclk_stutter_mode_threshold = 60000; + phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; - if (table_info->vdd_dep_on_mclk) { + if (hwmgr->od_enabled) + vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_sclk; + else + vdd_dep_table = table_info->vdd_dep_on_mclk; + + if (vdd_dep_table) { result = fiji_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_mclk, clock, + vdd_dep_table, clock, (uint32_t *)(&mem_level->MinVoltage), &mem_level->MinMvdd); PP_ASSERT_WITH_CODE((0 == result), "can not find MinVddc voltage value from memory " @@ -1214,10 +1220,10 @@ static int fiji_populate_single_memory_level(struct pp_hwmgr *hwmgr, mem_level->EnabledForThrottle = 1; mem_level->EnabledForActivity = 0; - mem_level->UpHyst = 0; - mem_level->DownHyst = 100; + mem_level->UpHyst = data->current_profile_setting.mclk_up_hyst; + mem_level->DownHyst = data->current_profile_setting.mclk_down_hyst; mem_level->VoltageDownHyst = 0; - mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + mem_level->ActivityLevel = data->current_profile_setting.mclk_activity; mem_level->StutterEnable = false; mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; @@ -1435,7 +1441,7 @@ static int fiji_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, table->MemoryACPILevel.DownHyst = 100; table->MemoryACPILevel.VoltageDownHyst = 0; table->MemoryACPILevel.ActivityLevel = - PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + PP_HOST_TO_SMC_US(data->current_profile_setting.mclk_activity); table->MemoryACPILevel.StutterEnable = false; CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency); @@ -1799,7 +1805,7 @@ static int fiji_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_ClockStretcher); PP_ASSERT_WITH_CODE(false, - "Stretch Amount in PPTable not supported\n", + "Stretch Amount in PPTable not supported", return -EINVAL); } @@ -2141,7 +2147,7 @@ static int fiji_init_smc_table(struct pp_hwmgr *hwmgr) result = fiji_populate_vr_config(hwmgr, table); PP_ASSERT_WITH_CODE(0 == result, "Failed to populate VRConfig setting!", return result); - + data->vr_config = table->VRConfig; table->ThermGpio = 17; table->SclkStepSize = 0x4000; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h index 279647772578..6d3746268ccf 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h @@ -43,8 +43,6 @@ struct fiji_smumgr { struct SMU73_Discrete_Ulv ulv_setting; struct SMU73_Discrete_PmFuses power_tune_table; const struct fiji_pt_defaults *power_tune_defaults; - uint32_t activity_target[SMU73_MAX_LEVELS_GRAPHICS]; - }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index 125312691f75..11aeb150a97f 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -262,7 +262,6 @@ static int iceland_start_smu(struct pp_hwmgr *hwmgr) static int iceland_smu_init(struct pp_hwmgr *hwmgr) { - int i; struct iceland_smumgr *iceland_priv = NULL; iceland_priv = kzalloc(sizeof(struct iceland_smumgr), GFP_KERNEL); @@ -275,9 +274,6 @@ static int iceland_smu_init(struct pp_hwmgr *hwmgr) if (smu7_init(hwmgr)) return -EINVAL; - for (i = 0; i < SMU71_MAX_LEVELS_GRAPHICS; i++) - iceland_priv->activity_target[i] = 30; - return 0; } @@ -546,7 +542,7 @@ static int iceland_get_std_voltage_value_sidd(struct pp_hwmgr *hwmgr, /* SCLK/VDDC Dependency Table has to exist. */ PP_ASSERT_WITH_CODE(NULL != hwmgr->dyn_state.vddc_dependency_on_sclk, - "The SCLK/VDDC Dependency Table does not exist.\n", + "The SCLK/VDDC Dependency Table does not exist.", return -EINVAL); if (NULL == hwmgr->dyn_state.cac_leakage_table) { @@ -898,7 +894,6 @@ static int iceland_populate_phase_value_based_on_sclk(struct pp_hwmgr *hwmgr, static int iceland_populate_single_graphic_level(struct pp_hwmgr *hwmgr, uint32_t engine_clock, - uint16_t sclk_activity_level_threshold, SMU71_Discrete_GraphicsLevel *graphic_level) { int result; @@ -924,7 +919,7 @@ static int iceland_populate_single_graphic_level(struct pp_hwmgr *hwmgr, &graphic_level->MinVddcPhases); /* Indicates maximum activity level for this performance level. 50% for now*/ - graphic_level->ActivityLevel = sclk_activity_level_threshold; + graphic_level->ActivityLevel = data->current_profile_setting.sclk_activity; graphic_level->CcPwrDynRm = 0; graphic_level->CcPwrDynRm1 = 0; @@ -932,8 +927,8 @@ static int iceland_populate_single_graphic_level(struct pp_hwmgr *hwmgr, graphic_level->EnabledForActivity = 0; /* this level can be used for throttling.*/ graphic_level->EnabledForThrottle = 1; - graphic_level->UpHyst = 0; - graphic_level->DownHyst = 100; + graphic_level->UpHyst = data->current_profile_setting.sclk_up_hyst; + graphic_level->DownHyst = data->current_profile_setting.sclk_down_hyst; graphic_level->VoltageDownHyst = 0; graphic_level->PowerThrottle = 0; @@ -989,7 +984,6 @@ static int iceland_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) for (i = 0; i < dpm_table->sclk_table.count; i++) { result = iceland_populate_single_graphic_level(hwmgr, dpm_table->sclk_table.dpm_levels[i].value, - (uint16_t)smu_data->activity_target[i], &(smu_data->smc_state_table.GraphicsLevel[i])); if (result != 0) return result; @@ -1275,12 +1269,12 @@ static int iceland_populate_single_memory_level( memory_level->EnabledForThrottle = 1; memory_level->EnabledForActivity = 0; - memory_level->UpHyst = 0; - memory_level->DownHyst = 100; + memory_level->UpHyst = data->current_profile_setting.mclk_up_hyst; + memory_level->DownHyst = data->current_profile_setting.mclk_down_hyst; memory_level->VoltageDownHyst = 0; /* Indicates maximum activity level for this performance level.*/ - memory_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + memory_level->ActivityLevel = data->current_profile_setting.mclk_activity; memory_level->StutterEnable = 0; memory_level->StrobeEnable = 0; memory_level->EdcReadEnable = 0; @@ -1561,7 +1555,7 @@ static int iceland_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, table->MemoryACPILevel.DownHyst = 100; table->MemoryACPILevel.VoltageDownHyst = 0; /* Indicates maximum activity level for this performance level.*/ - table->MemoryACPILevel.ActivityLevel = PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + table->MemoryACPILevel.ActivityLevel = PP_HOST_TO_SMC_US(data->current_profile_setting.mclk_activity); table->MemoryACPILevel.StutterEnable = 0; table->MemoryACPILevel.StrobeEnable = 0; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.h index 802472530d34..f32c506779c9 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.h @@ -65,7 +65,6 @@ struct iceland_smumgr { const struct iceland_pt_defaults *power_tune_defaults; SMU71_Discrete_MCRegisters mc_regs; struct iceland_mc_reg_table mc_reg_table; - uint32_t activity_target[SMU71_MAX_LEVELS_GRAPHICS]; }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index cdb47657b567..bfb2c85d3c60 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -366,7 +366,6 @@ static bool polaris10_is_hw_avfs_present(struct pp_hwmgr *hwmgr) static int polaris10_smu_init(struct pp_hwmgr *hwmgr) { struct polaris10_smumgr *smu_data; - int i; smu_data = kzalloc(sizeof(struct polaris10_smumgr), GFP_KERNEL); if (smu_data == NULL) @@ -377,9 +376,6 @@ static int polaris10_smu_init(struct pp_hwmgr *hwmgr) if (smu7_init(hwmgr)) return -EINVAL; - for (i = 0; i < SMU74_MAX_LEVELS_GRAPHICS; i++) - smu_data->activity_target[i] = PPPOLARIS10_TARGETACTIVITY_DFLT; - return 0; } @@ -938,8 +934,7 @@ static int polaris10_calculate_sclk_params(struct pp_hwmgr *hwmgr, } static int polaris10_populate_single_graphic_level(struct pp_hwmgr *hwmgr, - uint32_t clock, uint16_t sclk_al_threshold, - struct SMU74_Discrete_GraphicsLevel *level) + uint32_t clock, struct SMU74_Discrete_GraphicsLevel *level) { int result; /* PP_Clocks minClocks; */ @@ -948,26 +943,32 @@ static int polaris10_populate_single_graphic_level(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_information *table_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); SMU_SclkSetting curr_sclk_setting = { 0 }; + phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; result = polaris10_calculate_sclk_params(hwmgr, clock, &curr_sclk_setting); + if (hwmgr->od_enabled) + vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_sclk; + else + vdd_dep_table = table_info->vdd_dep_on_sclk; + /* populate graphics levels */ result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_sclk, clock, + vdd_dep_table, clock, &level->MinVoltage, &mvdd); PP_ASSERT_WITH_CODE((0 == result), "can not find VDDC voltage value for " "VDDC engine clock dependency table", return result); - level->ActivityLevel = sclk_al_threshold; + level->ActivityLevel = data->current_profile_setting.sclk_activity; level->CcPwrDynRm = 0; level->CcPwrDynRm1 = 0; level->EnabledForActivity = 0; level->EnabledForThrottle = 1; - level->UpHyst = 10; - level->DownHyst = 0; + level->UpHyst = data->current_profile_setting.sclk_up_hyst; + level->DownHyst = data->current_profile_setting.sclk_down_hyst; level->VoltageDownHyst = 0; level->PowerThrottle = 0; data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr; @@ -1031,7 +1032,6 @@ static int polaris10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) result = polaris10_populate_single_graphic_level(hwmgr, dpm_table->sclk_table.dpm_levels[i].value, - (uint16_t)smu_data->activity_target[i], &(smu_data->smc_state_table.GraphicsLevel[i])); if (result) return result; @@ -1107,12 +1107,18 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, int result = 0; struct cgs_display_info info = {0, 0, NULL}; uint32_t mclk_stutter_mode_threshold = 40000; + phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; cgs_get_active_displays_info(hwmgr->device, &info); - if (table_info->vdd_dep_on_mclk) { + if (hwmgr->od_enabled) + vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_sclk; + else + vdd_dep_table = table_info->vdd_dep_on_mclk; + + if (vdd_dep_table) { result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_mclk, clock, + vdd_dep_table, clock, &mem_level->MinVoltage, &mem_level->MinMvdd); PP_ASSERT_WITH_CODE((0 == result), "can not find MinVddc voltage value from memory " @@ -1122,10 +1128,10 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, mem_level->MclkFrequency = clock; mem_level->EnabledForThrottle = 1; mem_level->EnabledForActivity = 0; - mem_level->UpHyst = 0; - mem_level->DownHyst = 100; + mem_level->UpHyst = data->current_profile_setting.mclk_up_hyst; + mem_level->DownHyst = data->current_profile_setting.mclk_down_hyst; mem_level->VoltageDownHyst = 0; - mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + mem_level->ActivityLevel = data->current_profile_setting.mclk_activity; mem_level->StutterEnable = false; mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; @@ -1306,7 +1312,7 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, table->MemoryACPILevel.DownHyst = 100; table->MemoryACPILevel.VoltageDownHyst = 0; table->MemoryACPILevel.ActivityLevel = - PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + PP_HOST_TO_SMC_US(data->current_profile_setting.mclk_activity); CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency); CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage); @@ -1652,7 +1658,7 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_ClockStretcher); PP_ASSERT_WITH_CODE(false, - "Stretch Amount in PPTable not supported\n", + "Stretch Amount in PPTable not supported", return -EINVAL); } @@ -1991,7 +1997,7 @@ static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr) result = polaris10_populate_vr_config(hwmgr, table); PP_ASSERT_WITH_CODE(0 == result, "Failed to populate VRConfig setting!", return result); - + hw_data->vr_config = table->VRConfig; table->ThermGpio = 17; table->SclkStepSize = 0x4000; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.h index 5e19c24b0561..1ec425df9eda 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.h @@ -59,7 +59,6 @@ struct polaris10_smumgr { struct SMU74_Discrete_PmFuses power_tune_table; struct polaris10_range_table range_table[NUM_SCLK_RANGE]; const struct polaris10_pt_defaults *power_tune_defaults; - uint32_t activity_target[SMU74_MAX_LEVELS_GRAPHICS]; uint32_t bif_sclk_table[SMU74_MAX_LEVELS_LINK]; }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 79e5c05571bc..97404a578542 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -222,7 +222,6 @@ static int tonga_start_smu(struct pp_hwmgr *hwmgr) static int tonga_smu_init(struct pp_hwmgr *hwmgr) { struct tonga_smumgr *tonga_priv = NULL; - int i; tonga_priv = kzalloc(sizeof(struct tonga_smumgr), GFP_KERNEL); if (tonga_priv == NULL) @@ -233,9 +232,6 @@ static int tonga_smu_init(struct pp_hwmgr *hwmgr) if (smu7_init(hwmgr)) return -EINVAL; - for (i = 0; i < SMU72_MAX_LEVELS_GRAPHICS; i++) - tonga_priv->activity_target[i] = 30; - return 0; } @@ -416,7 +412,7 @@ static int tonga_populate_cac_tables(struct pp_hwmgr *hwmgr, convert_to_vid(vddc_lookup_table->entries[index].us_cac_high); } - if ((data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2)) { + if (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) { /* We are populating vddgfx CAC data to BapmVddgfx table in split mode */ for (count = 0; count < vddgfx_level_count; count++) { index = phm_get_voltage_index(vddgfx_lookup_table, @@ -612,7 +608,6 @@ static int tonga_calculate_sclk_params(struct pp_hwmgr *hwmgr, static int tonga_populate_single_graphic_level(struct pp_hwmgr *hwmgr, uint32_t engine_clock, - uint16_t sclk_activity_level_threshold, SMU72_Discrete_GraphicsLevel *graphic_level) { int result; @@ -620,12 +615,18 @@ static int tonga_populate_single_graphic_level(struct pp_hwmgr *hwmgr, struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); + phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; result = tonga_calculate_sclk_params(hwmgr, engine_clock, graphic_level); + if (hwmgr->od_enabled) + vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_sclk; + else + vdd_dep_table = pptable_info->vdd_dep_on_sclk; + /* populate graphics levels*/ result = tonga_get_dependency_volt_by_clk(hwmgr, - pptable_info->vdd_dep_on_sclk, engine_clock, + vdd_dep_table, engine_clock, &graphic_level->MinVoltage, &mvdd); PP_ASSERT_WITH_CODE((!result), "can not find VDDC voltage value for VDDC " @@ -634,7 +635,7 @@ static int tonga_populate_single_graphic_level(struct pp_hwmgr *hwmgr, /* SCLK frequency in units of 10KHz*/ graphic_level->SclkFrequency = engine_clock; /* Indicates maximum activity level for this performance level. 50% for now*/ - graphic_level->ActivityLevel = sclk_activity_level_threshold; + graphic_level->ActivityLevel = data->current_profile_setting.sclk_activity; graphic_level->CcPwrDynRm = 0; graphic_level->CcPwrDynRm1 = 0; @@ -642,8 +643,8 @@ static int tonga_populate_single_graphic_level(struct pp_hwmgr *hwmgr, graphic_level->EnabledForActivity = 0; /* this level can be used for throttling.*/ graphic_level->EnabledForThrottle = 1; - graphic_level->UpHyst = 0; - graphic_level->DownHyst = 0; + graphic_level->UpHyst = data->current_profile_setting.sclk_up_hyst; + graphic_level->DownHyst = data->current_profile_setting.sclk_down_hyst; graphic_level->VoltageDownHyst = 0; graphic_level->PowerThrottle = 0; @@ -702,7 +703,6 @@ static int tonga_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) for (i = 0; i < dpm_table->sclk_table.count; i++) { result = tonga_populate_single_graphic_level(hwmgr, dpm_table->sclk_table.dpm_levels[i].value, - (uint16_t)smu_data->activity_target[i], &(smu_data->smc_state_table.GraphicsLevel[i])); if (result != 0) return result; @@ -966,10 +966,16 @@ static int tonga_populate_single_memory_level( uint32_t mclk_stutter_mode_threshold = 30000; uint32_t mclk_edc_enable_threshold = 40000; uint32_t mclk_strobe_mode_threshold = 40000; + phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; - if (NULL != pptable_info->vdd_dep_on_mclk) { + if (hwmgr->od_enabled) + vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_sclk; + else + vdd_dep_table = pptable_info->vdd_dep_on_mclk; + + if (NULL != vdd_dep_table) { result = tonga_get_dependency_volt_by_clk(hwmgr, - pptable_info->vdd_dep_on_mclk, + vdd_dep_table, memory_clock, &memory_level->MinVoltage, &mvdd); PP_ASSERT_WITH_CODE( @@ -986,12 +992,12 @@ static int tonga_populate_single_memory_level( memory_level->EnabledForThrottle = 1; memory_level->EnabledForActivity = 0; - memory_level->UpHyst = 0; - memory_level->DownHyst = 100; + memory_level->UpHyst = data->current_profile_setting.mclk_up_hyst; + memory_level->DownHyst = data->current_profile_setting.mclk_down_hyst; memory_level->VoltageDownHyst = 0; /* Indicates maximum activity level for this performance level.*/ - memory_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + memory_level->ActivityLevel = data->current_profile_setting.mclk_activity; memory_level->StutterEnable = 0; memory_level->StrobeEnable = 0; memory_level->EdcReadEnable = 0; @@ -1281,7 +1287,7 @@ static int tonga_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, table->MemoryACPILevel.VoltageDownHyst = 0; /* Indicates maximum activity level for this performance level.*/ table->MemoryACPILevel.ActivityLevel = - PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + PP_HOST_TO_SMC_US(data->current_profile_setting.mclk_activity); table->MemoryACPILevel.StutterEnable = 0; table->MemoryACPILevel.StrobeEnable = 0; @@ -1699,7 +1705,7 @@ static int tonga_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_ClockStretcher); PP_ASSERT_WITH_CODE(false, - "Stretch Amount in PPTable not supported\n", + "Stretch Amount in PPTable not supported", return -EINVAL); } @@ -2434,7 +2440,7 @@ static int tonga_init_smc_table(struct pp_hwmgr *hwmgr) result = tonga_populate_vr_config(hwmgr, table); PP_ASSERT_WITH_CODE(!result, "Failed to populate VRConfig setting !", return result); - + data->vr_config = table->VRConfig; table->ThermGpio = 17; table->SclkStepSize = 0x4000; @@ -2501,7 +2507,6 @@ static int tonga_init_smc_table(struct pp_hwmgr *hwmgr) for (i = 0; i < SMU72_MAX_ENTRIES_SMIO; i++) table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]); - CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.h index 5d70a00348e2..d664fedd3d85 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.h @@ -69,9 +69,6 @@ struct tonga_smumgr { const struct tonga_pt_defaults *power_tune_defaults; SMU72_Discrete_MCRegisters mc_regs; struct tonga_mc_reg_table mc_reg_table; - - uint32_t activity_target[SMU72_MAX_LEVELS_GRAPHICS]; - }; #endif diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c index 238e6eb842ea..cd8a3ee16649 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_mst.c +++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c @@ -718,7 +718,7 @@ radeon_dp_mst_check_status(struct radeon_connector *radeon_connector) DP_SINK_COUNT_ESI, esi, 8); go_again: if (dret == 8) { - DRM_DEBUG_KMS("got esi %02x %02x %02x\n", esi[0], esi[1], esi[2]); + DRM_DEBUG_KMS("got esi %3ph\n", esi); ret = drm_dp_mst_hpd_irq(&radeon_connector->mst_mgr, esi, &handled); if (handled) { @@ -733,7 +733,7 @@ go_again: dret = drm_dp_dpcd_read(&radeon_connector->ddc_bus->aux, DP_SINK_COUNT_ESI, esi, 8); if (dret == 8) { - DRM_DEBUG_KMS("got esi2 %02x %02x %02x\n", esi[0], esi[1], esi[2]); + DRM_DEBUG_KMS("got esi2 %3ph\n", esi); goto go_again; } } else diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 15404af9d740..c38fea37a67d 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -204,11 +204,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size); - if (unlikely(r)) { - kfree(bo); - return r; - } + drm_gem_private_object_init(rdev->ddev, &bo->gem_base, size); bo->rdev = rdev; bo->surface_reg = -1; INIT_LIST_HEAD(&bo->list); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 326ad068c15a..4b6542538ff9 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -47,7 +47,6 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev); static bool radeon_pm_debug_check_in_vbl(struct radeon_device *rdev, bool finish); static void radeon_pm_update_profile(struct radeon_device *rdev); static void radeon_pm_set_clocks(struct radeon_device *rdev); -static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev); int radeon_pm_get_type_index(struct radeon_device *rdev, enum radeon_pm_state_type ps_type, @@ -80,8 +79,6 @@ void radeon_pm_acpi_event_handler(struct radeon_device *rdev) radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power); } mutex_unlock(&rdev->pm.mutex); - /* allow new DPM state to be picked */ - radeon_pm_compute_clocks_dpm(rdev); } else if (rdev->pm.pm_method == PM_METHOD_PROFILE) { if (rdev->pm.profile == PM_PROFILE_AUTO) { mutex_lock(&rdev->pm.mutex); @@ -885,8 +882,7 @@ static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev, dpm_state = POWER_STATE_TYPE_INTERNAL_3DPERF; /* balanced states don't exist at the moment */ if (dpm_state == POWER_STATE_TYPE_BALANCED) - dpm_state = rdev->pm.dpm.ac_power ? - POWER_STATE_TYPE_PERFORMANCE : POWER_STATE_TYPE_BATTERY; + dpm_state = POWER_STATE_TYPE_PERFORMANCE; restart_search: /* Pick the best power state based on current conditions */ diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index c1e3862a48a4..c50620aadbd0 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -728,9 +728,6 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm, struct radeon_device *rdev; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); - if (ttm->state != tt_unpopulated) - return 0; - if (gtt && gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 2c18996d59c5..0d95888ccc3e 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -461,7 +461,7 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo { struct drm_sched_job *s_job; struct drm_sched_entity *entity, *tmp; - int i;; + int i; spin_lock(&sched->job_list_lock); list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 2fef09a56d16..d90b1cf10b27 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -49,6 +49,12 @@ static struct attribute ttm_bo_count = { .mode = S_IRUGO }; +/* default destructor */ +static void ttm_bo_default_destroy(struct ttm_buffer_object *bo) +{ + kfree(bo); +} + static inline int ttm_mem_type_from_place(const struct ttm_place *place, uint32_t *mem_type) { @@ -147,11 +153,7 @@ static void ttm_bo_release_list(struct kref *list_kref) dma_fence_put(bo->moving); reservation_object_fini(&bo->ttm_resv); mutex_destroy(&bo->wu_mutex); - if (bo->destroy) - bo->destroy(bo); - else { - kfree(bo); - } + bo->destroy(bo); ttm_mem_global_free(bdev->glob->mem_glob, acc_size); } @@ -163,7 +165,6 @@ void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) reservation_object_assert_held(bo->resv); if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) { - BUG_ON(!list_empty(&bo->lru)); man = &bdev->man[bo->mem.mem_type]; @@ -235,6 +236,9 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc) if (bdev->need_dma32) page_flags |= TTM_PAGE_FLAG_DMA32; + if (bdev->no_retry) + page_flags |= TTM_PAGE_FLAG_NO_RETRY; + switch (bo->type) { case ttm_bo_type_device: if (zero_alloc) @@ -611,10 +615,9 @@ static void ttm_bo_delayed_workqueue(struct work_struct *work) struct ttm_bo_device *bdev = container_of(work, struct ttm_bo_device, wq.work); - if (!ttm_bo_delayed_delete(bdev, false)) { + if (!ttm_bo_delayed_delete(bdev, false)) schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); - } } static void ttm_bo_release(struct kref *kref) @@ -1176,7 +1179,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, ttm_mem_global_free(mem_glob, acc_size); return -EINVAL; } - bo->destroy = destroy; + bo->destroy = destroy ? destroy : ttm_bo_default_destroy; kref_init(&bo->kref); kref_init(&bo->list_kref); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 153de1bf0232..38da6903cae9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -375,8 +375,8 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, /* * TTM might be null for moves within the same region. */ - if (ttm && ttm->state == tt_unpopulated) { - ret = ttm->bdev->driver->ttm_tt_populate(ttm, ctx); + if (ttm) { + ret = ttm_tt_populate(ttm, ctx); if (ret) goto out1; } @@ -402,8 +402,9 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, PAGE_KERNEL); ret = ttm_copy_io_ttm_page(ttm, old_iomap, page, prot); - } else + } else { ret = ttm_copy_io_page(new_iomap, old_iomap, page); + } if (ret) goto out1; } @@ -556,11 +557,9 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, BUG_ON(!ttm); - if (ttm->state == tt_unpopulated) { - ret = ttm->bdev->driver->ttm_tt_populate(ttm, &ctx); - if (ret) - return ret; - } + ret = ttm_tt_populate(ttm, &ctx); + if (ret) + return ret; if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) { /* diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 60fcef1593dd..610d6714042a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -118,7 +118,6 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) int ret; int i; unsigned long address = vmf->address; - int retval = VM_FAULT_NOPAGE; struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; struct vm_area_struct cvma; @@ -158,7 +157,7 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) * (if at all) by redirecting mmap to the exporter. */ if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) { - retval = VM_FAULT_SIGBUS; + ret = VM_FAULT_SIGBUS; goto out_unlock; } @@ -169,10 +168,10 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) break; case -EBUSY: case -ERESTARTSYS: - retval = VM_FAULT_NOPAGE; + ret = VM_FAULT_NOPAGE; goto out_unlock; default: - retval = VM_FAULT_SIGBUS; + ret = VM_FAULT_SIGBUS; goto out_unlock; } } @@ -183,12 +182,10 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) */ ret = ttm_bo_vm_fault_idle(bo, vmf); if (unlikely(ret != 0)) { - retval = ret; - - if (retval == VM_FAULT_RETRY && + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { /* The BO has already been unreserved. */ - return retval; + return ret; } goto out_unlock; @@ -196,12 +193,12 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) ret = ttm_mem_io_lock(man, true); if (unlikely(ret != 0)) { - retval = VM_FAULT_NOPAGE; + ret = VM_FAULT_NOPAGE; goto out_unlock; } ret = ttm_mem_io_reserve_vm(bo); if (unlikely(ret != 0)) { - retval = VM_FAULT_SIGBUS; + ret = VM_FAULT_SIGBUS; goto out_io_unlock; } @@ -211,7 +208,7 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) drm_vma_node_start(&bo->vma_node); if (unlikely(page_offset >= bo->num_pages)) { - retval = VM_FAULT_SIGBUS; + ret = VM_FAULT_SIGBUS; goto out_io_unlock; } @@ -237,8 +234,8 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) cvma.vm_page_prot); /* Allocate all page at once, most common usage */ - if (ttm->bdev->driver->ttm_tt_populate(ttm, &ctx)) { - retval = VM_FAULT_OOM; + if (ttm_tt_populate(ttm, &ctx)) { + ret = VM_FAULT_OOM; goto out_io_unlock; } } @@ -255,7 +252,7 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) } else { page = ttm->pages[page_offset]; if (unlikely(!page && i == 0)) { - retval = VM_FAULT_OOM; + ret = VM_FAULT_OOM; goto out_io_unlock; } else if (unlikely(!page)) { break; @@ -280,7 +277,7 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) if (unlikely((ret == -EBUSY) || (ret != 0 && i > 0))) break; else if (unlikely(ret != 0)) { - retval = + ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; goto out_io_unlock; } @@ -289,11 +286,12 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) if (unlikely(++page_offset >= page_last)) break; } + ret = VM_FAULT_NOPAGE; out_io_unlock: ttm_mem_io_unlock(man); out_unlock: ttm_bo_unreserve(bo); - return retval; + return ret; } static void ttm_bo_vm_open(struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 373ced0b2fc2..fa44f7b15285 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -139,12 +139,14 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, */ ttm_eu_backoff_reservation_reverse(list, entry); - if (ret == -EDEADLK && intr) { - ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, - ticket); - } else if (ret == -EDEADLK) { - ww_mutex_lock_slow(&bo->resv->lock, ticket); - ret = 0; + if (ret == -EDEADLK) { + if (intr) { + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + ticket); + } else { + ww_mutex_lock_slow(&bo->resv->lock, ticket); + ret = 0; + } } if (!ret && entry->shared) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 2b12c55a3bff..5edcd896cd53 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -741,6 +741,9 @@ out: if (ttm_flags & TTM_PAGE_FLAG_ZERO_ALLOC) gfp_flags |= __GFP_ZERO; + if (ttm_flags & TTM_PAGE_FLAG_NO_RETRY) + gfp_flags |= __GFP_RETRY_MAYFAIL; + /* ttm_alloc_new_pages doesn't reference pool so we can run * multiple requests in parallel. **/ @@ -893,6 +896,9 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags, if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) gfp_flags |= __GFP_ZERO; + if (flags & TTM_PAGE_FLAG_NO_RETRY) + gfp_flags |= __GFP_RETRY_MAYFAIL; + if (flags & TTM_PAGE_FLAG_DMA32) gfp_flags |= GFP_DMA32; else @@ -1063,6 +1069,28 @@ void ttm_page_alloc_fini(void) _manager = NULL; } +static void +ttm_pool_unpopulate_helper(struct ttm_tt *ttm, unsigned mem_count_update) +{ + unsigned i; + + if (mem_count_update == 0) + goto put_pages; + + for (i = 0; i < mem_count_update; ++i) { + if (!ttm->pages[i]) + continue; + + ttm_mem_global_free_page(ttm->glob->mem_glob, ttm->pages[i], + PAGE_SIZE); + } + +put_pages: + ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags, + ttm->caching_state); + ttm->state = tt_unpopulated; +} + int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) { struct ttm_mem_global *mem_glob = ttm->glob->mem_glob; @@ -1075,8 +1103,7 @@ int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags, ttm->caching_state); if (unlikely(ret != 0)) { - ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags, - ttm->caching_state); + ttm_pool_unpopulate_helper(ttm, 0); return ret; } @@ -1084,8 +1111,7 @@ int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i], PAGE_SIZE, ctx); if (unlikely(ret != 0)) { - ttm_put_pages(ttm->pages, ttm->num_pages, - ttm->page_flags, ttm->caching_state); + ttm_pool_unpopulate_helper(ttm, i); return -ENOMEM; } } @@ -1105,18 +1131,7 @@ EXPORT_SYMBOL(ttm_pool_populate); void ttm_pool_unpopulate(struct ttm_tt *ttm) { - unsigned i; - - for (i = 0; i < ttm->num_pages; ++i) { - if (!ttm->pages[i]) - continue; - - ttm_mem_global_free_page(ttm->glob->mem_glob, ttm->pages[i], - PAGE_SIZE); - } - ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags, - ttm->caching_state); - ttm->state = tt_unpopulated; + ttm_pool_unpopulate_helper(ttm, ttm->num_pages); } EXPORT_SYMBOL(ttm_pool_unpopulate); diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index a88051552ace..b122f6eee94c 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -210,6 +210,7 @@ static ssize_t ttm_pool_store(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct ttm_pool_manager, kobj); int chars; unsigned val; + chars = sscanf(buffer, "%u", &val); if (chars == 0) return size; @@ -217,11 +218,11 @@ static ssize_t ttm_pool_store(struct kobject *kobj, struct attribute *attr, /* Convert kb to number of pages */ val = val / (PAGE_SIZE >> 10); - if (attr == &ttm_page_pool_max) + if (attr == &ttm_page_pool_max) { m->options.max_size = val; - else if (attr == &ttm_page_pool_small) + } else if (attr == &ttm_page_pool_small) { m->options.small = val; - else if (attr == &ttm_page_pool_alloc_size) { + } else if (attr == &ttm_page_pool_alloc_size) { if (val > NUM_PAGES_TO_ALLOC*8) { pr_err("Setting allocation size to %lu is not allowed. Recommended size is %lu\n", NUM_PAGES_TO_ALLOC*(PAGE_SIZE >> 7), @@ -389,14 +390,12 @@ static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page) { struct page *page = d_page->p; unsigned i, num_pages; - int ret; /* Don't set WB on WB page pool. */ if (!(pool->type & IS_CACHED)) { num_pages = pool->size / PAGE_SIZE; for (i = 0; i < num_pages; ++i, ++page) { - ret = set_pages_array_wb(&page, 1); - if (ret) { + if (set_pages_array_wb(&page, 1)) { pr_err("%s: Failed to set %d pages to wb!\n", pool->dev_name, 1); } @@ -681,10 +680,10 @@ err_mem: static struct dma_pool *ttm_dma_find_pool(struct device *dev, enum pool_type type) { - struct dma_pool *pool, *tmp, *found = NULL; + struct dma_pool *pool, *tmp; if (type == IS_UNDEFINED) - return found; + return NULL; /* NB: We iterate on the 'struct dev' which has no spinlock, but * it does have a kref which we have taken. The kref is taken during @@ -697,13 +696,10 @@ static struct dma_pool *ttm_dma_find_pool(struct device *dev, * thing is at that point of time there are no pages associated with the * driver so this function will not be called. */ - list_for_each_entry_safe(pool, tmp, &dev->dma_pools, pools) { - if (pool->type != type) - continue; - found = pool; - break; - } - return found; + list_for_each_entry_safe(pool, tmp, &dev->dma_pools, pools) + if (pool->type == type) + return pool; + return NULL; } /* @@ -765,10 +761,9 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool, return -ENOMEM; } - if (count > 1) { + if (count > 1) pr_debug("%s: (%s:%d) Getting %d pages\n", pool->dev_name, pool->name, current->pid, count); - } for (i = 0, cpages = 0; i < count; ++i) { dma_p = __ttm_dma_alloc_page(pool); @@ -920,6 +915,9 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge) gfp_flags &= ~__GFP_COMP; } + if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY) + gfp_flags |= __GFP_RETRY_MAYFAIL; + return gfp_flags; } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 5a046a3c543a..39c44e301c72 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -50,19 +50,25 @@ /** * Allocates storage for pointers to the pages that back the ttm. */ -static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm) +static int ttm_tt_alloc_page_directory(struct ttm_tt *ttm) { ttm->pages = kvmalloc_array(ttm->num_pages, sizeof(void*), GFP_KERNEL | __GFP_ZERO); + if (!ttm->pages) + return -ENOMEM; + return 0; } -static void ttm_dma_tt_alloc_page_directory(struct ttm_dma_tt *ttm) +static int ttm_dma_tt_alloc_page_directory(struct ttm_dma_tt *ttm) { ttm->ttm.pages = kvmalloc_array(ttm->ttm.num_pages, sizeof(*ttm->ttm.pages) + sizeof(*ttm->dma_address), GFP_KERNEL | __GFP_ZERO); + if (!ttm->ttm.pages) + return -ENOMEM; ttm->dma_address = (void *) (ttm->ttm.pages + ttm->ttm.num_pages); + return 0; } #ifdef CONFIG_X86 @@ -197,8 +203,7 @@ int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev, ttm->state = tt_unpopulated; ttm->swap_storage = NULL; - ttm_tt_alloc_page_directory(ttm); - if (!ttm->pages) { + if (ttm_tt_alloc_page_directory(ttm)) { ttm_tt_destroy(ttm); pr_err("Failed allocating page table\n"); return -ENOMEM; @@ -230,8 +235,7 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev, ttm->swap_storage = NULL; INIT_LIST_HEAD(&ttm_dma->pages_list); - ttm_dma_tt_alloc_page_directory(ttm_dma); - if (!ttm->pages) { + if (ttm_dma_tt_alloc_page_directory(ttm_dma)) { ttm_tt_destroy(ttm); pr_err("Failed allocating page table\n"); return -ENOMEM; @@ -272,7 +276,7 @@ int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem, if (ttm->state == tt_bound) return 0; - ret = ttm->bdev->driver->ttm_tt_populate(ttm, ctx); + ret = ttm_tt_populate(ttm, ctx); if (ret) return ret; @@ -301,7 +305,11 @@ int ttm_tt_swapin(struct ttm_tt *ttm) swap_space = swap_storage->f_mapping; for (i = 0; i < ttm->num_pages; ++i) { - from_page = shmem_read_mapping_page(swap_space, i); + gfp_t gfp_mask = mapping_gfp_mask(swap_space); + + gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? __GFP_RETRY_MAYFAIL : 0); + from_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask); + if (IS_ERR(from_page)) { ret = PTR_ERR(from_page); goto out_err; @@ -344,16 +352,22 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage) pr_err("Failed allocating swap storage\n"); return PTR_ERR(swap_storage); } - } else + } else { swap_storage = persistent_swap_storage; + } swap_space = swap_storage->f_mapping; for (i = 0; i < ttm->num_pages; ++i) { + gfp_t gfp_mask = mapping_gfp_mask(swap_space); + + gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? __GFP_RETRY_MAYFAIL : 0); + from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; - to_page = shmem_read_mapping_page(swap_space, i); + + to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask); if (IS_ERR(to_page)) { ret = PTR_ERR(to_page); goto out_err; @@ -378,6 +392,14 @@ out_err: return ret; } +int ttm_tt_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) +{ + if (ttm->state != tt_unpopulated) + return 0; + + return ttm->bdev->driver->ttm_tt_populate(ttm, ctx); +} + static void ttm_tt_clear_mapping(struct ttm_tt *ttm) { pgoff_t i; |