diff options
120 files changed, 2332 insertions, 613 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 14f68c028126..5ffb07b02810 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -234,14 +234,10 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s }) /* GPUVM API */ -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid, - void **vm, void **process_info, - struct dma_fence **ef); int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, void **vm, void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index e93850f2f3b1..7d4118c8128a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -445,22 +445,19 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) { - mapping_flags |= AMDGPU_VM_MTYPE_RW; + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; if (adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { - mapping_flags |= AMDGPU_VM_MTYPE_NC; + mapping_flags |= AMDGPU_VM_MTYPE_UC; if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { snoop = true; - if (adev->gmc.xgmi.connected_to_cpu) - /* system memory uses NC on A+A */ - mapping_flags |= AMDGPU_VM_MTYPE_NC; - else - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; default: @@ -1037,41 +1034,6 @@ create_evict_fence_fail: return ret; } -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid, - void **vm, void **process_info, - struct dma_fence **ef) -{ - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *new_vm; - int ret; - - new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); - if (!new_vm) - return -ENOMEM; - - /* Initialize AMDGPU part of the VM */ - ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid); - if (ret) { - pr_err("Failed init vm ret %d\n", ret); - goto amdgpu_vm_init_fail; - } - - /* Initialize KFD part of the VM and process info */ - ret = init_kfd_vm(new_vm, process_info, ef); - if (ret) - goto init_kfd_vm_fail; - - *vm = (void *) new_vm; - - return 0; - -init_kfd_vm_fail: - amdgpu_vm_fini(adev, new_vm); -amdgpu_vm_init_fail: - kfree(new_vm); - return ret; -} - int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, void **vm, void **process_info, @@ -1138,21 +1100,6 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, } } -void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) -{ - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; - - if (WARN_ON(!kgd || !vm)) - return; - - pr_debug("Destroying process vm %p\n", vm); - - /* Release the VM context */ - amdgpu_vm_fini(adev, avm); - kfree(vm); -} - void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 9a2f811450ed..2e622c1675d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -908,6 +908,19 @@ int amdgpu_display_gem_fb_verify_and_init( &amdgpu_fb_funcs); if (ret) goto err; + /* Verify that the modifier is supported. */ + if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0])) { + struct drm_format_name_buf format_name; + drm_dbg_kms(dev, + "unsupported pixel format %s / modifier 0x%llx\n", + drm_get_format_name(mode_cmd->pixel_format, + &format_name), + mode_cmd->modifier[0]); + + ret = -EINVAL; + goto err; + } ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e0c4f7c7f1b9..baa980a477d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -291,8 +291,8 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: - r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, attach->dev, - dir, &sgt); + r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, 0, + bo->tbo.base.size, attach->dev, dir, &sgt); if (r) return ERR_PTR(r); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index d8f131ed10cb..922938931e1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -185,7 +185,7 @@ uint amdgpu_ras_mask = 0xffffffff; int amdgpu_bad_page_threshold = -1; struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { .timeout_fatal_disable = false, - .period = 0x23, /* default to max. timeout = 1 << 0x23 cycles */ + .period = 0x0, /* default to 0x0 (timeout disable) */ }; /** @@ -553,7 +553,7 @@ module_param_named(timeout_fatal_disable, amdgpu_watchdog_timer.timeout_fatal_di * DOC: timeout_period (uint) * Modify the watchdog timeout max_cycles as (1 << period) */ -MODULE_PARM_DESC(timeout_period, "watchdog timeout period (1 to 0x23(default), timeout maxCycles = (1 << period)"); +MODULE_PARM_DESC(timeout_period, "watchdog timeout period (0 = timeout disabled, 1 ~ 0x23 = timeout maxcycles = (1 << period)"); module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 4c5c19820d37..4f10c4529840 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -205,7 +205,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper, struct drm_gem_object *gobj = NULL; struct amdgpu_bo *abo = NULL; int ret; - unsigned long tmp; memset(&mode_cmd, 0, sizeof(mode_cmd)); mode_cmd.width = sizes->surface_width; @@ -246,8 +245,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, info->fbops = &amdgpufb_ops; - tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start; - info->fix.smem_start = adev->gmc.aper_base + tmp; + info->fix.smem_start = amdgpu_gmc_vram_cpu_pa(adev, abo); info->fix.smem_len = amdgpu_bo_size(abo); info->screen_base = amdgpu_bo_kptr(abo); info->screen_size = amdgpu_bo_size(abo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 4d32233cde92..c39ed9eb0987 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -487,6 +487,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: if (amdgpu_tmz == 0) { adev->gmc.tmz_enabled = false; dev_info(adev->dev, @@ -497,7 +498,6 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) "Trusted Memory Zone (TMZ) feature enabled\n"); } break; - case CHIP_RENOIR: case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: @@ -661,8 +661,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) u64 vram_addr = adev->vm_manager.vram_base_offset - adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; u64 vram_end = vram_addr + vram_size; - u64 gart_ptb_gpu_pa = amdgpu_bo_gpu_offset(adev->gart.bo) + - adev->vm_manager.vram_base_offset - adev->gmc.vram_start; + u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; flags |= AMDGPU_PTE_WRITEABLE; @@ -685,3 +684,39 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) /* Requires gart_ptb_gpu_pa to be 4K aligned */ amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags); } + +/** + * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC + * address + * + * @adev: amdgpu_device pointer + * @mc_addr: MC address of buffer + */ +uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr) +{ + return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; +} + +/** + * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from + * GPU's view + * + * @adev: amdgpu_device pointer + * @bo: amdgpu buffer object + */ +uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo)); +} + +/** + * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address + * from CPU's view + * + * @adev: amdgpu_device pointer + * @bo: amdgpu buffer object + */ +uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index cbb7735c6988..9d11c02a3938 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -218,15 +218,6 @@ struct amdgpu_gmc { */ u64 fb_start; u64 fb_end; - /* In the case of use GART table for vmid0 FB access, [fb_start, fb_end] - * will be squeezed to GART aperture. But we have a PSP FW issue to fix - * for now. To temporarily workaround the PSP FW issue, added below two - * variables to remember the original fb_start/end to re-enable FB - * aperture to workaround the PSP FW issue. Will delete it after we - * get a proper PSP FW fix. - */ - u64 fb_start_original; - u64 fb_end_original; unsigned vram_width; u64 real_vram_size; int vram_mtrr; @@ -341,4 +332,7 @@ amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev); void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev); +uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr); +uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); +uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 148a3b481b12..a2fe2dac32c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -76,6 +76,8 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, } ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo); + /* flush the cache before commit the IB */ + ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC; if (!vm) ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 19c0a3655228..82e9ecf84352 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -519,8 +519,10 @@ static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry, pmu_entry->pmu.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), GFP_KERNEL); - if (!pmu_entry->pmu.attr_groups) + if (!pmu_entry->pmu.attr_groups) { + ret = -ENOMEM; goto err_attr_group; + } snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", pmu_entry->pmu_file_prefix, adev_to_drm(pmu_entry->adev)->primary->index); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9e769cf6095b..a09483beb968 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -328,8 +328,12 @@ psp_cmd_submit_buf(struct psp_context *psp, static void psp_prep_tmr_cmd_buf(struct psp_context *psp, struct psp_gfx_cmd_resp *cmd, - uint64_t tmr_mc, uint32_t size) + uint64_t tmr_mc, struct amdgpu_bo *tmr_bo) { + struct amdgpu_device *adev = psp->adev; + uint32_t size = amdgpu_bo_size(tmr_bo); + uint64_t tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo); + if (amdgpu_sriov_vf(psp->adev)) cmd->cmd_id = GFX_CMD_ID_SETUP_VMR; else @@ -337,6 +341,9 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp, cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_size = size; + cmd->cmd.cmd_setup_tmr.bitfield.virt_phy_addr = 1; + cmd->cmd.cmd_setup_tmr.system_phy_addr_lo = lower_32_bits(tmr_pa); + cmd->cmd.cmd_setup_tmr.system_phy_addr_hi = upper_32_bits(tmr_pa); } static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd, @@ -407,16 +414,6 @@ static int psp_tmr_init(struct psp_context *psp) AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, pptr); - /* workaround the tmr_mc_addr: - * PSP requires an address in FB aperture. Right now driver produce - * tmr_mc_addr in the GART aperture. Convert it back to FB aperture - * for PSP. Will revert it after we get a fix from PSP FW. - */ - if (psp->adev->asic_type == CHIP_ALDEBARAN) { - psp->tmr_mc_addr -= psp->adev->gmc.fb_start; - psp->tmr_mc_addr += psp->adev->gmc.fb_start_original; - } - return ret; } @@ -466,8 +463,7 @@ static int psp_tmr_load(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, - amdgpu_bo_size(psp->tmr_bo)); + psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo); DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); @@ -561,7 +557,7 @@ static int psp_boot_config_set(struct amdgpu_device *adev) struct psp_context *psp = &adev->psp; struct psp_gfx_cmd_resp *cmd = psp->cmd; - if (adev->asic_type != CHIP_SIENNA_CICHLID) + if (adev->asic_type != CHIP_SIENNA_CICHLID || amdgpu_sriov_vf(adev)) return 0; memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 0541196ae1ed..b0d2fc9454ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -114,7 +114,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre if (amdgpu_ras_check_bad_page(adev, address)) { dev_warn(adev->dev, - "RAS WARN: 0x%llx has been marked as bad page!\n", + "RAS WARN: 0x%llx has already been marked as bad page!\n", address); return 0; } @@ -221,18 +221,17 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, op = 1; else if (sscanf(str, "inject %32s %8s", block_name, err) == 2) op = 2; - else if (sscanf(str, "retire_page") == 0) + else if (strstr(str, "retire_page") != NULL) op = 3; else if (str[0] && str[1] && str[2] && str[3]) /* ascii string, but commands are not matched. */ return -EINVAL; if (op != -1) { - if (op == 3) { - if (sscanf(str, "%*s %llu", &address) != 1) - if (sscanf(str, "%*s 0x%llx", &address) != 1) - return -EINVAL; + if (sscanf(str, "%*s 0x%llx", &address) != 1 && + sscanf(str, "%*s %llu", &address) != 1) + return -EINVAL; data->op = op; data->inject.address = address; @@ -255,11 +254,11 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, data->op = op; if (op == 2) { - if (sscanf(str, "%*s %*s %*s %u %llu %llu", - &sub_block, &address, &value) != 3) - if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", - &sub_block, &address, &value) != 3) - return -EINVAL; + if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", + &sub_block, &address, &value) != 3 && + sscanf(str, "%*s %*s %*s %u %llu %llu", + &sub_block, &address, &value) != 3) + return -EINVAL; data->head.sub_block_index = sub_block; data->inject.address = address; data->inject.value = value; @@ -278,7 +277,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, /** * DOC: AMDGPU RAS debugfs control interface * - * It accepts struct ras_debug_if who has two members. + * The control interface accepts struct ras_debug_if which has two members. * * First member: ras_debug_if::head or ras_debug_if::inject. * @@ -303,32 +302,33 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * * How to use the interface? * - * Programs + * In a program * - * Copy the struct ras_debug_if in your codes and initialize it. - * Write the struct to the control node. + * Copy the struct ras_debug_if in your code and initialize it. + * Write the struct to the control interface. * - * Shells + * From shell * * .. code-block:: bash * - * echo op block [error [sub_block address value]] > .../ras/ras_ctrl + * echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl + * echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl + * echo "inject <block> <error> <sub-block> <address> <value> > /sys/kernel/debug/dri/<N>/ras/ras_ctrl * - * Parameters: + * Where N, is the card which you want to affect. * - * op: disable, enable, inject - * disable: only block is needed - * enable: block and error are needed - * inject: error, address, value are needed - * block: umc, sdma, gfx, ......... + * "disable" requires only the block. + * "enable" requires the block and error type. + * "inject" requires the block, error type, address, and value. + * The block is one of: umc, sdma, gfx, etc. * see ras_block_string[] for details - * error: ue, ce - * ue: multi_uncorrectable - * ce: single_correctable - * sub_block: - * sub block index, pass 0 if there is no sub block + * The error type is one of: ue, ce, where, + * ue is multi-uncorrectable + * ce is single-correctable + * The sub-block is a the sub-block index, pass 0 if there is no sub-block. + * The address and value are hexadecimal numbers, leading 0x is optional. * - * here are some examples for bash commands: + * For instance, * * .. code-block:: bash * @@ -336,17 +336,17 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl * - * How to check the result? + * How to check the result of the operation? * - * For disable/enable, please check ras features at + * To check disable/enable, see "ras" features at, * /sys/class/drm/card[0/1/2...]/device/ras/features * - * For inject, please check corresponding err count at - * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count + * To check inject, see the corresponding error count at, + * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count * * .. note:: * Operations are only allowed on blocks which are supported. - * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask * to see which blocks support RAS on a particular asic. * */ @@ -367,11 +367,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * if (ret) return -EINVAL; - if (data.op == 3) - { + if (data.op == 3) { ret = amdgpu_reserve_page_direct(adev, data.inject.address); - - if (ret) + if (!ret) return size; else return ret; @@ -503,6 +501,12 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; + + if (obj->adev->asic_type == CHIP_ALDEBARAN) { + if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) + DRM_WARN("Failed to reset error counter and error status"); + } + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, "ce", info.ce_count); } @@ -1269,6 +1273,8 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device * &amdgpu_ras_debugfs_ctrl_ops); debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, dir, adev, &amdgpu_ras_debugfs_eeprom_ops); + debugfs_create_u32("bad_page_cnt_threshold", 0444, dir, + &con->bad_page_cnt_threshold); /* * After one uncorrectable error happens, usually GPU recovery will diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index dec0db8b0b13..9e38475e0f8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -112,6 +112,7 @@ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man); u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo); int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, struct ttm_resource *mem, + u64 offset, u64 size, struct device *dev, enum dma_data_direction dir, struct sg_table **sgt); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 592a2dd16493..bce105e2973e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -25,6 +25,7 @@ #include <linux/dma-mapping.h> #include "amdgpu.h" #include "amdgpu_vm.h" +#include "amdgpu_res_cursor.h" #include "amdgpu_atomfirmware.h" #include "atom.h" @@ -565,6 +566,8 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, * * @adev: amdgpu device pointer * @mem: TTM memory object + * @offset: byte offset from the base of VRAM BO + * @length: number of bytes to export in sg_table * @dev: the other device * @dir: dma direction * @sgt: resulting sg table @@ -573,39 +576,47 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, */ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, struct ttm_resource *mem, + u64 offset, u64 length, struct device *dev, enum dma_data_direction dir, struct sg_table **sgt) { - struct drm_mm_node *node; + struct amdgpu_res_cursor cursor; struct scatterlist *sg; int num_entries = 0; - unsigned int pages; int i, r; *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL); if (!*sgt) return -ENOMEM; - for (pages = mem->num_pages, node = mem->mm_node; - pages; pages -= node->size, ++node) - ++num_entries; + /* Determine the number of DRM_MM nodes to export */ + amdgpu_res_first(mem, offset, length, &cursor); + while (cursor.remaining) { + num_entries++; + amdgpu_res_next(&cursor, cursor.size); + } r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); if (r) goto error_free; + /* Initialize scatterlist nodes of sg_table */ for_each_sgtable_sg((*sgt), sg, i) sg->length = 0; - node = mem->mm_node; + /* + * Walk down DRM_MM nodes to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_MM node + * and the number of bytes from it. Access the following + * DRM_MM node(s) if more buffer needs to exported + */ + amdgpu_res_first(mem, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { - phys_addr_t phys = (node->start << PAGE_SHIFT) + - adev->gmc.aper_base; - size_t size = node->size << PAGE_SHIFT; + phys_addr_t phys = cursor.start + adev->gmc.aper_base; + size_t size = cursor.size; dma_addr_t addr; - ++node; addr = dma_map_resource(dev, phys, size, dir, DMA_ATTR_SKIP_CPU_SYNC); r = dma_mapping_error(dev, addr); @@ -615,7 +626,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, sg_set_page(sg, NULL, size, 0); sg_dma_address(sg) = addr; sg_dma_len(sg) = size; + + amdgpu_res_next(&cursor, cursor.size); } + return 0; error_unmap: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 06811a1f4625..a078a38c2cee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1587,6 +1587,9 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, err = 0; adev->gfx.mec2_fw = NULL; } + } else { + adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; + adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index 830080ff90d8..b4789dfc2bb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -994,7 +994,7 @@ static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, return ret; } -static const struct soc15_reg_entry gfx_v9_4_rdrsp_status_regs = +static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 }; static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) @@ -1007,15 +1007,21 @@ static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_rdrsp_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_rdrsp_status_regs.instance; + for (i = 0; i < gfx_v9_4_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_ea_err_status_regs.instance; j++) { gfx_v9_4_select_se_sh(adev, i, 0, j); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( - gfx_v9_4_rdrsp_status_regs)); - if (reg_value) + gfx_v9_4_ea_err_status_regs)); + if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { + /* SDP read/write error/parity error in FUE_IS_FATAL mode + * can cause system fatal error in arcturas. Harvest the error + * status before GPU reset */ dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n", j, reg_value); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 9ca76a3ac38c..a30c7c10cd9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -808,7 +808,7 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = { REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) }, }; -static const struct soc15_reg_entry gfx_v9_4_2_rdrsp_status_regs = +static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev, @@ -997,8 +997,9 @@ static int gfx_v9_4_2_query_utc_edc_count(struct amdgpu_device *adev, blk->clear); /* print the edc count */ - gfx_v9_4_2_log_utc_edc_count(adev, blk, j, sec_cnt, - ded_cnt); + if (sec_cnt || ded_cnt) + gfx_v9_4_2_log_utc_edc_count(adev, blk, j, sec_cnt, + ded_cnt); } } @@ -1040,11 +1041,11 @@ static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev) uint32_t i, j; mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance; + for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance; j++) { gfx_v9_4_2_select_se_sh(adev, i, 0, j); - WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10); + WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10); } } gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -1089,17 +1090,20 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance; + for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance; j++) { gfx_v9_4_2_select_se_sh(adev, i, 0, j); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( - gfx_v9_4_2_rdrsp_status_regs)); - if (reg_value) + gfx_v9_4_2_ea_err_status_regs)); + if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n", j, reg_value); + } /* clear after read */ - WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10); + WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10); } } @@ -1112,19 +1116,19 @@ static void gfx_v9_4_2_query_utc_err_status(struct amdgpu_device *adev) uint32_t data; data = RREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS); - if (!data) { + if (data) { dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data); WREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS, 0x3); } data = RREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS); - if (!data) { + if (data) { dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data); WREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS, 0x3); } data = RREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS); - if (!data) { + if (data) { dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data); WREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS, 0x3); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index d189507dcef0..1e4678cb98f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -120,8 +120,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr); WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, @@ -141,21 +140,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * FB aperture and AGP aperture. Disable them. */ if (adev->gmc.pdb0_bo) { - if (adev->asic_type == CHIP_ALDEBARAN) { - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, adev->gmc.fb_end_original >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, adev->gmc.fb_start_original >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFF); - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->gmc.fb_start_original >> 18); - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, adev->gmc.fb_end_original >> 18); - } else { - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, 0); - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFF); - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF); - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); - } + WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, 0); + WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFF); + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF); + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 2aecc6a243e8..14c1c1a297dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -165,8 +165,7 @@ static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start - + adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr); WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 410fd3a1a388..41807817de7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -164,8 +164,7 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev) max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start - + adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr); WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 2bfd620576f2..498b28a35f5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -568,8 +568,7 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) - *addr = adev->vm_manager.vram_base_offset + *addr - - adev->gmc.vram_start; + *addr = amdgpu_gmc_vram_mc2pa(adev, *addr); BUG_ON(*addr & 0xFFFF00000000003FULL); if (!adev->gmc.translate_further) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index c82d82da2c73..455bb91060d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -574,7 +574,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if (entry->vmid_src == AMDGPU_GFXHUB_0) + if ((entry->vmid_src == AMDGPU_GFXHUB_0) && + (adev->asic_type < CHIP_ALDEBARAN)) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); @@ -802,7 +803,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * be cleared to avoid a false ACK due to the new fast * GRBM interface. */ - if (vmhub == AMDGPU_GFXHUB_0) + if ((vmhub == AMDGPU_GFXHUB_0) && + (adev->asic_type < CHIP_ALDEBARAN)) RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); @@ -1048,8 +1050,7 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) - *addr = adev->vm_manager.vram_base_offset + *addr - - adev->gmc.vram_start; + *addr = amdgpu_gmc_vram_mc2pa(adev, *addr); BUG_ON(*addr & 0xFFFF00000000003FULL); if (!adev->gmc.translate_further) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index aa9be5612c89..a99953833820 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -114,8 +114,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) return; /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr); WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 7977a7879b32..0103a5ab28e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -47,8 +47,6 @@ static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device *adev) adev->gmc.fb_start = base; adev->gmc.fb_end = top; - adev->gmc.fb_start_original = base; - adev->gmc.fb_end_original = top; return base; } @@ -126,17 +124,16 @@ static void mmhub_v1_7_init_system_aperture_regs(struct amdgpu_device *adev) if (adev->gmc.pdb0_bo) { WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, 0xFFFFFF); WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, 0); - WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP, adev->gmc.fb_end_original >> 24); - WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE, adev->gmc.fb_start_original >> 24); - WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->gmc.fb_start_original >> 18); - WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, adev->gmc.fb_end_original >> 18); + WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP, 0); + WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); + WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF); + WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); } if (amdgpu_sriov_vf(adev)) return; /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr); WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, @@ -1287,7 +1284,7 @@ static void mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev) } } -static const struct soc15_reg_entry mmhub_v1_7_err_status_regs[] = { +static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = { { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 }, { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 }, { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 }, @@ -1304,12 +1301,15 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev) if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) return; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_err_status_regs); i++) { + for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) { reg_value = - RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_err_status_regs[i])); - if (reg_value) + RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i])); + if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n", i, reg_value); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index da7edd1ed6b2..ac76081b91d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -210,8 +210,7 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) } /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr); WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 1141c37432f0..a9899335d0b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -162,8 +162,7 @@ static void mmhub_v2_3_init_system_aperture_regs(struct amdgpu_device *adev) max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr); WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 0cffa820ea6e..47c8dd9d1c78 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -136,8 +136,7 @@ static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr); WREG32_SOC15_OFFSET( MMHUB, 0, mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, @@ -1646,9 +1645,15 @@ static void mmhub_v9_4_query_ras_error_status(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_err_status_regs); i++) { reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_err_status_regs[i])); - if (reg_value) + if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { + /* SDP read/write error/parity error in FUE_IS_FATAL mode + * can cause system fatal error in arcturas. Harvest the error + * status before GPU reset */ dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n", i, reg_value); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 83ea063388fd..0d2d629e2d6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -31,6 +31,28 @@ #include "vega10_enum.h" #include <uapi/linux/kfd_ioctl.h> +#define smnPCIE_LC_CNTL 0x11140280 +#define smnPCIE_LC_CNTL3 0x111402d4 +#define smnPCIE_LC_CNTL6 0x111402ec +#define smnPCIE_LC_CNTL7 0x111402f0 +#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c +#define NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK 0x00001000L +#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK 0x0000FFFFL +#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK 0xFFFF0000L +#define smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL 0x10123530 +#define smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2 0x1014008c +#define smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP 0x10140324 +#define smnPSWUSP0_PCIE_LC_CNTL2 0x111402c4 +#define smnRCC_BIF_STRAP2 0x10123488 +#define smnRCC_BIF_STRAP3 0x1012348c +#define smnRCC_BIF_STRAP5 0x10123494 +#define BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK 0x0400L +#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK 0x0000FFFFL +#define RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK 0x00004000L +#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT 0x0 +#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT 0x10 +#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT 0x0 + static void nbio_v6_1_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -256,6 +278,111 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) WREG32_PCIE(smnPCIE_CI_CNTL, data); } +static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) +{ + uint32_t def, data; + + WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, 0x75EB); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP2); + data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP2, data); + + def = data = RREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL); + data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK; + if (def != data) + WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data); + + def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); +} + +static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL7); + data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL7, data); + + def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); + data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK; + if (def != data) + WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL3, data); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); + data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK; + data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP3, data); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP5); + data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP5, data); + + def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + + WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001); + + def = data = RREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2); + data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | + PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; + data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK; + if (def != data) + WREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL6); + data |= PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK | + PCIE_LC_CNTL6__LC_RX_L0S_STANDBY_EN_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL6, data); + + nbio_v6_1_program_ltr(adev); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; + data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP3, data); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP5); + data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP5, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL3); + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL3, data); +} + const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset, @@ -274,4 +401,5 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { .ih_control = nbio_v6_1_ih_control, .init_registers = nbio_v6_1_init_registers, .remap_hdp_registers = nbio_v6_1_remap_hdp_registers, + .program_aspm = nbio_v6_1_program_aspm, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index af44aad78171..cef929746739 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -31,7 +31,26 @@ #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include <uapi/linux/kfd_ioctl.h> +#define smnPCIE_LC_CNTL 0x11140280 +#define smnPCIE_LC_CNTL3 0x111402d4 +#define smnPCIE_LC_CNTL6 0x111402ec +#define smnPCIE_LC_CNTL7 0x111402f0 #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c +#define smnRCC_BIF_STRAP3 0x1012348c +#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK 0x0000FFFFL +#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK 0xFFFF0000L +#define smnRCC_BIF_STRAP5 0x10123494 +#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK 0x0000FFFFL +#define smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2 0x1014008c +#define BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK 0x0400L +#define smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP 0x10140324 +#define smnPSWUSP0_PCIE_LC_CNTL2 0x111402c4 +#define smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL 0x10123538 +#define smnRCC_BIF_STRAP2 0x10123488 +#define RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK 0x00004000L +#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT 0x0 +#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT 0x10 +#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT 0x0 /* * These are nbio v7_4_1 registers mask. Temporarily define these here since @@ -567,6 +586,111 @@ const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs = { .ras_fini = amdgpu_nbio_ras_fini, }; +static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) +{ + uint32_t def, data; + + WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, 0x75EB); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP2); + data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP2, data); + + def = data = RREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL); + data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK; + if (def != data) + WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data); + + def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); +} + +static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL7); + data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL7, data); + + def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); + data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK; + if (def != data) + WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL3, data); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); + data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK; + data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP3, data); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP5); + data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP5, data); + + def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + + WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001); + + def = data = RREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2); + data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | + PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; + data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK; + if (def != data) + WREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL6); + data |= PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK | + PCIE_LC_CNTL6__LC_RX_L0S_STANDBY_EN_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL6, data); + + nbio_v7_4_program_ltr(adev); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; + data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP3, data); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP5); + data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_PCIE(smnRCC_BIF_STRAP5, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL, data); + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL3); + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL3, data); +} + const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, @@ -587,4 +711,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .ih_control = nbio_v7_4_ih_control, .init_registers = nbio_v7_4_init_registers, .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, + .program_aspm = nbio_v7_4_program_aspm, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 46d4bbabce75..d54af7f8801b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -601,8 +601,7 @@ static void nv_program_aspm(struct amdgpu_device *adev) if (amdgpu_aspm != 1) return; - if ((adev->asic_type >= CHIP_SIENNA_CICHLID) && - !(adev->flags & AMD_IS_APU) && + if (!(adev->flags & AMD_IS_APU) && (adev->nbio.funcs->program_aspm)) adev->nbio.funcs->program_aspm(adev); @@ -934,12 +933,7 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, if (adev->gfx.funcs->update_perfmon_mgcg) adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); - /* - * The ASPM function is not fully enabled and verified on - * Navi yet. Temporarily skip this until ASPM enabled. - */ - if ((adev->asic_type >= CHIP_SIENNA_CICHLID) && - !(adev->flags & AMD_IS_APU) && + if (!(adev->flags & AMD_IS_APU) && (adev->nbio.funcs->enable_aspm)) adev->nbio.funcs->enable_aspm(adev, !enter); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index dd4d65f7e0f0..96064c343163 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -185,10 +185,19 @@ struct psp_gfx_cmd_setup_tmr uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */ uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */ uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */ + union { + struct { + uint32_t sriov_enabled:1; /* whether the device runs under SR-IOV*/ + uint32_t virt_phy_addr:1; /* driver passes both virtual and physical address to PSP*/ + uint32_t reserved:30; + } bitfield; + uint32_t tmr_flags; + }; + uint32_t system_phy_addr_lo; /* bits [31:0] of system physical address of TMR buffer (must be 4 KB aligned) */ + uint32_t system_phy_addr_hi; /* bits [63:32] of system physical address of TMR buffer */ }; - /* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */ enum psp_gfx_fw_type { GFX_FW_TYPE_NONE = 0, /* */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c index 6fcb95c89999..bf95007f0843 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c @@ -160,6 +160,7 @@ static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = { }; static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev, + uint32_t reg_offset, uint32_t value, uint32_t instance, uint32_t *sec_count) @@ -169,6 +170,9 @@ static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev, /* double bits error (multiple bits) error detection is not supported */ for (i = 0; i < ARRAY_SIZE(sdma_v4_4_ras_fields); i++) { + if (sdma_v4_4_ras_fields[i].reg_offset != reg_offset) + continue; + /* the SDMA_EDC_COUNTER register in each sdma instance * shares the same sed shift_mask * */ @@ -197,13 +201,30 @@ static int sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, reg_value = RREG32(reg_offset); /* double bit error is not supported */ if (reg_value) - sdma_v4_4_get_ras_error_count(adev, reg_value, instance, &sec_count); - /* err_data->ce_count should be initialized to 0 - * before calling into this function */ - err_data->ce_count += sec_count; - /* double bit error is not supported - * set ue count to 0 */ - err_data->ue_count = 0; + sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER, reg_value, + instance, &sec_count); + + reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER2); + reg_value = RREG32(reg_offset); + /* double bit error is not supported */ + if (reg_value) + sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER2, reg_value, + instance, &sec_count); + + /* + * err_data->ue_count should be initialized to 0 + * before calling into this function + * + * SDMA RAS supports single bit uncorrectable error detection. + * So, increment uncorrectable error count. + */ + err_data->ue_count += sec_count; + + /* + * SDMA RAS does not support correctable errors. + * Set ce count to 0. + */ + err_data->ce_count = 0; return 0; }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 93f826a7d3f0..b1ad9e52b234 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -370,6 +370,33 @@ static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring, } /** + * sdma_v5_2_ring_emit_mem_sync - flush the IB by graphics cache rinse + * + * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from + * @ib: IB object to schedule + * + * flush the IB by graphics cache rinse. + */ +static void sdma_v5_2_ring_emit_mem_sync(struct amdgpu_ring *ring) +{ + uint32_t gcr_cntl = + SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV | + SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV | + SDMA_GCR_GLI_INV(1); + + /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) | + SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) | + SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) | + SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0)); +} + +/** * sdma_v5_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring * * @ring: amdgpu ring pointer @@ -1663,6 +1690,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { 10 + 10 + 10, /* sdma_v5_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v5_2_ring_emit_ib */ .emit_ib = sdma_v5_2_ring_emit_ib, + .emit_mem_sync = sdma_v5_2_ring_emit_mem_sync, .emit_fence = sdma_v5_2_ring_emit_fence, .emit_pipeline_sync = sdma_v5_2_ring_emit_pipeline_sync, .emit_vm_flush = sdma_v5_2_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 5c5eb3aed1b3..d80e12b80c7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -816,11 +816,12 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev) static void soc15_program_aspm(struct amdgpu_device *adev) { - - if (amdgpu_aspm == 0) + if (amdgpu_aspm != 1) return; - /* todo */ + if (!(adev->flags & AMD_IS_APU) && + (adev->nbio.funcs->program_aspm)) + adev->nbio.funcs->program_aspm(adev); } static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index ea338de5818a..735ebbd1148f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -81,6 +81,30 @@ #include "mxgpu_vi.h" #include "amdgpu_dm.h" +#define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6 +#define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L +#define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L +#define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_1_OVERRIDE_MASK 0x00000004L +#define PCIE_LC_L1_PM_SUBSTATE__LC_ASPM_L1_2_OVERRIDE_MASK 0x00000008L +#define PCIE_LC_L1_PM_SUBSTATE__LC_ASPM_L1_1_OVERRIDE_MASK 0x00000010L +#define ixPCIE_L1_PM_SUB_CNTL 0x378 +#define PCIE_L1_PM_SUB_CNTL__ASPM_L1_2_EN_MASK 0x00000004L +#define PCIE_L1_PM_SUB_CNTL__ASPM_L1_1_EN_MASK 0x00000008L +#define PCIE_L1_PM_SUB_CNTL__PCI_PM_L1_2_EN_MASK 0x00000001L +#define PCIE_L1_PM_SUB_CNTL__PCI_PM_L1_1_EN_MASK 0x00000002L +#define PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK 0x00200000L +#define LINK_CAP 0x64 +#define PCIE_LINK_CAP__CLOCK_POWER_MANAGEMENT_MASK 0x00040000L +#define ixCPM_CONTROL 0x1400118 +#define ixPCIE_LC_CNTL7 0x100100BC +#define PCIE_LC_CNTL7__LC_L1_SIDEBAND_CLKREQ_PDWN_EN_MASK 0x00000400L +#define PCIE_LC_CNTL__LC_L0S_INACTIVITY_DEFAULT 0x00000007 +#define PCIE_LC_CNTL__LC_L1_INACTIVITY_DEFAULT 0x00000009 +#define CPM_CONTROL__CLKREQb_UNGATE_TXCLK_ENABLE_MASK 0x01000000L +#define PCIE_L1_PM_SUB_CNTL 0x378 +#define ASIC_IS_P22(asic_type, rid) ((asic_type >= CHIP_POLARIS10) && \ + (asic_type <= CHIP_POLARIS12) && \ + (rid >= 0x6E)) /* Topaz */ static const struct amdgpu_video_codecs topaz_video_codecs_encode = { @@ -1091,13 +1115,178 @@ static void vi_pcie_gen3_enable(struct amdgpu_device *adev) /* todo */ } +static void vi_enable_aspm(struct amdgpu_device *adev) +{ + u32 data, orig; + + orig = data = RREG32_PCIE(ixPCIE_LC_CNTL); + data |= PCIE_LC_CNTL__LC_L0S_INACTIVITY_DEFAULT << + PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + data |= PCIE_LC_CNTL__LC_L1_INACTIVITY_DEFAULT << + PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + data |= PCIE_LC_CNTL__LC_DELAY_L1_EXIT_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_CNTL, data); +} + static void vi_program_aspm(struct amdgpu_device *adev) { + u32 data, data1, orig; + bool bL1SS = false; + bool bClkReqSupport = true; - if (amdgpu_aspm == 0) + if (amdgpu_aspm != 1) return; - /* todo */ + if (adev->flags & AMD_IS_APU || + adev->asic_type < CHIP_POLARIS10) + return; + + orig = data = RREG32_PCIE(ixPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_CNTL, data); + + orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL); + data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK; + data |= 0x0024 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT; + data |= PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_N_FTS_CNTL, data); + + orig = data = RREG32_PCIE(ixPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_CNTL3, data); + + orig = data = RREG32_PCIE(ixPCIE_P_CNTL); + data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_P_CNTL, data); + + data = RREG32_PCIE(ixPCIE_LC_L1_PM_SUBSTATE); + pci_read_config_dword(adev->pdev, PCIE_L1_PM_SUB_CNTL, &data1); + if (data & PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK && + (data & (PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK | + PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_1_OVERRIDE_MASK | + PCIE_LC_L1_PM_SUBSTATE__LC_ASPM_L1_2_OVERRIDE_MASK | + PCIE_LC_L1_PM_SUBSTATE__LC_ASPM_L1_1_OVERRIDE_MASK))) { + bL1SS = true; + } else if (data1 & (PCIE_L1_PM_SUB_CNTL__ASPM_L1_2_EN_MASK | + PCIE_L1_PM_SUB_CNTL__ASPM_L1_1_EN_MASK | + PCIE_L1_PM_SUB_CNTL__PCI_PM_L1_2_EN_MASK | + PCIE_L1_PM_SUB_CNTL__PCI_PM_L1_1_EN_MASK)) { + bL1SS = true; + } + + orig = data = RREG32_PCIE(ixPCIE_LC_CNTL6); + data |= PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_CNTL6, data); + + orig = data = RREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL); + data |= PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL, data); + + pci_read_config_dword(adev->pdev, LINK_CAP, &data); + if (!(data & PCIE_LINK_CAP__CLOCK_POWER_MANAGEMENT_MASK)) + bClkReqSupport = false; + + if (bClkReqSupport) { + orig = data = RREG32_SMC(ixTHM_CLK_CNTL); + data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK); + data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) | + (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT); + if (orig != data) + WREG32_SMC(ixTHM_CLK_CNTL, data); + + orig = data = RREG32_SMC(ixMISC_CLK_CTRL); + data &= ~(MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL_MASK | + MISC_CLK_CTRL__ZCLK_SEL_MASK | MISC_CLK_CTRL__DFT_SMS_PG_CLK_SEL_MASK); + data |= (1 << MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL__SHIFT) | + (1 << MISC_CLK_CTRL__ZCLK_SEL__SHIFT); + data |= (0x20 << MISC_CLK_CTRL__DFT_SMS_PG_CLK_SEL__SHIFT); + if (orig != data) + WREG32_SMC(ixMISC_CLK_CTRL, data); + + orig = data = RREG32_SMC(ixCG_CLKPIN_CNTL); + data |= CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK; + if (orig != data) + WREG32_SMC(ixCG_CLKPIN_CNTL, data); + + orig = data = RREG32_SMC(ixCG_CLKPIN_CNTL_2); + data |= CG_CLKPIN_CNTL_2__ENABLE_XCLK_MASK; + if (orig != data) + WREG32_SMC(ixCG_CLKPIN_CNTL, data); + + orig = data = RREG32_SMC(ixMPLL_BYPASSCLK_SEL); + data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK; + data |= (4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT); + if (orig != data) + WREG32_SMC(ixMPLL_BYPASSCLK_SEL, data); + + orig = data = RREG32_PCIE(ixCPM_CONTROL); + data |= (CPM_CONTROL__REFCLK_XSTCLK_ENABLE_MASK | + CPM_CONTROL__CLKREQb_UNGATE_TXCLK_ENABLE_MASK); + if (orig != data) + WREG32_PCIE(ixCPM_CONTROL, data); + + orig = data = RREG32_PCIE(ixPCIE_CONFIG_CNTL); + data &= ~PCIE_CONFIG_CNTL__DYN_CLK_LATENCY_MASK; + data |= (0xE << PCIE_CONFIG_CNTL__DYN_CLK_LATENCY__SHIFT); + if (orig != data) + WREG32_PCIE(ixPCIE_CONFIG_CNTL, data); + + orig = data = RREG32(mmBIF_CLK_CTRL); + data |= BIF_CLK_CTRL__BIF_XSTCLK_READY_MASK; + if (orig != data) + WREG32(mmBIF_CLK_CTRL, data); + + orig = data = RREG32_PCIE(ixPCIE_LC_CNTL7); + data |= PCIE_LC_CNTL7__LC_L1_SIDEBAND_CLKREQ_PDWN_EN_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_CNTL7, data); + + orig = data = RREG32_PCIE(ixPCIE_HW_DEBUG); + data |= PCIE_HW_DEBUG__HW_01_DEBUG_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_HW_DEBUG, data); + + orig = data = RREG32_PCIE(ixPCIE_LC_CNTL2); + data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; + data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK; + if (bL1SS) + data &= ~PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_CNTL2, data); + + } + + vi_enable_aspm(adev); + + data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL); + data1 = RREG32_PCIE(ixPCIE_LC_STATUS1); + if (((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) && + data1 & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK && + data1 & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK) { + orig = data = RREG32_PCIE(ixPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_CNTL, data); + } + + if ((adev->asic_type == CHIP_POLARIS12 && + !(ASICID_IS_P23(adev->pdev->device, adev->pdev->revision))) || + ASIC_IS_P22(adev->asic_type, adev->external_rev_id)) { + orig = data = RREG32_PCIE(ixPCIE_LC_TRAINING_CNTL); + data &= ~PCIE_LC_TRAINING_CNTL__LC_DISABLE_TRAINING_BIT_ARCH_MASK; + if (orig != data) + WREG32_PCIE(ixPCIE_LC_TRAINING_CNTL, data); + } } static void vi_enable_doorbell_aperture(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 1c20458f3962..696944fa0177 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -25,6 +25,70 @@ #include "soc15_int.h" #include "kfd_device_queue_manager.h" #include "kfd_smi_events.h" +#include "amdgpu.h" + +enum SQ_INTERRUPT_WORD_ENCODING { + SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0, + SQ_INTERRUPT_WORD_ENCODING_INST, + SQ_INTERRUPT_WORD_ENCODING_ERROR, +}; + +enum SQ_INTERRUPT_ERROR_TYPE { + SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0, + SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST, + SQ_INTERRUPT_ERROR_TYPE_MEMVIOL, + SQ_INTERRUPT_ERROR_TYPE_EDC_FED, +}; + +/* SQ_INTERRUPT_WORD_AUTO_CTXID */ +#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE__SHIFT 0 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__WLT__SHIFT 1 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_BUF_FULL__SHIFT 2 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__REG_TIMESTAMP__SHIFT 3 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__CMD_TIMESTAMP__SHIFT 4 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_CMD_OVERFLOW__SHIFT 5 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_REG_OVERFLOW__SHIFT 6 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__IMMED_OVERFLOW__SHIFT 7 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_UTC_ERROR__SHIFT 8 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__SE_ID__SHIFT 24 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__ENCODING__SHIFT 26 + +#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_MASK 0x00000001 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__WLT_MASK 0x00000002 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_BUF_FULL_MASK 0x00000004 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__REG_TIMESTAMP_MASK 0x00000008 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__CMD_TIMESTAMP_MASK 0x00000010 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_CMD_OVERFLOW_MASK 0x00000020 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_REG_OVERFLOW_MASK 0x00000040 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__IMMED_OVERFLOW_MASK 0x00000080 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_UTC_ERROR_MASK 0x00000100 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__SE_ID_MASK 0x03000000 +#define SQ_INTERRUPT_WORD_AUTO_CTXID__ENCODING_MASK 0x0c000000 + +/* SQ_INTERRUPT_WORD_WAVE_CTXID */ +#define SQ_INTERRUPT_WORD_WAVE_CTXID__DATA__SHIFT 0 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__SH_ID__SHIFT 12 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV__SHIFT 13 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__WAVE_ID__SHIFT 14 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__SIMD_ID__SHIFT 18 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__CU_ID__SHIFT 20 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__SE_ID__SHIFT 24 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__ENCODING__SHIFT 26 + +#define SQ_INTERRUPT_WORD_WAVE_CTXID__DATA_MASK 0x00000fff +#define SQ_INTERRUPT_WORD_WAVE_CTXID__SH_ID_MASK 0x00001000 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK 0x00002000 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__WAVE_ID_MASK 0x0003c000 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__SIMD_ID_MASK 0x000c0000 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__CU_ID_MASK 0x00f00000 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__SE_ID_MASK 0x03000000 +#define SQ_INTERRUPT_WORD_WAVE_CTXID__ENCODING_MASK 0x0c000000 + +#define KFD_CONTEXT_ID_GET_SQ_INT_DATA(ctx0, ctx1) \ + ((ctx0 & 0xfff) | ((ctx0 >> 16) & 0xf000) | ((ctx1 << 16) & 0xff0000)) + +#define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000 +#define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20 static bool event_interrupt_isr_v9(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -108,13 +172,15 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { uint16_t source_id, client_id, pasid, vmid; - uint32_t context_id; + uint32_t context_id0, context_id1; + uint32_t sq_intr_err, sq_int_data, encoding; source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); - context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry); if (client_id == SOC15_IH_CLIENTID_GRBM_CP || client_id == SOC15_IH_CLIENTID_SE0SH || @@ -122,10 +188,59 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, client_id == SOC15_IH_CLIENTID_SE2SH || client_id == SOC15_IH_CLIENTID_SE3SH) { if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, context_id, 32); - else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) - kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24); - else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + kfd_signal_event_interrupt(pasid, context_id0, 32); + else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) { + sq_int_data = KFD_CONTEXT_ID_GET_SQ_INT_DATA(context_id0, context_id1); + encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING); + switch (encoding) { + case SQ_INTERRUPT_WORD_ENCODING_AUTO: + pr_debug( + "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR)); + break; + case SQ_INTERRUPT_WORD_ENCODING_INST: + pr_debug("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n", + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID), + sq_int_data); + break; + case SQ_INTERRUPT_WORD_ENCODING_ERROR: + sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE); + pr_warn("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n", + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID), + sq_intr_err); + if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && + sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { + kfd_signal_hw_exception_event(pasid); + amdgpu_amdkfd_gpu_reset(dev->kgd); + return; + } + break; + default: + break; + } + kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24); + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(pasid); } else if (client_id == SOC15_IH_CLIENTID_SDMA0 || client_id == SOC15_IH_CLIENTID_SDMA1 || @@ -136,7 +251,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, client_id == SOC15_IH_CLIENTID_SDMA6 || client_id == SOC15_IH_CLIENTID_SDMA7) { if (source_id == SOC15_INTSRC_SDMA_TRAP) - kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28); + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); } else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || client_id == SOC15_IH_CLIENTID_UTCL2) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d4241d29ea94..d97e330a5022 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -935,9 +935,6 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pdd->dev->kgd, pdd->vm); fput(pdd->drm_file); } - else if (pdd->vm) - amdgpu_amdkfd_gpuvm_destroy_process_vm( - pdd->dev->kgd, pdd->vm); if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, @@ -1375,19 +1372,18 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, struct kfd_dev *dev; int ret; + if (!drm_file) + return -EINVAL; + if (pdd->vm) - return drm_file ? -EBUSY : 0; + return -EBUSY; p = pdd->process; dev = pdd->dev; - if (drm_file) - ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( - dev->kgd, drm_file, p->pasid, - &pdd->vm, &p->kgd_process_info, &p->ef); - else - ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid, - &pdd->vm, &p->kgd_process_info, &p->ef); + ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( + dev->kgd, drm_file, p->pasid, + &pdd->vm, &p->kgd_process_info, &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; @@ -1409,8 +1405,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, err_init_cwsr: err_reserve_ib_mem: kfd_process_device_free_bos(pdd); - if (!drm_file) - amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm); pdd->vm = NULL; return ret; @@ -1435,6 +1429,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, return ERR_PTR(-ENOMEM); } + if (!pdd->vm) + return ERR_PTR(-ENODEV); + /* * signal runtime-pm system to auto resume and prevent * further runtime suspend once device pdd is created until @@ -1452,10 +1449,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, if (err) goto out; - err = kfd_process_device_init_vm(pdd, NULL); - if (err) - goto out; - /* * make sure that runtime_usage counter is incremented just once * per pdd diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3c3b3441ad2..b34ab76c5f4c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2552,11 +2552,14 @@ static void handle_hpd_irq(void *param) struct drm_connector *connector = &aconnector->base; struct drm_device *dev = connector->dev; enum dc_connection_type new_connection_type = dc_connection_none; -#ifdef CONFIG_DRM_AMD_DC_HDCP struct amdgpu_device *adev = drm_to_adev(dev); +#ifdef CONFIG_DRM_AMD_DC_HDCP struct dm_connector_state *dm_con_state = to_dm_connector_state(connector->state); #endif + if (adev->dm.disable_hpd_irq) + return; + /* * In case of failure or MST no need to update connector status or notify the OS * since (for MST case) MST does this in its own context. @@ -2696,6 +2699,10 @@ static void handle_hpd_rx_irq(void *param) memset(&hpd_irq_data, 0, sizeof(hpd_irq_data)); + if (adev->dm.disable_hpd_irq) + return; + + /* * TODO:Temporary add mutex to protect hpd interrupt not have a gpio * conflict, after implement i2c helper, this mutex should be @@ -4225,6 +4232,7 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane, { struct amdgpu_device *adev = drm_to_adev(plane->dev); const struct drm_format_info *info = drm_format_info(format); + int i; enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3; @@ -4232,11 +4240,22 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane, return false; /* - * We always have to allow this modifier, because core DRM still - * checks LINEAR support if userspace does not provide modifers. + * We always have to allow these modifiers: + * 1. Core DRM checks for LINEAR support if userspace does not provide modifiers. + * 2. Not passing any modifiers is the same as explicitly passing INVALID. */ - if (modifier == DRM_FORMAT_MOD_LINEAR) + if (modifier == DRM_FORMAT_MOD_LINEAR || + modifier == DRM_FORMAT_MOD_INVALID) { return true; + } + + /* Check that the modifier is on the list of the plane's supported modifiers. */ + for (i = 0; i < plane->modifier_count; i++) { + if (modifier == plane->modifiers[i]) + break; + } + if (i == plane->modifier_count) + return false; /* * For D swizzle the canonical modifier depends on the bpp, so check diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 018943113025..b2f2ccfc20bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -1,5 +1,5 @@ /* - * Copyright 2015 Advanced Micro Devices, Inc. + * Copyright (C) 2015-2020 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -410,6 +410,7 @@ struct amdgpu_display_manager { */ struct amdgpu_encoder mst_encoders[AMDGPU_DM_MAX_CRTC]; bool force_timing_sync; + bool disable_hpd_irq; bool dmcub_trace_event_en; /** * @da_list: diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 9a13f47022df..529545045a3e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3077,6 +3077,37 @@ static int force_timing_sync_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(force_timing_sync_ops, force_timing_sync_get, force_timing_sync_set, "%llu\n"); + +/* + * Disables all HPD and HPD RX interrupt handling in the + * driver when set to 1. Default is 0. + */ +static int disable_hpd_set(void *data, u64 val) +{ + struct amdgpu_device *adev = data; + + adev->dm.disable_hpd_irq = (bool)val; + + return 0; +} + + +/* + * Returns 1 if HPD and HPRX interrupt handling is disabled, + * 0 otherwise. + */ +static int disable_hpd_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = data; + + *val = adev->dm.disable_hpd_irq; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(disable_hpd_ops, disable_hpd_get, + disable_hpd_set, "%llu\n"); + /* * Sets the DC visual confirm debug option from the given string. * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_visual_confirm @@ -3213,4 +3244,8 @@ void dtn_debugfs_init(struct amdgpu_device *adev) debugfs_create_file_unsafe("amdgpu_dm_dcc_en", 0644, root, adev, &dcc_en_bits_fops); + + debugfs_create_file_unsafe("amdgpu_dm_disable_hpd", 0644, root, adev, + &disable_hpd_ops); + } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 60f91853bd82..616f5b1ea3a8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -434,6 +434,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) int link_index = aconnector->dc_link->link_index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; + struct drm_connector_state *conn_state; if (config->dpms_off) { hdcp_remove_display(hdcp_work, link_index, aconnector); @@ -459,8 +460,13 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; link->adjust.auth_delay = 3; link->adjust.hdcp1.disable = 0; + conn_state = aconnector->base.state; - hdcp_update_display(hdcp_work, link_index, aconnector, DRM_MODE_HDCP_CONTENT_TYPE0, false); + pr_debug("[HDCP_DM] display %d, CP %d, type %d\n", aconnector->base.index, + (!!aconnector->base.state) ? aconnector->base.state->content_protection : -1, + (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); + + hdcp_update_display(hdcp_work, link_index, aconnector, conn_state->hdcp_content_type, false); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 103e29905b57..e8b325a828c1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -711,3 +711,28 @@ bool dm_helpers_dmub_outbox0_interrupt_control(struct dc_context *ctx, bool enab enable ? "en" : "dis", ret); return ret; } + +void dm_helpers_mst_enable_stream_features(const struct dc_stream_state *stream) +{ + /* TODO: virtual DPCD */ + struct dc_link *link = stream->link; + union down_spread_ctrl old_downspread; + union down_spread_ctrl new_downspread; + + if (link->aux_access_disabled) + return; + + if (!dm_helpers_dp_read_dpcd(link->ctx, link, DP_DOWNSPREAD_CTRL, + &old_downspread.raw, + sizeof(old_downspread))) + return; + + new_downspread.raw = old_downspread.raw; + new_downspread.bits.IGNORE_MSA_TIMING_PARAM = + (stream->ignore_msa_timing_param) ? 1 : 0; + + if (new_downspread.raw != old_downspread.raw) + dm_helpers_dp_write_dpcd(link->ctx, link, DP_DOWNSPREAD_CTRL, + &new_downspread.raw, + sizeof(new_downspread)); +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 73cdb9fe981a..9b221db526dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -229,6 +229,11 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) (aconnector->edid->extensions + 1) * EDID_LENGTH, &init_params); + if (!dc_sink) { + DRM_ERROR("Unable to add a remote sink\n"); + return 0; + } + dc_sink->priv = aconnector; /* dc_link_add_remote_sink returns a new reference */ aconnector->dc_sink = dc_sink; @@ -745,8 +750,8 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (!dc_dsc_compute_bandwidth_range( stream->sink->ctx->dc->res_pool->dscs[0], stream->sink->ctx->dc->debug.dsc_min_slice_height_override, - dsc_policy.min_target_bpp, - dsc_policy.max_target_bpp, + dsc_policy.min_target_bpp * 16, + dsc_policy.max_target_bpp * 16, &stream->sink->dsc_caps.dsc_dec_caps, &stream->timing, ¶ms[count].bw_range)) params[count].bw_range.stream_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 887a54246bde..a06e86853bb9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -128,7 +128,7 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; + int display_count, i; bool update_dppclk = false; bool update_dispclk = false; bool dpp_clock_lowered = false; @@ -210,6 +210,14 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.dppclk_khz, safe_to_lower); + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->signal == SIGNAL_TYPE_EDP && + context->streams[i]->apply_seamless_boot_optimization) { + dc_wait_for_vblank(dc, context->streams[i]); + break; + } + } + clk_mgr_base->clks.actual_dppclk_khz = rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); @@ -761,6 +769,43 @@ static struct wm_table ddr4_wm_table_rn = { } }; +static struct wm_table ddr4_1R_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + } +}; + static struct wm_table lpddr4_wm_table_rn = { .entries = { { @@ -945,8 +990,12 @@ void rn_clk_mgr_construct( } else { if (is_green_sardine) rn_bw_params.wm_table = ddr4_wm_table_gs; - else - rn_bw_params.wm_table = ddr4_wm_table_rn; + else { + if (ctx->dc->config.is_single_rank_dimm) + rn_bw_params.wm_table = ddr4_1R_wm_table_rn; + else + rn_bw_params.wm_table = ddr4_wm_table_rn; + } } /* Saved clocks configured at boot for debug purposes */ rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info); @@ -964,6 +1013,9 @@ void rn_clk_mgr_construct( if (status == PP_SMU_RESULT_OK && ctx->dc_bios && ctx->dc_bios->integrated_info) { rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info); + /* treat memory config as single channel if memory is asymmetrics. */ + if (ctx->dc->config.is_asymmetric_memory) + clk_mgr->base.bw_params->num_channels = 1; } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 577e7f97045e..652fa89fae5f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -432,7 +432,7 @@ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) clk_mgr->base.ctx->dc, clk_mgr_base->bw_params); } -static bool dcn3_is_smu_prsent(struct clk_mgr *clk_mgr_base) +static bool dcn3_is_smu_present(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); return clk_mgr->smu_present; @@ -500,7 +500,7 @@ static struct clk_mgr_funcs dcn3_funcs = { .are_clock_states_equal = dcn3_are_clock_states_equal, .enable_pme_wa = dcn3_enable_pme_wa, .notify_link_rate_change = dcn30_notify_link_rate_change, - .is_smu_present = dcn3_is_smu_prsent + .is_smu_present = dcn3_is_smu_present }; static void dcn3_init_clocks_fpga(struct clk_mgr *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 8f0a13807d05..4713f09bcbf1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -55,6 +55,7 @@ #include "link_encoder.h" #include "link_enc_cfg.h" +#include "dc_link.h" #include "dc_link_ddc.h" #include "dm_helpers.h" #include "mem_input.h" @@ -1322,11 +1323,10 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc, struct dc_link *link = sink->link; unsigned int i, enc_inst, tg_inst = 0; - // Seamless port only support single DP and EDP so far - if ((sink->sink_signal != SIGNAL_TYPE_DISPLAY_PORT && - sink->sink_signal != SIGNAL_TYPE_EDP) || - sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) + /* Support seamless boot on EDP displays only */ + if (sink->sink_signal != SIGNAL_TYPE_EDP) { return false; + } /* Check for enabled DIG to identify enabled display */ if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) @@ -1399,6 +1399,10 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc, if (crtc_timing->v_sync_width != hw_crtc_timing.v_sync_width) return false; + /* block DSC for now, as VBIOS does not currently support DSC timings */ + if (crtc_timing->flags.DSC) + return false; + if (dc_is_dp_signal(link->connector_signal)) { unsigned int pix_clk_100hz; @@ -1429,6 +1433,11 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc, return false; } + if (is_edp_ilr_optimization_required(link, crtc_timing)) { + DC_LOG_EVENT_LINK_TRAINING("Seamless boot disabled to optimize eDP link rate\n"); + return false; + } + return true; } @@ -2678,6 +2687,10 @@ static void commit_planes_for_stream(struct dc *dc, plane_state->triplebuffer_flips = true; } } + if (update_type == UPDATE_TYPE_FULL) { + /* force vsync flip when reconfiguring pipes to prevent underflow */ + plane_state->flip_immediate = false; + } } } @@ -2821,7 +2834,8 @@ static void commit_planes_for_stream(struct dc *dc, if (pipe_ctx->bottom_pipe || pipe_ctx->next_odm_pipe || !pipe_ctx->stream || pipe_ctx->stream != stream || - !pipe_ctx->plane_state->update_flags.bits.addr_update) + !pipe_ctx->plane_state->update_flags.bits.addr_update || + pipe_ctx->plane_state->skip_manual_trigger) continue; if (pipe_ctx->stream_res.tg->funcs->program_manual_trigger) @@ -3205,6 +3219,19 @@ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink) } } +void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream) { + struct timing_generator *tg = + dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; + tg->funcs->wait_for_state(tg, CRTC_STATE_VBLANK); + break; + } +} + void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info) { info->displayClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 29bc2874f6a7..f4374d83662a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1679,21 +1679,27 @@ void link_destroy(struct dc_link **link) static void enable_stream_features(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->link; - union down_spread_ctrl old_downspread; - union down_spread_ctrl new_downspread; - core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL, - &old_downspread.raw, sizeof(old_downspread)); + if (pipe_ctx->stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST) { + struct dc_link *link = stream->link; + union down_spread_ctrl old_downspread; + union down_spread_ctrl new_downspread; + + core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL, + &old_downspread.raw, sizeof(old_downspread)); - new_downspread.raw = old_downspread.raw; + new_downspread.raw = old_downspread.raw; - new_downspread.bits.IGNORE_MSA_TIMING_PARAM = - (stream->ignore_msa_timing_param) ? 1 : 0; + new_downspread.bits.IGNORE_MSA_TIMING_PARAM = + (stream->ignore_msa_timing_param) ? 1 : 0; + + if (new_downspread.raw != old_downspread.raw) { + core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, + &new_downspread.raw, sizeof(new_downspread)); + } - if (new_downspread.raw != old_downspread.raw) { - core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, - &new_downspread.raw, sizeof(new_downspread)); + } else { + dm_helpers_mst_enable_stream_features(stream); } } @@ -2813,12 +2819,9 @@ bool dc_link_setup_psr(struct dc_link *link, psr_context->psr_level.u32all = 0; -#if defined(CONFIG_DRM_AMD_DC_DCN) /*skip power down the single pipe since it blocks the cstate*/ - if ((link->ctx->asic_id.chip_family == FAMILY_RV) && - ASICREV_IS_RAVEN(link->ctx->asic_id.hw_internal_rev)) + if (link->ctx->asic_id.chip_family >= FAMILY_RV) psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true; -#endif /* SMU will perform additional powerdown sequence. * For unsupported ASICs, set psr_level flag to skip PSR @@ -3139,50 +3142,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) return DC_OK; } -enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link) -{ - int i; - struct pipe_ctx *pipe_ctx; - - // Clear all of MST payload then reallocate - for (i = 0; i < MAX_PIPES; i++) { - pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; - - /* driver enable split pipe for external monitors - * we have to check pipe_ctx is split pipe or not - * If it's split pipe, driver using top pipe to - * reaallocate. - */ - if (!pipe_ctx || pipe_ctx->top_pipe) - continue; - - if (pipe_ctx->stream && pipe_ctx->stream->link == link && - pipe_ctx->stream->dpms_off == false && - pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - deallocate_mst_payload(pipe_ctx); - } - } - - for (i = 0; i < MAX_PIPES; i++) { - pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; - - if (!pipe_ctx || pipe_ctx->top_pipe) - continue; - - if (pipe_ctx->stream && pipe_ctx->stream->link == link && - pipe_ctx->stream->dpms_off == false && - pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - /* enable/disable PHY will clear connection between BE and FE - * need to restore it. - */ - link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc, - pipe_ctx->stream_res.stream_enc->id, true); - dc_link_allocate_mst_payload(pipe_ctx); - } - } - - return DC_OK; -} #if defined(CONFIG_DRM_AMD_DC_HDCP) static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) @@ -3296,7 +3255,8 @@ void core_link_enable_stream( /* eDP lit up by bios already, no need to enable again. */ if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && - apply_edp_fast_boot_optimization) { + apply_edp_fast_boot_optimization && + !pipe_ctx->stream->timing.flags.DSC) { pipe_ctx->stream->dpms_off = false; #if defined(CONFIG_DRM_AMD_DC_HDCP) update_psp_stream_config(pipe_ctx, false); @@ -3358,8 +3318,10 @@ void core_link_enable_stream( /* Set DPS PPS SDP (AKA "info frames") */ if (pipe_ctx->stream->timing.flags.DSC) { if (dc_is_dp_signal(pipe_ctx->stream->signal) || - dc_is_virtual_signal(pipe_ctx->stream->signal)) + dc_is_virtual_signal(pipe_ctx->stream->signal)) { + dp_set_dsc_on_rx(pipe_ctx, true); dp_set_dsc_pps_sdp(pipe_ctx, true); + } } if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) @@ -3754,7 +3716,8 @@ bool dc_link_should_enable_fec(const struct dc_link *link) if ((link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT_MST && link->local_sink && link->local_sink->edid_caps.panel_patch.disable_fec) || - link->connector_signal == SIGNAL_TYPE_EDP) // Disable FEC for eDP + (link->connector_signal == SIGNAL_TYPE_EDP && + link->dc->debug.force_enable_edp_fec == false)) // Disable FEC for eDP is_fec_disable = true; if (dc_link_is_fec_supported(link) && !link->dc->debug.disable_fec && !is_fec_disable) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 7d2e433c2275..3ff3d9e90983 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1132,11 +1132,6 @@ static inline enum link_training_result perform_link_training_int( enum link_training_result status) { union lane_count_set lane_count_set = { {0} }; - union dpcd_training_pattern dpcd_pattern = { {0} }; - - /* 3. set training not in progress*/ - dpcd_pattern.v1_4.TRAINING_PATTERN_SET = DPCD_TRAINING_PATTERN_VIDEOIDLE; - dpcd_set_training_pattern(link, dpcd_pattern); /* 4. mainlink output idle pattern*/ dp_set_hw_test_pattern(link, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); @@ -1560,6 +1555,7 @@ enum link_training_result dc_link_dp_perform_link_training( { enum link_training_result status = LINK_TRAINING_SUCCESS; struct link_training_settings lt_settings; + union dpcd_training_pattern dpcd_pattern = { { 0 } }; bool fec_enable; uint8_t repeater_cnt; @@ -1624,6 +1620,9 @@ enum link_training_result dc_link_dp_perform_link_training( } } + /* 3. set training not in progress*/ + dpcd_pattern.v1_4.TRAINING_PATTERN_SET = DPCD_TRAINING_PATTERN_VIDEOIDLE; + dpcd_set_training_pattern(link, dpcd_pattern); if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) { status = perform_link_training_int(link, <_settings, @@ -2490,7 +2489,7 @@ static bool decide_dp_link_settings(struct dc_link *link, struct dc_link_setting return false; } -static bool decide_edp_link_settings(struct dc_link *link, struct dc_link_settings *link_setting, uint32_t req_bw) +bool decide_edp_link_settings(struct dc_link *link, struct dc_link_settings *link_setting, uint32_t req_bw) { struct dc_link_settings initial_link_setting; struct dc_link_settings current_link_setting; @@ -3582,6 +3581,8 @@ static bool retrieve_link_cap(struct dc_link *link) link->dpcd_caps.lttpr_caps.revision.raw >= 0x14); if (is_lttpr_present) CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: "); + else + link->lttpr_mode = LTTPR_MODE_NON_LTTPR; } if (!is_lttpr_present) @@ -3892,7 +3893,7 @@ void detect_edp_sink_caps(struct dc_link *link) memset(supported_link_rates, 0, sizeof(supported_link_rates)); if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 && - (link->dc->config.optimize_edp_link_rate || + (link->dc->debug.optimize_edp_link_rate || link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN)) { // Read DPCD 00010h - 0001Fh 16 bytes at one shot core_link_read_dpcd(link, DP_SUPPORTED_LINK_RATES, @@ -4718,3 +4719,51 @@ bool dc_link_set_default_brightness_aux(struct dc_link *link) } return false; } + +bool is_edp_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing) +{ + struct dc_link_settings link_setting; + uint8_t link_bw_set; + uint8_t link_rate_set; + uint32_t req_bw; + union lane_count_set lane_count_set = { {0} }; + + ASSERT(link || crtc_timing); // invalid input + + if (link->dpcd_caps.edp_supported_link_rates_count == 0 || + !link->dc->debug.optimize_edp_link_rate) + return false; + + + // Read DPCD 00100h to find if standard link rates are set + core_link_read_dpcd(link, DP_LINK_BW_SET, + &link_bw_set, sizeof(link_bw_set)); + + if (link_bw_set) { + DC_LOG_EVENT_LINK_TRAINING("eDP ILR: Optimization required, VBIOS used link_bw_set\n"); + return true; + } + + // Read DPCD 00115h to find the edp link rate set used + core_link_read_dpcd(link, DP_LINK_RATE_SET, + &link_rate_set, sizeof(link_rate_set)); + + // Read DPCD 00101h to find out the number of lanes currently set + core_link_read_dpcd(link, DP_LANE_COUNT_SET, + &lane_count_set.raw, sizeof(lane_count_set)); + + req_bw = dc_bandwidth_in_kbps_from_timing(crtc_timing); + + decide_edp_link_settings(link, &link_setting, req_bw); + + if (link->dpcd_caps.edp_supported_link_rates[link_rate_set] != link_setting.link_rate || + lane_count_set.bits.LANE_COUNT_SET != link_setting.lane_count) { + DC_LOG_EVENT_LINK_TRAINING("eDP ILR: Optimization required, VBIOS link_rate_set not optimal\n"); + return true; + } + + DC_LOG_EVENT_LINK_TRAINING("eDP ILR: No optimization required, VBIOS set optimal link_rate_set\n"); + return false; +} + + diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index 48ad1a8d4a74..b426f878fb99 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -431,7 +431,7 @@ static void dsc_optc_config_log(struct display_stream_compressor *dsc, DC_LOG_DSC("\tslice_width %d", config->slice_width); } -static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable) +bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable) { struct dc *dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; @@ -541,7 +541,7 @@ bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable) goto out; if (enable) { - if (dp_set_dsc_on_rx(pipe_ctx, true)) { + { dp_set_dsc_on_stream(pipe_ctx, true); result = true; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index ac7a75887f95..8cb937c046aa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2506,26 +2506,31 @@ static void set_avi_info_frame( hdmi_info.bits.ITC = itc_value; } + if (stream->qs_bit == 1) { + if (color_space == COLOR_SPACE_SRGB || + color_space == COLOR_SPACE_2020_RGB_FULLRANGE) + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_FULL_RANGE; + else if (color_space == COLOR_SPACE_SRGB_LIMITED || + color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_LIMITED_RANGE; + else + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; + } else + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; + /* TODO : We should handle YCC quantization */ /* but we do not have matrix calculation */ - if (stream->qs_bit == 1 && - stream->qy_bit == 1) { + if (stream->qy_bit == 1) { if (color_space == COLOR_SPACE_SRGB || - color_space == COLOR_SPACE_2020_RGB_FULLRANGE) { - hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_FULL_RANGE; + color_space == COLOR_SPACE_2020_RGB_FULLRANGE) hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - } else if (color_space == COLOR_SPACE_SRGB_LIMITED || - color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) { - hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_LIMITED_RANGE; + else if (color_space == COLOR_SPACE_SRGB_LIMITED || + color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - } else { - hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; + else hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - } - } else { - hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; - hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - } + } else + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; ///VIC format = stream->timing.timing_3d_format; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8108b82bac60..100d434f7a03 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -45,7 +45,7 @@ /* forward declaration */ struct aux_payload; -#define DC_VER "3.2.130" +#define DC_VER "3.2.132" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -293,7 +293,6 @@ struct dc_config { bool gpu_vm_support; bool disable_disp_pll_sharing; bool fbc_support; - bool optimize_edp_link_rate; bool disable_fractional_pwm; bool allow_seamless_boot_optimization; bool power_down_display_on_boot; @@ -309,6 +308,8 @@ struct dc_config { #endif uint64_t vblank_alignment_dto_params; uint8_t vblank_alignment_max_frame_time_diff; + bool is_asymmetric_memory; + bool is_single_rank_dimm; }; enum visual_confirm { @@ -541,6 +542,11 @@ struct dc_debug_options { /* Enable dmub aux for legacy ddc */ bool enable_dmub_aux_for_legacy_ddc; + bool optimize_edp_link_rate; /* eDP ILR */ + /* force enable edp FEC */ + bool force_enable_edp_fec; + /* FEC/PSR1 sequence enable delay in 100us */ + uint8_t fec_enable_delay_in100us; }; struct dc_debug_data { @@ -713,6 +719,7 @@ void dc_init_callbacks(struct dc *dc, void dc_deinit_callbacks(struct dc *dc); void dc_destroy(struct dc **dc); +void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream); /******************************************************************************* * Surface Interfaces ******************************************************************************/ @@ -900,6 +907,8 @@ struct dc_plane_state { union surface_update_flags update_flags; bool flip_int_enabled; + bool skip_manual_trigger; + /* private to DC core */ struct dc_plane_status status; struct dc_context *ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index b0013e674864..054bab45ee17 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -276,7 +276,6 @@ enum dc_detect_reason { bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); bool dc_link_get_hpd_state(struct dc_link *dc_link); enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx); -enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link); /* Notify DC about DP RX Interrupt (aka Short Pulse Interrupt). * Return: diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index b0297f07f9de..13dae7238a58 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -238,7 +238,6 @@ struct dc_stream_state { bool apply_seamless_boot_optimization; uint32_t stream_id; - bool is_dsc_enabled; struct test_pattern test_pattern; union stream_update_flags update_flags; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 28ff059aa7f3..5e99553fcdd4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -284,6 +284,8 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->debug.u32All = 0; copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR; copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; + copy_settings_data->fec_enable_status = (link->fec_state == dc_link_fec_enabled); + copy_settings_data->fec_enable_delay_in100us = link->dc->debug.fec_enable_delay_in100us; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 873c6f2d2cd9..5ddeee96bf23 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -48,6 +48,7 @@ #include "stream_encoder.h" #include "link_encoder.h" #include "link_hwss.h" +#include "dc_link_dp.h" #include "clock_source.h" #include "clk_mgr.h" #include "abm.h" @@ -1694,6 +1695,8 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) bool can_apply_edp_fast_boot = false; bool can_apply_seamless_boot = false; bool keep_edp_vdd_on = false; + DC_LOGGER_INIT(); + get_edp_links_with_sink(dc, edp_links_with_sink, &edp_with_sink_num); get_edp_links(dc, edp_links, &edp_num); @@ -1714,8 +1717,11 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) /* Set optimization flag on eDP stream*/ if (edp_stream_num && edp_link->link_status.link_active) { edp_stream = edp_streams[0]; - edp_stream->apply_edp_fast_boot_optimization = true; - can_apply_edp_fast_boot = true; + can_apply_edp_fast_boot = !is_edp_ilr_optimization_required(edp_stream->link, &edp_stream->timing); + edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot; + if (can_apply_edp_fast_boot) + DC_LOG_EVENT_LINK_TRAINING("eDP fast boot disabled to optimize link rate\n"); + break; } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index bec7059f6d5d..a1318c31bcfa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1,5 +1,5 @@ /* - * Copyright 2012-17 Advanced Micro Devices, Inc. + * Copyright 2012-2021 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -181,11 +181,14 @@ void hubp2_vready_at_or_After_vsync(struct hubp *hubp, else Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 0 */ - if ((pipe_dest->vstartup_start - (pipe_dest->vready_offset+pipe_dest->vupdate_width - + pipe_dest->vupdate_offset) / pipe_dest->htotal) <= pipe_dest->vblank_end) { - value = 1; - } else - value = 0; + if (pipe_dest->htotal != 0) { + if ((pipe_dest->vstartup_start - (pipe_dest->vready_offset+pipe_dest->vupdate_width + + pipe_dest->vupdate_offset) / pipe_dest->htotal) <= pipe_dest->vblank_end) { + value = 1; + } else + value = 0; + } + REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, value); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index f65a6904d09c..527e56c353cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2201,10 +2201,11 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].dout.output_bpp = (output_bpc * 3.0) / 2; break; case PIXEL_ENCODING_YCBCR422: - if (true) /* todo */ - pipes[pipe_cnt].dout.output_format = dm_s422; - else + if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC && + !res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.ycbcr422_simple) pipes[pipe_cnt].dout.output_format = dm_n422; + else + pipes[pipe_cnt].dout.output_format = dm_s422; pipes[pipe_cnt].dout.output_bpp = output_bpc * 2; break; default: diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c index d3b643089603..8fccee5a3036 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c @@ -218,6 +218,8 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, cmd.abm_set_backlight.header.sub_type = DMUB_CMD__ABM_SET_BACKLIGHT; cmd.abm_set_backlight.abm_set_backlight_data.frame_ramp = frame_ramp; cmd.abm_set_backlight.abm_set_backlight_data.backlight_user_level = backlight_pwm_u16_16; + cmd.abm_set_backlight.abm_set_backlight_data.version = DMUB_CMD_ABM_SET_BACKLIGHT_VERSION_1; + cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_cntl->inst); cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data); dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c index 0d90523c7cdc..70b053d9ba40 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c @@ -99,6 +99,8 @@ static const struct hw_sequencer_funcs dcn301_funcs = { .set_pipe = dcn21_set_pipe, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, .get_dcc_en_bits = dcn10_get_dcc_en_bits, + .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, }; static const struct hwseq_private_funcs dcn301_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index f41db27c44de..7617fab9e1f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -147,6 +147,8 @@ bool dm_helpers_dp_write_dsc_enable( bool dm_helpers_is_dp_sink_present( struct dc_link *link); +void dm_helpers_mst_enable_stream_features(const struct dc_stream_state *stream); + enum dc_edid_status dm_helpers_read_local_edid( struct dc_context *ctx, struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index b970a32177af..3ae05c96d557 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -52,6 +52,10 @@ bool dp_validate_mode_timing( struct dc_link *link, const struct dc_crtc_timing *timing); +bool decide_edp_link_settings(struct dc_link *link, + struct dc_link_settings *link_setting, + uint32_t req_bw); + void decide_link_settings( struct dc_stream_state *stream, struct dc_link_settings *link_setting); @@ -71,6 +75,8 @@ void detect_edp_sink_caps(struct dc_link *link); bool is_dp_active_dongle(const struct dc_link *link); +bool is_edp_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); + void dp_enable_mst_on_sink(struct dc_link *link, bool enable); enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); @@ -86,5 +92,7 @@ bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable); bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable); void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable); bool dp_update_dsc_config(struct pipe_ctx *pipe_ctx); +bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable); + #endif /* __DC_LINK_DP_H__ */ diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 44003836fafd..4195ff10c514 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -47,10 +47,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0x7f2db1846 +#define DMUB_FW_VERSION_GIT_HASH 0x23db9b126 #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 59 +#define DMUB_FW_VERSION_REVISION 62 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 @@ -121,6 +121,16 @@ #define TRACE_BUFFER_ENTRY_OFFSET 16 /** + * ABM backlight control version legacy + */ +#define DMUB_CMD_ABM_SET_BACKLIGHT_VERSION_UNKNOWN 0x0 + +/** + * ABM backlight control version with multi edp support + */ +#define DMUB_CMD_ABM_SET_BACKLIGHT_VERSION_1 0x1 + +/** * Physical framebuffer address location, 64-bit. */ #ifndef PHYSICAL_ADDRESS_LOC @@ -1625,6 +1635,23 @@ struct dmub_cmd_abm_set_backlight_data { * Requested backlight level from user. */ uint32_t backlight_user_level; + + /** + * Backlight data version. + */ + uint8_t version; + + /** + * Panel Control HW instance mask. + * Bit 0 is Panel Control HW instance 0. + * Bit 1 is Panel Control HW instance 1. + */ + uint8_t panel_mask; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[2]; }; /** diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index eeac14300a2a..2cbd931363bd 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -427,8 +427,6 @@ static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp, event_ctx->unexpected_event = 1; goto out; } - if (!mod_hdcp_is_link_encryption_enabled(hdcp)) - goto out; if (status == MOD_HDCP_STATUS_SUCCESS) mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index f164f6a5d4dc..c1331facdcb4 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -564,8 +564,6 @@ static enum mod_hdcp_status authenticated(struct mod_hdcp *hdcp, event_ctx->unexpected_event = 1; goto out; } - if (!mod_hdcp_is_link_encryption_enabled(hdcp)) - goto out; process_rxstatus(hdcp, event_ctx, input, &status); diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index 9d7ca316dc3f..26f96c05e0ec 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -791,6 +791,8 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_rx_id_list(struct mod_hdcp *hdcp) TA_HDCP2_MSG_AUTHENTICATION_STATUS__RECEIVERID_REVOKED) { hdcp->connection.is_hdcp2_revoked = 1; status = MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED; + } else { + status = MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE; } } mutex_unlock(&psp->hdcp_context.mutex); diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h index 4089cfa081f5..849450caca15 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h @@ -617,6 +617,22 @@ #define GCEA_EDC_CNT3__MAM_A3MEM_SEC_COUNT_MASK 0x30000000L #define GCEA_EDC_CNT3__MAM_A3MEM_DED_COUNT_MASK 0xC0000000L +//GCEA_ERR_STATUS +#define GCEA_ERR_STATUS__SDP_RDRSP_STATUS__SHIFT 0x0 +#define GCEA_ERR_STATUS__SDP_WRRSP_STATUS__SHIFT 0x4 +#define GCEA_ERR_STATUS__SDP_RDRSP_DATASTATUS__SHIFT 0x8 +#define GCEA_ERR_STATUS__SDP_RDRSP_DATAPARITY_ERROR__SHIFT 0xa +#define GCEA_ERR_STATUS__CLEAR_ERROR_STATUS__SHIFT 0xb +#define GCEA_ERR_STATUS__BUSY_ON_ERROR__SHIFT 0xc +#define GCEA_ERR_STATUS__FUE_FLAG__SHIFT 0xd +#define GCEA_ERR_STATUS__SDP_RDRSP_STATUS_MASK 0x0000000FL +#define GCEA_ERR_STATUS__SDP_WRRSP_STATUS_MASK 0x000000F0L +#define GCEA_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK 0x00000300L +#define GCEA_ERR_STATUS__SDP_RDRSP_DATAPARITY_ERROR_MASK 0x00000400L +#define GCEA_ERR_STATUS__CLEAR_ERROR_STATUS_MASK 0x00000800L +#define GCEA_ERR_STATUS__BUSY_ON_ERROR_MASK 0x00001000L +#define GCEA_ERR_STATUS__FUE_FLAG_MASK 0x00002000L + // addressBlock: gc_gfxudec //GRBM_GFX_INDEX #define GRBM_GFX_INDEX__INSTANCE_INDEX__SHIFT 0x0 diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index c77ed38c20fb..f2564ba21c0b 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -3336,6 +3336,47 @@ enum atom_smu11_syspll3_1_clock_id { SMU11_SYSPLL3_1_LCLK_ID = 2, // LCLK }; +enum atom_smu12_syspll_id { + SMU12_SYSPLL0_ID = 0, + SMU12_SYSPLL1_ID = 1, + SMU12_SYSPLL2_ID = 2, + SMU12_SYSPLL3_0_ID = 3, + SMU12_SYSPLL3_1_ID = 4, +}; + +enum atom_smu12_syspll0_clock_id { + SMU12_SYSPLL0_SMNCLK_ID = 0, // SOCCLK + SMU12_SYSPLL0_SOCCLK_ID = 1, // SOCCLK + SMU12_SYSPLL0_MP0CLK_ID = 2, // MP0CLK + SMU12_SYSPLL0_MP1CLK_ID = 3, // MP1CLK + SMU12_SYSPLL0_MP2CLK_ID = 4, // MP2CLK + SMU12_SYSPLL0_VCLK_ID = 5, // VCLK + SMU12_SYSPLL0_LCLK_ID = 6, // LCLK + SMU12_SYSPLL0_DCLK_ID = 7, // DCLK + SMU12_SYSPLL0_ACLK_ID = 8, // ACLK + SMU12_SYSPLL0_ISPCLK_ID = 9, // ISPCLK + SMU12_SYSPLL0_SHUBCLK_ID = 10, // SHUBCLK +}; + +enum atom_smu12_syspll1_clock_id { + SMU12_SYSPLL1_DISPCLK_ID = 0, // DISPCLK + SMU12_SYSPLL1_DPPCLK_ID = 1, // DPPCLK + SMU12_SYSPLL1_DPREFCLK_ID = 2, // DPREFCLK + SMU12_SYSPLL1_DCFCLK_ID = 3, // DCFCLK +}; + +enum atom_smu12_syspll2_clock_id { + SMU12_SYSPLL2_Pre_GFXCLK_ID = 0, // Pre_GFXCLK +}; + +enum atom_smu12_syspll3_0_clock_id { + SMU12_SYSPLL3_0_FCLK_ID = 0, // FCLK +}; + +enum atom_smu12_syspll3_1_clock_id { + SMU12_SYSPLL3_1_UMCCLK_ID = 0, // UMCCLK +}; + struct atom_get_smu_clock_info_output_parameters_v3_1 { union { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 204e34549013..8128603ef495 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1844,7 +1844,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ if (asic_type < CHIP_VEGA10) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { - if (asic_type < CHIP_VEGA10 || asic_type == CHIP_ARCTURUS) + if (asic_type < CHIP_VEGA10 || + asic_type == CHIP_ARCTURUS || + asic_type == CHIP_ALDEBARAN) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_fclk)) { if (asic_type < CHIP_VEGA20) diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h index 6e23a3f803a7..8361ebd8d876 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h +++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h @@ -26,7 +26,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define SMU13_DRIVER_IF_VERSION 2 +#define SMU13_DRIVER_IF_VERSION 3 typedef struct { int32_t value; @@ -191,6 +191,44 @@ typedef struct { uint16_t SocTemperature; //[centi-Celsius] uint16_t EdgeTemperature; uint16_t ThrottlerStatus; +} SmuMetrics_legacy_t; + +typedef struct { + uint16_t GfxclkFrequency; //[MHz] + uint16_t SocclkFrequency; //[MHz] + uint16_t VclkFrequency; //[MHz] + uint16_t DclkFrequency; //[MHz] + uint16_t MemclkFrequency; //[MHz] + uint16_t spare; + + uint16_t GfxActivity; //[centi] + uint16_t UvdActivity; //[centi] + uint16_t C0Residency[4]; //percentage + + uint16_t Voltage[3]; //[mV] indices: VDDCR_VDD, VDDCR_SOC, VDDCR_GFX + uint16_t Current[3]; //[mA] indices: VDDCR_VDD, VDDCR_SOC, VDDCR_GFX + uint16_t Power[3]; //[mW] indices: VDDCR_VDD, VDDCR_SOC, VDDCR_GFX + uint16_t CurrentSocketPower; //[mW] + + //3rd party tools in Windows need info in the case of APUs + uint16_t CoreFrequency[4]; //[MHz] + uint16_t CorePower[4]; //[mW] + uint16_t CoreTemperature[4]; //[centi-Celsius] + uint16_t L3Frequency[1]; //[MHz] + uint16_t L3Temperature[1]; //[centi-Celsius] + + uint16_t GfxTemperature; //[centi-Celsius] + uint16_t SocTemperature; //[centi-Celsius] + uint16_t EdgeTemperature; + uint16_t ThrottlerStatus; +} SmuMetricsTable_t; + +typedef struct { + SmuMetricsTable_t Current; + SmuMetricsTable_t Average; + //uint32_t AccCnt; + uint32_t SampleStartTime; + uint32_t SampleStopTime; } SmuMetrics_t; diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index d5182bbaa598..bb55a96f98e9 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -32,7 +32,7 @@ #define SMU11_DRIVER_IF_VERSION_NV14 0x38 #define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3D #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xE -#define SMU11_DRIVER_IF_VERSION_VANGOGH 0x02 +#define SMU11_DRIVER_IF_VERSION_VANGOGH 0x03 #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF /* MP Apertures */ diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v12_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v12_0.h index 02de3b6199e5..1ad2dff71090 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v12_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v12_0.h @@ -60,5 +60,7 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ int smu_v12_0_set_driver_table_location(struct smu_context *smu); +int smu_v12_0_get_vbios_bootup_values(struct smu_context *smu); + #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index e0eb7ca112e2..c29d8b3131b7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2221,6 +2221,7 @@ static int smu_set_power_limit(void *handle, uint32_t limit) dev_err(smu->adev->dev, "New power limit (%d) is over the max allowed %d\n", limit, smu->max_power_limit); + ret = -EINVAL; goto out; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 7bcd35840bf2..77f532a49e37 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -194,18 +194,34 @@ static int vangogh_tables_init(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = smu_table->tables; + struct amdgpu_device *adev = smu->adev; + uint32_t if_version; + uint32_t ret = 0; + + ret = smu_cmn_get_smc_version(smu, &if_version, NULL); + if (ret) { + dev_err(adev->dev, "Failed to get smu if version!\n"); + goto err0_out; + } SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU11_TOOL_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_ACTIVITY_MONITOR_COEFF, sizeof(DpmActivityMonitorCoeffExt_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); + + if (if_version < 0x3) { + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_legacy_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_legacy_t), GFP_KERNEL); + } else { + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); + } if (!smu_table->metrics_table) goto err0_out; smu_table->metrics_time = 0; @@ -235,13 +251,12 @@ err0_out: return -ENOMEM; } -static int vangogh_get_smu_metrics_data(struct smu_context *smu, +static int vangogh_get_legacy_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) { struct smu_table_context *smu_table = &smu->smu_table; - - SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; + SmuMetrics_legacy_t *metrics = (SmuMetrics_legacy_t *)smu_table->metrics_table; int ret = 0; mutex_lock(&smu->metrics_lock); @@ -255,7 +270,7 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu, } switch (member) { - case METRICS_AVERAGE_GFXCLK: + case METRICS_CURR_GFXCLK: *value = metrics->GfxclkFrequency; break; case METRICS_AVERAGE_SOCCLK: @@ -267,7 +282,7 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_DCLK: *value = metrics->DclkFrequency; break; - case METRICS_AVERAGE_UCLK: + case METRICS_CURR_UCLK: *value = metrics->MemclkFrequency; break; case METRICS_AVERAGE_GFXACTIVITY: @@ -311,6 +326,103 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu, return ret; } +static int vangogh_get_smu_metrics_data(struct smu_context *smu, + MetricsMember_t member, + uint32_t *value) +{ + struct smu_table_context *smu_table = &smu->smu_table; + SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; + int ret = 0; + + mutex_lock(&smu->metrics_lock); + + ret = smu_cmn_get_metrics_table_locked(smu, + NULL, + false); + if (ret) { + mutex_unlock(&smu->metrics_lock); + return ret; + } + + switch (member) { + case METRICS_CURR_GFXCLK: + *value = metrics->Current.GfxclkFrequency; + break; + case METRICS_AVERAGE_SOCCLK: + *value = metrics->Current.SocclkFrequency; + break; + case METRICS_AVERAGE_VCLK: + *value = metrics->Current.VclkFrequency; + break; + case METRICS_AVERAGE_DCLK: + *value = metrics->Current.DclkFrequency; + break; + case METRICS_CURR_UCLK: + *value = metrics->Current.MemclkFrequency; + break; + case METRICS_AVERAGE_GFXACTIVITY: + *value = metrics->Current.GfxActivity; + break; + case METRICS_AVERAGE_VCNACTIVITY: + *value = metrics->Current.UvdActivity; + break; + case METRICS_AVERAGE_SOCKETPOWER: + *value = (metrics->Current.CurrentSocketPower << 8) / + 1000; + break; + case METRICS_TEMPERATURE_EDGE: + *value = metrics->Current.GfxTemperature / 100 * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_TEMPERATURE_HOTSPOT: + *value = metrics->Current.SocTemperature / 100 * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_THROTTLER_STATUS: + *value = metrics->Current.ThrottlerStatus; + break; + case METRICS_VOLTAGE_VDDGFX: + *value = metrics->Current.Voltage[2]; + break; + case METRICS_VOLTAGE_VDDSOC: + *value = metrics->Current.Voltage[1]; + break; + case METRICS_AVERAGE_CPUCLK: + memcpy(value, &metrics->Current.CoreFrequency[0], + smu->cpu_core_num * sizeof(uint16_t)); + break; + default: + *value = UINT_MAX; + break; + } + + mutex_unlock(&smu->metrics_lock); + + return ret; +} + +static int vangogh_common_get_smu_metrics_data(struct smu_context *smu, + MetricsMember_t member, + uint32_t *value) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t if_version; + int ret = 0; + + ret = smu_cmn_get_smc_version(smu, &if_version, NULL); + if (ret) { + dev_err(adev->dev, "Failed to get smu if version!\n"); + return ret; + } + + if (if_version < 0x3) + ret = vangogh_get_legacy_smu_metrics_data(smu, member, value); + else + ret = vangogh_get_smu_metrics_data(smu, member, value); + + return ret; +} + static int vangogh_allocate_dpm_context(struct smu_context *smu) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; @@ -447,11 +559,11 @@ static int vangogh_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_typ return 0; } -static int vangogh_print_clk_levels(struct smu_context *smu, +static int vangogh_print_legacy_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { DpmClocks_t *clk_table = smu->smu_table.clocks_table; - SmuMetrics_t metrics; + SmuMetrics_legacy_t metrics; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; @@ -546,6 +658,126 @@ static int vangogh_print_clk_levels(struct smu_context *smu, return size; } +static int vangogh_print_clk_levels(struct smu_context *smu, + enum smu_clk_type clk_type, char *buf) +{ + DpmClocks_t *clk_table = smu->smu_table.clocks_table; + SmuMetrics_t metrics; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + int i, size = 0, ret = 0; + uint32_t cur_value = 0, value = 0, count = 0; + bool cur_value_match_level = false; + + memset(&metrics, 0, sizeof(metrics)); + + ret = smu_cmn_get_metrics_table(smu, &metrics, false); + if (ret) + return ret; + + switch (clk_type) { + case SMU_OD_SCLK: + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { + size = sprintf(buf, "%s:\n", "OD_SCLK"); + size += sprintf(buf + size, "0: %10uMhz\n", + (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); + size += sprintf(buf + size, "1: %10uMhz\n", + (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); + } + break; + case SMU_OD_CCLK: + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { + size = sprintf(buf, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); + size += sprintf(buf + size, "0: %10uMhz\n", + (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); + size += sprintf(buf + size, "1: %10uMhz\n", + (smu->cpu_actual_soft_max_freq > 0) ? smu->cpu_actual_soft_max_freq : smu->cpu_default_soft_max_freq); + } + break; + case SMU_OD_RANGE: + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { + size = sprintf(buf, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); + size += sprintf(buf + size, "CCLK: %7uMhz %10uMhz\n", + smu->cpu_default_soft_min_freq, smu->cpu_default_soft_max_freq); + } + break; + case SMU_SOCCLK: + /* the level 3 ~ 6 of socclk use the same frequency for vangogh */ + count = clk_table->NumSocClkLevelsEnabled; + cur_value = metrics.Current.SocclkFrequency; + break; + case SMU_VCLK: + count = clk_table->VcnClkLevelsEnabled; + cur_value = metrics.Current.VclkFrequency; + break; + case SMU_DCLK: + count = clk_table->VcnClkLevelsEnabled; + cur_value = metrics.Current.DclkFrequency; + break; + case SMU_MCLK: + count = clk_table->NumDfPstatesEnabled; + cur_value = metrics.Current.MemclkFrequency; + break; + case SMU_FCLK: + count = clk_table->NumDfPstatesEnabled; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetFclkFrequency, 0, &cur_value); + if (ret) + return ret; + break; + default: + break; + } + + switch (clk_type) { + case SMU_SOCCLK: + case SMU_VCLK: + case SMU_DCLK: + case SMU_MCLK: + case SMU_FCLK: + for (i = 0; i < count; i++) { + ret = vangogh_get_dpm_clk_limited(smu, clk_type, i, &value); + if (ret) + return ret; + if (!value) + continue; + size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + cur_value == value ? "*" : ""); + if (cur_value == value) + cur_value_match_level = true; + } + + if (!cur_value_match_level) + size += sprintf(buf + size, " %uMhz *\n", cur_value); + break; + default: + break; + } + + return size; +} + +static int vangogh_common_print_clk_levels(struct smu_context *smu, + enum smu_clk_type clk_type, char *buf) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t if_version; + int ret = 0; + + ret = smu_cmn_get_smc_version(smu, &if_version, NULL); + if (ret) { + dev_err(adev->dev, "Failed to get smu if version!\n"); + return ret; + } + + if (if_version < 0x3) + ret = vangogh_print_legacy_clk_levels(smu, clk_type, buf); + else + ret = vangogh_print_clk_levels(smu, clk_type, buf); + + return ret; +} + static int vangogh_get_profiling_clk_mask(struct smu_context *smu, enum amd_dpm_forced_level level, uint32_t *vclk_mask, @@ -860,7 +1092,6 @@ static int vangogh_set_soft_freq_limited_range(struct smu_context *smu, return ret; break; case SMU_FCLK: - case SMU_MCLK: ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min, NULL); @@ -948,7 +1179,6 @@ static int vangogh_force_clk_levels(struct smu_context *smu, if (ret) return ret; break; - case SMU_MCLK: case SMU_FCLK: ret = vangogh_get_dpm_clk_limited(smu, clk_type, soft_min_level, &min_freq); @@ -1035,7 +1265,6 @@ static int vangogh_force_dpm_limit_value(struct smu_context *smu, bool highest) SMU_SOCCLK, SMU_VCLK, SMU_DCLK, - SMU_MCLK, SMU_FCLK, }; @@ -1064,7 +1293,6 @@ static int vangogh_unforce_dpm_levels(struct smu_context *smu) enum smu_clk_type clk_type; uint32_t feature; } clk_feature_map[] = { - {SMU_MCLK, SMU_FEATURE_DPM_FCLK_BIT}, {SMU_FCLK, SMU_FEATURE_DPM_FCLK_BIT}, {SMU_SOCCLK, SMU_FEATURE_DPM_SOCCLK_BIT}, {SMU_VCLK, SMU_FEATURE_VCN_DPM_BIT}, @@ -1196,7 +1424,6 @@ static int vangogh_set_performance_level(struct smu_context *smu, if (ret) return ret; - vangogh_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask); vangogh_force_clk_levels(smu, SMU_FCLK, 1 << fclk_mask); vangogh_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask); vangogh_force_clk_levels(smu, SMU_VCLK, 1 << vclk_mask); @@ -1236,7 +1463,6 @@ static int vangogh_set_performance_level(struct smu_context *smu, if (ret) return ret; - vangogh_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask); vangogh_force_clk_levels(smu, SMU_FCLK, 1 << fclk_mask); break; case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: @@ -1278,57 +1504,57 @@ static int vangogh_read_sensor(struct smu_context *smu, mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_GPU_LOAD: - ret = vangogh_get_smu_metrics_data(smu, + ret = vangogh_common_get_smu_metrics_data(smu, METRICS_AVERAGE_GFXACTIVITY, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_GPU_POWER: - ret = vangogh_get_smu_metrics_data(smu, + ret = vangogh_common_get_smu_metrics_data(smu, METRICS_AVERAGE_SOCKETPOWER, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_EDGE_TEMP: - ret = vangogh_get_smu_metrics_data(smu, + ret = vangogh_common_get_smu_metrics_data(smu, METRICS_TEMPERATURE_EDGE, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: - ret = vangogh_get_smu_metrics_data(smu, + ret = vangogh_common_get_smu_metrics_data(smu, METRICS_TEMPERATURE_HOTSPOT, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_GFX_MCLK: - ret = vangogh_get_smu_metrics_data(smu, - METRICS_AVERAGE_UCLK, + ret = vangogh_common_get_smu_metrics_data(smu, + METRICS_CURR_UCLK, (uint32_t *)data); *(uint32_t *)data *= 100; *size = 4; break; case AMDGPU_PP_SENSOR_GFX_SCLK: - ret = vangogh_get_smu_metrics_data(smu, - METRICS_AVERAGE_GFXCLK, + ret = vangogh_common_get_smu_metrics_data(smu, + METRICS_CURR_GFXCLK, (uint32_t *)data); *(uint32_t *)data *= 100; *size = 4; break; case AMDGPU_PP_SENSOR_VDDGFX: - ret = vangogh_get_smu_metrics_data(smu, + ret = vangogh_common_get_smu_metrics_data(smu, METRICS_VOLTAGE_VDDGFX, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_VDDNB: - ret = vangogh_get_smu_metrics_data(smu, + ret = vangogh_common_get_smu_metrics_data(smu, METRICS_VOLTAGE_VDDSOC, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_CPU_CLK: - ret = vangogh_get_smu_metrics_data(smu, + ret = vangogh_common_get_smu_metrics_data(smu, METRICS_AVERAGE_CPUCLK, (uint32_t *)data); *size = smu->cpu_core_num * sizeof(uint16_t); @@ -1402,13 +1628,13 @@ static int vangogh_set_watermarks_table(struct smu_context *smu, return 0; } -static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, +static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; struct gpu_metrics_v2_1 *gpu_metrics = (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table; - SmuMetrics_t metrics; + SmuMetrics_legacy_t metrics; int ret = 0; ret = smu_cmn_get_metrics_table(smu, &metrics, true); @@ -1421,9 +1647,8 @@ static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, gpu_metrics->temperature_soc = metrics.SocTemperature; memcpy(&gpu_metrics->temperature_core[0], &metrics.CoreTemperature[0], - sizeof(uint16_t) * 8); + sizeof(uint16_t) * 4); gpu_metrics->temperature_l3[0] = metrics.L3Temperature[0]; - gpu_metrics->temperature_l3[1] = metrics.L3Temperature[1]; gpu_metrics->average_gfx_activity = metrics.GfxActivity; gpu_metrics->average_mm_activity = metrics.UvdActivity; @@ -1434,7 +1659,7 @@ static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_power = metrics.Power[2]; memcpy(&gpu_metrics->average_core_power[0], &metrics.CorePower[0], - sizeof(uint16_t) * 8); + sizeof(uint16_t) * 4); gpu_metrics->average_gfxclk_frequency = metrics.GfxclkFrequency; gpu_metrics->average_socclk_frequency = metrics.SocclkFrequency; @@ -1445,9 +1670,8 @@ static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, memcpy(&gpu_metrics->current_coreclk[0], &metrics.CoreFrequency[0], - sizeof(uint16_t) * 8); + sizeof(uint16_t) * 4); gpu_metrics->current_l3clk[0] = metrics.L3Frequency[0]; - gpu_metrics->current_l3clk[1] = metrics.L3Frequency[1]; gpu_metrics->throttle_status = metrics.ThrottlerStatus; @@ -1458,6 +1682,88 @@ static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v2_1); } +static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v2_1 *gpu_metrics = + (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table; + SmuMetrics_t metrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, &metrics, true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1); + + gpu_metrics->temperature_gfx = metrics.Current.GfxTemperature; + gpu_metrics->temperature_soc = metrics.Current.SocTemperature; + memcpy(&gpu_metrics->temperature_core[0], + &metrics.Current.CoreTemperature[0], + sizeof(uint16_t) * 4); + gpu_metrics->temperature_l3[0] = metrics.Current.L3Temperature[0]; + + gpu_metrics->average_gfx_activity = metrics.Current.GfxActivity; + gpu_metrics->average_mm_activity = metrics.Current.UvdActivity; + + gpu_metrics->average_socket_power = metrics.Current.CurrentSocketPower; + gpu_metrics->average_cpu_power = metrics.Current.Power[0]; + gpu_metrics->average_soc_power = metrics.Current.Power[1]; + gpu_metrics->average_gfx_power = metrics.Current.Power[2]; + memcpy(&gpu_metrics->average_core_power[0], + &metrics.Average.CorePower[0], + sizeof(uint16_t) * 4); + + gpu_metrics->average_gfxclk_frequency = metrics.Average.GfxclkFrequency; + gpu_metrics->average_socclk_frequency = metrics.Average.SocclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_fclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_vclk_frequency = metrics.Average.VclkFrequency; + gpu_metrics->average_dclk_frequency = metrics.Average.DclkFrequency; + + gpu_metrics->current_gfxclk = metrics.Current.GfxclkFrequency; + gpu_metrics->current_socclk = metrics.Current.SocclkFrequency; + gpu_metrics->current_uclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_fclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_vclk = metrics.Current.VclkFrequency; + gpu_metrics->current_dclk = metrics.Current.DclkFrequency; + + memcpy(&gpu_metrics->current_coreclk[0], + &metrics.Current.CoreFrequency[0], + sizeof(uint16_t) * 4); + gpu_metrics->current_l3clk[0] = metrics.Current.L3Frequency[0]; + + gpu_metrics->throttle_status = metrics.Current.ThrottlerStatus; + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v2_1); +} + +static ssize_t vangogh_common_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t if_version; + int ret = 0; + + ret = smu_cmn_get_smc_version(smu, &if_version, NULL); + if (ret) { + dev_err(adev->dev, "Failed to get smu if version!\n"); + return ret; + } + + if (if_version < 0x3) + ret = vangogh_get_legacy_gpu_metrics(smu, table); + else + ret = vangogh_get_gpu_metrics(smu, table); + + return ret; +} + static int vangogh_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABLE_COMMAND type, long input[], uint32_t size) { @@ -1876,9 +2182,9 @@ static const struct pptable_funcs vangogh_ppt_funcs = { .set_watermarks_table = vangogh_set_watermarks_table, .set_driver_table_location = smu_v11_0_set_driver_table_location, .interrupt_work = smu_v11_0_interrupt_work, - .get_gpu_metrics = vangogh_get_gpu_metrics, + .get_gpu_metrics = vangogh_common_get_gpu_metrics, .od_edit_dpm_table = vangogh_od_edit_dpm_table, - .print_clk_levels = vangogh_print_clk_levels, + .print_clk_levels = vangogh_common_print_clk_levels, .set_default_dpm_table = vangogh_set_default_dpm_tables, .set_fine_grain_gfx_freq_parameters = vangogh_set_fine_grain_gfx_freq_parameters, .system_features_control = vangogh_system_features_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index e3232295f2bf..f43b4c623685 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -1332,6 +1332,7 @@ static const struct pptable_funcs renoir_ppt_funcs = { .gfx_state_change_set = renoir_gfx_state_change_set, .set_fine_grain_gfx_freq_parameters = renoir_set_fine_grain_gfx_freq_parameters, .od_edit_dpm_table = renoir_od_edit_dpm_table, + .get_vbios_bootup_values = smu_v12_0_get_vbios_bootup_values, }; void renoir_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 6cc4855c8a37..d60b8c5e8715 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -27,6 +27,7 @@ #include "amdgpu_smu.h" #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" +#include "amdgpu_atombios.h" #include "smu_v12_0.h" #include "soc15_common.h" #include "atom.h" @@ -278,3 +279,125 @@ int smu_v12_0_set_driver_table_location(struct smu_context *smu) return ret; } + +static int smu_v12_0_atom_get_smu_clockinfo(struct amdgpu_device *adev, + uint8_t clk_id, + uint8_t syspll_id, + uint32_t *clk_freq) +{ + struct atom_get_smu_clock_info_parameters_v3_1 input = {0}; + struct atom_get_smu_clock_info_output_parameters_v3_1 *output; + int ret, index; + + input.clk_id = clk_id; + input.syspll_id = syspll_id; + input.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ; + index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1, + getsmuclockinfo); + + ret = amdgpu_atom_execute_table(adev->mode_info.atom_context, index, + (uint32_t *)&input); + if (ret) + return -EINVAL; + + output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&input; + *clk_freq = le32_to_cpu(output->atom_smu_outputclkfreq.smu_clock_freq_hz) / 10000; + + return 0; +} + +int smu_v12_0_get_vbios_bootup_values(struct smu_context *smu) +{ + int ret, index; + uint16_t size; + uint8_t frev, crev; + struct atom_common_table_header *header; + struct atom_firmware_info_v3_1 *v_3_1; + struct atom_firmware_info_v3_3 *v_3_3; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + + ret = amdgpu_atombios_get_data_table(smu->adev, index, &size, &frev, &crev, + (uint8_t **)&header); + if (ret) + return ret; + + if (header->format_revision != 3) { + dev_err(smu->adev->dev, "unknown atom_firmware_info version! for smu12\n"); + return -EINVAL; + } + + switch (header->content_revision) { + case 0: + case 1: + case 2: + v_3_1 = (struct atom_firmware_info_v3_1 *)header; + smu->smu_table.boot_values.revision = v_3_1->firmware_revision; + smu->smu_table.boot_values.gfxclk = v_3_1->bootup_sclk_in10khz; + smu->smu_table.boot_values.uclk = v_3_1->bootup_mclk_in10khz; + smu->smu_table.boot_values.socclk = 0; + smu->smu_table.boot_values.dcefclk = 0; + smu->smu_table.boot_values.vddc = v_3_1->bootup_vddc_mv; + smu->smu_table.boot_values.vddci = v_3_1->bootup_vddci_mv; + smu->smu_table.boot_values.mvddc = v_3_1->bootup_mvddc_mv; + smu->smu_table.boot_values.vdd_gfx = v_3_1->bootup_vddgfx_mv; + smu->smu_table.boot_values.cooling_id = v_3_1->coolingsolution_id; + smu->smu_table.boot_values.pp_table_id = 0; + smu->smu_table.boot_values.firmware_caps = v_3_1->firmware_capability; + break; + case 3: + case 4: + default: + v_3_3 = (struct atom_firmware_info_v3_3 *)header; + smu->smu_table.boot_values.revision = v_3_3->firmware_revision; + smu->smu_table.boot_values.gfxclk = v_3_3->bootup_sclk_in10khz; + smu->smu_table.boot_values.uclk = v_3_3->bootup_mclk_in10khz; + smu->smu_table.boot_values.socclk = 0; + smu->smu_table.boot_values.dcefclk = 0; + smu->smu_table.boot_values.vddc = v_3_3->bootup_vddc_mv; + smu->smu_table.boot_values.vddci = v_3_3->bootup_vddci_mv; + smu->smu_table.boot_values.mvddc = v_3_3->bootup_mvddc_mv; + smu->smu_table.boot_values.vdd_gfx = v_3_3->bootup_vddgfx_mv; + smu->smu_table.boot_values.cooling_id = v_3_3->coolingsolution_id; + smu->smu_table.boot_values.pp_table_id = v_3_3->pplib_pptable_id; + smu->smu_table.boot_values.firmware_caps = v_3_3->firmware_capability; + } + + smu->smu_table.boot_values.format_revision = header->format_revision; + smu->smu_table.boot_values.content_revision = header->content_revision; + + smu_v12_0_atom_get_smu_clockinfo(smu->adev, + (uint8_t)SMU12_SYSPLL0_SOCCLK_ID, + (uint8_t)SMU12_SYSPLL0_ID, + &smu->smu_table.boot_values.socclk); + + smu_v12_0_atom_get_smu_clockinfo(smu->adev, + (uint8_t)SMU12_SYSPLL1_DCFCLK_ID, + (uint8_t)SMU12_SYSPLL1_ID, + &smu->smu_table.boot_values.dcefclk); + + smu_v12_0_atom_get_smu_clockinfo(smu->adev, + (uint8_t)SMU12_SYSPLL0_VCLK_ID, + (uint8_t)SMU12_SYSPLL0_ID, + &smu->smu_table.boot_values.vclk); + + smu_v12_0_atom_get_smu_clockinfo(smu->adev, + (uint8_t)SMU12_SYSPLL0_DCLK_ID, + (uint8_t)SMU12_SYSPLL0_ID, + &smu->smu_table.boot_values.dclk); + + if ((smu->smu_table.boot_values.format_revision == 3) && + (smu->smu_table.boot_values.content_revision >= 2)) + smu_v12_0_atom_get_smu_clockinfo(smu->adev, + (uint8_t)SMU12_SYSPLL3_0_FCLK_ID, + (uint8_t)SMU12_SYSPLL3_0_ID, + &smu->smu_table.boot_values.fclk); + + smu_v12_0_atom_get_smu_clockinfo(smu->adev, + (uint8_t)SMU12_SYSPLL0_LCLK_ID, + (uint8_t)SMU12_SYSPLL0_ID, + &smu->smu_table.boot_values.lclk); + + return 0; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index bca02a9fb489..dcbe3a72da09 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -78,6 +78,8 @@ #define smnPCIE_ESM_CTRL 0x111003D0 +#define CLOCK_VALID (1 << 31) + static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -405,6 +407,9 @@ static int aldebaran_setup_pptable(struct smu_context *smu) { int ret = 0; + /* VBIOS pptable is the first choice */ + smu->smu_table.boot_values.pp_table_id = 0; + ret = smu_v13_0_setup_pptable(smu); if (ret) return ret; @@ -670,6 +675,7 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, struct smu_13_0_dpm_context *dpm_context = NULL; uint32_t display_levels; uint32_t freq_values[3] = {0}; + uint32_t min_clk, max_clk; if (amdgpu_ras_intr_triggered()) return snprintf(buf, PAGE_SIZE, "unavailable\n"); @@ -697,12 +703,20 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, display_levels = clocks.num_levels; + min_clk = smu->gfx_actual_hard_min_freq & CLOCK_VALID ? + smu->gfx_actual_hard_min_freq & ~CLOCK_VALID : + single_dpm_table->dpm_levels[0].value; + max_clk = smu->gfx_actual_soft_max_freq & CLOCK_VALID ? + smu->gfx_actual_soft_max_freq & ~CLOCK_VALID : + single_dpm_table->dpm_levels[1].value; + + freq_values[0] = min_clk; + freq_values[1] = max_clk; + /* fine-grained dpm has only 2 levels */ - if (now > single_dpm_table->dpm_levels[0].value && - now < single_dpm_table->dpm_levels[1].value) { + if (now > min_clk && now < max_clk) { display_levels = clocks.num_levels + 1; - freq_values[0] = single_dpm_table->dpm_levels[0].value; - freq_values[2] = single_dpm_table->dpm_levels[1].value; + freq_values[2] = max_clk; freq_values[1] = now; } @@ -712,12 +726,15 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, */ if (display_levels == clocks.num_levels) { for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", i, - clocks.data[i].clocks_in_khz / 1000, - (clocks.num_levels == 1) ? "*" : + size += sprintf( + buf + size, "%d: %uMhz %s\n", i, + freq_values[i], + (clocks.num_levels == 1) ? + "*" : (aldebaran_freqs_in_same_level( - clocks.data[i].clocks_in_khz / 1000, - now) ? "*" : "")); + freq_values[i], now) ? + "*" : + "")); } else { for (i = 0; i < display_levels; i++) size += sprintf(buf + size, "%d: %uMhz %s\n", i, @@ -1117,6 +1134,9 @@ static int aldebaran_set_performance_level(struct smu_context *smu, && (level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) smu_cmn_send_smc_msg(smu, SMU_MSG_DisableDeterminism, NULL); + /* Reset user min/max gfx clock */ + smu->gfx_actual_hard_min_freq = 0; + smu->gfx_actual_soft_max_freq = 0; switch (level) { @@ -1158,7 +1178,14 @@ static int aldebaran_set_soft_freq_limited_range(struct smu_context *smu, if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { min_clk = max(min, dpm_context->dpm_tables.gfx_table.min); max_clk = min(max, dpm_context->dpm_tables.gfx_table.max); - return smu_v13_0_set_soft_freq_limited_range(smu, SMU_GFXCLK, min_clk, max_clk); + ret = smu_v13_0_set_soft_freq_limited_range(smu, SMU_GFXCLK, + min_clk, max_clk); + + if (!ret) { + smu->gfx_actual_hard_min_freq = min_clk | CLOCK_VALID; + smu->gfx_actual_soft_max_freq = max_clk | CLOCK_VALID; + } + return ret; } if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { @@ -1178,9 +1205,15 @@ static int aldebaran_set_soft_freq_limited_range(struct smu_context *smu, ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableDeterminism, max, NULL); - if (ret) + if (ret) { dev_err(adev->dev, "Failed to enable determinism at GFX clock %d MHz\n", max); + } else { + smu->gfx_actual_hard_min_freq = + min_clk | CLOCK_VALID; + smu->gfx_actual_soft_max_freq = + max | CLOCK_VALID; + } } } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 30c9ac635105..0864083e7435 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -276,8 +276,6 @@ int smu_v13_0_setup_pptable(struct smu_context *smu) void *table; uint16_t version_major, version_minor; - /* temporarily hardcode to use vbios pptable */ - smu->smu_table.boot_values.pp_table_id = 0; if (amdgpu_smu_pptable_id >= 0) { smu->smu_table.boot_values.pp_table_id = amdgpu_smu_pptable_id; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d74b263c5f4e..64e9107d70f7 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1403,7 +1403,8 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) * require the entire fb to accommodate that to avoid * potential runtime errors at plane configuration time. */ - if (IS_DISPLAY_VER(dev_priv, 9) && color_plane == 0 && fb->width > 3840) + if ((IS_DISPLAY_VER(dev_priv, 9) || IS_GEMINILAKE(dev_priv)) && + color_plane == 0 && fb->width > 3840) tile_width *= 4; /* * The main surface pitch must be padded to a multiple of four diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index cbcfb0c4c370..02a003fd48fb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -96,7 +96,7 @@ static bool intel_dp_read_lttpr_common_caps(struct intel_dp *intel_dp) * Detecting LTTPRs must be avoided on platforms with an AUX timeout * period < 3.2ms. (see DP Standard v2.0, 2.11.2, 3.6.6.1). */ - if (DISPLAY_VER(i915) < 10) + if (DISPLAY_VER(i915) < 10 || IS_GEMINILAKE(i915)) return false; if (drm_dp_read_lttpr_common_caps(&intel_dp->aux, diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 986bbbe3b12f..957252b695d7 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -597,7 +597,7 @@ static bool stride_is_valid(struct drm_i915_private *dev_priv, return false; /* Display WA #1105: skl,bxt,kbl,cfl,glk */ - if (IS_DISPLAY_VER(dev_priv, 9) && + if ((IS_DISPLAY_VER(dev_priv, 9) || IS_GEMINILAKE(dev_priv)) && modifier == DRM_FORMAT_MOD_LINEAR && stride & 511) return false; diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index e477b6114a60..e5dadde422f7 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -803,8 +803,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, atomic_inc(&dev_priv->gpu_error.pending_fb_pin); vma = intel_overlay_pin_fb(new_bo); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto out_pin_section; + } i915_gem_object_flush_frontbuffer(new_bo, ORIGIN_DIRTYFB); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 1d561812fcad..8ada4f829cab 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1519,8 +1519,7 @@ void intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state) u32 psr_status; mutex_lock(&intel_dp->psr.lock); - if (!intel_dp->psr.enabled || - (intel_dp->psr.enabled && intel_dp->psr.psr2_enabled)) { + if (!intel_dp->psr.enabled || intel_dp->psr.psr2_enabled) { mutex_unlock(&intel_dp->psr.lock); continue; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 3e248d3bd869..4f9c8d3021ab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -70,6 +70,7 @@ static void try_to_writeback(struct drm_i915_gem_object *obj, /** * i915_gem_shrink - Shrink buffer object caches + * @ww: i915 gem ww acquire ctx, or NULL * @i915: i915 device * @target: amount of memory to make available, in pages * @nr_scanned: optional output for number of pages scanned (incremental) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 01c1d1b36acd..ca9c9e27a43d 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -948,11 +948,6 @@ static int cmd_reg_handler(struct parser_exec_state *s, /* below are all lri handlers */ vreg = &vgpu_vreg(s->vgpu, offset); - if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) { - gvt_vgpu_err("%s access to non-render register (%x)\n", - cmd, offset); - return -EBADRQC; - } if (is_cmd_update_pdps(offset, s) && cmd_pdp_mmio_update_handler(s, offset, index)) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 67a26923aa0e..9478c132d7b6 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -587,12 +587,6 @@ static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, entry, index, false, 0, mm->vgpu); } -static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm, - struct intel_gvt_gtt_entry *entry, unsigned long index) -{ - _ppgtt_set_root_entry(mm, entry, index, true); -} - static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, struct intel_gvt_gtt_entry *entry, unsigned long index) { diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index a66e7b2531d4..e7c2babcee8b 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -117,7 +117,7 @@ static bool intel_get_gvt_attrs(struct attribute_group ***intel_vgpu_type_groups return true; } -static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) +static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) { int i, j; struct intel_vgpu_type *type; @@ -135,7 +135,7 @@ static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) gvt_vgpu_type_groups[i] = group; } - return true; + return 0; unwind: for (j = 0; j < i; j++) { @@ -143,7 +143,7 @@ unwind: kfree(group); } - return false; + return -ENOMEM; } static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) @@ -364,7 +364,7 @@ int intel_gvt_init_device(struct drm_i915_private *i915) goto out_clean_thread; ret = intel_gvt_init_vgpu_type_groups(gvt); - if (ret == false) { + if (ret) { gvt_err("failed to init vgpu type groups: %d\n", ret); goto out_clean_types; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9165971c3c47..bec9c3652188 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -350,6 +350,8 @@ static void __rq_arm_watchdog(struct i915_request *rq) if (!ce->watchdog.timeout_us) return; + i915_request_get(rq); + hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); wdg->timer.function = __rq_watchdog_expired; hrtimer_start_range_ns(&wdg->timer, @@ -357,7 +359,6 @@ static void __rq_arm_watchdog(struct i915_request *rq) NSEC_PER_USEC), NSEC_PER_MSEC, HRTIMER_MODE_REL); - i915_request_get(rq); } static void __rq_cancel_watchdog(struct i915_request *rq) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 8b7a4f7b7576..42a8afa839cb 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7948,8 +7948,6 @@ restart_ih: DRM_ERROR("Illegal register access in command stream\n"); /* XXX check the bitfield order! */ me_id = (ring_id & 0x60) >> 5; - pipe_id = (ring_id & 0x18) >> 3; - queue_id = (ring_id & 0x7) >> 0; switch (me_id) { case 0: /* This results in a full GPU reset, but all we need to do is soft @@ -7971,8 +7969,6 @@ restart_ih: DRM_ERROR("Illegal instruction in command stream\n"); /* XXX check the bitfield order! */ me_id = (ring_id & 0x60) >> 5; - pipe_id = (ring_id & 0x18) >> 3; - queue_id = (ring_id & 0x7) >> 0; switch (me_id) { case 0: /* This results in a full GPU reset, but all we need to do is soft diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 88731b79c8f5..d0e94b10e4c0 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -4511,7 +4511,7 @@ static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx) } else { for (i = 0; i < (command & 0x1fffff); i++) { reg = start_reg + (4 * i); - if (!si_vm_reg_valid(reg)) { + if (!si_vm_reg_valid(reg)) { DRM_ERROR("CP DMA Bad DST register\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index c9385cfd0fc1..f9120dc24682 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -832,10 +832,14 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm, return &plane->base; } -static const u32 tegra_cursor_plane_formats[] = { +static const u32 tegra_legacy_cursor_plane_formats[] = { DRM_FORMAT_RGBA8888, }; +static const u32 tegra_cursor_plane_formats[] = { + DRM_FORMAT_ARGB8888, +}; + static int tegra_cursor_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -875,12 +879,24 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, plane); struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state); struct tegra_dc *dc = to_tegra_dc(new_state->crtc); - u32 value = CURSOR_CLIP_DISPLAY; + struct tegra_drm *tegra = plane->dev->dev_private; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + u64 dma_mask = *dc->dev->dma_mask; +#endif + unsigned int x, y; + u32 value = 0; /* rien ne va plus */ if (!new_state->crtc || !new_state->fb) return; + /* + * Legacy display supports hardware clipping of the cursor, but + * nvdisplay relies on software to clip the cursor to the screen. + */ + if (!dc->soc->has_nvdisplay) + value |= CURSOR_CLIP_DISPLAY; + switch (new_state->crtc_w) { case 32: value |= CURSOR_SIZE_32x32; @@ -908,7 +924,7 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - value = (tegra_plane_state->iova[0] >> 32) & 0x3; + value = (tegra_plane_state->iova[0] >> 32) & (dma_mask >> 32); tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI); #endif @@ -920,15 +936,39 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, value = tegra_dc_readl(dc, DC_DISP_BLEND_CURSOR_CONTROL); value &= ~CURSOR_DST_BLEND_MASK; value &= ~CURSOR_SRC_BLEND_MASK; - value |= CURSOR_MODE_NORMAL; + + if (dc->soc->has_nvdisplay) + value &= ~CURSOR_COMPOSITION_MODE_XOR; + else + value |= CURSOR_MODE_NORMAL; + value |= CURSOR_DST_BLEND_NEG_K1_TIMES_SRC; value |= CURSOR_SRC_BLEND_K1_TIMES_SRC; value |= CURSOR_ALPHA; tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL); + /* nvdisplay relies on software for clipping */ + if (dc->soc->has_nvdisplay) { + struct drm_rect src; + + x = new_state->dst.x1; + y = new_state->dst.y1; + + drm_rect_fp_to_int(&src, &new_state->src); + + value = (src.y1 & tegra->vmask) << 16 | (src.x1 & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR); + + value = (drm_rect_height(&src) & tegra->vmask) << 16 | + (drm_rect_width(&src) & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR); + } else { + x = new_state->crtc_x; + y = new_state->crtc_y; + } + /* position the cursor */ - value = (new_state->crtc_y & 0x3fff) << 16 | - (new_state->crtc_x & 0x3fff); + value = ((y & tegra->vmask) << 16) | (x & tegra->hmask); tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); } @@ -982,8 +1022,13 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm, plane->index = 6; plane->dc = dc; - num_formats = ARRAY_SIZE(tegra_cursor_plane_formats); - formats = tegra_cursor_plane_formats; + if (!dc->soc->has_nvdisplay) { + num_formats = ARRAY_SIZE(tegra_legacy_cursor_plane_formats); + formats = tegra_legacy_cursor_plane_formats; + } else { + num_formats = ARRAY_SIZE(tegra_cursor_plane_formats); + formats = tegra_cursor_plane_formats; + } err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, &tegra_plane_funcs, formats, @@ -2035,6 +2080,16 @@ static bool tegra_dc_has_window_groups(struct tegra_dc *dc) return false; } +static int tegra_dc_early_init(struct host1x_client *client) +{ + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + tegra->num_crtcs++; + + return 0; +} + static int tegra_dc_init(struct host1x_client *client) { struct drm_device *drm = dev_get_drvdata(client->host); @@ -2046,6 +2101,12 @@ static int tegra_dc_init(struct host1x_client *client) int err; /* + * DC has been reset by now, so VBLANK syncpoint can be released + * for general use. + */ + host1x_syncpt_release_vblank_reservation(client, 26 + dc->pipe); + + /* * XXX do not register DCs with no window groups because we cannot * assign a primary plane to them, which in turn will cause KMS to * crash. @@ -2111,6 +2172,12 @@ static int tegra_dc_init(struct host1x_client *client) if (dc->soc->pitch_align > tegra->pitch_align) tegra->pitch_align = dc->soc->pitch_align; + /* track maximum resolution */ + if (dc->soc->has_nvdisplay) + drm->mode_config.max_width = drm->mode_config.max_height = 16384; + else + drm->mode_config.max_width = drm->mode_config.max_height = 4096; + err = tegra_dc_rgb_init(drm, dc); if (err < 0 && err != -ENODEV) { dev_err(dc->dev, "failed to initialize RGB output: %d\n", err); @@ -2141,7 +2208,7 @@ cleanup: drm_plane_cleanup(primary); host1x_client_iommu_detach(client); - host1x_syncpt_free(dc->syncpt); + host1x_syncpt_put(dc->syncpt); return err; } @@ -2166,7 +2233,17 @@ static int tegra_dc_exit(struct host1x_client *client) } host1x_client_iommu_detach(client); - host1x_syncpt_free(dc->syncpt); + host1x_syncpt_put(dc->syncpt); + + return 0; +} + +static int tegra_dc_late_exit(struct host1x_client *client) +{ + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + tegra->num_crtcs--; return 0; } @@ -2235,8 +2312,10 @@ put_rpm: } static const struct host1x_client_ops dc_client_ops = { + .early_init = tegra_dc_early_init, .init = tegra_dc_init, .exit = tegra_dc_exit, + .late_exit = tegra_dc_late_exit, .suspend = tegra_dc_runtime_suspend, .resume = tegra_dc_runtime_resume, }; @@ -2246,6 +2325,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = { .supports_interlacing = false, .supports_cursor = false, .supports_block_linear = false, + .supports_sector_layout = false, .has_legacy_blending = true, .pitch_align = 8, .has_powergate = false, @@ -2265,6 +2345,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = { .supports_interlacing = false, .supports_cursor = false, .supports_block_linear = false, + .supports_sector_layout = false, .has_legacy_blending = true, .pitch_align = 8, .has_powergate = false, @@ -2284,6 +2365,7 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = { .supports_interlacing = false, .supports_cursor = false, .supports_block_linear = false, + .supports_sector_layout = false, .has_legacy_blending = true, .pitch_align = 64, .has_powergate = true, @@ -2303,6 +2385,7 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, + .supports_sector_layout = false, .has_legacy_blending = false, .pitch_align = 64, .has_powergate = true, @@ -2322,6 +2405,7 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, + .supports_sector_layout = false, .has_legacy_blending = false, .pitch_align = 64, .has_powergate = true, @@ -2375,6 +2459,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, + .supports_sector_layout = false, .has_legacy_blending = false, .pitch_align = 64, .has_powergate = false, @@ -2423,6 +2508,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, + .supports_sector_layout = true, .has_legacy_blending = false, .pitch_align = 64, .has_powergate = false, @@ -2532,9 +2618,16 @@ static int tegra_dc_couple(struct tegra_dc *dc) static int tegra_dc_probe(struct platform_device *pdev) { + u64 dma_mask = dma_get_mask(pdev->dev.parent); struct tegra_dc *dc; int err; + err = dma_coerce_mask_and_coherent(&pdev->dev, dma_mask); + if (err < 0) { + dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); + return err; + } + dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL); if (!dc) return -ENOMEM; diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 051d03dcb9b0..29f19c3c6149 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -52,6 +52,7 @@ struct tegra_dc_soc_info { bool supports_interlacing; bool supports_cursor; bool supports_block_linear; + bool supports_sector_layout; bool has_legacy_blending; unsigned int pitch_align; bool has_powergate; @@ -511,6 +512,8 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc); #define DC_DISP_CURSOR_START_ADDR_HI 0x4ec #define DC_DISP_BLEND_CURSOR_CONTROL 0x4f1 +#define CURSOR_COMPOSITION_MODE_BLEND (0 << 25) +#define CURSOR_COMPOSITION_MODE_XOR (1 << 25) #define CURSOR_MODE_LEGACY (0 << 24) #define CURSOR_MODE_NORMAL (1 << 24) #define CURSOR_DST_BLEND_ZERO (0 << 16) @@ -705,6 +708,9 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc); #define PROTOCOL_MASK (0xf << 8) #define PROTOCOL_SINGLE_TMDS_A (0x1 << 8) +#define DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR 0x442 +#define DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR 0x446 + #define DC_WIN_CORE_WINDOWGROUP_SET_CONTROL 0x702 #define OWNER_MASK (0xf << 0) #define OWNER(x) (((x) & 0xf) << 0) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 90709c38c993..0c350b0daab4 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -174,7 +174,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, struct drm_tegra_syncpt syncpt; struct host1x *host1x = dev_get_drvdata(drm->dev->parent); struct drm_gem_object **refs; - struct host1x_syncpt *sp; + struct host1x_syncpt *sp = NULL; struct host1x_job *job; unsigned int num_refs; int err; @@ -301,8 +301,8 @@ int tegra_drm_submit(struct tegra_drm_context *context, goto fail; } - /* check whether syncpoint ID is valid */ - sp = host1x_syncpt_get(host1x, syncpt.id); + /* Syncpoint ref will be dropped on job release. */ + sp = host1x_syncpt_get_by_id(host1x, syncpt.id); if (!sp) { err = -ENOENT; goto fail; @@ -311,7 +311,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, job->is_addr_reg = context->client->ops->is_addr_reg; job->is_valid_class = context->client->ops->is_valid_class; job->syncpt_incrs = syncpt.incrs; - job->syncpt_id = syncpt.id; + job->syncpt = sp; job->timeout = 10000; if (args->timeout && args->timeout < 10000) @@ -383,7 +383,7 @@ static int tegra_syncpt_read(struct drm_device *drm, void *data, struct drm_tegra_syncpt_read *args = data; struct host1x_syncpt *sp; - sp = host1x_syncpt_get(host, args->id); + sp = host1x_syncpt_get_by_id_noref(host, args->id); if (!sp) return -EINVAL; @@ -398,7 +398,7 @@ static int tegra_syncpt_incr(struct drm_device *drm, void *data, struct drm_tegra_syncpt_incr *args = data; struct host1x_syncpt *sp; - sp = host1x_syncpt_get(host1x, args->id); + sp = host1x_syncpt_get_by_id_noref(host1x, args->id); if (!sp) return -EINVAL; @@ -412,7 +412,7 @@ static int tegra_syncpt_wait(struct drm_device *drm, void *data, struct drm_tegra_syncpt_wait *args = data; struct host1x_syncpt *sp; - sp = host1x_syncpt_get(host1x, args->id); + sp = host1x_syncpt_get_by_id_noref(host1x, args->id); if (!sp) return -EINVAL; @@ -1121,9 +1121,8 @@ static int host1x_drm_probe(struct host1x_device *dev) drm->mode_config.min_width = 0; drm->mode_config.min_height = 0; - - drm->mode_config.max_width = 4096; - drm->mode_config.max_height = 4096; + drm->mode_config.max_width = 0; + drm->mode_config.max_height = 0; drm->mode_config.allow_fb_modifiers = true; @@ -1142,6 +1141,14 @@ static int host1x_drm_probe(struct host1x_device *dev) if (err < 0) goto fbdev; + /* + * Now that all display controller have been initialized, the maximum + * supported resolution is known and the bitmask for horizontal and + * vertical bitfields can be computed. + */ + tegra->hmask = drm->mode_config.max_width - 1; + tegra->vmask = drm->mode_config.max_height - 1; + if (tegra->use_explicit_iommu) { u64 carveout_start, carveout_end, gem_start, gem_end; u64 dma_mask = dma_get_mask(&dev->dev); diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index f38de08e0c95..87df251c1fcf 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -24,6 +24,9 @@ #include "hub.h" #include "trace.h" +/* XXX move to include/uapi/drm/drm_fourcc.h? */ +#define DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT BIT(22) + struct reset_control; #ifdef CONFIG_DRM_FBDEV_EMULATION @@ -54,7 +57,9 @@ struct tegra_drm { struct tegra_fbdev *fbdev; #endif + unsigned int hmask, vmask; unsigned int pitch_align; + unsigned int num_crtcs; struct tegra_display_hub *hub; }; diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 01939c57fc74..cae8b8cbe9dd 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -44,6 +44,15 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, { uint64_t modifier = framebuffer->modifier; + if ((modifier >> 56) == DRM_FORMAT_MOD_VENDOR_NVIDIA) { + if ((modifier & DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT) == 0) + tiling->sector_layout = TEGRA_BO_SECTOR_LAYOUT_TEGRA; + else + tiling->sector_layout = TEGRA_BO_SECTOR_LAYOUT_GPU; + + modifier &= ~DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT; + } + switch (modifier) { case DRM_FORMAT_MOD_LINEAR: tiling->mode = TEGRA_BO_TILING_MODE_PITCH; @@ -86,6 +95,7 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, break; default: + DRM_DEBUG_KMS("unknown format modifier: %llx\n", modifier); return -EINVAL; } diff --git a/drivers/gpu/drm/tegra/gem.h b/drivers/gpu/drm/tegra/gem.h index fafb5724499b..c15fd99d6cb2 100644 --- a/drivers/gpu/drm/tegra/gem.h +++ b/drivers/gpu/drm/tegra/gem.h @@ -21,9 +21,15 @@ enum tegra_bo_tiling_mode { TEGRA_BO_TILING_MODE_BLOCK, }; +enum tegra_bo_sector_layout { + TEGRA_BO_SECTOR_LAYOUT_TEGRA, + TEGRA_BO_SECTOR_LAYOUT_GPU, +}; + struct tegra_bo_tiling { enum tegra_bo_tiling_mode mode; unsigned long value; + enum tegra_bo_sector_layout sector_layout; }; struct tegra_bo { diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index adbe2ddcda19..de288cba3905 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -67,7 +67,7 @@ static int gr2d_init(struct host1x_client *client) detach: host1x_client_iommu_detach(client); free: - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); put: host1x_channel_put(gr2d->channel); return err; @@ -86,7 +86,7 @@ static int gr2d_exit(struct host1x_client *client) return err; host1x_client_iommu_detach(client); - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(gr2d->channel); return 0; diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index b0b8154e8104..24442ade0da3 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -76,7 +76,7 @@ static int gr3d_init(struct host1x_client *client) detach: host1x_client_iommu_detach(client); free: - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); put: host1x_channel_put(gr3d->channel); return err; @@ -94,7 +94,7 @@ static int gr3d_exit(struct host1x_client *client) return err; host1x_client_iommu_detach(client); - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(gr3d->channel); return 0; diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 8e6d329d062b..79bff8b48271 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -55,6 +55,18 @@ static const u64 tegra_shared_plane_modifiers[] = { DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3), DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4), DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5), + /* + * The GPU sector layout is only supported on Tegra194, but these will + * be filtered out later on by ->format_mod_supported() on SoCs where + * it isn't supported. + */ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + /* sentinel */ DRM_FORMAT_MOD_INVALID }; @@ -366,6 +378,12 @@ static int tegra_shared_plane_atomic_check(struct drm_plane *plane, return -EINVAL; } + if (tiling->sector_layout == TEGRA_BO_SECTOR_LAYOUT_GPU && + !dc->soc->supports_sector_layout) { + DRM_ERROR("hardware doesn't support GPU sector layout\n"); + return -EINVAL; + } + /* * Tegra doesn't support different strides for U and V planes so we * error out if the user tries to display a framebuffer with such a @@ -485,6 +503,16 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, base = tegra_plane_state->iova[0] + fb->offsets[0]; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + /* + * Physical address bit 39 in Tegra194 is used as a switch for special + * logic that swizzles the memory using either the legacy Tegra or the + * dGPU sector layout. + */ + if (tegra_plane_state->tiling.sector_layout == TEGRA_BO_SECTOR_LAYOUT_GPU) + base |= BIT(39); +#endif + tegra_plane_writel(p, tegra_plane_state->format, DC_WIN_COLOR_DEPTH); tegra_plane_writel(p, 0, DC_WIN_PRECOMP_WGRP_PARAMS); @@ -562,9 +590,8 @@ struct drm_plane *tegra_shared_plane_create(struct drm_device *drm, enum drm_plane_type type = DRM_PLANE_TYPE_OVERLAY; struct tegra_drm *tegra = drm->dev_private; struct tegra_display_hub *hub = tegra->hub; - /* planes can be assigned to arbitrary CRTCs */ - unsigned int possible_crtcs = 0x7; struct tegra_shared_plane *plane; + unsigned int possible_crtcs; unsigned int num_formats; const u64 *modifiers; struct drm_plane *p; @@ -583,6 +610,9 @@ struct drm_plane *tegra_shared_plane_create(struct drm_device *drm, p = &plane->base.base; + /* planes can be assigned to arbitrary CRTCs */ + possible_crtcs = BIT(tegra->num_crtcs) - 1; + num_formats = ARRAY_SIZE(tegra_shared_plane_formats); formats = tegra_shared_plane_formats; modifiers = tegra_shared_plane_modifiers; @@ -848,12 +878,19 @@ static const struct host1x_client_ops tegra_display_hub_ops = { static int tegra_display_hub_probe(struct platform_device *pdev) { + u64 dma_mask = dma_get_mask(pdev->dev.parent); struct device_node *child = NULL; struct tegra_display_hub *hub; struct clk *clk; unsigned int i; int err; + err = dma_coerce_mask_and_coherent(&pdev->dev, dma_mask); + if (err < 0) { + dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); + return err; + } + hub = devm_kzalloc(&pdev->dev, sizeof(*hub), GFP_KERNEL); if (!hub) return -ENOMEM; diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 19e8847a164b..2e11b4b1f702 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -83,6 +83,22 @@ static void tegra_plane_atomic_destroy_state(struct drm_plane *plane, kfree(state); } +static bool tegra_plane_supports_sector_layout(struct drm_plane *plane) +{ + struct drm_crtc *crtc; + + drm_for_each_crtc(crtc, plane->dev) { + if (plane->possible_crtcs & drm_crtc_mask(crtc)) { + struct tegra_dc *dc = to_tegra_dc(crtc); + + if (!dc->soc->supports_sector_layout) + return false; + } + } + + return true; +} + static bool tegra_plane_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) @@ -92,6 +108,14 @@ static bool tegra_plane_format_mod_supported(struct drm_plane *plane, if (modifier == DRM_FORMAT_MOD_LINEAR) return true; + /* check for the sector layout bit */ + if ((modifier >> 56) == DRM_FORMAT_MOD_VENDOR_NVIDIA) { + if (modifier & DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT) { + if (!tegra_plane_supports_sector_layout(plane)) + return false; + } + } + if (info->num_planes == 1) return true; @@ -119,6 +143,14 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) dma_addr_t phys_addr, *phys; struct sg_table *sgt; + /* + * If we're not attached to a domain, we already stored the + * physical address when the buffer was allocated. If we're + * part of a group that's shared between all display + * controllers, we've also already mapped the framebuffer + * through the SMMU. In both cases we can short-circuit the + * code below and retrieve the stored IOV address. + */ if (!domain || dc->client.group) phys = &phys_addr; else diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 77e128832920..72aea1cc0cfa 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -214,7 +214,7 @@ static int vic_init(struct host1x_client *client) return 0; free_syncpt: - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); free_channel: host1x_channel_put(vic->channel); detach: @@ -238,7 +238,7 @@ static int vic_exit(struct host1x_client *client) if (err < 0) return err; - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(vic->channel); host1x_client_iommu_detach(client); diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 9b787b3caeb5..510e3e001dab 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -112,7 +112,7 @@ int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob; struct ttm_device *bdev; - int ret = -EBUSY; + int ret = 0; mutex_lock(&ttm_global_mutex); list_for_each_entry(bdev, &glob->device_list, device_list) { diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index eecc930e97ab..a1a25410ec74 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -329,6 +329,8 @@ int ttm_tt_populate(struct ttm_device *bdev, ttm_dma32_pages_limit) { ret = ttm_global_swapout(ctx, GFP_KERNEL); + if (ret == 0) + break; if (ret < 0) goto error; } diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c index 104b95a8c7a2..aeb0a22a2c34 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c @@ -280,7 +280,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq, spin_unlock(&glob->lock); ret = ttm_global_swapout(ctx, GFP_KERNEL); spin_lock(&glob->lock); - if (unlikely(ret < 0)) + if (unlikely(ret <= 0)) break; } diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 68a766ff0e9d..46f69c532b6b 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -197,6 +197,17 @@ int host1x_device_init(struct host1x_device *device) mutex_lock(&device->clients_lock); list_for_each_entry(client, &device->clients, list) { + if (client->ops && client->ops->early_init) { + err = client->ops->early_init(client); + if (err < 0) { + dev_err(&device->dev, "failed to early initialize %s: %d\n", + dev_name(client->dev), err); + goto teardown_late; + } + } + } + + list_for_each_entry(client, &device->clients, list) { if (client->ops && client->ops->init) { err = client->ops->init(client); if (err < 0) { @@ -217,6 +228,14 @@ teardown: if (client->ops->exit) client->ops->exit(client); + /* reset client to end of list for late teardown */ + client = list_entry(&device->clients, struct host1x_client, list); + +teardown_late: + list_for_each_entry_continue_reverse(client, &device->clients, list) + if (client->ops->late_exit) + client->ops->late_exit(client); + mutex_unlock(&device->clients_lock); return err; } @@ -251,6 +270,18 @@ int host1x_device_exit(struct host1x_device *device) } } + list_for_each_entry_reverse(client, &device->clients, list) { + if (client->ops && client->ops->late_exit) { + err = client->ops->late_exit(client); + if (err < 0) { + dev_err(&device->dev, "failed to late cleanup %s: %d\n", + dev_name(client->dev), err); + mutex_unlock(&device->clients_lock); + return err; + } + } + } + mutex_unlock(&device->clients_lock); return 0; diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index e8d3fda91d8a..6e6ca774f68d 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -273,15 +273,13 @@ static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, static void cdma_start_timer_locked(struct host1x_cdma *cdma, struct host1x_job *job) { - struct host1x *host = cdma_to_host1x(cdma); - if (cdma->timeout.client) { /* timer already started */ return; } cdma->timeout.client = job->client; - cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id); + cdma->timeout.syncpt = job->syncpt; cdma->timeout.syncpt_val = job->syncpt_end; cdma->timeout.start_ktime = ktime_get(); @@ -312,7 +310,6 @@ static void stop_cdma_timer_locked(struct host1x_cdma *cdma) static void update_cdma_locked(struct host1x_cdma *cdma) { bool signal = false; - struct host1x *host1x = cdma_to_host1x(cdma); struct host1x_job *job, *n; /* If CDMA is stopped, queue is cleared and we can return */ @@ -324,8 +321,7 @@ static void update_cdma_locked(struct host1x_cdma *cdma) * to consume as many sync queue entries as possible without blocking */ list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { - struct host1x_syncpt *sp = - host1x_syncpt_get(host1x, job->syncpt_id); + struct host1x_syncpt *sp = job->syncpt; /* Check whether this syncpt has completed, and bail if not */ if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { @@ -499,8 +495,7 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) if (!cdma->timeout.initialized) { int err; - err = host1x_hw_cdma_timeout_init(host1x, cdma, - job->syncpt_id); + err = host1x_hw_cdma_timeout_init(host1x, cdma); if (err) { mutex_unlock(&cdma->lock); return err; diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c index 1b4997bda1c7..8a14880c61bb 100644 --- a/drivers/gpu/host1x/debug.c +++ b/drivers/gpu/host1x/debug.c @@ -69,6 +69,7 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) static void show_syncpts(struct host1x *m, struct output *o) { + struct list_head *pos; unsigned int i; host1x_debug_output(o, "---- syncpts ----\n"); @@ -76,12 +77,19 @@ static void show_syncpts(struct host1x *m, struct output *o) for (i = 0; i < host1x_syncpt_nb_pts(m); i++) { u32 max = host1x_syncpt_read_max(m->syncpt + i); u32 min = host1x_syncpt_load(m->syncpt + i); + unsigned int waiters = 0; - if (!min && !max) + spin_lock(&m->syncpt[i].intr.lock); + list_for_each(pos, &m->syncpt[i].intr.wait_head) + waiters++; + spin_unlock(&m->syncpt[i].intr.lock); + + if (!min && !max && !waiters) continue; - host1x_debug_output(o, "id %u (%s) min %d max %d\n", - i, m->syncpt[i].name, min, max); + host1x_debug_output(o, + "id %u (%s) min %d max %d (%d waiters)\n", + i, m->syncpt[i].name, min, max, waiters); } for (i = 0; i < host1x_syncpt_nb_bases(m); i++) { diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index d0ebb70e2fdd..fbb6447b8659 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -77,6 +77,7 @@ static const struct host1x_info host1x01_info = { .has_hypervisor = false, .num_sid_entries = 0, .sid_table = NULL, + .reserve_vblank_syncpts = true, }; static const struct host1x_info host1x02_info = { @@ -91,6 +92,7 @@ static const struct host1x_info host1x02_info = { .has_hypervisor = false, .num_sid_entries = 0, .sid_table = NULL, + .reserve_vblank_syncpts = true, }; static const struct host1x_info host1x04_info = { @@ -105,6 +107,7 @@ static const struct host1x_info host1x04_info = { .has_hypervisor = false, .num_sid_entries = 0, .sid_table = NULL, + .reserve_vblank_syncpts = false, }; static const struct host1x_info host1x05_info = { @@ -119,6 +122,7 @@ static const struct host1x_info host1x05_info = { .has_hypervisor = false, .num_sid_entries = 0, .sid_table = NULL, + .reserve_vblank_syncpts = false, }; static const struct host1x_sid_entry tegra186_sid_table[] = { @@ -142,6 +146,7 @@ static const struct host1x_info host1x06_info = { .has_hypervisor = true, .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, + .reserve_vblank_syncpts = false, }; static const struct host1x_sid_entry tegra194_sid_table[] = { @@ -165,6 +170,7 @@ static const struct host1x_info host1x07_info = { .has_hypervisor = true, .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), .sid_table = tegra194_sid_table, + .reserve_vblank_syncpts = false, }; static const struct of_device_id host1x_of_match[] = { diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index f781a9b0f39d..fa6d4bc46e98 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -37,7 +37,7 @@ struct host1x_cdma_ops { void (*start)(struct host1x_cdma *cdma); void (*stop)(struct host1x_cdma *cdma); void (*flush)(struct host1x_cdma *cdma); - int (*timeout_init)(struct host1x_cdma *cdma, unsigned int syncpt); + int (*timeout_init)(struct host1x_cdma *cdma); void (*timeout_destroy)(struct host1x_cdma *cdma); void (*freeze)(struct host1x_cdma *cdma); void (*resume)(struct host1x_cdma *cdma, u32 getptr); @@ -101,6 +101,12 @@ struct host1x_info { bool has_hypervisor; /* has hypervisor registers */ unsigned int num_sid_entries; const struct host1x_sid_entry *sid_table; + /* + * On T20-T148, the boot chain may setup DC to increment syncpoints + * 26/27 on VBLANK. As such we cannot use these syncpoints until + * the display driver disables VBLANK increments. + */ + bool reserve_vblank_syncpts; }; struct host1x { @@ -261,10 +267,9 @@ static inline void host1x_hw_cdma_flush(struct host1x *host, } static inline int host1x_hw_cdma_timeout_init(struct host1x *host, - struct host1x_cdma *cdma, - unsigned int syncpt) + struct host1x_cdma *cdma) { - return host->cdma_op->timeout_init(cdma, syncpt); + return host->cdma_op->timeout_init(cdma); } static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host, diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index 2f3bf94cf365..e49cd5b8f735 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -295,7 +295,7 @@ static void cdma_timeout_handler(struct work_struct *work) /* * Init timeout resources */ -static int cdma_timeout_init(struct host1x_cdma *cdma, unsigned int syncpt) +static int cdma_timeout_init(struct host1x_cdma *cdma) { INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler); cdma->timeout.initialized = true; diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 5eaa29d171c9..d4c28faf27d1 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -86,8 +86,7 @@ static void submit_gathers(struct host1x_job *job) static inline void synchronize_syncpt_base(struct host1x_job *job) { - struct host1x *host = dev_get_drvdata(job->channel->dev->parent); - struct host1x_syncpt *sp = host->syncpt + job->syncpt_id; + struct host1x_syncpt *sp = job->syncpt; unsigned int id; u32 value; @@ -118,7 +117,7 @@ static void host1x_channel_set_streamid(struct host1x_channel *channel) static int channel_submit(struct host1x_job *job) { struct host1x_channel *ch = job->channel; - struct host1x_syncpt *sp; + struct host1x_syncpt *sp = job->syncpt; u32 user_syncpt_incrs = job->syncpt_incrs; u32 prev_max = 0; u32 syncval; @@ -126,10 +125,9 @@ static int channel_submit(struct host1x_job *job) struct host1x_waitlist *completed_waiter = NULL; struct host1x *host = dev_get_drvdata(ch->dev->parent); - sp = host->syncpt + job->syncpt_id; trace_host1x_channel_submit(dev_name(ch->dev), job->num_gathers, job->num_relocs, - job->syncpt_id, job->syncpt_incrs); + job->syncpt->id, job->syncpt_incrs); /* before error checks, return current max */ prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); @@ -163,7 +161,7 @@ static int channel_submit(struct host1x_job *job) host1x_cdma_push(&ch->cdma, host1x_opcode_setclass(HOST1X_CLASS_HOST1X, host1x_uclass_wait_syncpt_r(), 1), - host1x_class_host_wait_syncpt(job->syncpt_id, + host1x_class_host_wait_syncpt(job->syncpt->id, host1x_syncpt_read_max(sp))); } diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c index f31bcfa1b837..ceb48229d14b 100644 --- a/drivers/gpu/host1x/hw/debug_hw.c +++ b/drivers/gpu/host1x/hw/debug_hw.c @@ -204,7 +204,7 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) unsigned int i; host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n", - job, job->syncpt_id, job->syncpt_end, + job, job->syncpt->id, job->syncpt_end, job->first_get, job->timeout, job->num_slots, job->num_unpins); diff --git a/drivers/gpu/host1x/hw/hw_host1x07_vm.h b/drivers/gpu/host1x/hw/hw_host1x07_vm.h index 3058b3c9a91d..b766851d5b83 100644 --- a/drivers/gpu/host1x/hw/hw_host1x07_vm.h +++ b/drivers/gpu/host1x/hw/hw_host1x07_vm.h @@ -29,6 +29,6 @@ #define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x652c + 4 * (x)) #define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x6590 + 4 * (x)) #define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x)) -#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0x8d00 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0x9980 + 4 * (x)) #define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xa604 + 4 * (x)) #define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 9245add23b5d..6d1f3c0fdbe7 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -235,25 +235,37 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, host1x_hw_intr_enable_syncpt_intr(host, syncpt->id); } - spin_unlock(&syncpt->intr.lock); - if (ref) *ref = waiter; + + spin_unlock(&syncpt->intr.lock); + return 0; } -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref) +void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref, + bool flush) { struct host1x_waitlist *waiter = ref; struct host1x_syncpt *syncpt; - while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) == - WLS_REMOVED) - schedule(); + atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED); syncpt = host->syncpt + id; - (void)process_wait_list(host, syncpt, - host1x_syncpt_load(host->syncpt + id)); + + spin_lock(&syncpt->intr.lock); + if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) == + WLS_CANCELLED) { + list_del(&waiter->list); + kref_put(&waiter->refcount, waiter_release); + } + spin_unlock(&syncpt->intr.lock); + + if (flush) { + /* Wait until any concurrently executing handler has finished. */ + while (atomic_read(&waiter->state) != WLS_HANDLED) + schedule(); + } kref_put(&waiter->refcount, waiter_release); } diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index aac38194398f..6ea55e615e3a 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -74,8 +74,10 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, * Unreference an action submitted to host1x_intr_add_action(). * You must call this if you passed non-NULL as ref. * @ref the ref returned from host1x_intr_add_action() + * @flush wait until any pending handlers have completed before returning. */ -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref); +void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref, + bool flush); /* Initialize host1x sync point interrupt */ int host1x_intr_init(struct host1x *host, unsigned int irq_sync); diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 82d0a60ba3f7..adbdc225de8d 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -79,6 +79,9 @@ static void job_free(struct kref *ref) { struct host1x_job *job = container_of(ref, struct host1x_job, ref); + if (job->syncpt) + host1x_syncpt_put(job->syncpt); + kfree(job); } @@ -674,7 +677,7 @@ EXPORT_SYMBOL(host1x_job_unpin); */ void host1x_job_dump(struct device *dev, struct host1x_job *job) { - dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id); + dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id); dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end); dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get); dev_dbg(dev, " TIMEOUT %d\n", job->timeout); diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index fce7892d5137..e648ebbb2027 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -42,17 +42,32 @@ static void host1x_syncpt_base_free(struct host1x_syncpt_base *base) base->requested = false; } -static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, - struct host1x_client *client, - unsigned long flags) +/** + * host1x_syncpt_alloc() - allocate a syncpoint + * @host: host1x device data + * @flags: bitfield of HOST1X_SYNCPT_* flags + * @name: name for the syncpoint for use in debug prints + * + * Allocates a hardware syncpoint for the caller's use. The caller then has + * the sole authority to mutate the syncpoint's value until it is freed again. + * + * If no free syncpoints are available, or a NULL name was specified, returns + * NULL. + */ +struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, + unsigned long flags, + const char *name) { struct host1x_syncpt *sp = host->syncpt; + char *full_name; unsigned int i; - char *name; + + if (!name) + return NULL; mutex_lock(&host->syncpt_mutex); - for (i = 0; i < host->info->nb_pts && sp->name; i++, sp++) + for (i = 0; i < host->info->nb_pts && kref_read(&sp->ref); i++, sp++) ; if (i >= host->info->nb_pts) @@ -64,19 +79,19 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, goto unlock; } - name = kasprintf(GFP_KERNEL, "%02u-%s", sp->id, - client ? dev_name(client->dev) : NULL); - if (!name) + full_name = kasprintf(GFP_KERNEL, "%u-%s", sp->id, name); + if (!full_name) goto free_base; - sp->client = client; - sp->name = name; + sp->name = full_name; if (flags & HOST1X_SYNCPT_CLIENT_MANAGED) sp->client_managed = true; else sp->client_managed = false; + kref_init(&sp->ref); + mutex_unlock(&host->syncpt_mutex); return sp; @@ -87,6 +102,7 @@ unlock: mutex_unlock(&host->syncpt_mutex); return NULL; } +EXPORT_SYMBOL(host1x_syncpt_alloc); /** * host1x_syncpt_id() - retrieve syncpoint ID @@ -294,7 +310,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, } } - host1x_intr_put_ref(sp->host, sp->id, ref); + host1x_intr_put_ref(sp->host, sp->id, ref, true); done: return err; @@ -307,59 +323,12 @@ EXPORT_SYMBOL(host1x_syncpt_wait); bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh) { u32 current_val; - u32 future_val; smp_rmb(); current_val = (u32)atomic_read(&sp->min_val); - future_val = (u32)atomic_read(&sp->max_val); - - /* Note the use of unsigned arithmetic here (mod 1<<32). - * - * c = current_val = min_val = the current value of the syncpoint. - * t = thresh = the value we are checking - * f = future_val = max_val = the value c will reach when all - * outstanding increments have completed. - * - * Note that c always chases f until it reaches f. - * - * Dtf = (f - t) - * Dtc = (c - t) - * - * Consider all cases: - * - * A) .....c..t..f..... Dtf < Dtc need to wait - * B) .....c.....f..t.. Dtf > Dtc expired - * C) ..t..c.....f..... Dtf > Dtc expired (Dct very large) - * - * Any case where f==c: always expired (for any t). Dtf == Dcf - * Any case where t==c: always expired (for any f). Dtf >= Dtc (because Dtc==0) - * Any case where t==f!=c: always wait. Dtf < Dtc (because Dtf==0, - * Dtc!=0) - * - * Other cases: - * - * A) .....t..f..c..... Dtf < Dtc need to wait - * A) .....f..c..t..... Dtf < Dtc need to wait - * A) .....f..t..c..... Dtf > Dtc expired - * - * So: - * Dtf >= Dtc implies EXPIRED (return true) - * Dtf < Dtc implies WAIT (return false) - * - * Note: If t is expired then we *cannot* wait on it. We would wait - * forever (hang the system). - * - * Note: do NOT get clever and remove the -thresh from both sides. It - * is NOT the same. - * - * If future valueis zero, we have a client managed sync point. In that - * case we do a direct comparison. - */ - if (!host1x_syncpt_client_managed(sp)) - return future_val - thresh >= current_val - thresh; - else - return (s32)(current_val - thresh) >= 0; + + return ((current_val - thresh) & 0x80000000U) == 0U; } int host1x_syncpt_init(struct host1x *host) @@ -401,10 +370,15 @@ int host1x_syncpt_init(struct host1x *host) host1x_hw_syncpt_enable_protection(host); /* Allocate sync point to use for clearing waits for expired fences */ - host->nop_sp = host1x_syncpt_alloc(host, NULL, 0); + host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop"); if (!host->nop_sp) return -ENOMEM; + if (host->info->reserve_vblank_syncpts) { + kref_init(&host->syncpt[26].ref); + kref_init(&host->syncpt[27].ref); + } + return 0; } @@ -416,44 +390,50 @@ int host1x_syncpt_init(struct host1x *host) * host1x client drivers can use this function to allocate a syncpoint for * subsequent use. A syncpoint returned by this function will be reserved for * use by the client exclusively. When no longer using a syncpoint, a host1x - * client driver needs to release it using host1x_syncpt_free(). + * client driver needs to release it using host1x_syncpt_put(). */ struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, unsigned long flags) { struct host1x *host = dev_get_drvdata(client->host->parent); - return host1x_syncpt_alloc(host, client, flags); + return host1x_syncpt_alloc(host, flags, dev_name(client->dev)); } EXPORT_SYMBOL(host1x_syncpt_request); -/** - * host1x_syncpt_free() - free a requested syncpoint - * @sp: host1x syncpoint - * - * Release a syncpoint previously allocated using host1x_syncpt_request(). A - * host1x client driver should call this when the syncpoint is no longer in - * use. Note that client drivers must ensure that the syncpoint doesn't remain - * under the control of hardware after calling this function, otherwise two - * clients may end up trying to access the same syncpoint concurrently. - */ -void host1x_syncpt_free(struct host1x_syncpt *sp) +static void syncpt_release(struct kref *ref) { - if (!sp) - return; + struct host1x_syncpt *sp = container_of(ref, struct host1x_syncpt, ref); + + atomic_set(&sp->max_val, host1x_syncpt_read(sp)); mutex_lock(&sp->host->syncpt_mutex); host1x_syncpt_base_free(sp->base); kfree(sp->name); sp->base = NULL; - sp->client = NULL; sp->name = NULL; sp->client_managed = false; mutex_unlock(&sp->host->syncpt_mutex); } -EXPORT_SYMBOL(host1x_syncpt_free); + +/** + * host1x_syncpt_put() - free a requested syncpoint + * @sp: host1x syncpoint + * + * Release a syncpoint previously allocated using host1x_syncpt_request(). A + * host1x client driver should call this when the syncpoint is no longer in + * use. + */ +void host1x_syncpt_put(struct host1x_syncpt *sp) +{ + if (!sp) + return; + + kref_put(&sp->ref, syncpt_release); +} +EXPORT_SYMBOL(host1x_syncpt_put); void host1x_syncpt_deinit(struct host1x *host) { @@ -520,16 +500,48 @@ unsigned int host1x_syncpt_nb_mlocks(struct host1x *host) } /** - * host1x_syncpt_get() - obtain a syncpoint by ID + * host1x_syncpt_get_by_id() - obtain a syncpoint by ID * @host: host1x controller * @id: syncpoint ID */ -struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id) +struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host, + unsigned int id) { if (id >= host->info->nb_pts) return NULL; - return host->syncpt + id; + if (kref_get_unless_zero(&host->syncpt[id].ref)) + return &host->syncpt[id]; + else + return NULL; +} +EXPORT_SYMBOL(host1x_syncpt_get_by_id); + +/** + * host1x_syncpt_get_by_id_noref() - obtain a syncpoint by ID but don't + * increase the refcount. + * @host: host1x controller + * @id: syncpoint ID + */ +struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host, + unsigned int id) +{ + if (id >= host->info->nb_pts) + return NULL; + + return &host->syncpt[id]; +} +EXPORT_SYMBOL(host1x_syncpt_get_by_id_noref); + +/** + * host1x_syncpt_get() - increment syncpoint refcount + * @sp: syncpoint + */ +struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp) +{ + kref_get(&sp->ref); + + return sp; } EXPORT_SYMBOL(host1x_syncpt_get); @@ -552,3 +564,31 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base) return base->id; } EXPORT_SYMBOL(host1x_syncpt_base_id); + +static void do_nothing(struct kref *ref) +{ +} + +/** + * host1x_syncpt_release_vblank_reservation() - Make VBLANK syncpoint + * available for allocation + * + * @client: host1x bus client + * @syncpt_id: syncpoint ID to make available + * + * Makes VBLANK<i> syncpoint available for allocatation if it was + * reserved at initialization time. This should be called by the display + * driver after it has ensured that any VBLANK increment programming configured + * by the boot chain has been disabled. + */ +void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, + u32 syncpt_id) +{ + struct host1x *host = dev_get_drvdata(client->host->parent); + + if (!host->info->reserve_vblank_syncpts) + return; + + kref_put(&host->syncpt[syncpt_id].ref, do_nothing); +} +EXPORT_SYMBOL(host1x_syncpt_release_vblank_reservation); diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index 8e1d04dacaa0..a6766f8d55ee 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -11,6 +11,7 @@ #include <linux/atomic.h> #include <linux/host1x.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/sched.h> #include "intr.h" @@ -26,6 +27,8 @@ struct host1x_syncpt_base { }; struct host1x_syncpt { + struct kref ref; + unsigned int id; atomic_t min_val; atomic_t max_val; @@ -33,7 +36,6 @@ struct host1x_syncpt { const char *name; bool client_managed; struct host1x *host; - struct host1x_client *client; struct host1x_syncpt_base *base; /* interrupt data */ diff --git a/drivers/staging/media/tegra-video/vi.c b/drivers/staging/media/tegra-video/vi.c index 7a09061cda57..df5ca3596470 100644 --- a/drivers/staging/media/tegra-video/vi.c +++ b/drivers/staging/media/tegra-video/vi.c @@ -1131,8 +1131,8 @@ static void tegra_channel_host1x_syncpts_free(struct tegra_vi_channel *chan) int i; for (i = 0; i < chan->numgangports; i++) { - host1x_syncpt_free(chan->mw_ack_sp[i]); - host1x_syncpt_free(chan->frame_start_sp[i]); + host1x_syncpt_put(chan->mw_ack_sp[i]); + host1x_syncpt_put(chan->frame_start_sp[i]); } } @@ -1177,7 +1177,7 @@ static int tegra_channel_host1x_syncpt_init(struct tegra_vi_channel *chan) mw_sp = host1x_syncpt_request(&vi->client, flags); if (!mw_sp) { dev_err(vi->dev, "failed to request memory ack syncpoint\n"); - host1x_syncpt_free(fs_sp); + host1x_syncpt_put(fs_sp); ret = -ENOMEM; goto free_syncpts; } diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index f58a545b3bf3..8ea8f079cde2 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -575,7 +575,8 @@ static int efifb_probe(struct platform_device *dev) goto err_fb_dealoc; } fb_info(info, "%s frame buffer device\n", info->fix.id); - pm_runtime_get_sync(&efifb_pci_dev->dev); + if (efifb_pci_dev) + pm_runtime_get_sync(&efifb_pci_dev->dev); return 0; err_fb_dealoc: @@ -602,7 +603,8 @@ static int efifb_remove(struct platform_device *pdev) unregister_framebuffer(info); sysfs_remove_groups(&pdev->dev.kobj, efifb_groups); framebuffer_release(info); - pm_runtime_put(&efifb_pci_dev->dev); + if (efifb_pci_dev) + pm_runtime_put(&efifb_pci_dev->dev); return 0; } diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 9eb77c87a83b..232e1bd507a7 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -25,14 +25,18 @@ u64 host1x_get_dma_mask(struct host1x *host1x); /** * struct host1x_client_ops - host1x client operations + * @early_init: host1x client early initialization code * @init: host1x client initialization code * @exit: host1x client tear down code + * @late_exit: host1x client late tear down code * @suspend: host1x client suspend code * @resume: host1x client resume code */ struct host1x_client_ops { + int (*early_init)(struct host1x_client *client); int (*init)(struct host1x_client *client); int (*exit)(struct host1x_client *client); + int (*late_exit)(struct host1x_client *client); int (*suspend)(struct host1x_client *client); int (*resume)(struct host1x_client *client); }; @@ -142,7 +146,9 @@ struct host1x_syncpt_base; struct host1x_syncpt; struct host1x; -struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id); +struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host, u32 id); +struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host, u32 id); +struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp); u32 host1x_syncpt_id(struct host1x_syncpt *sp); u32 host1x_syncpt_read_min(struct host1x_syncpt *sp); u32 host1x_syncpt_read_max(struct host1x_syncpt *sp); @@ -153,11 +159,17 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value); struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, unsigned long flags); -void host1x_syncpt_free(struct host1x_syncpt *sp); +void host1x_syncpt_put(struct host1x_syncpt *sp); +struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, + unsigned long flags, + const char *name); struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp); u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); +void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, + u32 syncpt_id); + /* * host1x channel */ @@ -218,7 +230,7 @@ struct host1x_job { dma_addr_t *reloc_addr_phys; /* Sync point id, number of increments and end related to the submit */ - u32 syncpt_id; + struct host1x_syncpt *syncpt; u32 syncpt_incrs; u32 syncpt_end; |