diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 191 |
1 files changed, 168 insertions, 23 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 5e0066cd6c51..36b1ca73c2ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -30,6 +30,7 @@ #include "amdgpu_xcp.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" +#include "amdgpu_reset.h" #include "sdma/sdma_4_4_2_offset.h" #include "sdma/sdma_4_4_2_sh_mask.h" @@ -44,6 +45,7 @@ #include "amdgpu_ras.h" MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { @@ -105,6 +107,11 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); +static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring); +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring); +static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev, + u32 instance_id); static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) @@ -486,7 +493,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 doorbell_offset, doorbell; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, sdma_cntl; int i; for_each_inst(i, inst_mask) { @@ -498,6 +505,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0); + WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl); if (sdma[i]->use_doorbell) { doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); @@ -681,6 +691,7 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, b u32 doorbell; u32 doorbell_offset; u64 wptr_gpu_addr; + u64 rwptr; wb_offset = (ring->rptr_offs * 4); @@ -706,12 +717,20 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, b /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); + /* For the guilty queue, set RPTR to the current wptr to skip bad commands, + * It is not a guilty queue, restore cache_rptr and continue execution. + */ + if (adev->sdma.instance[i].gfx_guilty) + rwptr = ring->wptr; + else + rwptr = ring->cached_rptr; + /* Initialize the ring buffer's read and write pointers */ if (restore) { - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 2)); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr << 2)); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 2)); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(rwptr << 2)); } else { WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); @@ -778,6 +797,7 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, u32 doorbell; u32 doorbell_offset; u64 wptr_gpu_addr; + u64 rwptr; wb_offset = (ring->rptr_offs * 4); @@ -785,12 +805,20 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl); WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl); + /* For the guilty queue, set RPTR to the current wptr to skip bad commands, + * It is not a guilty queue, restore cache_rptr and continue execution. + */ + if (adev->sdma.instance[i].page_guilty) + rwptr = ring->wptr; + else + rwptr = ring->cached_rptr; + /* Initialize the ring buffer's read and write pointers */ if (restore) { - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 2)); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr << 2)); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 2)); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, upper_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, upper_32_bits(rwptr << 2)); } else { WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); @@ -973,6 +1001,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { /* enable context empty interrupt during initialization */ @@ -1312,6 +1341,12 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev) } } +static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = { + .stop_kernel_queue = &sdma_v4_4_2_stop_queue, + .start_kernel_queue = &sdma_v4_4_2_restore_queue, + .soft_reset_kernel_queue = &sdma_v4_4_2_soft_reset_engine, +}; + static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1330,7 +1365,6 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_4_2_set_vm_pte_funcs(adev); sdma_v4_4_2_set_irq_funcs(adev); sdma_v4_4_2_set_ras_funcs(adev); - return 0; } @@ -1351,6 +1385,12 @@ static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); + /* The initialization is done in the late_init stage to ensure that the SMU + * initialization and capability setup are completed before we check the SDMA + * reset capability + */ + sdma_v4_4_2_update_reset_mask(adev); + return 0; } @@ -1415,6 +1455,12 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); + /* Initialize guilty flags for GFX and PAGE queues */ + adev->sdma.instance[i].gfx_guilty = false; + adev->sdma.instance[i].page_guilty = false; + adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs; + ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1458,7 +1504,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) } } - /* TODO: Add queue reset mask when FW fully supports it */ adev->sdma.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); @@ -1561,9 +1606,9 @@ static int sdma_v4_4_2_resume(struct amdgpu_ip_block *ip_block) return sdma_v4_4_2_hw_init(ip_block); } -static bool sdma_v4_4_2_is_idle(void *handle) +static bool sdma_v4_4_2_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1602,25 +1647,74 @@ static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static bool sdma_v4_4_2_is_queue_selected(struct amdgpu_device *adev, uint32_t instance_id, bool is_page_queue) +{ + uint32_t reg_offset = is_page_queue ? regSDMA_PAGE_CONTEXT_STATUS : regSDMA_GFX_CONTEXT_STATUS; + uint32_t context_status = RREG32(sdma_v4_4_2_get_reg_offset(adev, instance_id, reg_offset)); + + /* Check if the SELECTED bit is set */ + return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0; +} + +static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, r; + u32 id = ring->me; + int r; + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, id, false); + amdgpu_amdkfd_resume(adev, true); + return r; +} + +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 instance_id = ring->me; u32 inst_mask; + uint64_t rptr; if (amdgpu_sriov_vf(adev)) return -EINVAL; + /* Check if this queue is the guilty one */ + adev->sdma.instance[instance_id].gfx_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, false); + if (adev->sdma.has_page_queue) + adev->sdma.instance[instance_id].page_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, true); + + /* Cache the rptr before reset, after the reset, + * all of the registers will be reset to 0 + */ + rptr = amdgpu_ring_get_rptr(ring); + ring->cached_rptr = rptr; + /* Cache the rptr for the page queue if it exists */ + if (adev->sdma.has_page_queue) { + struct amdgpu_ring *page_ring = &adev->sdma.instance[instance_id].page; + rptr = amdgpu_ring_get_rptr(page_ring); + page_ring->cached_rptr = rptr; + } + /* stop queue */ inst_mask = 1 << ring->me; sdma_v4_4_2_inst_gfx_stop(adev, inst_mask); if (adev->sdma.has_page_queue) sdma_v4_4_2_inst_page_stop(adev, inst_mask); - r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, ring->me)); - if (r) - return r; + return 0; +} +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 inst_mask; + int i, r; + + inst_mask = 1 << ring->me; udelay(50); for (i = 0; i < adev->usec_timeout; i++) { @@ -1635,7 +1729,18 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) return -ETIMEDOUT; } - return sdma_v4_4_2_inst_start(adev, inst_mask, true); + r = sdma_v4_4_2_inst_start(adev, inst_mask, true); + + return r; +} + +static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev, + u32 instance_id) +{ + /* For SDMA 4.x, use the existing DPM interface for backward compatibility + * we need to convert the logical instance ID to physical instance ID before reset. + */ + return amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id)); } static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, @@ -1683,6 +1788,9 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev, case 0: amdgpu_fence_process(&adev->sdma.instance[i].ring); break; + case 1: + amdgpu_fence_process(&adev->sdma.instance[i].page); + break; default: break; } @@ -1774,7 +1882,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, if (task_info) { dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n", task_info->process_name, task_info->tgid, - task_info->task_name, task_info->pid); + task_info->task.comm, task_info->task.pid); amdgpu_vm_put_task_info(task_info); } @@ -1920,9 +2028,9 @@ static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block, return 0; } -static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v4_4_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) @@ -2060,6 +2168,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { .emit_wreg = sdma_v4_4_2_ring_emit_wreg, .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = sdma_v4_4_2_reset_queue, }; static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev) @@ -2231,6 +2340,40 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } +/** + * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA + * @adev: Pointer to the AMDGPU device structure + * + * This function update reset mask for SDMA and sets the supported + * reset types based on the IP version and firmware versions. + * + */ +static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) +{ + /* per queue reset not supported for SRIOV */ + if (amdgpu_sriov_vf(adev)) + return; + + /* + * the user queue relies on MEC fw and pmfw when the sdma queue do reset. + * it needs to check both of them at here to skip old mec and pmfw. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; + case IP_VERSION(9, 5, 0): + if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; + default: + break; + } + +} + const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, @@ -2392,11 +2535,13 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: - ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + bank->aca_err_type = ACA_ERROR_TYPE_CE; + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; default: |