diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 196 |
1 files changed, 110 insertions, 86 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b4b85e550bc2..f4c4eea62526 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -309,6 +309,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); +static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { @@ -1312,9 +1313,9 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { - soc15_grbm_select(adev, me, pipe, q, 0); + soc15_grbm_select(adev, me, pipe, q, vm); } static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { @@ -1724,7 +1725,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -1760,7 +1761,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); if (r) return r; @@ -1802,7 +1803,7 @@ static int gfx_v9_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); @@ -1941,11 +1942,15 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) if (i == 0) { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!amdgpu_noretry); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); } else { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!amdgpu_noretry); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, (adev->gmc.private_aperture_start >> 48)); @@ -1959,25 +1964,6 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v9_0_init_compute_vmid(adev); - - mutex_lock(&adev->grbm_idx_mutex); - /* - * making sure that the following register writes will be broadcasted - * to all the shaders - */ - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - - WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE, - (adev->gfx.config.sc_prim_fifo_size_frontend << - PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_prim_fifo_size_backend << - PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_hiz_tile_fifo_size << - PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_earlyz_tile_fifo_size << - PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); - mutex_unlock(&adev->grbm_idx_mutex); - } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) @@ -2092,11 +2078,10 @@ static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) u32 tmp = 0; u32 *register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); + kmemdup(adev->gfx.rlc.register_list_format, + adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); if (!register_list_format) return -ENOMEM; - memcpy(register_list_format, adev->gfx.rlc.register_list_format, - adev->gfx.rlc.reg_list_format_size_bytes); /* setup unique_indirect_regs array and indirect_start_offsets array */ unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); @@ -3600,45 +3585,89 @@ static const struct soc15_reg_entry sgpr_init_regs[] = { }; static const struct soc15_reg_entry sec_ded_counter_registers[] = { - { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) }, - { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) }, - { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) }, - { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) }, - { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) }, - { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, }; +static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + int i, r; + + r = amdgpu_ring_alloc(ring, 7); + if (r) { + DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", + ring->name, r); + return r; + } + + WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); + WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); + + amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); + amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | + PACKET3_DMA_DATA_DST_SEL(1) | + PACKET3_DMA_DATA_SRC_SEL(2) | + PACKET3_DMA_DATA_ENGINE(0))); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | + adev->gds.gds_size); + + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); + + return r; +} + static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; struct amdgpu_ib ib; struct dma_fence *f = NULL; - int r, i, j; + int r, i, j, k; unsigned total_size, vgpr_offset, sgpr_offset; u64 gpu_addr; @@ -3750,19 +3779,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* read back registers to clear the counters */ mutex_lock(&adev->grbm_idx_mutex); - for (j = 0; j < 16; j++) { - gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { + for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { + for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { + gfx_v9_0_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + } + } } WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); mutex_unlock(&adev->grbm_idx_mutex); @@ -3815,6 +3838,10 @@ static int gfx_v9_0_ecc_late_init(void *handle) return 0; } + r = gfx_v9_0_do_edc_gds_workarounds(adev); + if (r) + return r; + /* requires IBs so do in late init after IB pool is initialized */ r = gfx_v9_0_do_edc_gpr_workarounds(adev); if (r) @@ -3864,9 +3891,7 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) goto interrupt; - r = amdgpu_ras_debugfs_create(adev, &fs_info); - if (r) - goto debugfs; + amdgpu_ras_debugfs_create(adev, &fs_info); r = amdgpu_ras_sysfs_create(adev, &fs_info); if (r) @@ -3881,7 +3906,6 @@ irq: amdgpu_ras_sysfs_remove(adev, *ras_if); sysfs: amdgpu_ras_debugfs_remove(adev, *ras_if); -debugfs: amdgpu_ras_interrupt_remove_handler(adev, &ih_info); interrupt: amdgpu_ras_feature_enable(adev, *ras_if, 0); @@ -4542,7 +4566,7 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, struct amdgpu_ring *iring; mutex_lock(&adev->gfx.pipe_reserve_mutex); - pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); if (acquire) set_bit(pipe, adev->gfx.pipe_reserve_bitmap); else @@ -4561,20 +4585,20 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, /* Lower all pipes without a current reservation */ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { iring = &adev->gfx.gfx_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v9_0_ring_set_pipe_percent(iring, reserve); } for (i = 0; i < adev->gfx.num_compute_rings; ++i) { iring = &adev->gfx.compute_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v9_0_ring_set_pipe_percent(iring, reserve); } @@ -4989,7 +5013,7 @@ static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: |