summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c196
1 files changed, 110 insertions, 86 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b4b85e550bc2..f4c4eea62526 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -309,6 +309,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
@@ -1312,9 +1313,9 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q)
+ u32 me, u32 pipe, u32 q, u32 vm)
{
- soc15_grbm_select(adev, me, pipe, q, 0);
+ soc15_grbm_select(adev, me, pipe, q, vm);
}
static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
@@ -1724,7 +1725,7 @@ static int gfx_v9_0_sw_init(void *handle)
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
+ &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
if (r)
return r;
}
@@ -1760,7 +1761,7 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
/* create MQD for all compute queues as wel as KIQ for SRIOV case */
- r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
if (r)
return r;
@@ -1802,7 +1803,7 @@ static int gfx_v9_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_compute_mqd_sw_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
amdgpu_gfx_kiq_fini(adev);
@@ -1941,11 +1942,15 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
if (i == 0) {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!amdgpu_noretry);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
} else {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!amdgpu_noretry);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
(adev->gmc.private_aperture_start >> 48));
@@ -1959,25 +1964,6 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
gfx_v9_0_init_compute_vmid(adev);
-
- mutex_lock(&adev->grbm_idx_mutex);
- /*
- * making sure that the following register writes will be broadcasted
- * to all the shaders
- */
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-
- WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
- (adev->gfx.config.sc_prim_fifo_size_frontend <<
- PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_prim_fifo_size_backend <<
- PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_hiz_tile_fifo_size <<
- PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_earlyz_tile_fifo_size <<
- PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
- mutex_unlock(&adev->grbm_idx_mutex);
-
}
static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
@@ -2092,11 +2078,10 @@ static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
u32 tmp = 0;
u32 *register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
+ kmemdup(adev->gfx.rlc.register_list_format,
+ adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
if (!register_list_format)
return -ENOMEM;
- memcpy(register_list_format, adev->gfx.rlc.register_list_format,
- adev->gfx.rlc.reg_list_format_size_bytes);
/* setup unique_indirect_regs array and indirect_start_offsets array */
unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
@@ -3600,45 +3585,89 @@ static const struct soc15_reg_entry sgpr_init_regs[] = {
};
static const struct soc15_reg_entry sec_ded_counter_registers[] = {
- { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
- { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
- { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
- { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
- { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
- { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
- { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
+ { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
};
+static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
+ int i, r;
+
+ r = amdgpu_ring_alloc(ring, 7);
+ if (r) {
+ DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
+ ring->name, r);
+ return r;
+ }
+
+ WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
+ WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
+ amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
+ PACKET3_DMA_DATA_DST_SEL(1) |
+ PACKET3_DMA_DATA_SRC_SEL(2) |
+ PACKET3_DMA_DATA_ENGINE(0)));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
+ adev->gds.gds_size);
+
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
+
+ return r;
+}
+
static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- int r, i, j;
+ int r, i, j, k;
unsigned total_size, vgpr_offset, sgpr_offset;
u64 gpu_addr;
@@ -3750,19 +3779,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* read back registers to clear the counters */
mutex_lock(&adev->grbm_idx_mutex);
- for (j = 0; j < 16; j++) {
- gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
- for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
- RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
- gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
- for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
- RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
- gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
- for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
- RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
- gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
- for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
- RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+ for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
+ for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
+ for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
+ gfx_v9_0_select_se_sh(adev, j, 0x0, k);
+ RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+ }
+ }
}
WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
mutex_unlock(&adev->grbm_idx_mutex);
@@ -3815,6 +3838,10 @@ static int gfx_v9_0_ecc_late_init(void *handle)
return 0;
}
+ r = gfx_v9_0_do_edc_gds_workarounds(adev);
+ if (r)
+ return r;
+
/* requires IBs so do in late init after IB pool is initialized */
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
if (r)
@@ -3864,9 +3891,7 @@ static int gfx_v9_0_ecc_late_init(void *handle)
if (r)
goto interrupt;
- r = amdgpu_ras_debugfs_create(adev, &fs_info);
- if (r)
- goto debugfs;
+ amdgpu_ras_debugfs_create(adev, &fs_info);
r = amdgpu_ras_sysfs_create(adev, &fs_info);
if (r)
@@ -3881,7 +3906,6 @@ irq:
amdgpu_ras_sysfs_remove(adev, *ras_if);
sysfs:
amdgpu_ras_debugfs_remove(adev, *ras_if);
-debugfs:
amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
interrupt:
amdgpu_ras_feature_enable(adev, *ras_if, 0);
@@ -4542,7 +4566,7 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
struct amdgpu_ring *iring;
mutex_lock(&adev->gfx.pipe_reserve_mutex);
- pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
+ pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
if (acquire)
set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
else
@@ -4561,20 +4585,20 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
/* Lower all pipes without a current reservation */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
iring = &adev->gfx.gfx_ring[i];
- pipe = amdgpu_gfx_queue_to_bit(adev,
- iring->me,
- iring->pipe,
- 0);
+ pipe = amdgpu_gfx_mec_queue_to_bit(adev,
+ iring->me,
+ iring->pipe,
+ 0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v9_0_ring_set_pipe_percent(iring, reserve);
}
for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
iring = &adev->gfx.compute_ring[i];
- pipe = amdgpu_gfx_queue_to_bit(adev,
- iring->me,
- iring->pipe,
- 0);
+ pipe = amdgpu_gfx_mec_queue_to_bit(adev,
+ iring->me,
+ iring->pipe,
+ 0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v9_0_ring_set_pipe_percent(iring, reserve);
}
@@ -4989,7 +5013,7 @@ static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
switch (type) {
- case AMDGPU_CP_IRQ_GFX_EOP:
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
break;
case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: