diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 187 |
1 files changed, 146 insertions, 41 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 02702597ddeb..c8f2aa1db13b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -35,6 +35,8 @@ #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" +#include "smuio/smuio_11_0_0_offset.h" +#include "smuio/smuio_11_0_0_sh_mask.h" #include "navi10_enum.h" #include "hdp/hdp_5_0_0_offset.h" #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" @@ -222,6 +224,49 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) }; +static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v) +{ + static void *scratch_reg0; + static void *scratch_reg1; + static void *scratch_reg2; + static void *scratch_reg3; + static void *spare_int; + static uint32_t grbm_cntl; + static uint32_t grbm_idx; + uint32_t i = 0; + uint32_t retries = 50000; + + scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; + scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; + scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; + scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; + spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; + + grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; + grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; + + if (amdgpu_sriov_runtime(adev)) { + pr_err("shouldn't call rlcg write register during runtime\n"); + return; + } + + writel(v, scratch_reg0); + writel(offset | 0x80000000, scratch_reg1); + writel(1, spare_int); + for (i = 0; i < retries; i++) { + u32 tmp; + + tmp = readl(scratch_reg1); + if (!(tmp & 0x80000000)) + break; + + udelay(10); + } + + if (i >= retries) + pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); +} + static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = { /* Pending on emulation bring up */ @@ -500,29 +545,28 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; struct dma_fence *f = NULL; - uint32_t scratch; - uint32_t tmp = 0; + unsigned index; + uint64_t gpu_addr; + uint32_t tmp; long r; - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + r = amdgpu_device_wb_get(adev, &index); + if (r) return r; - } - - WREG32(scratch, 0xCAFEDEAD); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + r = amdgpu_ib_get(adev, NULL, 16, &ib); + if (r) goto err1; - } - ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); - ib.ptr[2] = 0xDEADBEEF; - ib.length_dw = 3; + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) @@ -530,15 +574,13 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); r = -ETIMEDOUT; goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } - tmp = RREG32(scratch); + tmp = adev->wb.wb[index]; if (tmp == 0xDEADBEEF) r = 0; else @@ -547,8 +589,7 @@ err2: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err1: - amdgpu_gfx_scratch_free(adev, scratch); - + amdgpu_device_wb_free(adev, index); return r; } @@ -1016,6 +1057,10 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) return r; } + /* init spm vmid with 0xf */ + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + return 0; } @@ -1783,11 +1828,11 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ - WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); return 0; } @@ -1895,6 +1940,11 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ gfx_v10_0_rlc_enable_srm(adev); } else { + if (amdgpu_sriov_vf(adev)) { + gfx_v10_0_init_csb(adev); + return 0; + } + adev->gfx.rlc.funcs->stop(adev); /* disable CG */ @@ -2395,7 +2445,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].sched.ready = false; } - WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); for (i = 0; i < adev->usec_timeout; i++) { if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) @@ -3168,12 +3218,7 @@ static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); - r = amdgpu_ring_test_ring(kiq_ring); - if (r) { - DRM_ERROR("kfq enable failed\n"); - kiq_ring->sched.ready = false; - } - return r; + return amdgpu_ring_test_helper(kiq_ring); } #endif @@ -3216,6 +3261,22 @@ done: return r; } +static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct v10_compute_mqd *mqd) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; + ring->has_high_prio = true; + mqd->cp_hqd_queue_priority = + AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; + } else { + ring->has_high_prio = false; + } + } +} + static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3341,6 +3402,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; + /* set static priority for a compute queue/ring */ + gfx_v10_0_compute_mqd_set_priority(ring, mqd); + /* map_queues packet doesn't need activate the queue, * so only kiq need set this field. */ @@ -3790,7 +3854,7 @@ static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], PREEMPT_QUEUES, 0, 0); - return amdgpu_ring_test_ring(kiq_ring); + return amdgpu_ring_test_helper(kiq_ring); } #endif @@ -3930,9 +3994,8 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | + ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); amdgpu_gfx_off_ctrl(adev, true); return clock; @@ -4220,6 +4283,45 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } +static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +{ + u32 data; + + data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); + + data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; + + WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); +} + +static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev, + uint32_t offset, + struct soc15_reg_rlcg *entries, int arr_size) +{ + int i; + uint32_t reg; + + if (!entries) + return false; + + for (i = 0; i < arr_size; i++) { + const struct soc15_reg_rlcg *entry; + + entry = &entries[i]; + reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + if (offset == reg) + return true; + } + + return false; +} + +static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) +{ + return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0); +} + static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { .is_rlc_enabled = gfx_v10_0_is_rlc_enabled, .set_safe_mode = gfx_v10_0_set_safe_mode, @@ -4230,7 +4332,10 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { .resume = gfx_v10_0_rlc_resume, .stop = gfx_v10_0_rlc_stop, .reset = gfx_v10_0_rlc_reset, - .start = gfx_v10_0_rlc_start + .start = gfx_v10_0_rlc_start, + .update_spm_vmid = gfx_v10_0_update_spm_vmid, + .rlcg_wreg = gfx_v10_rlcg_wreg, + .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, }; static int gfx_v10_0_set_powergating_state(void *handle, @@ -4419,15 +4524,15 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); if (flags & AMDGPU_IB_PREEMPTED) control |= INDIRECT_BUFFER_PRE_RESUME(1); - if (!(ib->flags & AMDGPU_IB_FLAG_CE)) + if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) gfx_v10_0_ring_emit_de_meta(ring, - flags & AMDGPU_IB_PREEMPTED ? true : false); + (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); } amdgpu_ring_write(ring, header); @@ -4574,9 +4679,9 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flag { uint32_t dw2 = 0; - if (amdgpu_mcbp) + if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev)) gfx_v10_0_ring_emit_ce_meta(ring, - flags & AMDGPU_IB_PREEMPTED ? true : false); + (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); gfx_v10_0_ring_emit_tmz(ring, true); |