diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 304 |
1 files changed, 143 insertions, 161 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 46009db3d195..e92fb372bc99 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -27,15 +27,14 @@ #include "amdgpu_ucode.h" #include "amdgpu_trace.h" -#include "vega10/soc15ip.h" -#include "vega10/SDMA0/sdma0_4_0_offset.h" -#include "vega10/SDMA0/sdma0_4_0_sh_mask.h" -#include "vega10/SDMA1/sdma1_4_0_offset.h" -#include "vega10/SDMA1/sdma1_4_0_sh_mask.h" -#include "vega10/MMHUB/mmhub_1_0_offset.h" -#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" -#include "vega10/HDP/hdp_4_0_offset.h" -#include "raven1/SDMA0/sdma0_4_1_default.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "sdma0/sdma0_4_0_sh_mask.h" +#include "sdma1/sdma1_4_0_offset.h" +#include "sdma1/sdma1_4_0_sh_mask.h" +#include "mmhub/mmhub_1_0_offset.h" +#include "mmhub/mmhub_1_0_sh_mask.h" +#include "hdp/hdp_4_0_offset.h" +#include "sdma0/sdma0_4_1_default.h" #include "soc15_common.h" #include "soc15.h" @@ -53,97 +52,85 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static const u32 golden_settings_sdma_4[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0x003ff006, 0x0003c000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CHICKEN_BITS), 0xfe931f07, 0x02831f07, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), 0xffffffff, 0x3f000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), 0x003ff000, 0x0003c000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0 +static const struct soc15_reg_golden golden_settings_sdma_4[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xff000ff0, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003ff006, 0x0003c000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_POWER_CNTL, 0x003ff000, 0x0003c000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0) }; -static const u32 golden_settings_sdma_vg10[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 +static const struct soc15_reg_golden golden_settings_sdma_vg10[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002) }; -static const u32 golden_settings_sdma_4_1[] = +static const struct soc15_reg_golden golden_settings_sdma_4_1[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0xfc3fffff, 0x40000051, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0111, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0111, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0 + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0xfc3fffff, 0x40000051), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0) }; -static const u32 golden_settings_sdma_rv1[] = +static const struct soc15_reg_golden golden_settings_sdma_rv1[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00000002, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00000002 + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002) }; -static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) +static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, + u32 instance, u32 offset) { - u32 base = 0; - - switch (instance) { - case 0: - base = SDMA0_BASE.instance[0].segment[0]; - break; - case 1: - base = SDMA1_BASE.instance[0].segment[0]; - break; - default: - BUG(); - break; - } - - return base + internal_offset; + return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) : + (adev->reg_offset[SDMA1_HWIP][0][0] + offset)); } static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_sdma_4, - (const u32)ARRAY_SIZE(golden_settings_sdma_4)); - amdgpu_program_register_sequence(adev, + ARRAY_SIZE(golden_settings_sdma_4)); + soc15_program_register_sequence(adev, golden_settings_sdma_vg10, - (const u32)ARRAY_SIZE(golden_settings_sdma_vg10)); + ARRAY_SIZE(golden_settings_sdma_vg10)); break; case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_sdma_4_1, - (const u32)ARRAY_SIZE(golden_settings_sdma_4_1)); - amdgpu_program_register_sequence(adev, + ARRAY_SIZE(golden_settings_sdma_4_1)); + soc15_program_register_sequence(adev, golden_settings_sdma_rv1, - (const u32)ARRAY_SIZE(golden_settings_sdma_rv1)); + ARRAY_SIZE(golden_settings_sdma_rv1)); break; default: break; @@ -265,8 +252,8 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; wptr = &local_wptr; - lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; + lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2; + highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", me, highbit, lowbit); @@ -315,8 +302,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), me, upper_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); } } @@ -343,15 +330,13 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 vmid = vm_id & 0xf; - /* IB packet must end on a 8 DW boundary */ sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | - SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); @@ -370,13 +355,9 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, */ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg; - - if (ring->adev->flags & AMD_IS_APU) - nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; - else - nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; if (ring == &ring->adev->sdma.instance[0].ring) ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; @@ -386,8 +367,8 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_done_offset << 2); - amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_req_offset << 2); + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); amdgpu_ring_write(ring, ref_and_mask); /* reference */ amdgpu_ring_write(ring, ref_and_mask); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | @@ -396,6 +377,8 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE)); @@ -460,12 +443,12 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); for (i = 0; i < adev->sdma.num_instances; i++) { - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); } sdma0->ready = false; @@ -522,18 +505,18 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); if (enable && amdgpu_sdma_phase_quantum) { - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), phase_quantum); } - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); } } @@ -557,9 +540,9 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL)); + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), f32_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); } } @@ -587,48 +570,48 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) ring = &adev->sdma.instance[i].ring; wb_offset = (ring->rptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_HI), 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); /* set the wb address whether it's enabled or not */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); } - doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET)); + doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); if (ring->use_doorbell) { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); @@ -637,55 +620,53 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) } else { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); } - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - if (adev->flags & AMD_IS_APU) - nbio_v7_0_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); - else - nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index); if (amdgpu_sriov_vf(adev)) sdma_v4_0_ring_set_wptr(ring); /* set minor_ptr_update to 0 after wptr programed */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); /* set utc l1 enable flag always to 1 */ - temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); + temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), temp); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); if (!amdgpu_sriov_vf(adev)) { /* unhalt engine */ - temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL)); + temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); } /* setup the wptr shadow polling */ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); if (amdgpu_sriov_vf(adev)) wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); else wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); #endif /* enable DMA IBs */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); ring->ready = true; @@ -816,12 +797,12 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); for (j = 0; j < fw_size; j++) - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); } return 0; @@ -886,7 +867,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -899,7 +880,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -919,13 +900,13 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -948,7 +929,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) u32 tmp = 0; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -990,7 +971,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); @@ -1000,7 +981,7 @@ err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1152,23 +1133,24 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (VEGA10). */ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vmid * 2); amdgpu_ring_write(ring, upper_32_bits(pd_addr)); /* flush TLB */ @@ -1183,8 +1165,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); /* reference */ - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, 1 << vmid); /* reference */ + amdgpu_ring_write(ring, 1 << vmid); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } @@ -1317,7 +1299,7 @@ static bool sdma_v4_0_is_idle(void *handle) u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { - u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG)); + u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) return false; @@ -1333,8 +1315,8 @@ static int sdma_v4_0_wait_for_idle(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG)); - sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG)); + sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); + sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) return 0; @@ -1358,8 +1340,8 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? - sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) : - sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL); + sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : + sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); sdma_cntl = RREG32(reg_offset); sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, |