diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 268 |
1 files changed, 173 insertions, 95 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index d46128b0ec92..e6d8eddda2bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -43,6 +43,8 @@ #include "sdma_common.h" #include "sdma_v6_0.h" #include "v11_structs.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); @@ -51,6 +53,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_6_1_3.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 @@ -375,11 +378,9 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -890,6 +891,12 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + + m->sdmax_rlcx_f32_dbg0 = lower_32_bits(prop->fence_address); + m->sdmax_rlcx_f32_dbg1 = upper_32_bits(prop->fence_address); + return 0; } @@ -916,33 +923,22 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -955,10 +951,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -970,8 +963,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -994,37 +986,23 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1052,10 +1030,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1063,11 +1038,10 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1299,6 +1273,23 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + case 1: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = false; + break; + case 2: + adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = false; + break; + } + r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) return r; @@ -1328,11 +1319,19 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + /* SDMA user fence event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, + GFX_11_0_0__SRCID__SDMA_FENCE, + &adev->sdma.fence_irq); + if (r) + return r; + for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; ring->me = i; + ring->no_user_submission = adev->sdma.no_user_submission; DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); @@ -1356,7 +1355,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): - if (adev->sdma.instance[0].fw_version >= 21) + if ((adev->sdma.instance[0].fw_version >= 21) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1375,6 +1375,43 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(6, 0, 0): + if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 1): + if ((adev->sdma.instance[0].fw_version >= 18) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 2): + if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 3): + if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 0): + if ((adev->sdma.instance[0].fw_version >= 14) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 1): + if ((adev->sdma.instance[0].fw_version >= 17) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 2): + if ((adev->sdma.instance[0].fw_version >= 15) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 3): + if ((adev->sdma.instance[0].fw_version >= 10) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } + r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) return r; @@ -1398,11 +1435,39 @@ static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int sdma_v6_0_set_userq_trap_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int i, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) { + for (i = 0; i < adev->sdma.num_instances; i++) { + irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i; + if (enable) + r = amdgpu_irq_get(adev, &adev->sdma.trap_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->sdma.trap_irq, + irq_type); + if (r) + return r; + } + } + + return 0; +} + static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; - return sdma_v6_0_start(adev); + r = sdma_v6_0_start(adev); + if (r) + return r; + + return sdma_v6_0_set_userq_trap_interrupts(adev, true); } static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) @@ -1414,6 +1479,7 @@ static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v6_0_ctxempty_int_enable(adev, false); sdma_v6_0_enable(adev, false); + sdma_v6_0_set_userq_trap_interrupts(adev, false); return 0; } @@ -1428,9 +1494,9 @@ static int sdma_v6_0_resume(struct amdgpu_ip_block *ip_block) return sdma_v6_0_hw_init(ip_block); } -static bool sdma_v6_0_is_idle(void *handle) +static bool sdma_v6_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1505,29 +1571,29 @@ static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, r; + int r; - if (amdgpu_sriov_vf(adev)) + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; } - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); - return -EINVAL; - } + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); if (r) return r; - return sdma_v6_0_gfx_resume_instance(adev, i, true); + r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev, @@ -1554,25 +1620,9 @@ static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instances, queue; - uint32_t mes_queue_id = entry->src_data[0]; DRM_DEBUG("IH: SDMA trap\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; - - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); - return 0; - } - queue = entry->ring_id & 0xf; instances = (entry->ring_id & 0xf0) >> 4; if (instances > 1) { @@ -1594,6 +1644,29 @@ static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u32 doorbell_offset = entry->src_data[0]; + + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; + + doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); + } + + return 0; +} + static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -1601,19 +1674,19 @@ static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v6_0_set_clockgating_state(void *handle, +static int sdma_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int sdma_v6_0_set_powergating_state(void *handle, +static int sdma_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { } @@ -1730,6 +1803,10 @@ static const struct amdgpu_irq_src_funcs sdma_v6_0_trap_irq_funcs = { .process = sdma_v6_0_process_trap_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v6_0_fence_irq_funcs = { + .process = sdma_v6_0_process_fence_irq, +}; + static const struct amdgpu_irq_src_funcs sdma_v6_0_illegal_inst_irq_funcs = { .process = sdma_v6_0_process_illegal_inst_irq, }; @@ -1739,6 +1816,7 @@ static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v6_0_trap_irq_funcs; + adev->sdma.fence_irq.funcs = &sdma_v6_0_fence_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v6_0_illegal_inst_irq_funcs; } |