diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 98 |
1 files changed, 64 insertions, 34 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index a988dfb1d394..14e87234171a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -146,6 +146,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma[i].feature_version >= 20) + adev->sdma[i].burst_nop = true; if (adev->firmware.smu_load) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; @@ -218,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); } +static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->nop); +} + /** * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine * @@ -245,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 2) - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP)); + sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); /* base must be 32 byte aligned */ @@ -673,6 +688,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; + struct fence *f = NULL; unsigned i; unsigned index; int r; @@ -688,12 +704,11 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); - + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { - amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); - return r; + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -707,19 +722,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); ib.length_dw = 8; - r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED); - if (r) { - amdgpu_ib_free(adev, &ib); - amdgpu_wb_free(adev, index); - DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r); - return r; - } - r = amdgpu_fence_wait(ib.fence, false); + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, + AMDGPU_FENCE_OWNER_UNDEFINED, + &f); + if (r) + goto err1; + + r = fence_wait(f, false); if (r) { - amdgpu_ib_free(adev, &ib); - amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); - return r; + goto err1; } for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); @@ -729,12 +741,17 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { DRM_INFO("ib test on ring %d succeeded in %u usecs\n", - ib.fence->ring->idx, i); + ring->idx, i); + goto err1; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } + +err1: + fence_put(f); amdgpu_ib_free(adev, &ib); +err0: amdgpu_wb_free(adev, index); return r; } @@ -877,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib) { - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); } /** @@ -1312,6 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .test_ring = sdma_v2_4_ring_test_ring, .test_ib = sdma_v2_4_ring_test_ib, .is_lockup = sdma_v2_4_ring_is_lockup, + .insert_nop = sdma_v2_4_ring_insert_nop, }; static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) @@ -1348,19 +1377,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ -static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring, +static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); - amdgpu_ring_write(ring, byte_count); - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ - amdgpu_ring_write(ring, lower_32_bits(src_offset)); - amdgpu_ring_write(ring, upper_32_bits(src_offset)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = byte_count; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); } /** @@ -1373,16 +1402,16 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring, * * Fill GPU buffers using the DMA engine (VI). */ -static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ring *ring, +static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib, uint32_t src_data, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); - amdgpu_ring_write(ring, src_data); - amdgpu_ring_write(ring, byte_count); + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count; } static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = { @@ -1415,5 +1444,6 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; + adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; } } |