summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c98
1 files changed, 64 insertions, 34 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index a988dfb1d394..14e87234171a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -146,6 +146,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ if (adev->sdma[i].feature_version >= 20)
+ adev->sdma[i].burst_nop = true;
if (adev->firmware.smu_load) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
@@ -218,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
}
+static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->nop);
+}
+
/**
* sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
*
@@ -245,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */
- while ((ring->wptr & 7) != 2)
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
+ sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
+
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
/* base must be 32 byte aligned */
@@ -673,6 +688,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
+ struct fence *f = NULL;
unsigned i;
unsigned index;
int r;
@@ -688,12 +704,11 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
-
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
- amdgpu_wb_free(adev, index);
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
- return r;
+ goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
@@ -707,19 +722,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
ib.length_dw = 8;
- r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED);
- if (r) {
- amdgpu_ib_free(adev, &ib);
- amdgpu_wb_free(adev, index);
- DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r);
- return r;
- }
- r = amdgpu_fence_wait(ib.fence, false);
+ r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ &f);
+ if (r)
+ goto err1;
+
+ r = fence_wait(f, false);
if (r) {
- amdgpu_ib_free(adev, &ib);
- amdgpu_wb_free(adev, index);
DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
- return r;
+ goto err1;
}
for (i = 0; i < adev->usec_timeout; i++) {
tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -729,12 +741,17 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
}
if (i < adev->usec_timeout) {
DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
- ib.fence->ring->idx, i);
+ ring->idx, i);
+ goto err1;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
r = -EINVAL;
}
+
+err1:
+ fence_put(f);
amdgpu_ib_free(adev, &ib);
+err0:
amdgpu_wb_free(adev, index);
return r;
}
@@ -877,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
*/
static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
{
- while (ib->length_dw & 0x7)
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
}
/**
@@ -1312,6 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.test_ring = sdma_v2_4_ring_test_ring,
.test_ib = sdma_v2_4_ring_test_ib,
.is_lockup = sdma_v2_4_ring_is_lockup,
+ .insert_nop = sdma_v2_4_ring_insert_nop,
};
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
@@ -1348,19 +1377,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback.
*/
-static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring,
+static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR));
- amdgpu_ring_write(ring, byte_count);
- amdgpu_ring_write(ring, 0); /* src/dst endian swap */
- amdgpu_ring_write(ring, lower_32_bits(src_offset));
- amdgpu_ring_write(ring, upper_32_bits(src_offset));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
}
/**
@@ -1373,16 +1402,16 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring,
*
* Fill GPU buffers using the DMA engine (VI).
*/
-static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ring *ring,
+static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib,
uint32_t src_data,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
- amdgpu_ring_write(ring, src_data);
- amdgpu_ring_write(ring, byte_count);
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count;
}
static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
@@ -1415,5 +1444,6 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring;
+ adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
}
}