summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c616
1 files changed, 425 insertions, 191 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 62a257a4a3e9..f09d96bfee16 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -44,6 +44,8 @@
#include "gfx_v12_0.h"
#include "nbif_v6_3_1.h"
#include "mes_v12_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
#define GFX12_NUM_GFX_RINGS 1
#define GFX12_MEC_HPD_SIZE 2048
@@ -133,11 +135,14 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
-
/* cp header registers */
- SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
@@ -186,7 +191,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
};
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
@@ -215,7 +229,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
};
static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
@@ -475,33 +506,18 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t padding, offset;
-
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- padding = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
-
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
- *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r)
- return r;
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- cpu_ptr = &adev->wb.wb[index];
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
- r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
- goto err1;
- }
+ r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@@ -528,12 +544,10 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- if (!ring->is_mes_queue)
- amdgpu_ib_free(&ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -881,6 +895,34 @@ static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
soc24_grbm_select(adev, me, pipe, q, vm);
}
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE 73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE 484
+#define MQD_FWWORKAREA_ALIGNMENT 256
+
+static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info)
+{
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+ return 0;
+ }
+
+ memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+ return -EINVAL;
+}
+
static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v12_0_select_se_sh,
@@ -889,6 +931,7 @@ static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
.read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
.update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
+ .get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
};
static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
@@ -1346,6 +1389,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
unsigned num_compute_rings;
int xcc_id = 0;
struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
@@ -1354,7 +1398,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(12, 0, 1):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe_per_mec = 2;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -1372,6 +1416,22 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2780 &&
+ adev->gfx.pfp_fw_version >= 2840 &&
+ adev->gfx.mec_fw_version >= 3050 &&
+ adev->mes.fw_version[0] >= 123) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
if (adev->gfx.me_fw_version >= 2480 &&
adev->gfx.pfp_fw_version >= 2530 &&
adev->gfx.mec_fw_version >= 2680 &&
@@ -1383,11 +1443,13 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
break;
}
- /* recalculate compute rings to use based on hardware configuration */
- num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
- adev->gfx.mec.num_queue_per_pipe) / 2;
- adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
- num_compute_rings);
+ if (adev->gfx.num_compute_rings) {
+ /* recalculate compute rings to use based on hardware configuration */
+ num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe) / 2;
+ adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+ num_compute_rings);
+ }
/* EOP Event */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
@@ -1433,37 +1495,41 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
return r;
}
- /* set up the gfx ring */
- for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
- if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
- continue;
-
- r = gfx_v12_0_gfx_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
- ring_id++;
+ if (adev->gfx.num_gfx_rings) {
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v12_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
}
}
}
- ring_id = 0;
- /* set up the compute queues - allocate horizontally across pipes */
- for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
- for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev,
- 0, i, k, j))
- continue;
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev,
+ 0, i, k, j))
+ continue;
- r = gfx_v12_0_compute_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
+ r = gfx_v12_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
- ring_id++;
+ ring_id++;
+ }
}
}
}
@@ -2948,6 +3014,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
mqd->cp_gfx_hqd_cntl = tmp;
/* set up cp_doorbell_control */
@@ -2968,6 +3036,14 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
/* active the queue */
mqd->cp_gfx_hqd_active = 1;
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -3091,6 +3167,8 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
@@ -3142,6 +3220,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_active = prop->hqd_active;
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -3600,6 +3682,49 @@ static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
return r;
}
+static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -3610,6 +3735,7 @@ static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v12_0_set_userq_eop_interrupts(adev, false);
if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
@@ -3698,11 +3824,33 @@ static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
+
adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
- adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
- AMDGPU_MAX_COMPUTE_RINGS);
+ if (adev->gfx.disable_kq) {
+ adev->gfx.num_gfx_rings = 0;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
gfx_v12_0_set_kiq_pm4_funcs(adev);
gfx_v12_0_set_ring_funcs(adev);
@@ -3733,6 +3881,10 @@ static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = gfx_v12_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
return 0;
}
@@ -4172,45 +4324,17 @@ static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always being used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- if (ring->use_doorbell) {
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
- lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
- upper_32_bits(ring->wptr));
- }
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
}
}
@@ -4235,42 +4359,14 @@ static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- /* XXX check if swapping is necessary on BE */
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- BUG(); /* only DOORBELL method supported on gfx12 now */
- }
+ BUG(); /* only DOORBELL method supported on gfx12 now */
}
}
@@ -4317,10 +4413,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x400000;
-
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -4340,10 +4432,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x40000000;
-
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -4383,8 +4471,7 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
- amdgpu_ring_write(ring, ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+ amdgpu_ring_write(ring, 0);
}
static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -4412,10 +4499,7 @@ static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- if (ring->is_mes_queue)
- gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
- else
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -4749,25 +4833,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- int i;
+ u32 doorbell_offset = entry->src_data[0];
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
- uint32_t mes_queue_id = entry->src_data[0];
+ int i;
DRM_DEBUG("IH: CP EOP\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -4934,27 +5016,29 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
- switch (me_id) {
- case 0:
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- if (ring->me == me_id && ring->pipe == pipe_id &&
- ring->queue == queue_id)
- drm_sched_fault(&ring->sched);
- }
- break;
- case 1:
- case 2:
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- if (ring->me == me_id && ring->pipe == pipe_id &&
- ring->queue == queue_id)
- drm_sched_fault(&ring->sched);
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
}
- break;
- default:
- BUG();
- break;
}
}
@@ -5160,6 +5244,69 @@ static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block)
amdgpu_gfx_off_ctrl(adev, true);
}
+static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v12_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v12_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Sometimes the ME start pc counter can't cache correctly, so the
+ * PC check only as a reference and pipe reset result rely on the
+ * later ring test.
+ */
+ return 0;
+}
+
static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
@@ -5170,8 +5317,10 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
if (r) {
- dev_err(adev->dev, "reset via MES failed %d\n", r);
- return r;
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v12_reset_gfx_pipe(ring);
+ if (r)
+ return r;
}
r = gfx_v12_0_kgq_init_queue(ring, true);
@@ -5189,6 +5338,89 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
return amdgpu_ring_test_ring(ring);
}
+static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r = 0;
+
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v12_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ /* Doesn't find the F32 MEC instruction pointer register, and suppose
+ * the driver won't run into the F32 mode.
+ */
+ }
+
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v12_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Need the ring test to verify the pipe reset result.*/
+ return 0;
+}
+
static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
@@ -5199,8 +5431,10 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
if (r) {
- dev_err(adev->dev, "reset via MMIO failed %d\n", r);
- return r;
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v12_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
}
r = gfx_v12_0_kcq_init_queue(ring, true);