From 8f970c46b56235a3965e0965fa3fd60e7567fecc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 14:10:17 -0400 Subject: drm/amdgpu/gfx: decouple the number of kgqs from the hw The driver currently sets up one kgq per pipe. As such adev->gfx.me.num_queue_per_pipe is hardcoded to 1 everywhere. This is fine for kernel queues, but when we enable user queues we need to know that actual number of queues per pipe. Decouple the kgq setup from the actual hardware count. For dev core dumps and user queues, we want to know the actual number of queues per pipe. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d57db42f9536..0c9b28a46050 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1571,6 +1571,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) int i, j, k, r, ring_id = 0; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -1703,7 +1704,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) /* set up the gfx ring */ for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (j = 0; j < num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) continue; -- cgit v1.2.3 From 9cffd67e803a081ce548488161c69f2cddb97fe7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 14:24:47 -0400 Subject: drm/amdgpu/gfx: assign the actual me0 queues per pipe Set the actual number of queues per pipe for ME0 (gfx). This way we will dump all of the queues properly in dev core dumps. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index cba9b973f0a7..d9c2dab17f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4764,7 +4764,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 1, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 8; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -4779,7 +4779,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 2; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0c9b28a46050..c78458ecb88b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1581,7 +1581,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; @@ -1594,7 +1594,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 7b20ab48f762..ddac26b012bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1355,7 +1355,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 1): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 8; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; -- cgit v1.2.3 From a9a8bccaa3ba64d509cf7df387cf0b5e1cd06499 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Mar 2025 11:58:19 -0400 Subject: drm/amdgpu/gfx11: fix CSIB handling We shouldn't return after the last section. We need to update the rest of the CSIB. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c78458ecb88b..cedfcd118430 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -859,8 +859,6 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, PACKET3_SET_CONTEXT_REG_START); for (i = 0; i < ext->reg_count; i++) buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; } } } -- cgit v1.2.3 From dcc8e148e013f0d6b0a715e0aa05f033ca7945e9 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 17 Jul 2024 14:58:00 +0800 Subject: drm/amdgpu/gfx11: Implement the GFX11 KGQ pipe reset Implement the kernel graphics queue pipe reset,and the driver will fallback to pipe reset when the queue reset fails. However, the ME FW hasn't fully supported pipe reset yet so disable the KGQ pipe reset temporarily. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 72 ++++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4eedd92f000b..06fe21e15ed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -25,6 +25,8 @@ #include "amdgpu_socbb.h" +#define RS64_FW_UC_START_ADDR_LO 0x3000 + struct common_firmware_header { uint32_t size_bytes; /* size of the entire header+image(s) in bytes */ uint32_t header_size_bytes; /* size of just the header in bytes */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index cedfcd118430..42a08751af5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6608,6 +6608,69 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) +{ + /* Disable the pipe reset until the CPFW fully support it.*/ + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); + return false; +} + + +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 0); + break; + default: + break; + } + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); + + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - + RS64_FW_UC_START_ADDR_LO; + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, + * so the pipe reset status relies on the later gfx ring test result. + */ + return 0; +} + static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -6617,8 +6680,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return -EINVAL; r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); - if (r) - return r; + if (r) { + + dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + r = gfx_v11_reset_gfx_pipe(ring); + if (r) + return r; + } r = gfx_v11_0_kgq_init_queue(ring, true); if (r) { -- cgit v1.2.3 From d69248cf4c91949a7b83b3fd77c7c229336bb23c Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 21 Feb 2025 20:14:31 +0800 Subject: drm/amdgpu/gfx11: Implement the GFX11 KCQ pipe reset Implement the GFX11 compute pipe reset. As the GFX11 CPFW still hasn't fully supported pipe reset yet, therefore disable the KCQ pipe reset temporarily. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 136 ++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 42a08751af5a..7a9ab4c4b2f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6703,6 +6703,136 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return amdgpu_ring_test_ring(ring); } +static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) +{ + + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + clean_pipe = reset_pipe; + + if (adev->gfx.rs64_enable) { + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - + RS64_FW_UC_START_ADDR_LO; + } else { + if (ring->me == 1) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ + } else { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ + } + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); + r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe + * reset status relies on the compute ring test result. + */ + return 0; +} + static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -6713,8 +6843,10 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "reset via MMIO failed %d\n", r); - return r; + dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + r = gfx_v11_0_reset_compute_pipe(ring); + if (r) + return r; } r = gfx_v11_0_kcq_init_queue(ring, true); -- cgit v1.2.3 From eb15a5d1aef59d1ec3aafe9d9f13953deb9977b6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 11:48:26 -0400 Subject: drm/amdgpu/gfx11: dump full CP packet header FIFOs In dev core dump, dump the full header fifo for each queue. Each FIFO has 8 entries. Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 59 ++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 7a9ab4c4b2f0..77df4452979b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -177,9 +177,13 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), @@ -230,7 +234,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { @@ -259,7 +272,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_11_0[] = { @@ -6892,9 +6922,14 @@ static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_11[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "regCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_11[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -6954,9 +6989,13 @@ static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) /* ME0 is for GFX so start from 1 for CP */ soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { + if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_11[reg])); + RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_11[reg])); } index += reg_count; } -- cgit v1.2.3 From a1d201e16940775f0e2f0230960d69e40879ec6d Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 2 May 2024 12:37:30 +0200 Subject: drm/amdgpu: enable GFX-V11 userqueue support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables GFX-v11 IP support in the usermode queue base code. It typically: - adds a GFX_v11 specific MQD structure - sets IP functions to create and destroy MQDs - sets MQD objects coming from userspace V10: introduced this spearate patch for GFX V11 enabling (Alex). V11: Addressed review comments: - update the comments in GFX mqd structure informing user about using the INFO IOCTL for object sizes (Alex) - rename struct drm_amdgpu_userq_mqd_gfx_v11 to drm_amdgpu_userq_mqd_gfx11 (Marek) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 6 +++++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 28 ++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 19 ++++++++++++++++ 4 files changed, 56 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 64a063ec3b27..5cb984c509c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -188,6 +188,12 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) uint64_t index; int qid, r = 0; + /* Usermode queues are only supported for GFX IP as of now */ + if (args->in.ip_type != AMDGPU_HW_IP_GFX) { + DRM_ERROR("Usermode queue doesn't support IP type %u\n", args->in.ip_type); + return -EINVAL; + } + if (args->in.flags) { DRM_ERROR("Usermode queue flags not supported yet\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 77df4452979b..ec487fbeaec5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -48,6 +48,7 @@ #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" +#include "mes_v11_0_userqueue.h" #define GFX11_NUM_GFX_RINGS 1 #define GFX11_MEC_HPD_SIZE 2048 @@ -1613,6 +1614,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -1626,6 +1628,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; break; default: adev->gfx.me.num_me = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index bc9ce5233a7d..bcfa0d1ef7bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -180,6 +180,34 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return r; } + /* Shadow, GDS and CSA objects come directly from userspace */ + if (mqd_user->ip_type == AMDGPU_HW_IP_GFX) { + struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; + struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid GFX MQD\n"); + return -EINVAL; + } + + mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_gfx_v11)) { + DRM_ERROR("Failed to read user MQD\n"); + amdgpu_userqueue_destroy_object(uq_mgr, ctx); + return -ENOMEM; + } + + mqd->shadow_base_lo = mqd_gfx_v11->shadow_va & 0xFFFFFFFC; + mqd->shadow_base_hi = upper_32_bits(mqd_gfx_v11->shadow_va); + + mqd->gds_bkup_base_lo = mqd_gfx_v11->gds_va & 0xFFFFFFFC; + mqd->gds_bkup_base_hi = upper_32_bits(mqd_gfx_v11->gds_va); + + mqd->fw_work_area_base_lo = mqd_gfx_v11->csa_va & 0xFFFFFFFC; + mqd->fw_work_area_base_hi = upper_32_bits(mqd_gfx_v11->csa_va); + kfree(mqd_gfx_v11); + } + return 0; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 53081050cb3e..4e07e15d5076 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -409,6 +409,25 @@ union drm_amdgpu_userq { struct drm_amdgpu_userq_out out; }; +/* GFX V11 IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_gfx11 { + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 shadow_va; + /** + * @gds_va: Virtual address of the GPU memory to hold the GDS buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 gds_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 csa_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From 2c695d7c072067cd53fb52e52aa0b48277120314 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 9 May 2024 14:31:15 +0200 Subject: drm/amdgpu: enable compute/gfx usermode queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch does the necessary changes required to enable compute workload support using the existing usermode queues infrastructure. V9: Patch introduced V10: Add custom IP specific mqd strcuture for compute (Alex) V11: Rename drm_amdgpu_userq_mqd_compute_gfx_v11 to drm_amdgpu_userq_mqd_compute_gfx11 (Marek) Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 4 +++- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 23 +++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 10 ++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 2c5747cc492e..5173718c3848 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -189,7 +189,9 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) int qid, r = 0; /* Usermode queues are only supported for GFX IP as of now */ - if (args->in.ip_type != AMDGPU_HW_IP_GFX && args->in.ip_type != AMDGPU_HW_IP_DMA) { + if (args->in.ip_type != AMDGPU_HW_IP_GFX && + args->in.ip_type != AMDGPU_HW_IP_DMA && + args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { DRM_ERROR("Usermode queue doesn't support IP type %u\n", args->in.ip_type); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ec487fbeaec5..ed57c3e4c0c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1615,6 +1615,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -1629,6 +1630,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; break; default: adev->gfx.me.num_me = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index dc5359742774..e70b8e429e9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -268,6 +268,29 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->use_doorbell = true; userq_props->doorbell_index = queue->doorbell_index; + if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { + struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; + + if (mqd_user->mqd_size != sizeof(*compute_mqd)) { + DRM_ERROR("Invalid compute IP MQD size\n"); + r = -EINVAL; + goto free_mqd; + } + + compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(compute_mqd)) { + DRM_ERROR("Failed to read user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->eop_gpu_addr = compute_mqd->eop_va; + userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; + userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; + userq_props->hqd_active = false; + kfree(compute_mqd); + } + queue->userq_prop = userq_props; r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 6ae988574084..59f0818e8dcd 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -438,6 +438,16 @@ struct drm_amdgpu_userq_mqd_sdma_gfx11 { __u64 csa_va; }; +/* GFX V11 Compute IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_compute_gfx11 { + /** + * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. + * This must be a from a separate GPU object, and must be at least 1 page + * sized. + */ + __u64 eop_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From f540f69256a3a05973e87bcdd25881a9fe731b61 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 27 Aug 2024 14:55:35 +0530 Subject: drm/amdgpu: add kernel config for gfx-userqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch: - adds a kernel config option "CONFIG_DRM_AMDGPU_NAVI3X_USERQ" - moves the usequeue initialization code for all IPs under this flag - cover the core userqueue functions under this config - adds stub function for userqueue ioctl. so that the userqueue works only when the config is enabled. V9: Introduce this patch V10: Call it CONFIG_DRM_AMDGPU_NAVI3X_USERQ instead of CONFIG_DRM_AMDGPU_USERQ_GFX (Christian) V11: Add GFX in the config help description message. V12: Add depends on BROKEN for this config, remove this when the rest of the code is available. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 9 +++++++++ drivers/gpu/drm/amd/amdgpu/Makefile | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 ++- 5 files changed, 26 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 1a11cab741ac..5bb44ea80164 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -96,6 +96,15 @@ config DRM_AMDGPU_WERROR Add -Werror to the build flags for amdgpu.ko. Only enable this if you are warning code for amdgpu.ko. +config DRM_AMDGPU_NAVI3X_USERQ + bool "Enable Navi 3x gfx usermode queues" + depends on DRM_AMDGPU + depends on BROKEN + default n + help + Choose this option to enable GFX usermode queue support for GFX/SDMA/Compute + workload submission. This feature is experimental and supported on Navi 3X only. + source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig" source "drivers/gpu/drm/amd/amdkfd/Kconfig" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index f42b9b4be9a4..376e8f5396ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -175,7 +175,9 @@ amdgpu-y += \ amdgpu_mes.o \ mes_v11_0.o \ mes_v12_0.o \ - mes_v11_0_userqueue.o + +# add GFX userqueue support +amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_v11_0_userqueue.o # add UVD block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 5173718c3848..21e805530a6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -39,6 +39,7 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, kfree(queue); } +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ static struct amdgpu_usermode_queue * amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid) { @@ -279,6 +280,13 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, return r; } +#else +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + return 0; +} +#endif int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ed57c3e4c0c4..62a6da083a8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1614,8 +1614,10 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; +#endif break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -1629,8 +1631,10 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; +#ifdef CONFIG_DRM_AMD_USERQ_GFX adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; +#endif break; default: adev->gfx.me.num_me = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index f163b253c200..d4e1b2cd1f35 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1377,8 +1377,9 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_v11_0_funcs; - +#endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) return r; -- cgit v1.2.3 From 8949843762631d9d0fc526dfb61a272dca29fc6f Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:38:46 +0530 Subject: drm/amdgpu: Enable userq fence interrupt support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support to handle the userqueue protected fence signal hardware interrupt. Create a xarray which maps the doorbell index to the fence driver address. This would help to retrieve the fence driver information when an userq fence interrupt is triggered. Firmware sends the doorbell offset value and this info is compared with the queue's mqd doorbell offset value. If they are same, we process the userq fence interrupt. v1:(Christian): - use xa_load to extract the fence driver. - move the amdgpu_userq_fence_driver_process call within the xa_lock as there is a chance that fence_drv might be freed. Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 25 ++++++++++++------------- 3 files changed, 20 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cfaa5d77b20a..fdd271edb0b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4336,6 +4336,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->virt.rlcg_reg_lock); spin_lock_init(&adev->wb.lock); + xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ); + INIT_LIST_HEAD(&adev->reset_list); INIT_LIST_HEAD(&adev->ras_list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 1a9565b61266..cd473c985e36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -71,6 +71,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq) { struct amdgpu_userq_fence_driver *fence_drv; + unsigned long flags; int r; fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); @@ -98,6 +99,11 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, fence_drv->context = dma_fence_context_alloc(1); get_task_comm(fence_drv->timeline_name, current); + xa_lock_irqsave(&adev->userq_xa, flags); + __xa_store(&adev->userq_xa, userq->doorbell_index, + fence_drv, GFP_KERNEL); + xa_unlock_irqrestore(&adev->userq_xa, flags); + userq->fence_drv = fence_drv; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 62a6da083a8f..b061b654d1ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -49,6 +49,7 @@ #include "nbio_v4_3.h" #include "mes_v11_0.h" #include "mes_v11_0_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX11_NUM_GFX_RINGS 1 #define GFX11_MEC_HPD_SIZE 2048 @@ -6334,25 +6335,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; -- cgit v1.2.3 From d9e697f19bda777a7934e3bbdc67889b06d58019 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 30 Oct 2024 15:32:27 +0100 Subject: drm/amdgpu: bypass SRIOV check for shadow size info Currently, the shadow FW space size and alignment information is protected under a flag (adev->gfx.cp_gfx_shadow) which gets set only in case of SRIOV setups. if (amdgpu_sriov_vf(adev)) adev->gfx.cp_gfx_shadow = true; But we need this information for GFX Userqueues, so that user can create these objects while creating userqueue. This patch series creates a method to get this information bypassing the dependency on this check. This patch: - adds a new input parameter flag to the gfx.funcs->get_gfx_shadow_info fptr definition, so that it can accommodate the information without the check (adev->gfx.cp_gfx_shadow) on request. - updates the existing definition of amdgpu_gfx_get_gfx_shadow_info to adjust with this new flag. Next patch in the series is adding a UAPI which will consume this info. V2: split this patch from the new UAPI patch Cc: Alex Deucher Cc: Christian Koenig Cc: Arvind Yadav Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 5d70b3ae3fe1..319e6e547c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -305,7 +305,8 @@ struct amdgpu_gfx_funcs { void (*init_spm_golden)(struct amdgpu_device *adev); void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); int (*get_gfx_shadow_info)(struct amdgpu_device *adev, - struct amdgpu_gfx_shadow_info *shadow_info); + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check); enum amdgpu_gfx_partition (*query_partition_mode)(struct amdgpu_device *adev); int (*switch_partition_mode)(struct amdgpu_device *adev, @@ -503,7 +504,7 @@ struct amdgpu_gfx_ras_mem_id_entry { #define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id))) #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id))) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) -#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))) +#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false)) /** * amdgpu_gfx_create_bitmask - create a bitmask diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b061b654d1ec..933c4ad5eff9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1086,14 +1086,21 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, #define MQD_FWWORKAREA_SIZE 484 #define MQD_FWWORKAREA_ALIGNMENT 256 -static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, +static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, struct amdgpu_gfx_shadow_info *shadow_info) { - if (adev->gfx.cp_gfx_shadow) { - shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; - shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; - shadow_info->csa_size = MQD_FWWORKAREA_SIZE; - shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; + shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; + shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; + shadow_info->csa_size = MQD_FWWORKAREA_SIZE; + shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; +} + +static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check) +{ + if (adev->gfx.cp_gfx_shadow || skip_check) { + gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); return 0; } else { memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); -- cgit v1.2.3 From 7179439e34bbeef28e1b5083c365e7295b07f9bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 14:14:34 -0400 Subject: drm/amdgpu/gfx11: update mqd init for UQ Set the addresses for the UQ metadata. V2: Fix lower address (Shashank) V3: Restore lower_32_bits() for MQD addresses (Alex) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 933c4ad5eff9..b8f75e1ba72c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4127,6 +4127,14 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* active the queue */ mqd->cp_gfx_hqd_active = 1; + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); + mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + return 0; } -- cgit v1.2.3 From 79819d9a0ac38bf83ac712e00be2d53104895b84 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 Nov 2024 15:45:19 +0100 Subject: drm/amdgpu/uq: make MES UQ setup generic Now that all of the IP specific code has been moved into the IP specific functions, we can make this code generic. V2: Fixed build errors and porting logics (Shashank) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 386 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/mes_userqueue.h | 30 ++ drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 383 ---------------------- drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h | 30 -- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 +- 7 files changed, 424 insertions(+), 421 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/mes_userqueue.c create mode 100644 drivers/gpu/drm/amd/amdgpu/mes_userqueue.h delete mode 100644 drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c delete mode 100644 drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 0b0c8ec46516..513c4d64f554 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -177,7 +177,7 @@ amdgpu-y += \ mes_v12_0.o \ # add GFX userqueue support -amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_v11_0_userqueue.o +amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_userqueue.o # add UVD block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b8f75e1ba72c..63b7c7bfcc4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -48,7 +48,7 @@ #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" -#include "mes_v11_0_userqueue.h" +#include "mes_userqueue.h" #include "amdgpu_userq_fence.h" #define GFX11_NUM_GFX_RINGS 1 @@ -1623,8 +1623,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif break; case IP_VERSION(11, 0, 1): @@ -1640,8 +1640,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMD_USERQ_GFX - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c new file mode 100644 index 000000000000..9c2fc8ae0d56 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" +#include "v11_structs.h" + +#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE +#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE + +static int +mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(&bo->tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + +static int +mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; + int ret; + + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + return ret; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + return -EINVAL; + } + + wptr_obj->obj = wptr_mapping->bo_va->base.bo; + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + return -EINVAL; + } + + ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + return ret; + } + + queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); + return 0; +} + +static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct amdgpu_mqd_prop *userq_props) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + struct mes_add_queue_input queue_input; + int r; + + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); + + queue_input.process_va_start = 0; + queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; + + /* set process quantum to 10 ms and gang quantum to 1 ms as default */ + queue_input.process_quantum = 100000; + queue_input.gang_quantum = 10000; + queue_input.paging = false; + + queue_input.process_context_addr = ctx->gpu_addr; + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + + queue_input.process_id = queue->vm->pasid; + queue_input.queue_type = queue->queue_type; + queue_input.mqd_addr = queue->mqd.gpu_addr; + queue_input.wptr_addr = userq_props->wptr_gpu_addr; + queue_input.queue_size = userq_props->queue_size >> 2; + queue_input.doorbell_offset = userq_props->doorbell_index; + queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) { + DRM_ERROR("Failed to map queue in HW, err (%d)\n", r); + return r; + } + + queue->queue_active = true; + DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index); + return 0; +} + +static void mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct mes_remove_queue_input queue_input; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r; + + memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); + queue_input.doorbell_offset = queue->doorbell_index; + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) + DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); + queue->queue_active = false; +} + +static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct drm_amdgpu_userq_in *mqd_user) +{ + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r, size; + + /* + * The FW expects at least one page space allocated for + * process ctx and gang ctx each. Create an object + * for the same. + */ + size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; + r = amdgpu_userqueue_create_object(uq_mgr, ctx, size); + if (r) { + DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); + return r; + } + + return 0; +} + +static void mes_userq_set_fence_space(struct amdgpu_usermode_queue *queue) +{ + struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; + + mqd->fenceaddress_lo = lower_32_bits(queue->fence_drv->gpu_addr); + mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr); +} + +static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, + struct drm_amdgpu_userq_in *args_in, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; + struct drm_amdgpu_userq_in *mqd_user = args_in; + struct amdgpu_mqd_prop *userq_props; + int r; + + /* Structure to initialize MQD for userqueue using generic MQD init function */ + userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL); + if (!userq_props) { + DRM_ERROR("Failed to allocate memory for userq_props\n"); + return -ENOMEM; + } + + if (!mqd_user->wptr_va || !mqd_user->rptr_va || + !mqd_user->queue_va || mqd_user->queue_size == 0) { + DRM_ERROR("Invalid MQD parameters for userqueue\n"); + r = -EINVAL; + goto free_props; + } + + r = amdgpu_userqueue_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size); + if (r) { + DRM_ERROR("Failed to create MQD object for userqueue\n"); + goto free_props; + } + + /* Initialize the MQD BO with user given values */ + userq_props->wptr_gpu_addr = mqd_user->wptr_va; + userq_props->rptr_gpu_addr = mqd_user->rptr_va; + userq_props->queue_size = mqd_user->queue_size; + userq_props->hqd_base_gpu_addr = mqd_user->queue_va; + userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; + userq_props->use_doorbell = true; + userq_props->doorbell_index = queue->doorbell_index; + + if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { + struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; + + if (mqd_user->mqd_size != sizeof(*compute_mqd)) { + DRM_ERROR("Invalid compute IP MQD size\n"); + r = -EINVAL; + goto free_mqd; + } + + compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(compute_mqd)) { + DRM_ERROR("Failed to read user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->eop_gpu_addr = compute_mqd->eop_va; + userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; + userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; + userq_props->hqd_active = false; + kfree(compute_mqd); + } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { + struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid GFX MQD\n"); + r = -EINVAL; + goto free_mqd; + } + + mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_gfx_v11)) { + DRM_ERROR("Failed to read user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->shadow_addr = mqd_gfx_v11->shadow_va; + userq_props->csa_addr = mqd_gfx_v11->csa_va; + kfree(mqd_gfx_v11); + } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { + struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid SDMA MQD\n"); + r = -EINVAL; + goto free_mqd; + } + + mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_sdma_v11)) { + DRM_ERROR("Failed to read sdma user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->csa_addr = mqd_sdma_v11->csa_va; + kfree(mqd_sdma_v11); + } + + queue->userq_prop = userq_props; + + r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); + if (r) { + DRM_ERROR("Failed to initialize MQD for userqueue\n"); + goto free_mqd; + } + + /* Create BO for FW operations */ + r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user); + if (r) { + DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); + goto free_mqd; + } + + mes_userq_set_fence_space(queue); + + /* FW expects WPTR BOs to be mapped into GART */ + r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + if (r) { + DRM_ERROR("Failed to create WPTR mapping\n"); + goto free_ctx; + } + + /* Map userqueue into FW using MES */ + r = mes_userq_map(uq_mgr, queue, userq_props); + if (r) { + DRM_ERROR("Failed to init MQD\n"); + goto free_ctx; + } + + return 0; + +free_ctx: + amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); + +free_mqd: + amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + +free_props: + kfree(userq_props); + + return r; +} + +static void +mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + if (queue->queue_active) + mes_userq_unmap(uq_mgr, queue); + + amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); + kfree(queue->userq_prop); + amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); +} + +static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + if (queue->queue_active) { + mes_userq_unmap(uq_mgr, queue); + queue->queue_active = false; + } + + return 0; +} + +static int mes_userq_resume(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + int ret; + + if (queue->queue_active) + return 0; + + ret = mes_userq_map(uq_mgr, queue, queue->userq_prop); + if (ret) { + DRM_ERROR("Failed to resume queue\n"); + return ret; + } + + queue->queue_active = true; + return 0; +} + +const struct amdgpu_userq_funcs userq_mes_funcs = { + .mqd_create = mes_userq_mqd_create, + .mqd_destroy = mes_userq_mqd_destroy, + .suspend = mes_userq_suspend, + .resume = mes_userq_resume, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h new file mode 100644 index 000000000000..d0a521312ad4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef MES_USERQ_H +#define MES_USERQ_H +#include "amdgpu_userqueue.h" + +extern const struct amdgpu_userq_funcs userq_mes_funcs; +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c deleted file mode 100644 index 1ba6b91b03c4..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ /dev/null @@ -1,383 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright 2024 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include "amdgpu.h" -#include "amdgpu_gfx.h" -#include "mes_v11_0_userqueue.h" -#include "amdgpu_userq_fence.h" - -#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE -#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE - -static int -mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_bo *bo) -{ - int ret; - - ret = amdgpu_bo_reserve(bo, true); - if (ret) { - DRM_ERROR("Failed to reserve bo. ret %d\n", ret); - goto err_reserve_bo_failed; - } - - ret = amdgpu_ttm_alloc_gart(&bo->tbo); - if (ret) { - DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); - goto err_map_bo_gart_failed; - } - - amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); - - return 0; - -err_map_bo_gart_failed: - amdgpu_bo_unreserve(bo); -err_reserve_bo_failed: - return ret; -} - -static int -mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - uint64_t wptr) -{ - struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; - struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; - int ret; - - wptr_vm = queue->vm; - ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (ret) - return ret; - - wptr &= AMDGPU_GMC_HOLE_MASK; - wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - DRM_ERROR("Failed to lookup wptr bo\n"); - return -EINVAL; - } - - wptr_obj->obj = wptr_mapping->bo_va->base.bo; - if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { - DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); - return -EINVAL; - } - - ret = mes_v11_0_map_gtt_bo_to_gart(wptr_obj->obj); - if (ret) { - DRM_ERROR("Failed to map wptr bo to GART\n"); - return ret; - } - - queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); - return 0; -} - -static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - struct amdgpu_mqd_prop *userq_props) -{ - struct amdgpu_device *adev = uq_mgr->adev; - struct amdgpu_userq_obj *ctx = &queue->fw_obj; - struct mes_add_queue_input queue_input; - int r; - - memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); - - queue_input.process_va_start = 0; - queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; - - /* set process quantum to 10 ms and gang quantum to 1 ms as default */ - queue_input.process_quantum = 100000; - queue_input.gang_quantum = 10000; - queue_input.paging = false; - - queue_input.process_context_addr = ctx->gpu_addr; - queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; - queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - - queue_input.process_id = queue->vm->pasid; - queue_input.queue_type = queue->queue_type; - queue_input.mqd_addr = queue->mqd.gpu_addr; - queue_input.wptr_addr = userq_props->wptr_gpu_addr; - queue_input.queue_size = userq_props->queue_size >> 2; - queue_input.doorbell_offset = userq_props->doorbell_index; - queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); - queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; - - amdgpu_mes_lock(&adev->mes); - r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); - amdgpu_mes_unlock(&adev->mes); - if (r) { - DRM_ERROR("Failed to map queue in HW, err (%d)\n", r); - return r; - } - - queue->queue_active = true; - DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index); - return 0; -} - -static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) -{ - struct amdgpu_device *adev = uq_mgr->adev; - struct mes_remove_queue_input queue_input; - struct amdgpu_userq_obj *ctx = &queue->fw_obj; - int r; - - memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); - queue_input.doorbell_offset = queue->doorbell_index; - queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; - - amdgpu_mes_lock(&adev->mes); - r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); - amdgpu_mes_unlock(&adev->mes); - if (r) - DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); - queue->queue_active = false; -} - -static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - struct drm_amdgpu_userq_in *mqd_user) -{ - struct amdgpu_userq_obj *ctx = &queue->fw_obj; - int r, size; - - /* - * The FW expects at least one page space allocated for - * process ctx and gang ctx each. Create an object - * for the same. - */ - size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; - r = amdgpu_userqueue_create_object(uq_mgr, ctx, size); - if (r) { - DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); - return r; - } - - return 0; -} - -static void mes_v11_0_userq_set_fence_space(struct amdgpu_usermode_queue *queue) -{ - struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; - - mqd->fenceaddress_lo = lower_32_bits(queue->fence_drv->gpu_addr); - mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr); -} - -static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, - struct drm_amdgpu_userq_in *args_in, - struct amdgpu_usermode_queue *queue) -{ - struct amdgpu_device *adev = uq_mgr->adev; - struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; - struct drm_amdgpu_userq_in *mqd_user = args_in; - struct amdgpu_mqd_prop *userq_props; - int r; - - /* Structure to initialize MQD for userqueue using generic MQD init function */ - userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL); - if (!userq_props) { - DRM_ERROR("Failed to allocate memory for userq_props\n"); - return -ENOMEM; - } - - if (!mqd_user->wptr_va || !mqd_user->rptr_va || - !mqd_user->queue_va || mqd_user->queue_size == 0) { - DRM_ERROR("Invalid MQD parameters for userqueue\n"); - r = -EINVAL; - goto free_props; - } - - r = amdgpu_userqueue_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size); - if (r) { - DRM_ERROR("Failed to create MQD object for userqueue\n"); - goto free_props; - } - - /* Initialize the MQD BO with user given values */ - userq_props->wptr_gpu_addr = mqd_user->wptr_va; - userq_props->rptr_gpu_addr = mqd_user->rptr_va; - userq_props->queue_size = mqd_user->queue_size; - userq_props->hqd_base_gpu_addr = mqd_user->queue_va; - userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; - userq_props->use_doorbell = true; - userq_props->doorbell_index = queue->doorbell_index; - - if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { - struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; - - if (mqd_user->mqd_size != sizeof(*compute_mqd)) { - DRM_ERROR("Invalid compute IP MQD size\n"); - r = -EINVAL; - goto free_mqd; - } - - compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); - if (IS_ERR(compute_mqd)) { - DRM_ERROR("Failed to read user MQD\n"); - r = -ENOMEM; - goto free_mqd; - } - - userq_props->eop_gpu_addr = compute_mqd->eop_va; - userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; - userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; - userq_props->hqd_active = false; - kfree(compute_mqd); - } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { - struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; - - if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { - DRM_ERROR("Invalid GFX MQD\n"); - return -EINVAL; - } - - mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); - if (IS_ERR(mqd_gfx_v11)) { - DRM_ERROR("Failed to read user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; - } - - userq_props->shadow_addr = mqd_gfx_v11->shadow_va; - userq_props->csa_addr = mqd_gfx_v11->csa_va; - kfree(mqd_gfx_v11); - } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { - struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; - - if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { - DRM_ERROR("Invalid SDMA MQD\n"); - return -EINVAL; - } - - mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); - if (IS_ERR(mqd_sdma_v11)) { - DRM_ERROR("Failed to read sdma user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; - } - - userq_props->csa_addr = mqd_sdma_v11->csa_va; - kfree(mqd_sdma_v11); - } - - queue->userq_prop = userq_props; - - r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); - if (r) { - DRM_ERROR("Failed to initialize MQD for userqueue\n"); - goto free_mqd; - } - - /* Create BO for FW operations */ - r = mes_v11_0_userq_create_ctx_space(uq_mgr, queue, mqd_user); - if (r) { - DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); - goto free_mqd; - } - - mes_v11_0_userq_set_fence_space(queue); - - /* FW expects WPTR BOs to be mapped into GART */ - r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); - if (r) { - DRM_ERROR("Failed to create WPTR mapping\n"); - goto free_ctx; - } - - /* Map userqueue into FW using MES */ - r = mes_v11_0_userq_map(uq_mgr, queue, userq_props); - if (r) { - DRM_ERROR("Failed to init MQD\n"); - goto free_ctx; - } - - return 0; - -free_ctx: - amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); - -free_mqd: - amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); - -free_props: - kfree(userq_props); - - return r; -} - -static void -mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) -{ - if (queue->queue_active) - mes_v11_0_userq_unmap(uq_mgr, queue); - - amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); - kfree(queue->userq_prop); - amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); -} - -static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) -{ - if (queue->queue_active) { - mes_v11_0_userq_unmap(uq_mgr, queue); - queue->queue_active = false; - } - - return 0; -} - -static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) -{ - int ret; - - if (queue->queue_active) - return 0; - - ret = mes_v11_0_userq_map(uq_mgr, queue, queue->userq_prop); - if (ret) { - DRM_ERROR("Failed to resume queue\n"); - return ret; - } - - queue->queue_active = true; - return 0; -} - -const struct amdgpu_userq_funcs userq_mes_v11_0_funcs = { - .mqd_create = mes_v11_0_userq_mqd_create, - .mqd_destroy = mes_v11_0_userq_mqd_destroy, - .suspend = mes_v11_0_userq_suspend, - .resume = mes_v11_0_userq_resume, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h deleted file mode 100644 index 2c102361ca82..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2024 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef MES_V11_0_USERQ_H -#define MES_V11_0_USERQ_H -#include "amdgpu_userqueue.h" - -extern const struct amdgpu_userq_funcs userq_mes_v11_0_funcs; -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index fe389379e890..9bc3c7a35d18 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -43,7 +43,7 @@ #include "sdma_common.h" #include "sdma_v6_0.h" #include "v11_structs.h" -#include "mes_v11_0_userqueue.h" +#include "mes_userqueue.h" MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); @@ -1381,7 +1381,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) -- cgit v1.2.3 From dd5a376cd234c3fff79667598a1e06300cf75026 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Tue, 26 Nov 2024 15:51:08 +0100 Subject: drm/amdgpu: enable userqueue secure sem for GFX 12 - Add a field in struct amdgpu_mqd_prop for userqueue secure sem fence address since now we have a generic file for mes_userqueue.c - Add secure sem fence address mqd support to gfx12 into their corresponding init functions. - Enable secure semaphore IRQ handling V2: Address review comment from Alex: Use fence_address instead of fenceaddress (Shashank) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Arunpravin Paneer Selvam Signed-off-by: Somalapuram Amaranath Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 ++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 11 +---------- drivers/gpu/drm/amd/include/v11_structs.h | 4 ++-- drivers/gpu/drm/amd/include/v12_structs.h | 4 ++-- 6 files changed, 22 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ea5a1e6f01c3..3cdb5f8325aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -833,6 +833,7 @@ struct amdgpu_mqd_prop { uint64_t shadow_addr; uint64_t gds_bkup_addr; uint64_t csa_addr; + uint64_t fence_address; }; struct amdgpu_mqd { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 63b7c7bfcc4a..114284e65c7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4134,6 +4134,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index ee65f4057872..1030f2985c7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -45,6 +45,7 @@ #include "nbif_v6_3_1.h" #include "mes_v12_0.h" #include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX12_NUM_GFX_RINGS 1 #define GFX12_MEC_HPD_SIZE 2048 @@ -3037,6 +3038,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); return 0; } @@ -4819,25 +4822,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 9c2fc8ae0d56..1dde099382ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -185,14 +185,6 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static void mes_userq_set_fence_space(struct amdgpu_usermode_queue *queue) -{ - struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; - - mqd->fenceaddress_lo = lower_32_bits(queue->fence_drv->gpu_addr); - mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr); -} - static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct drm_amdgpu_userq_in *args_in, struct amdgpu_usermode_queue *queue) @@ -231,6 +223,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; userq_props->use_doorbell = true; userq_props->doorbell_index = queue->doorbell_index; + userq_props->fence_address = queue->fence_drv->gpu_addr; if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; @@ -307,8 +300,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } - mes_userq_set_fence_space(queue); - /* FW expects WPTR BOs to be mapped into GART */ r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); if (r) { diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h index 797ce6a1e56e..f6d4dab849eb 100644 --- a/drivers/gpu/drm/amd/include/v11_structs.h +++ b/drivers/gpu/drm/amd/include/v11_structs.h @@ -535,8 +535,8 @@ struct v11_gfx_mqd { uint32_t reserved_507; // offset: 507 (0x1FB) uint32_t reserved_508; // offset: 508 (0x1FC) uint32_t reserved_509; // offset: 509 (0x1FD) - uint32_t fenceaddress_lo; // offset: 510 (0x1FE) - uint32_t fenceaddress_hi; // offset: 511 (0x1FF) + uint32_t fence_address_lo; // offset: 510 (0x1FE) + uint32_t fence_address_hi; // offset: 511 (0x1FF) }; struct v11_sdma_mqd { diff --git a/drivers/gpu/drm/amd/include/v12_structs.h b/drivers/gpu/drm/amd/include/v12_structs.h index 5eabab611b02..5787c8a51b7c 100644 --- a/drivers/gpu/drm/amd/include/v12_structs.h +++ b/drivers/gpu/drm/amd/include/v12_structs.h @@ -535,8 +535,8 @@ struct v12_gfx_mqd { uint32_t reserved_507; // offset: 507 (0x1FB) uint32_t reserved_508; // offset: 508 (0x1FC) uint32_t reserved_509; // offset: 509 (0x1FD) - uint32_t reserved_510; // offset: 510 (0x1FE) - uint32_t reserved_511; // offset: 511 (0x1FF) + uint32_t fence_address_lo; // offset: 510 (0x1FE) + uint32_t fence_address_hi; // offset: 511 (0x1FF) }; struct v12_sdma_mqd { -- cgit v1.2.3 From 9ed335d9398475675e26fcf92d7cf956e5b8a605 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 9 Dec 2024 19:20:56 +0530 Subject: drm/amdgpu: Add mqd for userq compute queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add mqd for userq compute queue for gfx11/gfx12 Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 4 ++++ drivers/gpu/drm/amd/include/v11_structs.h | 4 ++-- drivers/gpu/drm/amd/include/v12_structs.h | 4 ++-- 4 files changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 114284e65c7c..d3b4018580c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4312,6 +4312,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_active = prop->hqd_active; + /* set UQ fenceaddress */ + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 1030f2985c7e..0e95c1cfeca6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3215,6 +3215,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_active = prop->hqd_active; + /* set UQ fenceaddress */ + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h index f6d4dab849eb..3728389fc3be 100644 --- a/drivers/gpu/drm/amd/include/v11_structs.h +++ b/drivers/gpu/drm/amd/include/v11_structs.h @@ -1118,8 +1118,8 @@ struct v11_compute_mqd { uint32_t reserved_443; // offset: 443 (0x1BB) uint32_t reserved_444; // offset: 444 (0x1BC) uint32_t reserved_445; // offset: 445 (0x1BD) - uint32_t reserved_446; // offset: 446 (0x1BE) - uint32_t reserved_447; // offset: 447 (0x1BF) + uint32_t fence_address_lo; // offset: 446 (0x1BE) + uint32_t fence_address_hi; // offset: 447 (0x1BF) uint32_t gws_0_val; // offset: 448 (0x1C0) uint32_t gws_1_val; // offset: 449 (0x1C1) uint32_t gws_2_val; // offset: 450 (0x1C2) diff --git a/drivers/gpu/drm/amd/include/v12_structs.h b/drivers/gpu/drm/amd/include/v12_structs.h index 5787c8a51b7c..03a35f8a65b0 100644 --- a/drivers/gpu/drm/amd/include/v12_structs.h +++ b/drivers/gpu/drm/amd/include/v12_structs.h @@ -1118,8 +1118,8 @@ struct v12_compute_mqd { uint32_t reserved_443; // offset: 443 (0x1BB) uint32_t reserved_444; // offset: 444 (0x1BC) uint32_t reserved_445; // offset: 445 (0x1BD) - uint32_t reserved_446; // offset: 446 (0x1BE) - uint32_t reserved_447; // offset: 447 (0x1BF) + uint32_t fence_address_lo; // offset: 446 (0x1BE) + uint32_t fence_address_hi; // offset: 447 (0x1BF) uint32_t gws_0_val; // offset: 448 (0x1C0) uint32_t gws_1_val; // offset: 449 (0x1C1) uint32_t gws_2_val; // offset: 450 (0x1C2) -- cgit v1.2.3 From f36e4876c8e1ea61c48f6048bfcfd540c7abef2f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:45:37 -0500 Subject: drm/amdgpu/gfx11: fix config guard s/CONFIG_DRM_AMD_USERQ_GFX/CONFIG_DRM_AMDGPU_NAVI3X_USERQ/ Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d3b4018580c6..80af0b92d6b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1639,7 +1639,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; -#ifdef CONFIG_DRM_AMD_USERQ_GFX +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif -- cgit v1.2.3 From 5ca4095960a8a8b7f24f02af6e5ce7b284e04559 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:50:11 -0500 Subject: drm/amdgpu: add userq firmware version checks Currently disabled until the firmwares are officially released. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 14 ++++++++++---- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 4 +++- 4 files changed, 21 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 80af0b92d6b1..f98c9c8253f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1623,8 +1623,11 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } #endif break; case IP_VERSION(11, 0, 1): @@ -1640,8 +1643,11 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 0e95c1cfeca6..58f17d95b2f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1420,8 +1420,11 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 9bc3c7a35d18..3aa4fec4d9e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1381,7 +1381,9 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 3514089acd94..8e3aa4536e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1383,7 +1383,9 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif -- cgit v1.2.3 From 4220d2c7c41b8ea3fd154dc5678b05575653cba0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2025 13:47:33 -0400 Subject: drm/amdgpu: remove is_mes_queue flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was leftover from MES bring up when we had MES user queues in the kernel. It's no longer used so remove it. Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 112 +++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 14 --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 22 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 84 ++++------------ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 143 ++++++-------------------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 67 +++---------- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 - drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 4 - drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 4 - drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 166 ++++++++++--------------------- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 83 +++++----------- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 83 +++++----------- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 164 ++++++++++-------------------- 15 files changed, 259 insertions(+), 697 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2ea98ec60220..802743efa3b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -163,12 +163,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, init_shadow = false; } - if (!ring->sched.ready && !ring->is_mes_queue) { + if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); return -EINVAL; } - if (vm && !job->vmid && !ring->is_mes_queue) { + if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 59acdbfe28d8..426834806fbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -187,14 +187,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } #define amdgpu_ring_get_gpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : \ - (ring->adev->wb.gpu_addr + offset * 4)) + (ring->adev->wb.gpu_addr + offset * 4) #define amdgpu_ring_get_cpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - (&ring->adev->wb.wb[offset])) + (&ring->adev->wb.wb[offset]) /** * amdgpu_ring_init - init driver ring struct. @@ -243,57 +239,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->sched_score = sched_score; ring->vmid_wait = dma_fence_get_stub(); - if (!ring->is_mes_queue) { - ring->idx = adev->num_rings++; - adev->rings[ring->idx] = ring; - } + ring->idx = adev->num_rings++; + adev->rings[ring->idx] = ring; r = amdgpu_fence_driver_init_ring(ring); if (r) return r; } - if (ring->is_mes_queue) { - ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RPTR_OFFS); - ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_WPTR_OFFS); - ring->fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_FENCE_OFFS); - ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_TRAIL_FENCE_OFFS); - ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_COND_EXE_OFFS); - } else { - r = amdgpu_device_wb_get(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); - if (r) { - dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); + if (r) { + dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); + return r; } ring->fence_gpu_addr = @@ -353,18 +334,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->cached_rptr = 0; /* Allocate ring buffer */ - if (ring->is_mes_queue) { - int offset = 0; - - BUG_ON(ring->ring_size > PAGE_SIZE*4); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RING_OFFS); - ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - amdgpu_ring_clear_ring(ring); - - } else if (ring->ring_obj == NULL) { + if (ring->ring_obj == NULL) { r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->ring_obj, @@ -401,32 +371,26 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) { /* Not to finish a ring which is not initialized */ - if (!(ring->adev) || - (!ring->is_mes_queue && !(ring->adev->rings[ring->idx]))) + if (!(ring->adev) || !(ring->adev->rings[ring->idx])) return; ring->sched.ready = false; - if (!ring->is_mes_queue) { - amdgpu_device_wb_free(ring->adev, ring->rptr_offs); - amdgpu_device_wb_free(ring->adev, ring->wptr_offs); + amdgpu_device_wb_free(ring->adev, ring->rptr_offs); + amdgpu_device_wb_free(ring->adev, ring->wptr_offs); - amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_device_wb_free(ring->adev, ring->fence_offs); + amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_device_wb_free(ring->adev, ring->fence_offs); - amdgpu_bo_free_kernel(&ring->ring_obj, - &ring->gpu_addr, - (void **)&ring->ring); - } else { - kfree(ring->fence_drv.fences); - } + amdgpu_bo_free_kernel(&ring->ring_obj, + &ring->gpu_addr, + (void **)&ring->ring); dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; ring->me = 0; - if (!ring->is_mes_queue) - ring->adev->rings[ring->idx] = NULL; + ring->adev->rings[ring->idx] = NULL; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bb2b66385223..615c3d5c5a8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -301,11 +301,6 @@ struct amdgpu_ring { unsigned num_hw_submission; atomic_t *sched_score; - /* used for mes */ - bool is_mes_queue; - uint32_t hw_queue_id; - struct amdgpu_mes_ctx_data *mes_ctx; - bool is_sw_ring; unsigned int entry_index; /* store the cached rptr to restore after reset */ @@ -435,15 +430,6 @@ static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, ring->ring[offset] = cur - offset; } -#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : 0) - -#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - NULL) - int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 807da4595893..c3c6f03190c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -76,22 +76,14 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp) return 0; - if (ring->is_mes_queue) { - uint32_t offset = 0; + r = amdgpu_sdma_get_index_from_ring(ring, &index); - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - sdma[ring->idx].sdma_meta_data); - csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - r = amdgpu_sdma_get_index_from_ring(ring, &index); - - if (r || index > 31) - csa_mc_addr = 0; - else - csa_mc_addr = amdgpu_csa_vaddr(adev) + - AMDGPU_CSA_SDMA_OFFSET + - index * AMDGPU_CSA_SDMA_SIZE; - } + if (r || index > 31) + csa_mc_addr = 0; + else + csa_mc_addr = amdgpu_csa_vaddr(adev) + + AMDGPU_CSA_SDMA_OFFSET + + index * AMDGPU_CSA_SDMA_SIZE; return csa_mc_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ce52b4d75e94..aa65d64fb15c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -817,7 +817,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); - if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && + if (ring->funcs->emit_gds_switch && gds_switch_needed) { amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f98c9c8253f8..23f4d097fa8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -612,33 +612,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -665,12 +650,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -5757,10 +5740,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); } - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -5780,10 +5759,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - /* Currently, there is a high possibility to get wave ID mismatch * between ME and GDS, leading to a hw deadlock, because ME generates * different wave IDs than the GDS expects. This situation happens @@ -5841,8 +5816,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -5870,10 +5844,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -6102,28 +6073,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v10_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v10_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 58f17d95b2f5..1c7022103c63 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -506,33 +506,18 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -559,12 +544,10 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -4252,45 +4235,17 @@ static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -4315,42 +4270,14 @@ static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx12 now */ - } + BUG(); /* only DOORBELL method supported on gfx12 now */ } } @@ -4397,10 +4324,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4420,10 +4343,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4463,8 +4382,7 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -4492,10 +4410,7 @@ static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c6125abddbb9..e62c77b3934a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5466,16 +5466,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_ce_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); @@ -5498,16 +5490,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_de_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = IB_COMPLETION_STATUS_PREEMPTED; @@ -5697,19 +5681,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - } + offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -5795,28 +5769,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v9_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); if (usegds) { de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 95d894a231fc..8ae4c031162b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -428,10 +428,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index ad099f136f84..5c91d4445418 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -393,10 +393,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 05c026d0b0d9..6e15cff6d548 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -413,10 +413,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 0dce59f4f6e2..e1348b6d9c6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -369,67 +369,36 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - AMDGPU_MES_PRIORITY_LEVEL_NORMAL); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, - ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "mmSDMA%i_GFX_RB_WPTR == 0x%08x " - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -575,11 +544,9 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -1046,33 +1013,22 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1085,10 +1041,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1100,8 +1053,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1124,38 +1076,24 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, - AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1183,10 +1121,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1197,8 +1132,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 2b39a03ff0c1..964f12afac9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -394,11 +394,9 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if ((flags & AMDGPU_FENCE_FLAG_INT)) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -903,33 +901,22 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -942,10 +929,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -957,8 +941,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -981,37 +964,23 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1039,10 +1008,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1053,8 +1019,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3aa4fec4d9e4..00dd7bfff01a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -377,11 +377,9 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -921,33 +919,22 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -960,10 +947,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -975,8 +959,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -999,37 +982,23 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1057,10 +1026,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1071,8 +1037,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 8e3aa4536e5a..99744728f5cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -205,66 +205,39 @@ static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -408,11 +381,9 @@ static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -965,33 +936,22 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1004,10 +964,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1019,8 +976,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1043,37 +999,23 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1101,10 +1043,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1115,8 +1054,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } -- cgit v1.2.3 From 9983ed969365329e22f3ea00838a6ad4afb65539 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 12:09:12 -0400 Subject: drm/amdgpu/gfx11: clean up and consolidate sw_init With the ME details fixed, we can now consolidate this state. Also split out the userq setup into a separate switch statement so that we can set them per IP version when the firmwares are ready. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 23f4d097fa8c..29793caa6466 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1597,14 +1597,35 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; + break; + default: + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + break; + } + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ if (0) { @@ -1619,12 +1640,6 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): - adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 2; - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe_per_mec = 4; - adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ if (0) { @@ -1634,12 +1649,6 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) #endif break; default: - adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe_per_mec = 4; - adev->gfx.mec.num_queue_per_pipe = 8; break; } -- cgit v1.2.3 From 1e63ebc0d443c3181ec725826f0abb8b72e9a8c3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2025 12:16:26 -0500 Subject: drm/amdgpu/gfx11: add support for disable_kq Plumb in support for disabling kernel queues in GFX11. We have to bring up a GFX queue briefly in order to initialize the clear state. After that we can disable it. v2: use ring counts per Felix' suggestion v3: fix stream fault handler, enable EOP interrupts v4: fix MEC interrupt offset (Sunil) Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 191 +++++++++++++++++++++++---------- 1 file changed, 136 insertions(+), 55 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 29793caa6466..91d29f482c3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1156,6 +1156,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; + if (adev->gfx.disable_kq) { + ring->no_scheduler = true; + ring->no_user_submission = true; + } if (!ring_id) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; @@ -1588,7 +1592,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { - int i, j, k, r, ring_id = 0; + int i, j, k, r, ring_id; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ @@ -1745,37 +1749,42 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* set up the gfx ring */ - for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) - continue; - - r = gfx_v11_0_gfx_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; - ring_id++; + if (adev->gfx.num_gfx_rings) { + ring_id = 0; + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v11_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } } } } - ring_id = 0; - /* set up the compute queues - allocate horizontally across pipes */ - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, - k, j)) - continue; + if (adev->gfx.num_compute_rings) { + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) + continue; - r = gfx_v11_0_compute_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; + r = gfx_v11_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; - ring_id++; + ring_id++; + } } } } @@ -4567,11 +4576,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) return r; } - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + if (adev->gfx.disable_kq) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + /* we don't want to set ring->ready */ + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + } + if (amdgpu_async_gfx_ring) + amdgpu_gfx_disable_kgq(adev, 0); + } else { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } } for (i = 0; i < adev->gfx.num_compute_rings; i++) { @@ -4780,6 +4801,46 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } +static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, + bool enable) +{ + if (adev->gfx.disable_kq) { + unsigned int irq_type; + int m, p, r; + + for (m = 0; m < adev->gfx.me.num_me; m++) { + for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + + for (m = 0; m < adev->gfx.mec.num_mec; ++m) { + for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + (m * adev->gfx.mec.num_pipe_per_mec) + + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + return 0; +} + static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -4789,9 +4850,11 @@ static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); + gfx_v11_0_set_userq_eop_interrupts(adev, false); if (!adev->no_hw_access) { - if (amdgpu_async_gfx_ring) { + if (amdgpu_async_gfx_ring && + !adev->gfx.disable_kq) { if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } @@ -5117,11 +5180,22 @@ static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + if (amdgpu_disable_kq == 1) + adev->gfx.disable_kq = true; + adev->gfx.funcs = &gfx_v11_0_gfx_funcs; - adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), - AMDGPU_MAX_COMPUTE_RINGS); + if (adev->gfx.disable_kq) { + /* We need one GFX ring temporarily to set up + * the clear state. + */ + adev->gfx.num_gfx_rings = 1; + adev->gfx.num_compute_rings = 0; + } else { + adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + } gfx_v11_0_set_kiq_pm4_funcs(adev); gfx_v11_0_set_ring_funcs(adev); @@ -5152,6 +5226,11 @@ static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); if (r) return r; + + r = gfx_v11_0_set_userq_eop_interrupts(adev, true); + if (r) + return r; + return 0; } @@ -6510,27 +6589,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); - } - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); + if (!adev->gfx.disable_kq) { + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; } - break; - default: - BUG(); - break; } } -- cgit v1.2.3 From e62a8bc5d68718c0d1e0366e2e636dd41e7ee5e4 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 10 Apr 2025 19:37:06 +0530 Subject: drm/amdgpu/gfx11: Add Cleaner Shader Support for GFX11.5.2/11.5.3 GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable the cleaner shader for additional GFX11.5.2/11.5.3 series GPUs to ensure data isolation among GPU tasks. The cleaner shader is tasked with clearing the Local Data Store (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs), which helps avoid data leakage and guarantees the accuracy of computational results. This update extends cleaner shader support to GFX11.5.2/11.5.3 GPUs, previously available for GFX11.0.3. It enhances security by clearing GPU memory between processes and maintains a consistent GPU state across KGD and KFD workloads. Cc: Mario Sopena-Novales Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 91d29f482c3c..06ad10d06ca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1688,6 +1688,34 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 5, 2): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 12 && + adev->gfx.pfp_fw_version >= 15 && + adev->gfx.mec_fw_version >= 15) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(11, 5, 3): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 7 && + adev->gfx.pfp_fw_version >= 8 && + adev->gfx.mec_fw_version >= 8) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; -- cgit v1.2.3 From 1d9bff4cf8c53d33ee2ff1b11574e5da739ce61c Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 11 Apr 2025 17:40:26 +0530 Subject: drm/amdgpu: Use the right function for hdp flush There are a few prechecks made before HDP flush like a flush is not required on APU bare metal. Using hdp callback directly bypasses those checks. Use amdgpu_device_flush_hdp which takes care of prechecks. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 2 +- 10 files changed, 23 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f5dcb72a6bf5..00eb4cfecf8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6172,7 +6172,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -6250,7 +6250,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -6327,7 +6327,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -6702,7 +6702,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 06ad10d06ca1..ac90f823e596 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -2509,7 +2509,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2553,7 +2553,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2598,7 +2598,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -3234,7 +3234,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -3452,7 +3452,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -4648,7 +4648,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 2474006b1a34..f347921fa909 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -2389,7 +2389,7 @@ static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -2533,7 +2533,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -3503,7 +3503,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index b6ac4c7adc8a..7648e977b44b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -965,7 +965,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) adev->hdp.funcs->init_registers(adev); /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 5c91d4445418..7f5ca170f141 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -229,7 +229,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -895,7 +895,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index d544419e3b44..b645d3e6a6c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -297,7 +297,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -877,7 +877,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8d3560314e5b..53050176c244 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2425,7 +2425,7 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) adev->hdp.funcs->init_registers(adev); /* After HDP is initialized, flush HDP.*/ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bb5dfc410a66..215543575f47 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -533,7 +533,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 17f1ccd8bd53..f5f616ab20e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -610,7 +610,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 7c49c3f3c388..256288c6cd78 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -498,7 +498,7 @@ static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { -- cgit v1.2.3 From ac9984cee7e17fad030708f6462f272cf82a5f74 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 13 Apr 2025 10:16:58 -0400 Subject: drm/amdgpu/gfx11: properly reference EOP interrupts for userqs Regardless of whether we disable kernel queues, we need to take an extra reference to the pipe interrupts for user queues to make sure they stay enabled in case we disable them for kernel queues. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ac90f823e596..a528afe38e0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4832,10 +4832,10 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, bool enable) { - if (adev->gfx.disable_kq) { - unsigned int irq_type; - int m, p, r; + unsigned int irq_type; + int m, p, r; + if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { for (m = 0; m < adev->gfx.me.num_me; m++) { for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; @@ -4849,7 +4849,9 @@ static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, return r; } } + } + if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { for (m = 0; m < adev->gfx.mec.num_mec; ++m) { for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -4866,6 +4868,7 @@ static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, } } } + return 0; } -- cgit v1.2.3 From fb20954c9717d1d07d8b8b8f34ac2a2755aec5ff Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Apr 2025 14:18:03 -0400 Subject: drm/amdgpu/userq: rework driver parameter Replace disable_kq parameter with user_queue parameter. The parameter has the following logic: -1 = auto (ASIC specific default) 0 = user queues disabled 1 = user queues enabled and kernel queues enabled (if supported) 2 = user queues enabled and kernel queues disabled The default behavior (-1) is currently the same as 0 for current ASICs. To enable user queues (in addition to kernel queues) set user_queue=1. To enable user queues and disable kernel queues (to make all resources available to user queues), set user_queue=2. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 15 +++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 20 +++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 18 ++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 18 ++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 18 ++++++++++++++++-- 8 files changed, 77 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b156e31ac86a..3212fd78b012 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -271,7 +271,7 @@ extern int amdgpu_agp; extern int amdgpu_rebar; extern int amdgpu_wbrf; -extern int amdgpu_disable_kq; +extern int amdgpu_user_queue; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index a117cd95b9dc..e24b0c730baf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -242,7 +242,7 @@ int amdgpu_wbrf = -1; int amdgpu_damage_clips = -1; /* auto */ int amdgpu_umsch_mm_fwlog; int amdgpu_rebar = -1; /* auto */ -int amdgpu_disable_kq = -1; +int amdgpu_user_queue = -1; DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", @@ -1114,12 +1114,15 @@ MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = en module_param_named(rebar, amdgpu_rebar, int, 0444); /** - * DOC: disable_kq (int) - * Disable kernel queues on systems that support user queues. - * (0 = kernel queues enabled, 1 = kernel queues disabled, -1 = auto (default setting)) + * DOC: user_queue (int) + * Enable user queues on systems that support user queues. + * -1 = auto (ASIC specific default) + * 0 = user queues disabled + * 1 = user queues enabled and kernel queues enabled (if supported) + * 2 = user queues enabled and kernel queues disabled */ -MODULE_PARM_DESC(disable_kq, "Disable kernel queues (-1 = auto (default), 0 = enable KQ, 1 = disable KQ)"); -module_param_named(disable_kq, amdgpu_disable_kq, int, 0444); +MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)"); +module_param_named(user_queue, amdgpu_user_queue, int, 0444); /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index caaddab31023..91dd365cb1e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -487,6 +487,7 @@ struct amdgpu_gfx { struct mutex workload_profile_mutex; bool disable_kq; + bool disable_uq; }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index a6c8f07a0da4..59778b978eb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -138,6 +138,7 @@ struct amdgpu_sdma { uint32_t supported_reset; struct list_head reset_callback_list; bool no_user_submission; + bool disable_uq; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index a528afe38e0b..70c38e53e691 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1632,7 +1632,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 3): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) { + if (0 && !adev->gfx.disable_uq) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } @@ -1646,7 +1646,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 3): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) { + if (0 && !adev->gfx.disable_uq) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } @@ -5211,8 +5211,22 @@ static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_disable_kq == 1) + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = true; + break; + case 1: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = false; + break; + case 2: adev->gfx.disable_kq = true; + adev->gfx.disable_uq = false; + break; + } adev->gfx.funcs = &gfx_v11_0_gfx_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 4c8958f91eda..1523f5b352c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1418,7 +1418,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 1): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) { + if (0 && !adev->gfx.disable_uq) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } @@ -3819,8 +3819,22 @@ static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_disable_kq == 1) + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = true; + break; + case 1: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = false; + break; + case 2: adev->gfx.disable_kq = true; + adev->gfx.disable_uq = false; + break; + } adev->gfx.funcs = &gfx_v12_0_gfx_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index c3d53974e7f5..6bb36187a53d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1269,8 +1269,22 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; - if (amdgpu_disable_kq == 1) + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + case 1: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = false; + break; + case 2: adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = false; + break; + } r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) @@ -1351,7 +1365,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) + if (0 && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index e1a6b1533850..943c6446a0a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1254,8 +1254,22 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; - if (amdgpu_disable_kq == 1) + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + case 1: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = false; + break; + case 2: adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = false; + break; + } r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) { @@ -1326,7 +1340,7 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) + if (0 && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif -- cgit v1.2.3 From 9486875408e775fb1c808744be761af2c0acfc46 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 21:08:56 -0500 Subject: drm/amdgpu/gfx11: add support for TMZ queues to mqd_init Set up TMZ for queues. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 70c38e53e691..496e83cb8917 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4142,6 +4142,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); #endif + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -4296,6 +4298,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; /* set the wb address whether it's enabled or not */ -- cgit v1.2.3 From 61ca97e9590ceac8c96a40b8526332c1c36409dc Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 9 Apr 2025 15:41:59 +0530 Subject: drm/amdgpu/gfx11: Add fw minimum version check for usermode queue This patch is load usermode queue based on FW support for gfx11. CP Ucode FW version: [PFP = 2530, ME = 2390, MEC = 2600, MES = 120] v2: Addressed review comments from Alex. - Just check the firmware versions directly. v3: Firmware version checks only for Navi3x(by Alex). Cc: Alex Deucher Cc: Christian Koenig Cc: Shashank Sharma Cc: Sunil Khatri Acked-by: Alex Deucher Reviewed-by: Sunil Khatri Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 496e83cb8917..ac7ac58e25a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1631,8 +1631,11 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - /* add firmware version checks here */ - if (0 && !adev->gfx.disable_uq) { + if (!adev->gfx.disable_uq && + adev->gfx.me_fw_version >= 2390 && + adev->gfx.pfp_fw_version >= 2530 && + adev->gfx.mec_fw_version >= 2600 && + adev->mes.fw_version[0] >= 120) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } -- cgit v1.2.3 From 106172df6ece4fc0a53471c71e0d05e9140fa2cc Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 21 Apr 2025 16:12:19 -0600 Subject: drm/amdgpu/gfx: Use CSB helpers in gfx_v11_0_get_csb_buffer Part of the code in gfx_v11_0_get_csb_buffer can be removed in favor of some GFX CSB helpers. This commit removes the duplicated part for the GFX 11 CSB function. Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ac7ac58e25a6..2df11f4127cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -848,9 +848,7 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; int ctx_reg_offset; if (adev->gfx.rlc.cs_data == NULL) @@ -858,37 +856,15 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } - } - } + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); - ctx_reg_offset = - SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); buffer[count++] = cpu_to_le32(ctx_reg_offset); buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) -- cgit v1.2.3 From 56801cb83c8c95ae23ea576570c75a01c1f07774 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 22 Apr 2025 19:29:03 +0530 Subject: drm/amdgpu: remove DRM_AMDGPU_NAVI3X_USERQ config for UQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRM_AMDGPU_NAVI3X_USERQ config support is not required for usermode queue. v2: rebase. Cc: Arunpravin Paneer Selvam Reviewed-by: Sunil Khatri Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 8 -------- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +------ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 5 +---- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 8 -------- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 18 ------------------ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ---- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 3 --- 10 files changed, 4 insertions(+), 56 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 7b95221d2f3d..1a11cab741ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -96,14 +96,6 @@ config DRM_AMDGPU_WERROR Add -Werror to the build flags for amdgpu.ko. Only enable this if you are warning code for amdgpu.ko. -config DRM_AMDGPU_NAVI3X_USERQ - bool "Enable amdgpu usermode queues" - depends on DRM_AMDGPU - default n - help - Choose this option to enable GFX usermode queue support for GFX/SDMA/Compute - workload submission. This feature is experimental and supported on GFX11+. - source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig" source "drivers/gpu/drm/amd/amdkfd/Kconfig" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 8595e05c691b..87080c06e5fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -177,7 +177,7 @@ amdgpu-y += \ mes_v12_0.o \ # add GFX userqueue support -amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_userqueue.o +amdgpu-y += mes_userqueue.o # add UVD block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b96e0613ea7e..fe68ba9997ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3513,9 +3513,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); amdgpu_amdkfd_suspend(adev, false); -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ amdgpu_userq_suspend(adev); -#endif /* Workaround for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); @@ -5086,9 +5084,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) if (!adev->in_s0ix) { amdgpu_amdkfd_suspend(adev, adev->in_runpm); -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ amdgpu_userq_suspend(adev); -#endif } r = amdgpu_device_evict_resources(adev); @@ -5156,11 +5152,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) goto exit; -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + r = amdgpu_userq_resume(adev); if (r) goto exit; -#endif } r = amdgpu_device_ip_late_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e0cc2bb083cb..8f1a2f7b03c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1981,9 +1981,7 @@ static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, if (adev->gfx.userq_sch_req_count[idx] == 0) { cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); if (!adev->gfx.userq_sch_inactive[idx]) { -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx); -#endif if (adev->kfd.init_complete) amdgpu_amdkfd_stop_sched(adev, idx); adev->gfx.userq_sch_inactive[idx] = true; @@ -2041,9 +2039,8 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) /* Tell KFD to resume the runqueue */ WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]); WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]); -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + amdgpu_userq_start_sched_for_enforce_isolation(adev, idx); -#endif if (adev->kfd.init_complete) amdgpu_amdkfd_start_sched(adev, idx); adev->gfx.userq_sch_inactive[idx] = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index b0e8098a3988..451890ee3fb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -129,7 +129,6 @@ amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr) return ret; } -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ static struct amdgpu_usermode_queue * amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) { @@ -520,13 +519,6 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, return r; } -#else -int amdgpu_userq_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - return -ENOTSUPP; -} -#endif static int amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index be068e8e37d1..3288c2ff692e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -216,7 +216,6 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); } -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) { *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); @@ -288,7 +287,6 @@ static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, return 0; } -#endif static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) { @@ -343,7 +341,6 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = { .release = amdgpu_userq_fence_release, }; -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /** * amdgpu_userq_fence_read_wptr - Read the userq wptr value * @@ -594,15 +591,7 @@ free_syncobj_handles: return r; } -#else -int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - return -ENOTSUPP; -} -#endif -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -968,10 +957,3 @@ free_bo_handles_read: return r; } -#else -int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - return -ENOTSUPP; -} -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 2df11f4127cc..3f4ee4b3b0a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1606,7 +1606,6 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ if (!adev->gfx.disable_uq && adev->gfx.me_fw_version >= 2390 && adev->gfx.pfp_fw_version >= 2530 && @@ -1615,7 +1614,6 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } -#endif break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -1623,13 +1621,11 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ if (0 && !adev->gfx.disable_uq) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } -#endif break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index dfa0830a4eb1..f09d96bfee16 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1416,7 +1416,6 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ if (!adev->gfx.disable_uq && adev->gfx.me_fw_version >= 2780 && adev->gfx.pfp_fw_version >= 2840 && @@ -1425,7 +1424,6 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } -#endif break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 6bb36187a53d..da5b5d64f137 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1363,11 +1363,10 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ if (0 && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; -#endif + r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 943c6446a0a7..befe013b11a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1338,12 +1338,9 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); -#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ if (0 && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; -#endif - return r; } -- cgit v1.2.3 From c2a3bac7c8ee95f23b235db8cd5520b9d574ff8b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 7 May 2025 14:48:48 +0530 Subject: drm/amdgpu: fix the indentation fix the indentation drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:6992 gfx_v11_ip_dump compiler: gcc-11 (Debian 11.3.0-12) 11.3.0 Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202505071619.7sHTLpNg-lkp@intel.com/ Signed-off-by: Sunil Khatri Reviewed-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 3f4ee4b3b0a4..afd6d59164bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -7094,9 +7094,12 @@ static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) /* ME0 is for GFX so start from 1 for CP */ soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { - if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC_ME2_HEADER_DUMP)); + if (i && + gc_cp_reg_list_11[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, + regCP_MEC_ME2_HEADER_DUMP)); else adev->gfx.ip_dump_compute_queues[index + reg] = RREG32(SOC15_REG_ENTRY_OFFSET( -- cgit v1.2.3