summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c452
1 files changed, 265 insertions, 187 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 24dce803a829..7bd506f06eb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,11 +40,11 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
-#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
#include "gfx_v10_0.h"
+#include "gfx_v10_0_cleaner_shader.h"
#include "nbio_v2_3.h"
/*
@@ -368,11 +368,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
/* cp header registers */
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
@@ -421,7 +416,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS)
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
};
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
@@ -448,7 +452,32 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI)
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
+ /* gfx header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
};
static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
@@ -3673,7 +3702,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid);
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
@@ -3789,12 +3818,65 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
}
+static void gfx_v10_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+ uint32_t tmp;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, mmCP_VMID_RESET, tmp);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx10_kiq_set_resources,
.kiq_map_queues = gfx10_kiq_map_queues,
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v10_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -4036,7 +4118,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -4138,18 +4220,21 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce%s.bin", ucode_prefix, wks);
if (err)
goto out;
@@ -4173,6 +4258,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec%s.bin", ucode_prefix, wks);
if (err)
goto out;
@@ -4180,6 +4266,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
if (!err) {
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
@@ -4238,9 +4325,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
volatile u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -4248,39 +4333,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
@@ -4694,6 +4755,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
int i, j, k, r, ring_id = 0;
int xcc_id = 0;
struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
@@ -4703,7 +4767,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(10, 1, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
@@ -4718,7 +4782,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 2;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -4733,6 +4797,69 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
break;
}
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 101 &&
+ adev->gfx.pfp_fw_version >= 158 &&
+ adev->gfx.mec_fw_version >= 151) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 64 &&
+ adev->gfx.pfp_fw_version >= 100 &&
+ adev->gfx.mec_fw_version >= 122) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 14 &&
+ adev->gfx.pfp_fw_version >= 17 &&
+ adev->gfx.mec_fw_version >= 24) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 4 &&
+ adev->gfx.pfp_fw_version >= 9 &&
+ adev->gfx.mec_fw_version >= 12) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
default:
adev->gfx.enable_cleaner_shader = false;
break;
@@ -4793,7 +4920,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
/* set up the gfx ring */
for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
continue;
@@ -4825,11 +4952,15 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
}
}
}
- /* TODO: Add queue reset mask when FW fully supports it */
+
adev->gfx.gfx_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
if (r) {
@@ -5952,7 +6083,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
else
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- if (adev->job_hang && !enable)
+ if (amdgpu_in_reset(adev) && !enable)
return 0;
for (i = 0; i < adev->usec_timeout; i++) {
@@ -6021,7 +6152,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -6099,7 +6230,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -6176,7 +6307,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -6551,7 +6682,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -6599,17 +6730,13 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80);
break;
default:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
break;
}
}
@@ -6762,22 +6889,9 @@ static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kgq_init_queue(ring, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
return r;
}
@@ -7084,55 +7198,24 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v10_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v10_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kcq_init_queue(ring, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i],
+ false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
@@ -7448,6 +7531,8 @@ static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
@@ -7457,7 +7542,7 @@ static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
* otherwise the gfxoff disallowing will be failed to set.
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
- gfx_v10_0_set_powergating_state(ip_block->adev, AMD_PG_STATE_UNGATE);
+ gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE);
if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
@@ -7492,9 +7577,9 @@ static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block)
return gfx_v10_0_hw_init(ip_block);
}
-static bool gfx_v10_0_is_idle(void *handle)
+static bool gfx_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -8345,10 +8430,10 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -8383,10 +8468,10 @@ static int gfx_v10_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v10_0_set_clockgating_state(void *handle,
+static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -8412,9 +8497,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_FGCG */
@@ -8965,21 +9050,6 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
-static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
- unsigned int vmid)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t value = 0;
-
- value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
- value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
- value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
- value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
- WREG32_SOC15(GC, 0, mmSQ_CMD, value);
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-}
-
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -9441,7 +9511,9 @@ static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
amdgpu_ring_insert_nop(ring, num_nop - 1);
}
-static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
+static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
@@ -9451,15 +9523,14 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
u64 addr;
int r;
- if (amdgpu_sriov_vf(adev))
- return -EINVAL;
-
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
spin_lock_irqsave(&kiq->ring_lock, flags);
- if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) {
+ if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
return -ENOMEM;
}
@@ -9479,37 +9550,37 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
0, 1, 0x20);
gfx_v10_0_ring_emit_reg_wait(kiq_ring,
SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff);
- kiq->pmf->kiq_map_queues(kiq_ring, ring);
amdgpu_ring_commit(kiq_ring);
-
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
if (r)
return r;
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)) {
- DRM_ERROR("fail to resv mqd_obj\n");
+ r = gfx_v10_0_kgq_init_queue(ring, true);
+ if (r) {
+ DRM_ERROR("fail to init kgq\n");
return r;
}
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kgq_init_queue(ring, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
}
- amdgpu_bo_unreserve(ring->mqd_obj);
- if (r) {
- DRM_ERROR("fail to unresv mqd_obj\n");
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
return r;
- }
- return amdgpu_ring_test_ring(ring);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
}
static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
- unsigned int vmid)
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
@@ -9517,12 +9588,11 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
unsigned long flags;
int i, r;
- if (amdgpu_sriov_vf(adev))
- return -EINVAL;
-
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
@@ -9533,9 +9603,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
0, 0);
amdgpu_ring_commit(kiq_ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
if (r)
return r;
@@ -9558,20 +9627,9 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
return r;
}
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)) {
- dev_err(adev->dev, "fail to resv mqd_obj\n");
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kcq_init_queue(ring, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kcq_init_queue(ring, true);
if (r) {
- dev_err(adev->dev, "fail to unresv mqd_obj\n");
+ dev_err(adev->dev, "fail to init kcq\n");
return r;
}
@@ -9582,13 +9640,12 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
}
kiq->pmf->kiq_map_queues(kiq_ring, ring);
amdgpu_ring_commit(kiq_ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
if (r)
return r;
- return amdgpu_ring_test_ring(ring);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
}
static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
@@ -9620,9 +9677,14 @@ static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
for (reg = 0; reg < reg_count; reg++) {
- drm_printf(p, "%-50s \t 0x%08x\n",
- gc_cp_reg_list_10[reg].reg_name,
- adev->gfx.ip_dump_compute_queues[index + reg]);
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
}
index += reg_count;
}
@@ -9683,9 +9745,13 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
for (reg = 0; reg < reg_count; reg++) {
- adev->gfx.ip_dump_compute_queues[index + reg] =
- RREG32(SOC15_REG_ENTRY_OFFSET(
- gc_cp_reg_list_10[reg]));
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_10[reg]));
}
index += reg_count;
}
@@ -9729,6 +9795,20 @@ static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
}
+static void gfx_v10_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v10_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.name = "gfx_v10_0",
.early_init = gfx_v10_0_early_init,
@@ -9800,12 +9880,11 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
.reset = gfx_v10_0_reset_kgq,
.emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
- .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
- .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -9841,12 +9920,11 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
.reset = gfx_v10_0_reset_kcq,
.emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
- .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
- .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {