summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c335
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c69
-rw-r--r--drivers/gpu/drm/amd/include/mes_api_def.h (renamed from drivers/gpu/drm/amd/amdgpu/mes_api_def.h)167
7 files changed, 526 insertions, 151 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 40df1e04d682..5d6b04fc6206 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -367,7 +367,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
/* create MQD for KIQ */
ring = &adev->gfx.kiq.ring;
- if (!ring->mqd_obj) {
+ if (!adev->enable_mes_kiq && !ring->mqd_obj) {
/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
* otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
* deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
@@ -464,7 +464,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i, r;
+ int i, r = 0;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
@@ -479,7 +479,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
RESET_QUEUES, 0, 0);
- r = amdgpu_ring_test_helper(kiq_ring);
+
+ if (adev->gfx.kiq.ring.sched.ready)
+ r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 5be30bf68b0c..72bafba1c470 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -150,7 +150,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
idr_init(&adev->mes.queue_id_idr);
ida_init(&adev->mes.doorbell_ida);
spin_lock_init(&adev->mes.queue_id_lock);
- mutex_init(&adev->mes.mutex);
+ mutex_init(&adev->mes.mutex_hidden);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
adev->mes.vmid_mask_mmhub = 0xffffff00;
@@ -166,8 +166,12 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
- for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
- adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+ if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
+ adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+ else
+ adev->mes.sdma_hqd_mask[i] = 0xfc;
+ }
for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
adev->mes.agreegated_doorbells[i] = 0xffffffff;
@@ -207,7 +211,7 @@ error_ids:
idr_destroy(&adev->mes.gang_id_idr);
idr_destroy(&adev->mes.queue_id_idr);
ida_destroy(&adev->mes.doorbell_ida);
- mutex_destroy(&adev->mes.mutex);
+ mutex_destroy(&adev->mes.mutex_hidden);
return r;
}
@@ -219,7 +223,14 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
idr_destroy(&adev->mes.gang_id_idr);
idr_destroy(&adev->mes.queue_id_idr);
ida_destroy(&adev->mes.doorbell_ida);
- mutex_destroy(&adev->mes.mutex);
+ mutex_destroy(&adev->mes.mutex_hidden);
+}
+
+static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
+{
+ amdgpu_bo_free_kernel(&q->mqd_obj,
+ &q->mqd_gpu_addr,
+ &q->mqd_cpu_ptr);
}
int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
@@ -228,13 +239,10 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
struct amdgpu_mes_process *process;
int r;
- mutex_lock(&adev->mes.mutex);
-
/* allocate the mes process buffer */
process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
if (!process) {
DRM_ERROR("no more memory to create mes process\n");
- mutex_unlock(&adev->mes.mutex);
return -ENOMEM;
}
@@ -244,18 +252,9 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
if (!process->doorbell_bitmap) {
DRM_ERROR("failed to allocate doorbell bitmap\n");
kfree(process);
- mutex_unlock(&adev->mes.mutex);
return -ENOMEM;
}
- /* add the mes process to idr list */
- r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
- GFP_KERNEL);
- if (r < 0) {
- DRM_ERROR("failed to lock pasid=%d\n", pasid);
- goto clean_up_memory;
- }
-
/* allocate the process context bo and map it */
r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
@@ -264,15 +263,29 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
&process->proc_ctx_cpu_ptr);
if (r) {
DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up_pasid;
+ goto clean_up_memory;
}
memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ /* add the mes process to idr list */
+ r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
+ GFP_KERNEL);
+ if (r < 0) {
+ DRM_ERROR("failed to lock pasid=%d\n", pasid);
+ goto clean_up_ctx;
+ }
+
/* allocate the starting doorbell index of the process */
r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index);
if (r < 0) {
DRM_ERROR("failed to allocate doorbell for process\n");
- goto clean_up_ctx;
+ goto clean_up_pasid;
}
DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index);
@@ -283,19 +296,19 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
process->process_quantum = adev->mes.default_process_quantum;
process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
+clean_up_pasid:
+ idr_remove(&adev->mes.pasid_idr, pasid);
+ amdgpu_mes_unlock(&adev->mes);
clean_up_ctx:
amdgpu_bo_free_kernel(&process->proc_ctx_bo,
&process->proc_ctx_gpu_addr,
&process->proc_ctx_cpu_ptr);
-clean_up_pasid:
- idr_remove(&adev->mes.pasid_idr, pasid);
clean_up_memory:
kfree(process->doorbell_bitmap);
kfree(process);
- mutex_unlock(&adev->mes.mutex);
return r;
}
@@ -308,18 +321,21 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
unsigned long flags;
int r;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
process = idr_find(&adev->mes.pasid_idr, pasid);
if (!process) {
DRM_WARN("pasid %d doesn't exist\n", pasid);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return;
}
- /* free all gangs in the process */
+ /* Remove all queues from hardware */
list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
- /* free all queues in the gang */
list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
@@ -332,29 +348,35 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
&queue_input);
if (r)
DRM_WARN("failed to remove hardware queue\n");
+ }
+
+ idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+ }
+ amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
+ idr_remove(&adev->mes.pasid_idr, pasid);
+ amdgpu_mes_unlock(&adev->mes);
+
+ /* free all memory allocated by the process */
+ list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
+ /* free all queues in the gang */
+ list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
+ amdgpu_mes_queue_free_mqd(queue);
list_del(&queue->list);
kfree(queue);
}
-
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
&gang->gang_ctx_gpu_addr,
&gang->gang_ctx_cpu_ptr);
list_del(&gang->list);
kfree(gang);
- }
- amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
-
- idr_remove(&adev->mes.pasid_idr, pasid);
+ }
amdgpu_bo_free_kernel(&process->proc_ctx_bo,
&process->proc_ctx_gpu_addr,
&process->proc_ctx_cpu_ptr);
kfree(process->doorbell_bitmap);
kfree(process);
-
- mutex_unlock(&adev->mes.mutex);
}
int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
@@ -365,34 +387,12 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
struct amdgpu_mes_gang *gang;
int r;
- mutex_lock(&adev->mes.mutex);
-
- process = idr_find(&adev->mes.pasid_idr, pasid);
- if (!process) {
- DRM_ERROR("pasid %d doesn't exist\n", pasid);
- mutex_unlock(&adev->mes.mutex);
- return -EINVAL;
- }
-
/* allocate the mes gang buffer */
gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
if (!gang) {
- mutex_unlock(&adev->mes.mutex);
return -ENOMEM;
}
- /* add the mes gang to idr list */
- r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
- GFP_KERNEL);
- if (r < 0) {
- kfree(gang);
- mutex_unlock(&adev->mes.mutex);
- return r;
- }
-
- gang->gang_id = r;
- *gang_id = r;
-
/* allocate the gang context bo and map it to cpu space */
r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
@@ -401,10 +401,34 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
&gang->gang_ctx_cpu_ptr);
if (r) {
DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up;
+ goto clean_up_mem;
}
memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ process = idr_find(&adev->mes.pasid_idr, pasid);
+ if (!process) {
+ DRM_ERROR("pasid %d doesn't exist\n", pasid);
+ r = -EINVAL;
+ goto clean_up_ctx;
+ }
+
+ /* add the mes gang to idr list */
+ r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
+ GFP_KERNEL);
+ if (r < 0) {
+ DRM_ERROR("failed to allocate idr for gang\n");
+ goto clean_up_ctx;
+ }
+
+ gang->gang_id = r;
+ *gang_id = r;
+
INIT_LIST_HEAD(&gang->queue_list);
gang->process = process;
gang->priority = gprops->priority;
@@ -414,13 +438,16 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
list_add_tail(&gang->list, &process->gang_list);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
-clean_up:
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+clean_up_ctx:
+ amdgpu_mes_unlock(&adev->mes);
+ amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
+ &gang->gang_ctx_gpu_addr,
+ &gang->gang_ctx_cpu_ptr);
+clean_up_mem:
kfree(gang);
- mutex_unlock(&adev->mes.mutex);
return r;
}
@@ -428,29 +455,35 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
{
struct amdgpu_mes_gang *gang;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
gang = idr_find(&adev->mes.gang_id_idr, gang_id);
if (!gang) {
DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -EINVAL;
}
if (!list_empty(&gang->queue_list)) {
DRM_ERROR("queue list is not empty\n");
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -EBUSY;
}
idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+ list_del(&gang->list);
+ amdgpu_mes_unlock(&adev->mes);
+
amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
&gang->gang_ctx_gpu_addr,
&gang->gang_ctx_cpu_ptr);
- list_del(&gang->list);
+
kfree(gang);
- mutex_unlock(&adev->mes.mutex);
return 0;
}
@@ -462,7 +495,11 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
struct mes_suspend_gang_input input;
int r, pasid;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
idp = &adev->mes.pasid_idr;
@@ -475,7 +512,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
}
}
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
}
@@ -487,7 +524,11 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
struct mes_resume_gang_input input;
int r, pasid;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
idp = &adev->mes.pasid_idr;
@@ -500,17 +541,16 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
}
}
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
}
-static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
struct amdgpu_mes_queue *q,
struct amdgpu_mes_queue_properties *p)
{
struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
u32 mqd_size = mqd_mgr->mqd_size;
- struct amdgpu_mqd_prop mqd_prop = {0};
int r;
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
@@ -523,6 +563,26 @@ static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
}
memset(q->mqd_cpu_ptr, 0, mqd_size);
+ r = amdgpu_bo_reserve(q->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto clean_up;
+
+ return 0;
+
+clean_up:
+ amdgpu_bo_free_kernel(&q->mqd_obj,
+ &q->mqd_gpu_addr,
+ &q->mqd_cpu_ptr);
+ return r;
+}
+
+static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+ struct amdgpu_mes_queue *q,
+ struct amdgpu_mes_queue_properties *p)
+{
+ struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
+ struct amdgpu_mqd_prop mqd_prop = {0};
+
mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
@@ -535,27 +595,9 @@ static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
mqd_prop.hqd_active = false;
- r = amdgpu_bo_reserve(q->mqd_obj, false);
- if (unlikely(r != 0))
- goto clean_up;
-
mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
amdgpu_bo_unreserve(q->mqd_obj);
- return 0;
-
-clean_up:
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
- return r;
-}
-
-static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
-{
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
}
int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
@@ -568,29 +610,38 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
unsigned long flags;
int r;
- mutex_lock(&adev->mes.mutex);
-
- gang = idr_find(&adev->mes.gang_id_idr, gang_id);
- if (!gang) {
- DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- mutex_unlock(&adev->mes.mutex);
- return -EINVAL;
- }
-
/* allocate the mes queue buffer */
queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
if (!queue) {
- mutex_unlock(&adev->mes.mutex);
+ DRM_ERROR("Failed to allocate memory for queue\n");
return -ENOMEM;
}
+ /* Allocate the queue mqd */
+ r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
+ if (r)
+ goto clean_up_memory;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ gang = idr_find(&adev->mes.gang_id_idr, gang_id);
+ if (!gang) {
+ DRM_ERROR("gang id %d doesn't exist\n", gang_id);
+ r = -EINVAL;
+ goto clean_up_mqd;
+ }
+
/* add the mes gang to idr list */
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
GFP_ATOMIC);
if (r < 0) {
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- goto clean_up_memory;
+ goto clean_up_mqd;
}
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
*queue_id = queue->queue_id = r;
@@ -603,13 +654,15 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
goto clean_up_queue_id;
/* initialize the queue mqd */
- r = amdgpu_mes_queue_init_mqd(adev, queue, qprops);
- if (r)
- goto clean_up_doorbell;
+ amdgpu_mes_queue_init_mqd(adev, queue, qprops);
/* add hw queue to mes */
queue_input.process_id = gang->process->pasid;
- queue_input.page_table_base_addr = gang->process->pd_gpu_addr;
+
+ queue_input.page_table_base_addr =
+ adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
+ adev->gmc.vram_start;
+
queue_input.process_va_start = 0;
queue_input.process_va_end =
(adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
@@ -629,7 +682,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
if (r) {
DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
qprops->doorbell_off);
- goto clean_up_mqd;
+ goto clean_up_doorbell;
}
DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
@@ -645,11 +698,9 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
queue->gang = gang;
list_add_tail(&queue->list, &gang->queue_list);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
-clean_up_mqd:
- amdgpu_mes_queue_free_mqd(queue);
clean_up_doorbell:
amdgpu_mes_queue_doorbell_free(adev, gang->process,
qprops->doorbell_off);
@@ -657,9 +708,11 @@ clean_up_queue_id:
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+clean_up_mqd:
+ amdgpu_mes_unlock(&adev->mes);
+ amdgpu_mes_queue_free_mqd(queue);
clean_up_memory:
kfree(queue);
- mutex_unlock(&adev->mes.mutex);
return r;
}
@@ -671,7 +724,11 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
struct mes_remove_queue_input queue_input;
int r;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
/* remove the mes gang from idr list */
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
@@ -679,7 +736,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
queue = idr_find(&adev->mes.queue_id_idr, queue_id);
if (!queue) {
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
DRM_ERROR("queue id %d doesn't exist\n", queue_id);
return -EINVAL;
}
@@ -699,15 +756,42 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
queue_id);
- amdgpu_mes_queue_free_mqd(queue);
list_del(&queue->list);
amdgpu_mes_queue_doorbell_free(adev, gang->process,
queue->doorbell_off);
+ amdgpu_mes_unlock(&adev->mes);
+
+ amdgpu_mes_queue_free_mqd(queue);
kfree(queue);
- mutex_unlock(&adev->mes.mutex);
return 0;
}
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct mes_unmap_legacy_queue_input queue_input;
+ int r;
+
+ amdgpu_mes_lock(&adev->mes);
+
+ queue_input.action = action;
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.trail_fence_addr = gpu_addr;
+ queue_input.trail_fence_data = seq;
+
+ r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+ if (r)
+ DRM_ERROR("failed to unmap legacy queue\n");
+
+ amdgpu_mes_unlock(&adev->mes);
+ return r;
+}
+
static void
amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
@@ -771,18 +855,22 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
struct amdgpu_mes_queue_properties qprops = {0};
int r, queue_id, pasid;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
gang = idr_find(&adev->mes.gang_id_idr, gang_id);
if (!gang) {
DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -EINVAL;
}
pasid = gang->process->pasid;
ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
if (!ring) {
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -ENOMEM;
}
@@ -823,7 +911,7 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
dma_fence_wait(gang->process->vm->last_update, false);
dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
if (r)
@@ -850,7 +938,7 @@ clean_up_ring:
amdgpu_ring_fini(ring);
clean_up_memory:
kfree(ring);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return r;
}
@@ -1086,9 +1174,10 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
}
for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
- /* On sienna cichlid+, fw hasn't supported to map sdma queue. */
- if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
- i == AMDGPU_RING_TYPE_SDMA)
+ /* On GFX v10.3, fw hasn't supported to map sdma queue. */
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
+ adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
+ queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
continue;
r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 548015bb6ee7..25590b301f25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -56,7 +56,7 @@ enum admgpu_mes_pipe {
struct amdgpu_mes {
struct amdgpu_device *adev;
- struct mutex mutex;
+ struct mutex mutex_hidden;
struct idr pasid_idr;
struct idr gang_id_idr;
@@ -109,9 +109,11 @@ struct amdgpu_mes {
uint32_t query_status_fence_offs;
uint64_t query_status_fence_gpu_addr;
uint64_t *query_status_fence_ptr;
+ uint32_t saved_flags;
/* initialize kiq pipe */
int (*kiq_hw_init)(struct amdgpu_device *adev);
+ int (*kiq_hw_fini)(struct amdgpu_device *adev);
/* ip specific functions */
const struct amdgpu_mes_funcs *funcs;
@@ -198,6 +200,10 @@ struct mes_add_queue_input {
uint64_t wptr_addr;
uint32_t queue_type;
uint32_t paging;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
};
struct mes_remove_queue_input {
@@ -205,6 +211,16 @@ struct mes_remove_queue_input {
uint64_t gang_context_addr;
};
+struct mes_unmap_legacy_queue_input {
+ enum amdgpu_unmap_queues_action action;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t trail_fence_addr;
+ uint64_t trail_fence_data;
+};
+
struct mes_suspend_gang_input {
bool suspend_all_gangs;
uint64_t gang_context_addr;
@@ -224,6 +240,9 @@ struct amdgpu_mes_funcs {
int (*remove_hw_queue)(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input);
+ int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input);
+
int (*suspend_gang)(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input);
@@ -232,6 +251,7 @@ struct amdgpu_mes_funcs {
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
+#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
@@ -255,6 +275,11 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
int *queue_id);
int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq);
+
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
int queue_type, int idx,
struct amdgpu_mes_ctx_data *ctx_data,
@@ -280,4 +305,62 @@ unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
uint32_t doorbell_index,
unsigned int doorbell_id);
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
+
+/*
+ * MES lock can be taken in MMU notifiers.
+ *
+ * A bit more detail about why to set no-FS reclaim with MES lock:
+ *
+ * The purpose of the MMU notifier is to stop GPU access to memory so
+ * that the Linux VM subsystem can move pages around safely. This is
+ * done by preempting user mode queues for the affected process. When
+ * MES is used, MES lock needs to be taken to preempt the queues.
+ *
+ * The MMU notifier callback entry point in the driver is
+ * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
+ * there is:
+ * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
+ * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
+ *
+ * The last part of the chain is a function pointer where we take the
+ * MES lock.
+ *
+ * The problem with taking locks in the MMU notifier is, that MMU
+ * notifiers can be called in reclaim-FS context. That's where the
+ * kernel frees up pages to make room for new page allocations under
+ * memory pressure. While we are running in reclaim-FS context, we must
+ * not trigger another memory reclaim operation because that would
+ * recursively reenter the reclaim code and cause a deadlock. The
+ * memalloc_nofs_save/restore calls guarantee that.
+ *
+ * In addition we also need to avoid lock dependencies on other locks taken
+ * under the MES lock, for example reservation locks. Here is a possible
+ * scenario of a deadlock:
+ * Thread A: takes and holds reservation lock | triggers reclaim-FS |
+ * MMU notifier | blocks trying to take MES lock
+ * Thread B: takes and holds MES lock | blocks trying to take reservation lock
+ *
+ * In this scenario Thread B gets involved in a deadlock even without
+ * triggering a reclaim-FS operation itself.
+ * To fix this and break the lock dependency chain you'd need to either:
+ * 1. protect reservation locks with memalloc_nofs_save/restore, or
+ * 2. avoid taking reservation locks under the MES lock.
+ *
+ * Reservation locks are taken all over the kernel in different subsystems, we
+ * have no control over them and their lock dependencies.So the only workable
+ * solution is to avoid taking other locks under the MES lock.
+ * As a result, make sure no reclaim-FS happens while holding this lock anywhere
+ * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
+{
+ mutex_lock(&mes->mutex_hidden);
+ mes->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
+{
+ memalloc_noreclaim_restore(mes->saved_flags);
+ mutex_unlock(&mes->mutex_hidden);
+}
#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9042e0b480ce..3c4f2a94ad9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3551,8 +3551,14 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+ if (!adev->gfx.kiq.ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ return;
+ }
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(action) |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index b80b5f70ecf1..61db2a378008 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -274,7 +274,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
- if (adev->gfx.kiq.ring.sched.ready &&
+ if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17;
@@ -411,6 +411,10 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
+ /* MES fw manages IH_VMID_x_LUT updating */
+ if (ring->is_mes_queue)
+ return;
+
if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
else
@@ -803,6 +807,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
}
amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
r = adev->mmhub.funcs->gart_enable(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 622aa17b18e7..030a92b3a0da 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -133,6 +133,8 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
{
struct amdgpu_device *adev = mes->adev;
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
@@ -141,8 +143,7 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_add_queue_pkt.process_id = input->process_id;
- mes_add_queue_pkt.page_table_base_addr =
- input->page_table_base_addr - adev->gmc.vram_start;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
mes_add_queue_pkt.process_va_start = input->process_va_start;
mes_add_queue_pkt.process_va_end = input->process_va_end;
mes_add_queue_pkt.process_quantum = input->process_quantum;
@@ -159,6 +160,10 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
@@ -192,6 +197,44 @@ static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
}
+static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ if (input->queue_type == AMDGPU_RING_TYPE_GFX)
+ mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
+ else
+ mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
+ }
+
+ mes_remove_queue_pkt.api_status.api_completion_fence_addr =
+ mes->ring.fence_drv.gpu_addr;
+ mes_remove_queue_pkt.api_status.api_completion_fence_value =
+ ++mes->ring.fence_drv.sync_seq;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
+}
+
static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input)
{
@@ -254,9 +297,21 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
- mes_set_hw_res_pkt.agreegated_doorbells[i] =
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
mes->agreegated_doorbells[i];
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+
mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_set_hw_res_pkt.api_status.api_completion_fence_value =
@@ -269,6 +324,7 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
.add_hw_queue = mes_v10_1_add_hw_queue,
.remove_hw_queue = mes_v10_1_remove_hw_queue,
+ .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
.suspend_gang = mes_v10_1_suspend_gang,
.resume_gang = mes_v10_1_resume_gang,
};
@@ -1097,6 +1153,13 @@ static int mes_v10_1_hw_init(void *handle)
goto failure;
}
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq.ring.sched.ready = false;
+
return 0;
failure:
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h b/drivers/gpu/drm/amd/include/mes_api_def.h
index 3f4fca5fd1da..b2a8503feec0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_api_def.h
@@ -59,6 +59,8 @@ enum MES_SCH_API_OPCODE {
MES_SCH_API_PROGRAM_GDS = 12,
MES_SCH_API_SET_DEBUG_VMID = 13,
MES_SCH_API_MISC = 14,
+ MES_SCH_API_UPDATE_ROOT_PAGE_TABLE = 15,
+ MES_SCH_API_AMD_LOG = 16,
MES_SCH_API_MAX = 0xFF
};
@@ -116,7 +118,12 @@ enum { MAX_VMID_GCHUB = 16 };
enum { MAX_VMID_MMHUB = 16 };
enum MES_LOG_OPERATION {
- MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0
+ MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0,
+ MES_LOG_OPERATION_QUEUE_NEW_WORK = 1,
+ MES_LOG_OPERATION_QUEUE_UNWAIT_SYNC_OBJECT = 2,
+ MES_LOG_OPERATION_QUEUE_NO_MORE_WORK = 3,
+ MES_LOG_OPERATION_QUEUE_WAIT_SYNC_OBJECT = 4,
+ MES_LOG_OPERATION_QUEUE_INVALID = 0xF,
};
enum MES_LOG_CONTEXT_STATE {
@@ -124,6 +131,7 @@ enum MES_LOG_CONTEXT_STATE {
MES_LOG_CONTEXT_STATE_RUNNING = 1,
MES_LOG_CONTEXT_STATE_READY = 2,
MES_LOG_CONTEXT_STATE_READY_STANDBY = 3,
+ MES_LOG_CONTEXT_STATE_INVALID = 0xF,
};
struct MES_LOG_CONTEXT_STATE_CHANGE {
@@ -131,6 +139,26 @@ struct MES_LOG_CONTEXT_STATE_CHANGE {
enum MES_LOG_CONTEXT_STATE new_context_state;
};
+struct MES_LOG_QUEUE_NEW_WORK {
+ uint64_t h_queue;
+ uint64_t reserved;
+};
+
+struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT {
+ uint64_t h_queue;
+ uint64_t h_sync_object;
+};
+
+struct MES_LOG_QUEUE_NO_MORE_WORK {
+ uint64_t h_queue;
+ uint64_t reserved;
+};
+
+struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT {
+ uint64_t h_queue;
+ uint64_t h_sync_object;
+};
+
struct MES_LOG_ENTRY_HEADER {
uint32_t first_free_entry_index;
uint32_t wraparound_count;
@@ -143,8 +171,12 @@ struct MES_LOG_ENTRY_DATA {
uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */
uint32_t reserved_operation_type_bits;
union {
- struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change;
- uint64_t reserved_operation_data[2];
+ struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change;
+ struct MES_LOG_QUEUE_NEW_WORK queue_new_work;
+ struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT queue_unwait_sync_object;
+ struct MES_LOG_QUEUE_NO_MORE_WORK queue_no_more_work;
+ struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT queue_wait_sync_object;
+ uint64_t all[2];
};
};
@@ -153,6 +185,10 @@ struct MES_LOG_BUFFER {
struct MES_LOG_ENTRY_DATA entries[1];
};
+enum MES_SWIP_TO_HWIP_DEF {
+ MES_MAX_HWIP_SEGMENT = 6,
+};
+
union MESAPI_SET_HW_RESOURCES {
struct {
union MES_API_HEADER header;
@@ -163,14 +199,26 @@ union MESAPI_SET_HW_RESOURCES {
uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[MAX_GFX_PIPES];
uint32_t sdma_hqd_mask[MAX_SDMA_PIPES];
- uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS];
+ uint32_t aggregated_doorbells[AMD_PRIORITY_NUM_LEVELS];
uint64_t g_sch_ctx_gpu_mc_ptr;
uint64_t query_status_fence_gpu_mc_ptr;
+ uint32_t gc_base[MES_MAX_HWIP_SEGMENT];
+ uint32_t mmhub_base[MES_MAX_HWIP_SEGMENT];
+ uint32_t osssys_base[MES_MAX_HWIP_SEGMENT];
struct MES_API_STATUS api_status;
union {
struct {
uint32_t disable_reset : 1;
- uint32_t reserved : 31;
+ uint32_t use_different_vmid_compute : 1;
+ uint32_t disable_mes_log : 1;
+ uint32_t apply_mmhub_pgvm_invalidate_ack_loss_wa : 1;
+ uint32_t apply_grbm_remote_register_dummy_read_wa : 1;
+ uint32_t second_gfx_pipe_enabled : 1;
+ uint32_t enable_level_process_quantum_check : 1;
+ uint32_t apply_cwsr_program_all_vmid_sq_shader_tba_registers_wa : 1;
+ uint32_t enable_mqd_active_poll : 1;
+ uint32_t disable_timer_int : 1;
+ uint32_t reserved : 22;
};
uint32_t uint32_t_all;
};
@@ -195,12 +243,16 @@ union MESAPI__ADD_QUEUE {
uint32_t doorbell_offset;
uint64_t mqd_addr;
uint64_t wptr_addr;
+ uint64_t h_context;
+ uint64_t h_queue;
enum MES_QUEUE_TYPE queue_type;
uint32_t gds_base;
uint32_t gds_size;
uint32_t gws_base;
uint32_t gws_size;
uint32_t oa_mask;
+ uint64_t trap_handler_addr;
+ uint32_t vm_context_cntl;
struct {
uint32_t paging : 1;
@@ -208,7 +260,8 @@ union MESAPI__ADD_QUEUE {
uint32_t program_gds : 1;
uint32_t is_gang_suspended : 1;
uint32_t is_tmz_queue : 1;
- uint32_t reserved : 24;
+ uint32_t map_kiq_utility_queue : 1;
+ uint32_t reserved : 23;
};
struct MES_API_STATUS api_status;
};
@@ -223,10 +276,18 @@ union MESAPI__REMOVE_QUEUE {
uint64_t gang_context_addr;
struct {
- uint32_t unmap_legacy_gfx_queue : 1;
- uint32_t reserved : 31;
+ uint32_t unmap_legacy_gfx_queue : 1;
+ uint32_t unmap_kiq_utility_queue : 1;
+ uint32_t preempt_legacy_gfx_queue : 1;
+ uint32_t reserved : 29;
};
- struct MES_API_STATUS api_status;
+ struct MES_API_STATUS api_status;
+
+ uint32_t pipe_id;
+ uint32_t queue_id;
+
+ uint64_t tf_addr;
+ uint32_t tf_data;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -321,16 +382,45 @@ union MESAPI__RESUME {
union MESAPI__RESET {
struct {
- union MES_API_HEADER header;
+ union MES_API_HEADER header;
struct {
- uint32_t reset_queue : 1;
- uint32_t reserved : 31;
+ /* Only reset the queue given by doorbell_offset (not entire gang) */
+ uint32_t reset_queue_only : 1;
+ /* Hang detection first then reset any queues that are hung */
+ uint32_t hang_detect_then_reset : 1;
+ /* Only do hang detection (no reset) */
+ uint32_t hang_detect_only : 1;
+ /* Rest HP and LP kernel queues not managed by MES */
+ uint32_t reset_legacy_gfx : 1;
+ uint32_t reserved : 28;
};
- uint64_t gang_context_addr;
- uint32_t doorbell_offset; /* valid only if reset_queue = true */
- struct MES_API_STATUS api_status;
+ uint64_t gang_context_addr;
+
+ /* valid only if reset_queue_only = true */
+ uint32_t doorbell_offset;
+
+ /* valid only if hang_detect_then_reset = true */
+ uint64_t doorbell_offset_addr;
+ enum MES_QUEUE_TYPE queue_type;
+
+ /* valid only if reset_legacy_gfx = true */
+ uint32_t pipe_id_lp;
+ uint32_t queue_id_lp;
+ uint32_t vmid_id_lp;
+ uint64_t mqd_mc_addr_lp;
+ uint32_t doorbell_offset_lp;
+ uint64_t wptr_addr_lp;
+
+ uint32_t pipe_id_hp;
+ uint32_t queue_id_hp;
+ uint32_t vmid_id_hp;
+ uint64_t mqd_mc_addr_hp;
+ uint32_t doorbell_offset_hp;
+ uint64_t wptr_addr_hp;
+
+ struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -408,6 +498,8 @@ union MESAPI__SET_DEBUG_VMID {
enum MESAPI_MISC_OPCODE {
MESAPI_MISC__MODIFY_REG,
+ MESAPI_MISC__INV_GART,
+ MESAPI_MISC__QUERY_STATUS,
MESAPI_MISC__MAX,
};
@@ -420,6 +512,21 @@ enum MODIFY_REG_SUBCODE {
enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 };
+struct MODIFY_REG {
+ enum MODIFY_REG_SUBCODE subcode;
+ uint32_t reg_offset;
+ uint32_t reg_value;
+};
+
+struct INV_GART {
+ uint64_t inv_range_va_start;
+ uint64_t inv_range_size;
+};
+
+struct QUERY_STATUS {
+ uint32_t context_id;
+};
+
union MESAPI__MISC {
struct {
union MES_API_HEADER header;
@@ -427,11 +534,9 @@ union MESAPI__MISC {
struct MES_API_STATUS api_status;
union {
- struct {
- enum MODIFY_REG_SUBCODE subcode;
- uint32_t reg_offset;
- uint32_t reg_value;
- } modify_reg;
+ struct MODIFY_REG modify_reg;
+ struct INV_GART inv_gart;
+ struct QUERY_STATUS query_status;
uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS];
};
};
@@ -439,5 +544,27 @@ union MESAPI__MISC {
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
+union MESAPI__UPDATE_ROOT_PAGE_TABLE {
+ struct {
+ union MES_API_HEADER header;
+ uint64_t page_table_base_addr;
+ uint64_t process_context_addr;
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI_AMD_LOG {
+ struct {
+ union MES_API_HEADER header;
+ uint64_t p_buffer_memory;
+ uint64_t p_buffer_size_used;
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
#pragma pack(pop)
#endif