diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 335 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 85 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 69 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/include/mes_api_def.h (renamed from drivers/gpu/drm/amd/amdgpu/mes_api_def.h) | 167 |
7 files changed, 526 insertions, 151 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 40df1e04d682..5d6b04fc6206 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -367,7 +367,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, /* create MQD for KIQ */ ring = &adev->gfx.kiq.ring; - if (!ring->mqd_obj) { + if (!adev->enable_mes_kiq && !ring->mqd_obj) { /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for @@ -464,7 +464,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r; + int i, r = 0; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -479,7 +479,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_compute_rings; i++) kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], RESET_QUEUES, 0, 0); - r = amdgpu_ring_test_helper(kiq_ring); + + if (adev->gfx.kiq.ring.sched.ready) + r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&adev->gfx.kiq.ring_lock); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 5be30bf68b0c..72bafba1c470 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -150,7 +150,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - mutex_init(&adev->mes.mutex); + mutex_init(&adev->mes.mutex_hidden); adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; @@ -166,8 +166,12 @@ int amdgpu_mes_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; - for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; + for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { + if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0)) + adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; + else + adev->mes.sdma_hqd_mask[i] = 0xfc; + } for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) adev->mes.agreegated_doorbells[i] = 0xffffffff; @@ -207,7 +211,7 @@ error_ids: idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); ida_destroy(&adev->mes.doorbell_ida); - mutex_destroy(&adev->mes.mutex); + mutex_destroy(&adev->mes.mutex_hidden); return r; } @@ -219,7 +223,14 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); ida_destroy(&adev->mes.doorbell_ida); - mutex_destroy(&adev->mes.mutex); + mutex_destroy(&adev->mes.mutex_hidden); +} + +static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) +{ + amdgpu_bo_free_kernel(&q->mqd_obj, + &q->mqd_gpu_addr, + &q->mqd_cpu_ptr); } int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, @@ -228,13 +239,10 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, struct amdgpu_mes_process *process; int r; - mutex_lock(&adev->mes.mutex); - /* allocate the mes process buffer */ process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL); if (!process) { DRM_ERROR("no more memory to create mes process\n"); - mutex_unlock(&adev->mes.mutex); return -ENOMEM; } @@ -244,18 +252,9 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, if (!process->doorbell_bitmap) { DRM_ERROR("failed to allocate doorbell bitmap\n"); kfree(process); - mutex_unlock(&adev->mes.mutex); return -ENOMEM; } - /* add the mes process to idr list */ - r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, - GFP_KERNEL); - if (r < 0) { - DRM_ERROR("failed to lock pasid=%d\n", pasid); - goto clean_up_memory; - } - /* allocate the process context bo and map it */ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, @@ -264,15 +263,29 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, &process->proc_ctx_cpu_ptr); if (r) { DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up_pasid; + goto clean_up_memory; } memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); + + /* add the mes process to idr list */ + r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, + GFP_KERNEL); + if (r < 0) { + DRM_ERROR("failed to lock pasid=%d\n", pasid); + goto clean_up_ctx; + } + /* allocate the starting doorbell index of the process */ r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index); if (r < 0) { DRM_ERROR("failed to allocate doorbell for process\n"); - goto clean_up_ctx; + goto clean_up_pasid; } DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index); @@ -283,19 +296,19 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, process->process_quantum = adev->mes.default_process_quantum; process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return 0; +clean_up_pasid: + idr_remove(&adev->mes.pasid_idr, pasid); + amdgpu_mes_unlock(&adev->mes); clean_up_ctx: amdgpu_bo_free_kernel(&process->proc_ctx_bo, &process->proc_ctx_gpu_addr, &process->proc_ctx_cpu_ptr); -clean_up_pasid: - idr_remove(&adev->mes.pasid_idr, pasid); clean_up_memory: kfree(process->doorbell_bitmap); kfree(process); - mutex_unlock(&adev->mes.mutex); return r; } @@ -308,18 +321,21 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) unsigned long flags; int r; - mutex_lock(&adev->mes.mutex); + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); process = idr_find(&adev->mes.pasid_idr, pasid); if (!process) { DRM_WARN("pasid %d doesn't exist\n", pasid); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return; } - /* free all gangs in the process */ + /* Remove all queues from hardware */ list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { - /* free all queues in the gang */ list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { spin_lock_irqsave(&adev->mes.queue_id_lock, flags); idr_remove(&adev->mes.queue_id_idr, queue->queue_id); @@ -332,29 +348,35 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) &queue_input); if (r) DRM_WARN("failed to remove hardware queue\n"); + } + + idr_remove(&adev->mes.gang_id_idr, gang->gang_id); + } + amdgpu_mes_free_process_doorbells(adev, process->doorbell_index); + idr_remove(&adev->mes.pasid_idr, pasid); + amdgpu_mes_unlock(&adev->mes); + + /* free all memory allocated by the process */ + list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { + /* free all queues in the gang */ + list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { + amdgpu_mes_queue_free_mqd(queue); list_del(&queue->list); kfree(queue); } - - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); amdgpu_bo_free_kernel(&gang->gang_ctx_bo, &gang->gang_ctx_gpu_addr, &gang->gang_ctx_cpu_ptr); list_del(&gang->list); kfree(gang); - } - amdgpu_mes_free_process_doorbells(adev, process->doorbell_index); - - idr_remove(&adev->mes.pasid_idr, pasid); + } amdgpu_bo_free_kernel(&process->proc_ctx_bo, &process->proc_ctx_gpu_addr, &process->proc_ctx_cpu_ptr); kfree(process->doorbell_bitmap); kfree(process); - - mutex_unlock(&adev->mes.mutex); } int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, @@ -365,34 +387,12 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, struct amdgpu_mes_gang *gang; int r; - mutex_lock(&adev->mes.mutex); - - process = idr_find(&adev->mes.pasid_idr, pasid); - if (!process) { - DRM_ERROR("pasid %d doesn't exist\n", pasid); - mutex_unlock(&adev->mes.mutex); - return -EINVAL; - } - /* allocate the mes gang buffer */ gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL); if (!gang) { - mutex_unlock(&adev->mes.mutex); return -ENOMEM; } - /* add the mes gang to idr list */ - r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, - GFP_KERNEL); - if (r < 0) { - kfree(gang); - mutex_unlock(&adev->mes.mutex); - return r; - } - - gang->gang_id = r; - *gang_id = r; - /* allocate the gang context bo and map it to cpu space */ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, @@ -401,10 +401,34 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, &gang->gang_ctx_cpu_ptr); if (r) { DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up; + goto clean_up_mem; } memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); + + process = idr_find(&adev->mes.pasid_idr, pasid); + if (!process) { + DRM_ERROR("pasid %d doesn't exist\n", pasid); + r = -EINVAL; + goto clean_up_ctx; + } + + /* add the mes gang to idr list */ + r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, + GFP_KERNEL); + if (r < 0) { + DRM_ERROR("failed to allocate idr for gang\n"); + goto clean_up_ctx; + } + + gang->gang_id = r; + *gang_id = r; + INIT_LIST_HEAD(&gang->queue_list); gang->process = process; gang->priority = gprops->priority; @@ -414,13 +438,16 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, gang->inprocess_gang_priority = gprops->inprocess_gang_priority; list_add_tail(&gang->list, &process->gang_list); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return 0; -clean_up: - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); +clean_up_ctx: + amdgpu_mes_unlock(&adev->mes); + amdgpu_bo_free_kernel(&gang->gang_ctx_bo, + &gang->gang_ctx_gpu_addr, + &gang->gang_ctx_cpu_ptr); +clean_up_mem: kfree(gang); - mutex_unlock(&adev->mes.mutex); return r; } @@ -428,29 +455,35 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) { struct amdgpu_mes_gang *gang; - mutex_lock(&adev->mes.mutex); + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); gang = idr_find(&adev->mes.gang_id_idr, gang_id); if (!gang) { DRM_ERROR("gang id %d doesn't exist\n", gang_id); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return -EINVAL; } if (!list_empty(&gang->queue_list)) { DRM_ERROR("queue list is not empty\n"); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return -EBUSY; } idr_remove(&adev->mes.gang_id_idr, gang->gang_id); + list_del(&gang->list); + amdgpu_mes_unlock(&adev->mes); + amdgpu_bo_free_kernel(&gang->gang_ctx_bo, &gang->gang_ctx_gpu_addr, &gang->gang_ctx_cpu_ptr); - list_del(&gang->list); + kfree(gang); - mutex_unlock(&adev->mes.mutex); return 0; } @@ -462,7 +495,11 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev) struct mes_suspend_gang_input input; int r, pasid; - mutex_lock(&adev->mes.mutex); + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); idp = &adev->mes.pasid_idr; @@ -475,7 +512,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev) } } - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return 0; } @@ -487,7 +524,11 @@ int amdgpu_mes_resume(struct amdgpu_device *adev) struct mes_resume_gang_input input; int r, pasid; - mutex_lock(&adev->mes.mutex); + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); idp = &adev->mes.pasid_idr; @@ -500,17 +541,16 @@ int amdgpu_mes_resume(struct amdgpu_device *adev) } } - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return 0; } -static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, +static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, struct amdgpu_mes_queue *q, struct amdgpu_mes_queue_properties *p) { struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; u32 mqd_size = mqd_mgr->mqd_size; - struct amdgpu_mqd_prop mqd_prop = {0}; int r; r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, @@ -523,6 +563,26 @@ static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, } memset(q->mqd_cpu_ptr, 0, mqd_size); + r = amdgpu_bo_reserve(q->mqd_obj, false); + if (unlikely(r != 0)) + goto clean_up; + + return 0; + +clean_up: + amdgpu_bo_free_kernel(&q->mqd_obj, + &q->mqd_gpu_addr, + &q->mqd_cpu_ptr); + return r; +} + +static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, + struct amdgpu_mes_queue *q, + struct amdgpu_mes_queue_properties *p) +{ + struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; + struct amdgpu_mqd_prop mqd_prop = {0}; + mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr; mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr; mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr; @@ -535,27 +595,9 @@ static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, mqd_prop.hqd_queue_priority = p->hqd_queue_priority; mqd_prop.hqd_active = false; - r = amdgpu_bo_reserve(q->mqd_obj, false); - if (unlikely(r != 0)) - goto clean_up; - mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); amdgpu_bo_unreserve(q->mqd_obj); - return 0; - -clean_up: - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); - return r; -} - -static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) -{ - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); } int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, @@ -568,29 +610,38 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, unsigned long flags; int r; - mutex_lock(&adev->mes.mutex); - - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - mutex_unlock(&adev->mes.mutex); - return -EINVAL; - } - /* allocate the mes queue buffer */ queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); if (!queue) { - mutex_unlock(&adev->mes.mutex); + DRM_ERROR("Failed to allocate memory for queue\n"); return -ENOMEM; } + /* Allocate the queue mqd */ + r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops); + if (r) + goto clean_up_memory; + + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); + + gang = idr_find(&adev->mes.gang_id_idr, gang_id); + if (!gang) { + DRM_ERROR("gang id %d doesn't exist\n", gang_id); + r = -EINVAL; + goto clean_up_mqd; + } + /* add the mes gang to idr list */ spin_lock_irqsave(&adev->mes.queue_id_lock, flags); r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0, GFP_ATOMIC); if (r < 0) { spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - goto clean_up_memory; + goto clean_up_mqd; } spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); *queue_id = queue->queue_id = r; @@ -603,13 +654,15 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, goto clean_up_queue_id; /* initialize the queue mqd */ - r = amdgpu_mes_queue_init_mqd(adev, queue, qprops); - if (r) - goto clean_up_doorbell; + amdgpu_mes_queue_init_mqd(adev, queue, qprops); /* add hw queue to mes */ queue_input.process_id = gang->process->pasid; - queue_input.page_table_base_addr = gang->process->pd_gpu_addr; + + queue_input.page_table_base_addr = + adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr - + adev->gmc.vram_start; + queue_input.process_va_start = 0; queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; @@ -629,7 +682,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, if (r) { DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n", qprops->doorbell_off); - goto clean_up_mqd; + goto clean_up_doorbell; } DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, " @@ -645,11 +698,9 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, queue->gang = gang; list_add_tail(&queue->list, &gang->queue_list); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return 0; -clean_up_mqd: - amdgpu_mes_queue_free_mqd(queue); clean_up_doorbell: amdgpu_mes_queue_doorbell_free(adev, gang->process, qprops->doorbell_off); @@ -657,9 +708,11 @@ clean_up_queue_id: spin_lock_irqsave(&adev->mes.queue_id_lock, flags); idr_remove(&adev->mes.queue_id_idr, queue->queue_id); spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); +clean_up_mqd: + amdgpu_mes_unlock(&adev->mes); + amdgpu_mes_queue_free_mqd(queue); clean_up_memory: kfree(queue); - mutex_unlock(&adev->mes.mutex); return r; } @@ -671,7 +724,11 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) struct mes_remove_queue_input queue_input; int r; - mutex_lock(&adev->mes.mutex); + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); /* remove the mes gang from idr list */ spin_lock_irqsave(&adev->mes.queue_id_lock, flags); @@ -679,7 +736,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) queue = idr_find(&adev->mes.queue_id_idr, queue_id); if (!queue) { spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); DRM_ERROR("queue id %d doesn't exist\n", queue_id); return -EINVAL; } @@ -699,15 +756,42 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) DRM_ERROR("failed to remove hardware queue, queue id = %d\n", queue_id); - amdgpu_mes_queue_free_mqd(queue); list_del(&queue->list); amdgpu_mes_queue_doorbell_free(adev, gang->process, queue->doorbell_off); + amdgpu_mes_unlock(&adev->mes); + + amdgpu_mes_queue_free_mqd(queue); kfree(queue); - mutex_unlock(&adev->mes.mutex); return 0; } +int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + struct mes_unmap_legacy_queue_input queue_input; + int r; + + amdgpu_mes_lock(&adev->mes); + + queue_input.action = action; + queue_input.queue_type = ring->funcs->type; + queue_input.doorbell_offset = ring->doorbell_index; + queue_input.pipe_id = ring->pipe; + queue_input.queue_id = ring->queue; + queue_input.trail_fence_addr = gpu_addr; + queue_input.trail_fence_data = seq; + + r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to unmap legacy queue\n"); + + amdgpu_mes_unlock(&adev->mes); + return r; +} + static void amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, struct amdgpu_ring *ring, @@ -771,18 +855,22 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, struct amdgpu_mes_queue_properties qprops = {0}; int r, queue_id, pasid; - mutex_lock(&adev->mes.mutex); + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); gang = idr_find(&adev->mes.gang_id_idr, gang_id); if (!gang) { DRM_ERROR("gang id %d doesn't exist\n", gang_id); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return -EINVAL; } pasid = gang->process->pasid; ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); if (!ring) { - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return -ENOMEM; } @@ -823,7 +911,7 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, dma_fence_wait(gang->process->vm->last_update, false); dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); if (r) @@ -850,7 +938,7 @@ clean_up_ring: amdgpu_ring_fini(ring); clean_up_memory: kfree(ring); - mutex_unlock(&adev->mes.mutex); + amdgpu_mes_unlock(&adev->mes); return r; } @@ -1086,9 +1174,10 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) } for (i = 0; i < ARRAY_SIZE(queue_types); i++) { - /* On sienna cichlid+, fw hasn't supported to map sdma queue. */ - if (adev->asic_type >= CHIP_SIENNA_CICHLID && - i == AMDGPU_RING_TYPE_SDMA) + /* On GFX v10.3, fw hasn't supported to map sdma queue. */ + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) && + adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) && + queue_types[i][0] == AMDGPU_RING_TYPE_SDMA) continue; r = amdgpu_mes_test_create_gang_and_queues(adev, pasid, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 548015bb6ee7..25590b301f25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -56,7 +56,7 @@ enum admgpu_mes_pipe { struct amdgpu_mes { struct amdgpu_device *adev; - struct mutex mutex; + struct mutex mutex_hidden; struct idr pasid_idr; struct idr gang_id_idr; @@ -109,9 +109,11 @@ struct amdgpu_mes { uint32_t query_status_fence_offs; uint64_t query_status_fence_gpu_addr; uint64_t *query_status_fence_ptr; + uint32_t saved_flags; /* initialize kiq pipe */ int (*kiq_hw_init)(struct amdgpu_device *adev); + int (*kiq_hw_fini)(struct amdgpu_device *adev); /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; @@ -198,6 +200,10 @@ struct mes_add_queue_input { uint64_t wptr_addr; uint32_t queue_type; uint32_t paging; + uint32_t gws_base; + uint32_t gws_size; + uint64_t tba_addr; + uint64_t tma_addr; }; struct mes_remove_queue_input { @@ -205,6 +211,16 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; +struct mes_unmap_legacy_queue_input { + enum amdgpu_unmap_queues_action action; + uint32_t queue_type; + uint32_t doorbell_offset; + uint32_t pipe_id; + uint32_t queue_id; + uint64_t trail_fence_addr; + uint64_t trail_fence_data; +}; + struct mes_suspend_gang_input { bool suspend_all_gangs; uint64_t gang_context_addr; @@ -224,6 +240,9 @@ struct amdgpu_mes_funcs { int (*remove_hw_queue)(struct amdgpu_mes *mes, struct mes_remove_queue_input *input); + int (*unmap_legacy_queue)(struct amdgpu_mes *mes, + struct mes_unmap_legacy_queue_input *input); + int (*suspend_gang)(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input); @@ -232,6 +251,7 @@ struct amdgpu_mes_funcs { }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) +#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev)) int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs); @@ -255,6 +275,11 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq); + int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, int queue_type, int idx, struct amdgpu_mes_ctx_data *ctx_data, @@ -280,4 +305,62 @@ unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar( uint32_t doorbell_index, unsigned int doorbell_id); int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev); + +/* + * MES lock can be taken in MMU notifiers. + * + * A bit more detail about why to set no-FS reclaim with MES lock: + * + * The purpose of the MMU notifier is to stop GPU access to memory so + * that the Linux VM subsystem can move pages around safely. This is + * done by preempting user mode queues for the affected process. When + * MES is used, MES lock needs to be taken to preempt the queues. + * + * The MMU notifier callback entry point in the driver is + * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from + * there is: + * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm -> + * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues + * + * The last part of the chain is a function pointer where we take the + * MES lock. + * + * The problem with taking locks in the MMU notifier is, that MMU + * notifiers can be called in reclaim-FS context. That's where the + * kernel frees up pages to make room for new page allocations under + * memory pressure. While we are running in reclaim-FS context, we must + * not trigger another memory reclaim operation because that would + * recursively reenter the reclaim code and cause a deadlock. The + * memalloc_nofs_save/restore calls guarantee that. + * + * In addition we also need to avoid lock dependencies on other locks taken + * under the MES lock, for example reservation locks. Here is a possible + * scenario of a deadlock: + * Thread A: takes and holds reservation lock | triggers reclaim-FS | + * MMU notifier | blocks trying to take MES lock + * Thread B: takes and holds MES lock | blocks trying to take reservation lock + * + * In this scenario Thread B gets involved in a deadlock even without + * triggering a reclaim-FS operation itself. + * To fix this and break the lock dependency chain you'd need to either: + * 1. protect reservation locks with memalloc_nofs_save/restore, or + * 2. avoid taking reservation locks under the MES lock. + * + * Reservation locks are taken all over the kernel in different subsystems, we + * have no control over them and their lock dependencies.So the only workable + * solution is to avoid taking other locks under the MES lock. + * As a result, make sure no reclaim-FS happens while holding this lock anywhere + * to prevent deadlocks when an MMU notifier runs in reclaim-FS context. + */ +static inline void amdgpu_mes_lock(struct amdgpu_mes *mes) +{ + mutex_lock(&mes->mutex_hidden); + mes->saved_flags = memalloc_noreclaim_save(); +} + +static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) +{ + memalloc_noreclaim_restore(mes->saved_flags); + mutex_unlock(&mes->mutex_hidden); +} #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9042e0b480ce..3c4f2a94ad9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3551,8 +3551,14 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, enum amdgpu_unmap_queues_action action, u64 gpu_addr, u64 seq) { + struct amdgpu_device *adev = kiq_ring->adev; uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + if (!adev->gfx.kiq.ring.sched.ready) { + amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); + return; + } + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ PACKET3_UNMAP_QUEUES_ACTION(action) | diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b80b5f70ecf1..61db2a378008 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -274,7 +274,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* For SRIOV run time, driver shouldn't access the register through MMIO * Directly use kiq to do the vm invalidation instead */ - if (adev->gfx.kiq.ring.sched.ready && + if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -411,6 +411,10 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid struct amdgpu_device *adev = ring->adev; uint32_t reg; + /* MES fw manages IH_VMID_x_LUT updating */ + if (ring->is_mes_queue) + return; + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else @@ -803,6 +807,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) } amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); + r = adev->mmhub.funcs->gart_enable(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 622aa17b18e7..030a92b3a0da 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -133,6 +133,8 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, { struct amdgpu_device *adev = mes->adev; union MESAPI__ADD_QUEUE mes_add_queue_pkt; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -141,8 +143,7 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_add_queue_pkt.process_id = input->process_id; - mes_add_queue_pkt.page_table_base_addr = - input->page_table_base_addr - adev->gmc.vram_start; + mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; mes_add_queue_pkt.process_va_start = input->process_va_start; mes_add_queue_pkt.process_va_end = input->process_va_end; mes_add_queue_pkt.process_quantum = input->process_quantum; @@ -159,6 +160,10 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.queue_type = convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.paging = input->paging; + mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl; + mes_add_queue_pkt.gws_base = input->gws_base; + mes_add_queue_pkt.gws_size = input->gws_size; + mes_add_queue_pkt.trap_handler_addr = input->tba_addr; mes_add_queue_pkt.api_status.api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; @@ -192,6 +197,44 @@ static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); } +static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes, + struct mes_unmap_legacy_queue_input *input) +{ + union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + + memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); + + mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; + mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_remove_queue_pkt.gang_context_addr = 0; + + mes_remove_queue_pkt.pipe_id = input->pipe_id; + mes_remove_queue_pkt.queue_id = input->queue_id; + + if (input->action == PREEMPT_QUEUES_NO_UNMAP) { + mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; + mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; + mes_remove_queue_pkt.tf_data = + lower_32_bits(input->trail_fence_data); + } else { + if (input->queue_type == AMDGPU_RING_TYPE_GFX) + mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1; + else + mes_remove_queue_pkt.unmap_kiq_utility_queue = 1; + } + + mes_remove_queue_pkt.api_status.api_completion_fence_addr = + mes->ring.fence_drv.gpu_addr; + mes_remove_queue_pkt.api_status.api_completion_fence_value = + ++mes->ring.fence_drv.sync_seq; + + return mes_v10_1_submit_pkt_and_poll_completion(mes, + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); +} + static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input) { @@ -254,9 +297,21 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.agreegated_doorbells[i] = + mes_set_hw_res_pkt.aggregated_doorbells[i] = mes->agreegated_doorbells[i]; + for (i = 0; i < 5; i++) { + mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; + mes_set_hw_res_pkt.mmhub_base[i] = + adev->reg_offset[MMHUB_HWIP][0][i]; + mes_set_hw_res_pkt.osssys_base[i] = + adev->reg_offset[OSSSYS_HWIP][0][i]; + } + + mes_set_hw_res_pkt.disable_reset = 1; + mes_set_hw_res_pkt.disable_mes_log = 1; + mes_set_hw_res_pkt.use_different_vmid_compute = 1; + mes_set_hw_res_pkt.api_status.api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; mes_set_hw_res_pkt.api_status.api_completion_fence_value = @@ -269,6 +324,7 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) static const struct amdgpu_mes_funcs mes_v10_1_funcs = { .add_hw_queue = mes_v10_1_add_hw_queue, .remove_hw_queue = mes_v10_1_remove_hw_queue, + .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue, .suspend_gang = mes_v10_1_suspend_gang, .resume_gang = mes_v10_1_resume_gang, }; @@ -1097,6 +1153,13 @@ static int mes_v10_1_hw_init(void *handle) goto failure; } + /* + * Disable KIQ ring usage from the driver once MES is enabled. + * MES uses KIQ ring exclusively so driver cannot access KIQ ring + * with MES enabled. + */ + adev->gfx.kiq.ring.sched.ready = false; + return 0; failure: diff --git a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h b/drivers/gpu/drm/amd/include/mes_api_def.h index 3f4fca5fd1da..b2a8503feec0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_api_def.h @@ -59,6 +59,8 @@ enum MES_SCH_API_OPCODE { MES_SCH_API_PROGRAM_GDS = 12, MES_SCH_API_SET_DEBUG_VMID = 13, MES_SCH_API_MISC = 14, + MES_SCH_API_UPDATE_ROOT_PAGE_TABLE = 15, + MES_SCH_API_AMD_LOG = 16, MES_SCH_API_MAX = 0xFF }; @@ -116,7 +118,12 @@ enum { MAX_VMID_GCHUB = 16 }; enum { MAX_VMID_MMHUB = 16 }; enum MES_LOG_OPERATION { - MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0 + MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0, + MES_LOG_OPERATION_QUEUE_NEW_WORK = 1, + MES_LOG_OPERATION_QUEUE_UNWAIT_SYNC_OBJECT = 2, + MES_LOG_OPERATION_QUEUE_NO_MORE_WORK = 3, + MES_LOG_OPERATION_QUEUE_WAIT_SYNC_OBJECT = 4, + MES_LOG_OPERATION_QUEUE_INVALID = 0xF, }; enum MES_LOG_CONTEXT_STATE { @@ -124,6 +131,7 @@ enum MES_LOG_CONTEXT_STATE { MES_LOG_CONTEXT_STATE_RUNNING = 1, MES_LOG_CONTEXT_STATE_READY = 2, MES_LOG_CONTEXT_STATE_READY_STANDBY = 3, + MES_LOG_CONTEXT_STATE_INVALID = 0xF, }; struct MES_LOG_CONTEXT_STATE_CHANGE { @@ -131,6 +139,26 @@ struct MES_LOG_CONTEXT_STATE_CHANGE { enum MES_LOG_CONTEXT_STATE new_context_state; }; +struct MES_LOG_QUEUE_NEW_WORK { + uint64_t h_queue; + uint64_t reserved; +}; + +struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT { + uint64_t h_queue; + uint64_t h_sync_object; +}; + +struct MES_LOG_QUEUE_NO_MORE_WORK { + uint64_t h_queue; + uint64_t reserved; +}; + +struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT { + uint64_t h_queue; + uint64_t h_sync_object; +}; + struct MES_LOG_ENTRY_HEADER { uint32_t first_free_entry_index; uint32_t wraparound_count; @@ -143,8 +171,12 @@ struct MES_LOG_ENTRY_DATA { uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */ uint32_t reserved_operation_type_bits; union { - struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change; - uint64_t reserved_operation_data[2]; + struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change; + struct MES_LOG_QUEUE_NEW_WORK queue_new_work; + struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT queue_unwait_sync_object; + struct MES_LOG_QUEUE_NO_MORE_WORK queue_no_more_work; + struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT queue_wait_sync_object; + uint64_t all[2]; }; }; @@ -153,6 +185,10 @@ struct MES_LOG_BUFFER { struct MES_LOG_ENTRY_DATA entries[1]; }; +enum MES_SWIP_TO_HWIP_DEF { + MES_MAX_HWIP_SEGMENT = 6, +}; + union MESAPI_SET_HW_RESOURCES { struct { union MES_API_HEADER header; @@ -163,14 +199,26 @@ union MESAPI_SET_HW_RESOURCES { uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES]; uint32_t gfx_hqd_mask[MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[MAX_SDMA_PIPES]; - uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS]; + uint32_t aggregated_doorbells[AMD_PRIORITY_NUM_LEVELS]; uint64_t g_sch_ctx_gpu_mc_ptr; uint64_t query_status_fence_gpu_mc_ptr; + uint32_t gc_base[MES_MAX_HWIP_SEGMENT]; + uint32_t mmhub_base[MES_MAX_HWIP_SEGMENT]; + uint32_t osssys_base[MES_MAX_HWIP_SEGMENT]; struct MES_API_STATUS api_status; union { struct { uint32_t disable_reset : 1; - uint32_t reserved : 31; + uint32_t use_different_vmid_compute : 1; + uint32_t disable_mes_log : 1; + uint32_t apply_mmhub_pgvm_invalidate_ack_loss_wa : 1; + uint32_t apply_grbm_remote_register_dummy_read_wa : 1; + uint32_t second_gfx_pipe_enabled : 1; + uint32_t enable_level_process_quantum_check : 1; + uint32_t apply_cwsr_program_all_vmid_sq_shader_tba_registers_wa : 1; + uint32_t enable_mqd_active_poll : 1; + uint32_t disable_timer_int : 1; + uint32_t reserved : 22; }; uint32_t uint32_t_all; }; @@ -195,12 +243,16 @@ union MESAPI__ADD_QUEUE { uint32_t doorbell_offset; uint64_t mqd_addr; uint64_t wptr_addr; + uint64_t h_context; + uint64_t h_queue; enum MES_QUEUE_TYPE queue_type; uint32_t gds_base; uint32_t gds_size; uint32_t gws_base; uint32_t gws_size; uint32_t oa_mask; + uint64_t trap_handler_addr; + uint32_t vm_context_cntl; struct { uint32_t paging : 1; @@ -208,7 +260,8 @@ union MESAPI__ADD_QUEUE { uint32_t program_gds : 1; uint32_t is_gang_suspended : 1; uint32_t is_tmz_queue : 1; - uint32_t reserved : 24; + uint32_t map_kiq_utility_queue : 1; + uint32_t reserved : 23; }; struct MES_API_STATUS api_status; }; @@ -223,10 +276,18 @@ union MESAPI__REMOVE_QUEUE { uint64_t gang_context_addr; struct { - uint32_t unmap_legacy_gfx_queue : 1; - uint32_t reserved : 31; + uint32_t unmap_legacy_gfx_queue : 1; + uint32_t unmap_kiq_utility_queue : 1; + uint32_t preempt_legacy_gfx_queue : 1; + uint32_t reserved : 29; }; - struct MES_API_STATUS api_status; + struct MES_API_STATUS api_status; + + uint32_t pipe_id; + uint32_t queue_id; + + uint64_t tf_addr; + uint32_t tf_data; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -321,16 +382,45 @@ union MESAPI__RESUME { union MESAPI__RESET { struct { - union MES_API_HEADER header; + union MES_API_HEADER header; struct { - uint32_t reset_queue : 1; - uint32_t reserved : 31; + /* Only reset the queue given by doorbell_offset (not entire gang) */ + uint32_t reset_queue_only : 1; + /* Hang detection first then reset any queues that are hung */ + uint32_t hang_detect_then_reset : 1; + /* Only do hang detection (no reset) */ + uint32_t hang_detect_only : 1; + /* Rest HP and LP kernel queues not managed by MES */ + uint32_t reset_legacy_gfx : 1; + uint32_t reserved : 28; }; - uint64_t gang_context_addr; - uint32_t doorbell_offset; /* valid only if reset_queue = true */ - struct MES_API_STATUS api_status; + uint64_t gang_context_addr; + + /* valid only if reset_queue_only = true */ + uint32_t doorbell_offset; + + /* valid only if hang_detect_then_reset = true */ + uint64_t doorbell_offset_addr; + enum MES_QUEUE_TYPE queue_type; + + /* valid only if reset_legacy_gfx = true */ + uint32_t pipe_id_lp; + uint32_t queue_id_lp; + uint32_t vmid_id_lp; + uint64_t mqd_mc_addr_lp; + uint32_t doorbell_offset_lp; + uint64_t wptr_addr_lp; + + uint32_t pipe_id_hp; + uint32_t queue_id_hp; + uint32_t vmid_id_hp; + uint64_t mqd_mc_addr_hp; + uint32_t doorbell_offset_hp; + uint64_t wptr_addr_hp; + + struct MES_API_STATUS api_status; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -408,6 +498,8 @@ union MESAPI__SET_DEBUG_VMID { enum MESAPI_MISC_OPCODE { MESAPI_MISC__MODIFY_REG, + MESAPI_MISC__INV_GART, + MESAPI_MISC__QUERY_STATUS, MESAPI_MISC__MAX, }; @@ -420,6 +512,21 @@ enum MODIFY_REG_SUBCODE { enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 }; +struct MODIFY_REG { + enum MODIFY_REG_SUBCODE subcode; + uint32_t reg_offset; + uint32_t reg_value; +}; + +struct INV_GART { + uint64_t inv_range_va_start; + uint64_t inv_range_size; +}; + +struct QUERY_STATUS { + uint32_t context_id; +}; + union MESAPI__MISC { struct { union MES_API_HEADER header; @@ -427,11 +534,9 @@ union MESAPI__MISC { struct MES_API_STATUS api_status; union { - struct { - enum MODIFY_REG_SUBCODE subcode; - uint32_t reg_offset; - uint32_t reg_value; - } modify_reg; + struct MODIFY_REG modify_reg; + struct INV_GART inv_gart; + struct QUERY_STATUS query_status; uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS]; }; }; @@ -439,5 +544,27 @@ union MESAPI__MISC { uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; }; +union MESAPI__UPDATE_ROOT_PAGE_TABLE { + struct { + union MES_API_HEADER header; + uint64_t page_table_base_addr; + uint64_t process_context_addr; + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI_AMD_LOG { + struct { + union MES_API_HEADER header; + uint64_t p_buffer_memory; + uint64_t p_buffer_size_used; + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + #pragma pack(pop) #endif |