diff options
author | Dave Airlie <airlied@redhat.com> | 2022-05-19 07:09:46 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-05-19 07:09:54 +0300 |
commit | 00df0514ab13813655a6fbaba85425f8f4780be2 (patch) | |
tree | 85e9e8908b702575ff4a7e4a58cf36dcca93c204 /drivers/gpu/drm/amd/amdkfd | |
parent | f8122500a039abeabfff41b0ad8b6a2c94c1107d (diff) | |
parent | 0223e516470aa0589da6c03e6d177c10594cabbd (diff) | |
download | linux-00df0514ab13813655a6fbaba85425f8f4780be2.tar.xz |
Merge tag 'amd-drm-next-5.19-2022-05-18' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.19-2022-05-18:
amdgpu:
- Misc code cleanups
- Additional SMU 13.x enablement
- Smartshift fixes
- GFX11 fixes
- Support for SMU 13.0.4
- SMU mutex fix
- Suspend/resume fix
amdkfd:
- static checker fix
- Doorbell/MMIO resource handling fix
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220518205621.5741-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/Makefile | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 82 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 38 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 300 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c | 81 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 56 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 384 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 508 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 19 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 21 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/soc15_int.h | 3 |
16 files changed, 1473 insertions, 61 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 19cfbf9577b4..e758c2a24cd0 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -37,6 +37,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_mqd_manager_vi.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v10.o \ + $(AMDKFD_PATH)/kfd_mqd_manager_v11.o \ $(AMDKFD_PATH)/kfd_kernel_queue.o \ $(AMDKFD_PATH)/kfd_packet_manager.o \ $(AMDKFD_PATH)/kfd_packet_manager_vi.o \ @@ -47,10 +48,12 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_device_queue_manager_vi.o \ $(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \ $(AMDKFD_PATH)/kfd_device_queue_manager_v10.o \ + $(AMDKFD_PATH)/kfd_device_queue_manager_v11.o \ $(AMDKFD_PATH)/kfd_interrupt.o \ $(AMDKFD_PATH)/kfd_events.o \ $(AMDKFD_PATH)/cik_event_interrupt.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ + $(AMDKFD_PATH)/kfd_int_process_v11.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ $(AMDKFD_PATH)/kfd_crat.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 90c70adc946c..5e9adbc71bbd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1315,6 +1315,80 @@ static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache, return 1; } +#define KFD_MAX_CACHE_TYPES 6 + +static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, + struct kfd_gpu_cache_info *pcache_info) +{ + struct amdgpu_device *adev = kdev->adev; + int i = 0; + + /* TCP L1 Cache per CU */ + if (adev->gfx.config.gc_tcp_l1_size) { + pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size; + pcache_info[i].cache_level = 1; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + i++; + } + /* Scalar L1 Instruction Cache per SQC */ + if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { + pcache_info[i].cache_size = + adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; + pcache_info[i].cache_level = 1; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + i++; + } + /* Scalar L1 Data Cache per SQC */ + if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { + pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; + pcache_info[i].cache_level = 1; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + i++; + } + /* GL1 Data Cache per SA */ + if (adev->gfx.config.gc_gl1c_per_sa && + adev->gfx.config.gc_gl1c_size_per_instance) { + pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa * + adev->gfx.config.gc_gl1c_size_per_instance; + pcache_info[i].cache_level = 1; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + i++; + } + /* L2 Data Cache per GPU (Total Tex Cache) */ + if (adev->gfx.config.gc_gl2c_per_gpu) { + pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu; + pcache_info[i].cache_level = 2; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + i++; + } + /* L3 Data Cache per GPU */ + if (adev->gmc.mall_size) { + pcache_info[i].cache_size = adev->gmc.mall_size / 1024; + pcache_info[i].cache_level = 3; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + i++; + } + return i; +} + /* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info * tables * @@ -1336,6 +1410,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, int *num_of_entries) { struct kfd_gpu_cache_info *pcache_info; + struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; int num_of_cache_types = 0; int i, j, k; int ct = 0; @@ -1444,6 +1519,13 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = yellow_carp_cache_info; num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); break; + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): + pcache_info = cache_info; + num_of_cache_types = + kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index ed33e95c03e6..f1a225a20719 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -53,6 +53,7 @@ extern const struct kfd2kgd_calls arcturus_kfd2kgd; extern const struct kfd2kgd_calls aldebaran_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; +extern const struct kfd2kgd_calls gfx_v11_kfd2kgd; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size); @@ -60,7 +61,7 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd); static int kfd_resume(struct kfd_dev *kfd); -static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) +static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) { uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0]; @@ -72,6 +73,7 @@ static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) case IP_VERSION(4, 1, 2):/* RENOIR */ case IP_VERSION(5, 2, 1):/* VANGOGH */ case IP_VERSION(5, 2, 3):/* YELLOW_CARP */ + case IP_VERSION(6, 0, 1): kfd->device_info.num_sdma_queues_per_engine = 2; break; case IP_VERSION(4, 2, 0):/* VEGA20 */ @@ -85,6 +87,8 @@ static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */ case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */ case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */ + case IP_VERSION(6, 0, 0): + case IP_VERSION(6, 0, 2): kfd->device_info.num_sdma_queues_per_engine = 8; break; default: @@ -93,6 +97,19 @@ static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) sdma_version); kfd->device_info.num_sdma_queues_per_engine = 8; } + + switch (sdma_version) { + case IP_VERSION(6, 0, 0): + case IP_VERSION(6, 0, 1): + case IP_VERSION(6, 0, 2): + /* Reserve 1 for paging and 1 for gfx */ + kfd->device_info.num_reserved_sdma_queues_per_engine = 2; + /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */ + kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL; + break; + default: + break; + } } static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) @@ -121,6 +138,11 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; break; + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): + kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; + break; default: dev_warn(kfd_device, "v9 event interrupt handler is set due to " "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version); @@ -145,7 +167,7 @@ static void kfd_device_info_init(struct kfd_dev *kfd, kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t); kfd->device_info.supports_cwsr = true; - kfd_device_info_set_sdma_queue_num(kfd); + kfd_device_info_set_sdma_info(kfd); kfd_device_info_set_event_interrupt_class(kfd); @@ -346,6 +368,18 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) if (!vf) f2g = &gfx_v10_3_kfd2kgd; break; + case IP_VERSION(11, 0, 0): + gfx_target_version = 110000; + f2g = &gfx_v11_kfd2kgd; + break; + case IP_VERSION(11, 0, 1): + gfx_target_version = 110003; + f2g = &gfx_v11_kfd2kgd; + break; + case IP_VERSION(11, 0, 2): + gfx_target_version = 110002; + f2g = &gfx_v11_kfd2kgd; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 198672264492..e1797657b04c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -35,6 +35,7 @@ #include "cik_regs.h" #include "kfd_kernel_queue.h" #include "amdgpu_amdkfd.h" +#include "mes_api_def.h" /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 @@ -118,6 +119,11 @@ unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) dqm->dev->device_info.num_sdma_queues_per_engine; } +static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info.reserved_sdma_queues_bitmap; +} + void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -129,6 +135,152 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, qpd->sh_mem_bases); } +static void kfd_hws_hang(struct device_queue_manager *dqm) +{ + /* + * Issue a GPU reset if HWS is unresponsive + */ + dqm->is_hws_hang = true; + + /* It's possible we're detecting a HWS hang in the + * middle of a GPU reset. No need to schedule another + * reset in this case. + */ + if (!dqm->is_resetting) + schedule_work(&dqm->hw_exception_work); +} + +static int convert_to_mes_queue_type(int queue_type) +{ + int mes_queue_type; + + switch (queue_type) { + case KFD_QUEUE_TYPE_COMPUTE: + mes_queue_type = MES_QUEUE_TYPE_COMPUTE; + break; + case KFD_QUEUE_TYPE_SDMA: + mes_queue_type = MES_QUEUE_TYPE_SDMA; + break; + default: + WARN(1, "Invalid queue type %d", queue_type); + mes_queue_type = -EINVAL; + break; + } + + return mes_queue_type; +} + +static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + struct mes_add_queue_input queue_input; + int r, queue_type; + + if (dqm->is_hws_hang) + return -EIO; + + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); + queue_input.process_id = qpd->pqm->process->pasid; + queue_input.page_table_base_addr = qpd->page_table_base; + queue_input.process_va_start = 0; + queue_input.process_va_end = adev->vm_manager.max_pfn - 1; + /* MES unit for quantum is 100ns */ + queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ + queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; + queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ + queue_input.gang_context_addr = q->gang_ctx_gpu_addr; + queue_input.inprocess_gang_priority = q->properties.priority; + queue_input.gang_global_priority_level = + AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + queue_input.doorbell_offset = q->properties.doorbell_off; + queue_input.mqd_addr = q->gart_mqd_addr; + queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; + queue_input.paging = false; + queue_input.tba_addr = qpd->tba_addr; + queue_input.tma_addr = qpd->tma_addr; + + queue_type = convert_to_mes_queue_type(q->properties.type); + if (queue_type < 0) { + pr_err("Queue type not supported with MES, queue:%d\n", + q->properties.type); + return -EINVAL; + } + queue_input.queue_type = (uint32_t)queue_type; + + if (q->gws) { + queue_input.gws_base = 0; + queue_input.gws_size = qpd->num_gws; + } + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) { + pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", + q->properties.doorbell_off); + pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); +} + + return r; +} + +static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r; + struct mes_remove_queue_input queue_input; + + if (dqm->is_hws_hang) + return -EIO; + + memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); + queue_input.doorbell_offset = q->properties.doorbell_off; + queue_input.gang_context_addr = q->gang_ctx_gpu_addr; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + + if (r) { + pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", + q->properties.doorbell_off); + pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + +static int remove_all_queues_mes(struct device_queue_manager *dqm) +{ + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + int retval = 0; + + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.is_active) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) { + pr_err("%s: Failed to remove queue %d for dev %d", + __func__, + q->properties.queue_id, + dqm->dev->id); + return retval; + } + } + } + } + + return retval; +} + static void increment_queue_count(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -659,6 +811,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; bool prev_active = false; + bool add_queue = false; dqm_lock(dqm); pdd = kfd_get_process_device_data(q->device, q->process); @@ -674,8 +827,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, /* Make sure the queue is unmapped before updating the MQD */ if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { - retval = unmap_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); + if (!dqm->dev->shared_resources.enable_mes) + retval = unmap_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); + else if (prev_active) + retval = remove_queue_mes(dqm, q, &pdd->qpd); + if (retval) { pr_err("unmap queue failed\n"); goto out_unlock; @@ -727,9 +884,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, q->properties.is_gws = false; } - if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) - retval = map_queues_cpsch(dqm); - else if (q->properties.is_active && + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { + if (!dqm->dev->shared_resources.enable_mes) + retval = map_queues_cpsch(dqm); + else if (add_queue) + retval = add_queue_mes(dqm, q, &pdd->qpd); + } else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { @@ -822,12 +982,22 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, q->properties.is_active = false; decrement_queue_count(dqm, qpd, q); + + if (dqm->dev->shared_resources.enable_mes) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) { + pr_err("Failed to evict queue %d\n", + q->properties.queue_id); + goto out; + } + } } pdd->last_evict_timestamp = get_jiffies_64(); - retval = execute_queues_cpsch(dqm, - qpd->is_debug ? - KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!dqm->dev->shared_resources.enable_mes) + retval = execute_queues_cpsch(dqm, + qpd->is_debug ? + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); out: dqm_unlock(dqm); @@ -951,9 +1121,19 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, q->properties.is_active = true; increment_queue_count(dqm, &pdd->qpd, q); + + if (dqm->dev->shared_resources.enable_mes) { + retval = add_queue_mes(dqm, q, qpd); + if (retval) { + pr_err("Failed to restore queue %d\n", + q->properties.queue_id); + goto out; + } + } } - retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!dqm->dev->shared_resources.enable_mes) + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); qpd->evicted = 0; eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; atomic64_add(eviction_duration, &pdd->evict_duration_counter); @@ -1081,6 +1261,9 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); + dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm)); + pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap); + dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); return 0; @@ -1277,6 +1460,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm) else dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1); + dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm)); + pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap); + num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm); if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap)) dqm->xgmi_sdma_bitmap = ULLONG_MAX; @@ -1295,14 +1481,16 @@ static int start_cpsch(struct device_queue_manager *dqm) retval = 0; dqm_lock(dqm); - retval = pm_init(&dqm->packet_mgr, dqm); - if (retval) - goto fail_packet_manager_init; - retval = set_sched_resources(dqm); - if (retval) - goto fail_set_sched_resources; + if (!dqm->dev->shared_resources.enable_mes) { + retval = pm_init(&dqm->packet_mgr, dqm); + if (retval) + goto fail_packet_manager_init; + retval = set_sched_resources(dqm); + if (retval) + goto fail_set_sched_resources; + } pr_debug("Allocating fence memory\n"); /* allocate fence memory on the gart */ @@ -1321,13 +1509,15 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm->is_hws_hang = false; dqm->is_resetting = false; dqm->sched_running = true; - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!dqm->dev->shared_resources.enable_mes) + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); return 0; fail_allocate_vidmem: fail_set_sched_resources: - pm_uninit(&dqm->packet_mgr, false); + if (!dqm->dev->shared_resources.enable_mes) + pm_uninit(&dqm->packet_mgr, false); fail_packet_manager_init: dqm_unlock(dqm); return retval; @@ -1343,15 +1533,22 @@ static int stop_cpsch(struct device_queue_manager *dqm) return 0; } - if (!dqm->is_hws_hang) - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); + if (!dqm->is_hws_hang) { + if (!dqm->dev->shared_resources.enable_mes) + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); + else + remove_all_queues_mes(dqm); + } + hanging = dqm->is_hws_hang || dqm->is_resetting; dqm->sched_running = false; - pm_release_ib(&dqm->packet_mgr); + if (!dqm->dev->shared_resources.enable_mes) + pm_release_ib(&dqm->packet_mgr); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); - pm_uninit(&dqm->packet_mgr, hanging); + if (!dqm->dev->shared_resources.enable_mes) + pm_uninit(&dqm->packet_mgr, hanging); dqm_unlock(dqm); return 0; @@ -1469,8 +1666,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.is_active) { increment_queue_count(dqm, qpd, q); - execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!dqm->dev->shared_resources.enable_mes) { + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + } else { + retval = add_queue_mes(dqm, q, qpd); + if (retval) + goto cleanup_queue; + } } /* @@ -1485,6 +1688,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, dqm_unlock(dqm); return retval; +cleanup_queue: + qpd->queue_count--; + list_del(&q->list); + if (q->properties.is_active) + decrement_queue_count(dqm, qpd, q); + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + dqm_unlock(dqm); out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: @@ -1572,13 +1782,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, queue_preemption_timeout_ms); if (retval) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); - dqm->is_hws_hang = true; - /* It's possible we're detecting a HWS hang in the - * middle of a GPU reset. No need to schedule another - * reset in this case. - */ - if (!dqm->is_resetting) - schedule_work(&dqm->hw_exception_work); + kfd_hws_hang(dqm); return retval; } @@ -1683,11 +1887,15 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, list_del(&q->list); qpd->queue_count--; if (q->properties.is_active) { - decrement_queue_count(dqm, qpd, q); - retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - if (retval == -ETIME) - qpd->reset_wavefronts = true; + if (!dqm->dev->shared_resources.enable_mes) { + decrement_queue_count(dqm, qpd, q); + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (retval == -ETIME) + qpd->reset_wavefronts = true; + } else { + retval = remove_queue_mes(dqm, q, qpd); + } } /* @@ -1941,9 +2149,17 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - if (q->properties.is_active) + if (q->properties.is_active) { decrement_queue_count(dqm, qpd, q); + if (dqm->dev->shared_resources.enable_mes) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) + pr_err("Failed to remove queue %d\n", + q->properties.queue_id); + } + } + dqm->total_queue_count--; } @@ -1958,7 +2174,9 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } } - retval = execute_queues_cpsch(dqm, filter, 0); + if (!dqm->dev->shared_resources.enable_mes) + retval = execute_queues_cpsch(dqm, filter, 0); + if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); @@ -2133,7 +2351,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) break; default: - if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) + if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) + device_queue_manager_init_v11(&dqm->asic_ops); + else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) device_queue_manager_init_v10_navi10(&dqm->asic_ops); else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) device_queue_manager_init_v9(&dqm->asic_ops); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 3d539d6483e0..a537b9ef3e16 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -35,6 +35,9 @@ #define VMID_NUM 16 +#define KFD_MES_PROCESS_QUANTUM 100000 +#define KFD_MES_GANG_QUANTUM 10000 + struct device_process_node { struct qcm_process_device *qpd; struct list_head list; @@ -267,6 +270,8 @@ void device_queue_manager_init_v9( struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_v10_navi10( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_v11( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_cp_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c new file mode 100644 index 000000000000..2e129da7acb4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c @@ -0,0 +1,81 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" +#include "gc/gc_11_0_0_offset.h" +#include "gc/gc_11_0_0_sh_mask.h" +#include "soc21_enum.h" + +static int update_qpd_v11(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); + +void device_queue_manager_init_v11( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->update_qpd = update_qpd_v11; + asic_ops->init_sdma_vm = init_sdma_vm_v11; + asic_ops->mqd_manager_init = mqd_manager_init_v11; +} + +static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) +{ + uint32_t shared_base = pdd->lds_base >> 48; + uint32_t private_base = pdd->scratch_base >> 48; + + return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) | + private_base; +} + +static int update_qpd_v11(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + (SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT); + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd); + + pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases); + + return 0; +} + +static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + /* Not needed on SDMAv4 onwards any more */ + q->properties.sdma_vm_addr = 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 5401b6317f25..cb3d2ccc5100 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -49,9 +49,13 @@ /* # of doorbell bytes allocated for each process. */ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) { - return roundup(kfd->device_info.doorbell_size * - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, - PAGE_SIZE); + if (!kfd->shared_resources.enable_mes) + return roundup(kfd->device_info.doorbell_size * + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + PAGE_SIZE); + else + return amdgpu_mes_doorbell_process_slice( + (struct amdgpu_device *)kfd->adev); } /* Doorbell calculations for device init. */ @@ -62,6 +66,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd) size_t doorbell_process_limit; /* + * With MES enabled, just set the doorbell base as it is needed + * to calculate doorbell physical address. + */ + if (kfd->shared_resources.enable_mes) { + kfd->doorbell_base = + kfd->shared_resources.doorbell_physical_address; + return 0; + } + + /* * We start with calculations in bytes because the input data might * only be byte-aligned. * Only after we have done the rounding can we assume any alignment. @@ -237,10 +251,16 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, * the process's doorbells. The offset returned is in dword * units regardless of the ASIC-dependent doorbell size. */ - return kfd->doorbell_base_dw_offset + - pdd->doorbell_index - * kfd_doorbell_process_slice(kfd) / sizeof(u32) + - doorbell_id * kfd->device_info.doorbell_size / sizeof(u32); + if (!kfd->shared_resources.enable_mes) + return kfd->doorbell_base_dw_offset + + pdd->doorbell_index + * kfd_doorbell_process_slice(kfd) / sizeof(u32) + + doorbell_id * + kfd->device_info.doorbell_size / sizeof(u32); + else + return amdgpu_mes_get_doorbell_dw_offset_in_bar( + (struct amdgpu_device *)kfd->adev, + pdd->doorbell_index, doorbell_id); } uint64_t kfd_get_number_elems(struct kfd_dev *kfd) @@ -261,8 +281,16 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index) { - int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices, - GFP_KERNEL); + int r = 0; + + if (!kfd->shared_resources.enable_mes) + r = ida_simple_get(&kfd->doorbell_ida, 1, + kfd->max_doorbell_slices, GFP_KERNEL); + else + r = amdgpu_mes_alloc_process_doorbells( + (struct amdgpu_device *)kfd->adev, + doorbell_index); + if (r > 0) *doorbell_index = r; @@ -271,6 +299,12 @@ int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_inde void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index) { - if (doorbell_index) - ida_simple_remove(&kfd->doorbell_ida, doorbell_index); + if (doorbell_index) { + if (!kfd->shared_resources.enable_mes) + ida_simple_remove(&kfd->doorbell_ida, doorbell_index); + else + amdgpu_mes_free_process_doorbells( + (struct amdgpu_device *)kfd->adev, + doorbell_index); + } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c new file mode 100644 index 000000000000..a6fcbeeb7428 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -0,0 +1,384 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "kfd_priv.h" +#include "kfd_events.h" +#include "soc15_int.h" +#include "kfd_device_queue_manager.h" +#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" +#include "kfd_smi_events.h" + +/* + * GFX11 SQ Interrupts + * + * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit + * packet to the Interrupt Handler: + * Auto - Generated by the SQG (various cmd overflows, timestamps etc) + * Wave - Generated by S_SENDMSG through a shader program + * Error - HW generated errors (Illegal instructions, Memviols, EDC etc) + * + * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus + * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such: + * + * - context_id1[7:6] + * Encoding type (0 = Auto, 1 = Wave, 2 = Error) + * + * - context_id0[26] + * PRIV bit indicates that Wave S_SEND or error occurred within trap + * + * - context_id0[24:0] + * 25-bit data with the following layout per encoding type: + * Auto - only context_id0[8:0] is used, which reports various interrupts + * generated by SQG. The rest is 0. + * Wave - user data sent from m0 via S_SENDMSG (context_id0[23:0]) + * Error - Error Type (context_id0[24:21]), Error Details (context_id0[20:0]) + * + * The other context_id bits show coordinates (SE/SH/CU/SIMD/WGP) for wave + * S_SENDMSG and Errors. These are 0 for Auto. + */ + +enum SQ_INTERRUPT_WORD_ENCODING { + SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0, + SQ_INTERRUPT_WORD_ENCODING_INST, + SQ_INTERRUPT_WORD_ENCODING_ERROR, +}; + +enum SQ_INTERRUPT_ERROR_TYPE { + SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0, + SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST, + SQ_INTERRUPT_ERROR_TYPE_MEMVIOL, + SQ_INTERRUPT_ERROR_TYPE_EDC_FED, +}; + +/* SQ_INTERRUPT_WORD_AUTO_CTXID */ +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE__SHIFT 0 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT__SHIFT 1 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL__SHIFT 2 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP__SHIFT 3 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP__SHIFT 4 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW__SHIFT 5 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW__SHIFT 6 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW__SHIFT 7 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR__SHIFT 8 +#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING__SHIFT 6 + +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_MASK 0x00000001 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT_MASK 0x00000002 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL_MASK 0x00000004 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP_MASK 0x00000008 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP_MASK 0x00000010 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW_MASK 0x00000020 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW_MASK 0x00000040 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW_MASK 0x00000080 +#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR_MASK 0x00000100 +#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING_MASK 0x000000c0 + +/* SQ_INTERRUPT_WORD_WAVE_CTXID */ +#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA__SHIFT 0 +#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID__SHIFT 25 +#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV__SHIFT 26 +#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID__SHIFT 27 +#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID__SHIFT 0 +#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID__SHIFT 2 +#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING__SHIFT 6 + +#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA_MASK 0x00ffffff /* [23:0] */ +#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID_MASK 0x02000000 /* [25] */ +#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK 0x04000000 /* [26] */ +#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID_MASK 0xf8000000 /* [31:27] */ +#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID_MASK 0x00000003 /* [33:32] */ +#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID_MASK 0x0000003c /* [37:34] */ +#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING_MASK 0x000000c0 /* [39:38] */ + +/* SQ_INTERRUPT_WORD_ERROR_CTXID */ +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL__SHIFT 0 +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE__SHIFT 21 +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID__SHIFT 25 +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV__SHIFT 26 +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID__SHIFT 27 +#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID__SHIFT 0 +#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID__SHIFT 2 +#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING__SHIFT 6 + +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL_MASK 0x001fffff /* [20:0] */ +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE_MASK 0x01e00000 /* [24:21] */ +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID_MASK 0x02000000 /* [25] */ +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV_MASK 0x04000000 /* [26] */ +#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID_MASK 0xf8000000 /* [31:27] */ +#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID_MASK 0x00000003 /* [33:32] */ +#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID_MASK 0x0000003c /* [37:34] */ +#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING_MASK 0x000000c0 /* [39:38] */ + +/* + * The debugger will send user data(m0) with PRIV=1 to indicate it requires + * notification from the KFD with the following queue id (DOORBELL_ID) and + * trap code (TRAP_CODE). + */ +#define KFD_CTXID0_TRAP_CODE_SHIFT 10 +#define KFD_CTXID0_TRAP_CODE_MASK 0xfffc00 +#define KFD_CTXID0_CP_BAD_OP_ECODE_MASK 0x3ffffff +#define KFD_CTXID0_DOORBELL_ID_MASK 0x0003ff + +#define KFD_CTXID0_TRAP_CODE(ctxid0) (((ctxid0) & \ + KFD_CTXID0_TRAP_CODE_MASK) >> \ + KFD_CTXID0_TRAP_CODE_SHIFT) +#define KFD_CTXID0_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) & \ + KFD_CTXID0_CP_BAD_OP_ECODE_MASK) >> \ + KFD_CTXID0_TRAP_CODE_SHIFT) +#define KFD_CTXID0_DOORBELL_ID(ctxid0) ((ctxid0) & \ + KFD_CTXID0_DOORBELL_ID_MASK) + +static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1) +{ + pr_debug( + "sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR)); +} + +static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1) +{ + pr_debug( + "sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID), + REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID), + REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID)); +} + +static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1) +{ + pr_warn( + "sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID)); +} + +static void event_interrupt_poison_consumption_v11(struct kfd_dev *dev, + uint16_t pasid, uint16_t source_id) +{ + int ret = -EINVAL; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + + if (!p) + return; + + /* all queues of a process will be unmapped in one time */ + if (atomic_read(&p->poison)) { + kfd_unref_process(p); + return; + } + + atomic_set(&p->poison, 1); + kfd_unref_process(p); + + switch (source_id) { + case SOC15_INTSRC_SQ_INTERRUPT_MSG: + if (dev->dqm->ops.reset_queues) + ret = dev->dqm->ops.reset_queues(dev->dqm, pasid); + break; + case SOC21_INTSRC_SDMA_ECC: + default: + break; + } + + kfd_signal_poison_consumed_event(dev, pasid); + + /* resetting queue passes, do page retirement without gpu reset + resetting queue fails, fallback to gpu reset solution */ + if (!ret) + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false); + else + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); +} + +static bool event_interrupt_isr_v11(struct kfd_dev *dev, + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, + bool *patched_flag) +{ + uint16_t source_id, client_id, pasid, vmid; + const uint32_t *data = ih_ring_entry; + uint32_t context_id0; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + /* Only handle interrupts from KFD VMIDs */ + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + if (/*!KFD_IRQ_IS_FENCE(client_id, source_id) &&*/ + (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd)) + return false; + + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + + if ((source_id == SOC15_INTSRC_CP_END_OF_PIPE) && + (context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG)) + return false; + + pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", + client_id, source_id, vmid, pasid); + pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); + + /* If there is no valid PASID, it's likely a bug */ + if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) + return false; + + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return source_id == SOC15_INTSRC_CP_END_OF_PIPE || + source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || + source_id == SOC15_INTSRC_CP_BAD_OPCODE || + source_id == SOC21_INTSRC_SDMA_TRAP || + /* KFD_IRQ_IS_FENCE(client_id, source_id) || */ + (((client_id == SOC21_IH_CLIENTID_VMC) || + ((client_id == SOC21_IH_CLIENTID_GFX) && + (source_id == UTCL2_1_0__SRCID__FAULT))) && + !amdgpu_no_queue_eviction_on_vm_fault); +} + +static void event_interrupt_wq_v11(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + uint16_t source_id, client_id, ring_id, pasid, vmid; + uint32_t context_id0, context_id1; + uint8_t sq_int_enc, sq_int_errtype, sq_int_priv; + struct kfd_vm_fault_info info = {0}; + struct kfd_hsa_memory_exception_data exception_data; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry); + + /* VMC, UTCL2 */ + if (client_id == SOC21_IH_CLIENTID_VMC || + ((client_id == SOC21_IH_CLIENTID_GFX) && + (source_id == UTCL2_1_0__SRCID__FAULT))) { + + info.vmid = vmid; + info.mc_id = client_id; + info.page_addr = ih_ring_entry[4] | + (uint64_t)(ih_ring_entry[5] & 0xf) << 32; + info.prot_valid = ring_id & 0x08; + info.prot_read = ring_id & 0x10; + info.prot_write = ring_id & 0x20; + + memset(&exception_data, 0, sizeof(exception_data)); + exception_data.gpu_id = dev->id; + exception_data.va = (info.page_addr) << PAGE_SHIFT; + exception_data.failure.NotPresent = info.prot_valid ? 1 : 0; + exception_data.failure.NoExecute = info.prot_exec ? 1 : 0; + exception_data.failure.ReadOnly = info.prot_write ? 1 : 0; + exception_data.failure.imprecise = 0; + + /*kfd_set_dbg_ev_from_interrupt(dev, pasid, -1, + KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION), + &exception_data, sizeof(exception_data));*/ + kfd_smi_event_update_vmfault(dev, pasid); + + /* GRBM, SDMA, SE, PMM */ + } else if (client_id == SOC21_IH_CLIENTID_GRBM_CP || + client_id == SOC21_IH_CLIENTID_GFX) { + + /* CP */ + if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) + kfd_signal_event_interrupt(pasid, context_id0, 32); + /*else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + kfd_set_dbg_ev_from_interrupt(dev, pasid, + KFD_CTXID0_DOORBELL_ID(context_id0), + KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), + NULL, 0);*/ + + /* SDMA */ + else if (source_id == SOC21_INTSRC_SDMA_TRAP) + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); + else if (source_id == SOC21_INTSRC_SDMA_ECC) { + event_interrupt_poison_consumption_v11(dev, pasid, source_id); + return; + } + + /* SQ */ + else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) { + sq_int_enc = REG_GET_FIELD(context_id1, + SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); + switch (sq_int_enc) { + case SQ_INTERRUPT_WORD_ENCODING_AUTO: + print_sq_intr_info_auto(context_id0, context_id1); + break; + case SQ_INTERRUPT_WORD_ENCODING_INST: + print_sq_intr_info_inst(context_id0, context_id1); + sq_int_priv = REG_GET_FIELD(context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV); + if (sq_int_priv /*&& (kfd_set_dbg_ev_from_interrupt(dev, pasid, + KFD_CTXID0_DOORBELL_ID(context_id0), + KFD_CTXID0_TRAP_CODE(context_id0), + NULL, 0))*/) + return; + break; + case SQ_INTERRUPT_WORD_ENCODING_ERROR: + print_sq_intr_info_error(context_id0, context_id1); + sq_int_errtype = REG_GET_FIELD(context_id0, + SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE); + if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && + sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { + event_interrupt_poison_consumption_v11( + dev, pasid, source_id); + return; + } + break; + default: + break; + } + kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24); + } + + /*} else if (KFD_IRQ_IS_FENCE(client_id, source_id)) { + kfd_process_close_interrupt_drain(pasid);*/ + } +} + +const struct kfd_event_interrupt_class event_interrupt_class_v11 = { + .interrupt_isr = event_interrupt_isr_v11, + .interrupt_wq = event_interrupt_wq_v11, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index f27fe022ef6f..0b75a37b689b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -90,7 +90,7 @@ enum SQ_INTERRUPT_ERROR_TYPE { #define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000 #define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20 -static void event_interrupt_poison_consumption(struct kfd_dev *dev, +static void event_interrupt_poison_consumption_v9(struct kfd_dev *dev, uint16_t pasid, uint16_t client_id) { int old_poison, ret = -EINVAL; @@ -316,7 +316,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, sq_intr_err); if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - event_interrupt_poison_consumption(dev, pasid, client_id); + event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } break; @@ -337,7 +337,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); } else if (source_id == SOC15_INTSRC_SDMA_ECC) { - event_interrupt_poison_consumption(dev, pasid, client_id); + event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || @@ -348,7 +348,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, if (client_id == SOC15_IH_CLIENTID_UTCL2 && amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) { - event_interrupt_poison_consumption(dev, pasid, client_id); + event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 5ac209209613..49a283be6b57 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -100,7 +100,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, { struct kfd_cu_info cu_info; uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; - int i, se, sh, cu; + int i, se, sh, cu, cu_bitmap_sh_mul; amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info); @@ -120,6 +120,10 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines); return; } + + cu_bitmap_sh_mul = (KFD_GC_VERSION(mm->dev) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(mm->dev) < IP_VERSION(12, 0, 0)) ? 2 : 1; + /* Count active CUs per SH. * * Some CUs in an SH may be disabled. HW expects disabled CUs to be @@ -129,10 +133,12 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, * Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1. * * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info. + * See note on GFX11 cu_bitmap layout in gfx_v11_0_get_cu_info. */ for (se = 0; se < cu_info.num_shader_engines; se++) for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) - cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); + cu_per_sh[se][sh] = hweight32( + cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]); /* Symmetrically map cu_mask to all SEs & SHs: * se_mask programs up to 2 SH in the upper and lower 16 bits. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c new file mode 100644 index 000000000000..4e0387f591be --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -0,0 +1,508 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" +#include "v11_structs.h" +#include "gc/gc_11_0_0_offset.h" +#include "gc/gc_11_0_0_sh_mask.h" +#include "amdgpu_amdkfd.h" + +static inline struct v11_compute_mqd *get_mqd(void *mqd) +{ + return (struct v11_compute_mqd *)mqd; +} + +static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v11_sdma_mqd *)mqd; +} + +static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct mqd_update_info *minfo) +{ + struct v11_compute_mqd *m; + uint32_t se_mask[KFD_MAX_NUM_SE] = {0}; + + if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) || + !minfo->cu_mask.ptr) + return; + + mqd_symmetrically_map_cu_mask(mm, + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + + m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; + m->compute_static_thread_mgmt_se1 = se_mask[1]; + m->compute_static_thread_mgmt_se2 = se_mask[2]; + m->compute_static_thread_mgmt_se3 = se_mask[3]; + m->compute_static_thread_mgmt_se4 = se_mask[4]; + m->compute_static_thread_mgmt_se5 = se_mask[5]; + m->compute_static_thread_mgmt_se6 = se_mask[6]; + m->compute_static_thread_mgmt_se7 = se_mask[7]; + + pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3, + m->compute_static_thread_mgmt_se4, + m->compute_static_thread_mgmt_se5, + m->compute_static_thread_mgmt_se6, + m->compute_static_thread_mgmt_se7); +} + +static void set_priority(struct v11_compute_mqd *m, struct queue_properties *q) +{ + m->cp_hqd_pipe_priority = pipe_priority_map[q->priority]; + m->cp_hqd_queue_priority = q->priority; +} + +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj; + int size; + + /* + * MES write to areas beyond MQD size. So allocate + * 1 PAGE_SIZE memory for MQD is MES is enabled. + */ + if (kfd->shared_resources.enable_mes) + size = PAGE_SIZE; + else + size = sizeof(struct v11_compute_mqd); + + if (kfd_gtt_sa_allocate(kfd, size, &mqd_mem_obj)) + return NULL; + + return mqd_mem_obj; +} + +static void init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + uint64_t addr; + struct v11_compute_mqd *m; + int size; + + m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr; + addr = mqd_mem_obj->gpu_addr; + + if (mm->dev->shared_resources.enable_mes) + size = PAGE_SIZE; + else + size = sizeof(struct v11_compute_mqd); + + memset(m, 0, size); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | + 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; + + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | + 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | + 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_aql_control = + 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; + } + + if (mm->dev->cwsr_enabled) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; + m->cp_hqd_cntl_stack_size = q->ctl_stack_size; + m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; + m->cp_hqd_wg_state_offset = q->ctl_stack_size; + } + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + mm->update_mqd(mm, m, q, NULL); +} + +static int load_mqd(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + int r = 0; + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); + + r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, + (uint32_t __user *)p->write_ptr, + wptr_shift, 0, mms); + return r; +} + +static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id, + queue_id, p->doorbell_off); +} + +static void update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, + struct mqd_update_info *minfo) +{ + struct v11_compute_mqd *m; + + m = get_mqd(mqd); + + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= + ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); + m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); + + m->cp_hqd_pq_doorbell_control = + q->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT; + + /* + * HW does not clamp this field correctly. Maximum EOP queue size + * is constrained by per-SE EOP done signal count, which is 8-bit. + * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit + * more than (EOP entry count - 1) so a queue size of 0x800 dwords + * is safe, giving a maximum field value of 0xA. + */ + m->cp_hqd_eop_control = min(0xA, + ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); + m->cp_hqd_eop_base_addr_lo = + lower_32_bits(q->eop_ring_buffer_address >> 8); + m->cp_hqd_eop_base_addr_hi = + upper_32_bits(q->eop_ring_buffer_address >> 8); + + m->cp_hqd_iq_timer = 0; + + m->cp_hqd_vmid = q->vmid; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + /* GC 10 removed WPP_CLAMP from PQ Control */ + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | + 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | + 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ; + m->cp_hqd_pq_doorbell_control |= + 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; + } + if (mm->dev->cwsr_enabled) + m->cp_hqd_ctx_save_control = 0; + + update_cu_mask(mm, mqd, minfo); + set_priority(m, q); + + q->is_active = QUEUE_IS_ACTIVE(*q); +} + +static uint32_t read_doorbell_id(void *mqd) +{ + struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd; + + return m->queue_doorbell_id0; +} + +static int destroy_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_destroy + (mm->dev->adev, mqd, type, timeout, + pipe_id, queue_id); +} + +static void free_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static bool is_occupied(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_is_occupied( + mm->dev->adev, queue_address, + pipe_id, queue_id); +} + +static int get_wave_state(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct v11_compute_mqd *m; + /*struct mqd_user_context_save_area_header header;*/ + + m = get_mqd(mqd); + + /* Control stack is written backwards, while workgroup context data + * is written forwards. Both starts from m->cp_hqd_cntl_stack_size. + * Current position is at m->cp_hqd_cntl_stack_offset and + * m->cp_hqd_wg_state_offset, respectively. + */ + *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - + m->cp_hqd_cntl_stack_offset; + *save_area_used_size = m->cp_hqd_wg_state_offset - + m->cp_hqd_cntl_stack_size; + + /* Control stack is not copied to user mode for GFXv11 because + * it's part of the context save area that is already + * accessible to user mode + */ +/* + header.control_stack_size = *ctl_stack_used_size; + header.wave_state_size = *save_area_used_size; + + header.wave_state_offset = m->cp_hqd_wg_state_offset; + header.control_stack_offset = m->cp_hqd_cntl_stack_offset; + + if (copy_to_user(ctl_stack, &header, sizeof(header))) + return -EFAULT; +*/ + return 0; +} + +static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + struct v11_compute_mqd *m; + + init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); + + m = get_mqd(*mqd); + + m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | + 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; +} + +static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + struct v11_sdma_mqd *m; + + m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr; + + memset(m, 0, sizeof(struct v11_sdma_mqd)); + + *mqd = m; + if (gart_addr) + *gart_addr = mqd_mem_obj->gpu_addr; + + mm->update_mqd(mm, m, q, NULL); +} + +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd, + (uint32_t __user *)p->write_ptr, + mms); +} + +#define SDMA_RLC_DUMMY_DEFAULT 0xf + +static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, + struct mqd_update_info *minfo) +{ + struct v11_sdma_mqd *m; + + m = get_sdma_mqd(mqd); + m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_QUEUE0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_doorbell_offset = + q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; + + q->is_active = QUEUE_IS_ACTIVE(*q); +} + +/* + * * preempt type here is ignored because there is only one way + * * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout); +} + +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); +} + +#if defined(CONFIG_DEBUG_FS) + +static int debugfs_show_mqd(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v11_compute_mqd), false); + return 0; +} + +static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v11_sdma_mqd), false); + return 0; +} + +#endif + +struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; + + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + if (!mqd) + return NULL; + + mqd->dev = dev; + + switch (type) { + case KFD_MQD_TYPE_CP: + pr_debug("%s@%i\n", __func__, __LINE__); + mqd->allocate_mqd = allocate_mqd; + mqd->init_mqd = init_mqd; + mqd->free_mqd = free_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct v11_compute_mqd); + mqd->get_wave_state = get_wave_state; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + pr_debug("%s@%i\n", __func__, __LINE__); + break; + case KFD_MQD_TYPE_HIQ: + pr_debug("%s@%i\n", __func__, __LINE__); + mqd->allocate_mqd = allocate_hiq_mqd; + mqd->init_mqd = init_mqd_hiq; + mqd->free_mqd = free_mqd_hiq_sdma; + mqd->load_mqd = hiq_load_mqd_kiq; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct v11_compute_mqd); +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + mqd->read_doorbell_id = read_doorbell_id; + pr_debug("%s@%i\n", __func__, __LINE__); + break; + case KFD_MQD_TYPE_DIQ: + mqd->allocate_mqd = allocate_mqd; + mqd->init_mqd = init_mqd_hiq; + mqd->free_mqd = free_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct v11_compute_mqd); +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_SDMA: + pr_debug("%s@%i\n", __func__, __LINE__); + mqd->allocate_mqd = allocate_sdma_mqd; + mqd->init_mqd = init_mqd_sdma; + mqd->free_mqd = free_mqd_hiq_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct v11_sdma_mqd); +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; +#endif + pr_debug("%s@%i\n", __func__, __LINE__); + break; + default: + kfree(mqd); + return NULL; + } + + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 10bb3bb46246..2585d6e61d42 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -229,6 +229,8 @@ struct kfd_device_info { bool needs_pci_atomics; uint32_t no_atomic_fw_version; unsigned int num_sdma_queues_per_engine; + unsigned int num_reserved_sdma_queues_per_engine; + uint64_t reserved_sdma_queues_bitmap; }; unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev); @@ -565,6 +567,10 @@ struct queue { /* procfs */ struct kobject kobj; + + void *gang_ctx_bo; + uint64_t gang_ctx_gpu_addr; + void *gang_ctx_cpu_ptr; }; enum KFD_MQD_TYPE { @@ -780,6 +786,10 @@ struct kfd_process_device { * checkpointed node to refer to this device. */ uint32_t user_gpu_id; + + void *proc_ctx_bo; + uint64_t proc_ctx_gpu_addr; + void *proc_ctx_cpu_ptr; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -1171,6 +1181,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -1293,6 +1305,7 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd); /* Events */ extern const struct kfd_event_interrupt_class event_interrupt_class_cik; extern const struct kfd_event_interrupt_class event_interrupt_class_v9; +extern const struct kfd_event_interrupt_class event_interrupt_class_v11; extern const struct kfd_device_global_init_class device_global_init_class_cik; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index cb8f4a459add..e3d64ec8c353 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1041,6 +1041,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index); + if (pdd->dev->shared_resources.enable_mes) + amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, + pdd->proc_ctx_bo); /* * before destroying pdd, make sure to report availability * for auto suspend @@ -1484,6 +1487,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p) { struct kfd_process_device *pdd = NULL; + int retval = 0; if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) return NULL; @@ -1516,6 +1520,21 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->sdma_past_activity_counter = 0; pdd->user_gpu_id = dev->id; atomic64_set(&pdd->evict_duration_counter, 0); + + if (dev->shared_resources.enable_mes) { + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); + if (retval) { + pr_err("failed to allocate process context bo\n"); + goto err_free_pdd; + } + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); + } + p->pdds[p->n_pdds++] = pdd; /* Init idr used for memory handle translation */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 4f58e671d39b..dc00484ff484 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -198,8 +198,26 @@ static int init_user_queue(struct process_queue_manager *pqm, (*q)->device = dev; (*q)->process = pqm->process; + if (dev->shared_resources.enable_mes) { + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, + AMDGPU_MES_GANG_CTX_SIZE, + &(*q)->gang_ctx_bo, + &(*q)->gang_ctx_gpu_addr, + &(*q)->gang_ctx_cpu_ptr, + false); + if (retval) { + pr_err("failed to allocate gang context bo\n"); + goto cleanup; + } + memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); + } + pr_debug("PQM After init queue"); + return 0; +cleanup: + if (dev->shared_resources.enable_mes) + uninit_queue(*q); return retval; } @@ -418,6 +436,9 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) pdd->qpd.num_gws = 0; } + if (dev->shared_resources.enable_mes) + amdgpu_amdkfd_free_gtt_mem(dev->adev, + pqn->q->gang_ctx_bo); uninit_queue(pqn->q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 05089f1de4e9..2e20f54bb147 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1412,7 +1412,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.num_sdma_xgmi_engines = kfd_get_num_xgmi_sdma_engines(gpu); dev->node_props.num_sdma_queues_per_engine = - gpu->device_info.num_sdma_queues_per_engine; + gpu->device_info.num_sdma_queues_per_engine - + gpu->device_info.num_reserved_sdma_queues_per_engine; dev->node_props.num_gws = (dev->gpu->gws && dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? dev->gpu->adev->gds.gws_size : 0; diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h index daf3c44547d3..e3f3b0b93a59 100644 --- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -31,7 +31,8 @@ #define SOC15_INTSRC_VMC_FAULT 0 #define SOC15_INTSRC_SDMA_TRAP 224 #define SOC15_INTSRC_SDMA_ECC 220 - +#define SOC21_INTSRC_SDMA_TRAP 49 +#define SOC21_INTSRC_SDMA_ECC 62 #define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff) #define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff) |