summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c75
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h4
4 files changed, 82 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index c31589043d5b..b5cae48dff66 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1295,6 +1295,7 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_
void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo);
int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 67242ce051b5..adcda9730c9f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include "kfd_priv.h"
+#include "kfd_topology.h"
#include "kfd_svm.h"
void print_queue_properties(struct queue_properties *q)
@@ -305,3 +306,77 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope
properties->ctx_save_restore_area_size);
return 0;
}
+
+#define SGPR_SIZE_PER_CU 0x4000
+#define LDS_SIZE_PER_CU 0x10000
+#define HWREG_SIZE_PER_CU 0x1000
+#define DEBUGGER_BYTES_ALIGN 64
+#define DEBUGGER_BYTES_PER_WAVE 32
+
+static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
+{
+ u32 vgpr_size = 0x40000;
+
+ if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */
+ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */
+ gfxv == 90008) /* GFX_VERSION_ARCTURUS */
+ vgpr_size = 0x80000;
+ else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */
+ gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */
+ gfxv == 120000 || /* GFX_VERSION_GFX1200 */
+ gfxv == 120001) /* GFX_VERSION_GFX1201 */
+ vgpr_size = 0x60000;
+
+ return vgpr_size;
+}
+
+#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \
+ (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
+ LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU)
+
+#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
+ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/
+
+#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
+
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
+{
+ struct kfd_node_properties *props = &dev->node_props;
+ u32 gfxv = props->gfx_target_version;
+ u32 ctl_stack_size;
+ u32 wg_data_size;
+ u32 wave_num;
+ u32 cu_num;
+
+ if (gfxv < 80001) /* GFX_VERSION_CARRIZO */
+ return;
+
+ cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
+ wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */
+ min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
+ : cu_num * 32;
+
+ wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE);
+ ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
+ ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
+ PAGE_SIZE);
+
+ if ((gfxv / 10000 * 10000) == 100000) {
+ /* HW design limits control stack size to 0x7000.
+ * This is insufficient for theoretical PM4 cases
+ * but sufficient for AQL, limited by SPI events.
+ */
+ ctl_stack_size = min(ctl_stack_size, 0x7000);
+ }
+
+ props->ctl_stack_size = ctl_stack_size;
+ props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
+ props->cwsr_size = ctl_stack_size + wg_data_size;
+
+ if (gfxv == 80002) /* GFX_VERSION_TONGA */
+ props->eop_buffer_size = 0x8000;
+ else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */
+ props->eop_buffer_size = 4096;
+ else if (gfxv >= 80000)
+ props->eop_buffer_size = 4096;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 6f89b06f89d3..a9b3eda65a2c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -2120,6 +2120,8 @@ int kfd_topology_add_device(struct kfd_node *gpu)
dev->gpu->adev->gmc.xgmi.connected_to_cpu)
dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS;
+ kfd_queue_ctx_save_restore_size(dev);
+
kfd_debug_print_topology();
kfd_notify_gpu_change(gpu_id, 1);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 2d1c9d771bef..43ba0d32e5bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -74,6 +74,10 @@ struct kfd_node_properties {
uint32_t num_sdma_xgmi_engines;
uint32_t num_sdma_queues_per_engine;
uint32_t num_cp_queues;
+ uint32_t cwsr_size;
+ uint32_t ctl_stack_size;
+ uint32_t eop_buffer_size;
+ uint32_t debug_memory_size;
char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};