diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
22 files changed, 5847 insertions, 175 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index f02c938f75da..8cc0a76ddf9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -12,3 +12,16 @@ config HSA_AMD select DRM_AMDGPU_USERPTR help Enable this if you want to use HSA features on AMD GPU devices. + +config HSA_AMD_SVM + bool "Enable HMM-based shared virtual memory manager" + depends on HSA_AMD && DEVICE_PRIVATE + default y + select HMM_MIRROR + select MMU_NOTIFIER + help + Enable this to use unified memory and managed memory in HIP. This + memory manager supports two modes of operation. One based on + preemptions and one based on page faults. To enable page fault + based memory management on most GFXv9 GPUs, set the module + parameter amdgpu.noretry=0. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index e1e4115dcf78..c4f3aff11072 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -63,3 +63,8 @@ endif ifneq ($(CONFIG_DEBUG_FS),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o endif + +ifneq ($(CONFIG_HSA_AMD_SVM),) +AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \ + $(AMDKFD_PATH)/kfd_migrate.o +endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 43de260b2230..67541c30327a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -38,6 +38,7 @@ #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" +#include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" @@ -1297,7 +1298,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( dev->kgd, args->va_addr, args->size, - pdd->vm, (struct kgd_mem **) &mem, &offset, + pdd->drm_priv, (struct kgd_mem **) &mem, &offset, flags); if (err) @@ -1328,7 +1329,8 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, + pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); return err; @@ -1365,7 +1367,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, } ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, - (struct kgd_mem *)mem, &size); + (struct kgd_mem *)mem, pdd->drm_priv, &size); /* If freeing the buffer failed, leave the handle in place for * clean-up during process tear-down. @@ -1391,6 +1393,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, long err = 0; int i; uint32_t *devices_arr = NULL; + bool table_freed = false; dev = kfd_device_by_id(GET_GPU_ID(args->handle)); if (!dev) @@ -1448,7 +1451,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + peer->kgd, (struct kgd_mem *)mem, + peer_pdd->drm_priv, &table_freed); if (err) { pr_err("Failed to map to gpu %d/%d\n", i, args->n_devices); @@ -1466,16 +1470,17 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, } /* Flush TLBs after waiting for the page table updates to complete */ - for (i = 0; i < args->n_devices; i++) { - peer = kfd_device_by_id(devices_arr[i]); - if (WARN_ON_ONCE(!peer)) - continue; - peer_pdd = kfd_get_process_device_data(peer, p); - if (WARN_ON_ONCE(!peer_pdd)) - continue; - kfd_flush_tlb(peer_pdd); + if (table_freed) { + for (i = 0; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(peer, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); + } } - kfree(devices_arr); return err; @@ -1555,7 +1560,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { pr_err("Failed to unmap from gpu %d/%d\n", i, args->n_devices); @@ -1563,10 +1568,27 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, } args->n_success = i+1; } - kfree(devices_arr); - mutex_unlock(&p->mutex); + err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); + if (err) { + pr_debug("Sync memory failed, wait interrupted by user signal\n"); + goto sync_memory_failed; + } + + /* Flush TLBs after waiting for the page table updates to complete */ + for (i = 0; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(peer, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); + } + + kfree(devices_arr); + return 0; bind_process_to_device_failed: @@ -1574,6 +1596,7 @@ get_mem_obj_from_handle_failed: unmap_memory_from_gpu_failed: mutex_unlock(&p->mutex); copy_from_user_failed: +sync_memory_failed: kfree(devices_arr); return err; } @@ -1701,7 +1724,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, } r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, - args->va_addr, pdd->vm, + args->va_addr, pdd->drm_priv, (struct kgd_mem **)&mem, &size, NULL); if (r) @@ -1721,7 +1744,8 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, + pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); dma_buf_put(dmabuf); @@ -1742,6 +1766,61 @@ static int kfd_ioctl_smi_events(struct file *filep, return kfd_smi_event_open(dev, &args->anon_fd); } +static int kfd_ioctl_set_xnack_mode(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_xnack_mode_args *args = data; + int r = 0; + + mutex_lock(&p->mutex); + if (args->xnack_enabled >= 0) { + if (!list_empty(&p->pqm.queues)) { + pr_debug("Process has user queues running\n"); + mutex_unlock(&p->mutex); + return -EBUSY; + } + if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) + r = -EPERM; + else + p->xnack_enabled = args->xnack_enabled; + } else { + args->xnack_enabled = p->xnack_enabled; + } + mutex_unlock(&p->mutex); + + return r; +} + +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) +static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) +{ + struct kfd_ioctl_svm_args *args = data; + int r = 0; + + pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n", + args->start_addr, args->size, args->op, args->nattr); + + if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK)) + return -EINVAL; + if (!args->start_addr || !args->size) + return -EINVAL; + + mutex_lock(&p->mutex); + + r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr, + args->attrs); + + mutex_unlock(&p->mutex); + + return r; +} +#else +static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) +{ + return -EPERM; +} +#endif + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1840,6 +1919,11 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, kfd_ioctl_smi_events, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, + kfd_ioctl_set_xnack_mode, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index c60e82697385..c6b02aee4993 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -55,7 +55,7 @@ struct kfd_gpu_cache_info { uint32_t cache_level; uint32_t flags; /* Indicates how many Compute Units share this cache - * Value = 1 indicates the cache is not shared + * within a SA. Value = 1 indicates the cache is not shared */ uint32_t num_cu_shared; }; @@ -69,7 +69,6 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = { CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), .num_cu_shared = 1, - }, { /* Scalar L1 Instruction Cache (in SQC module) per bank */ @@ -126,9 +125,6 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { /* TODO: Add L2 Cache information */ }; -/* NOTE: In future if more information is added to struct kfd_gpu_cache_info - * the following ASICs may need a separate table. - */ #define hawaii_cache_info kaveri_cache_info #define tonga_cache_info carrizo_cache_info #define fiji_cache_info carrizo_cache_info @@ -136,13 +132,667 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { #define polaris11_cache_info carrizo_cache_info #define polaris12_cache_info carrizo_cache_info #define vegam_cache_info carrizo_cache_info -/* TODO - check & update Vega10 cache details */ -#define vega10_cache_info carrizo_cache_info -#define raven_cache_info carrizo_cache_info -#define renoir_cache_info carrizo_cache_info -/* TODO - check & update Navi10 cache details */ -#define navi10_cache_info carrizo_cache_info -#define vangogh_cache_info carrizo_cache_info + +/* NOTE: L1 cache information has been updated and L2/L3 + * cache information has been added for Vega10 and + * newer ASICs. The unit for cache_size is KiB. + * In future, check & update cache details + * for every new ASIC is required. + */ + +static struct kfd_gpu_cache_info vega10_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 4096, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 16, + }, +}; + +static struct kfd_gpu_cache_info raven_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 1024, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 11, + }, +}; + +static struct kfd_gpu_cache_info renoir_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 1024, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 8, + }, +}; + +static struct kfd_gpu_cache_info vega12_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 2048, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 5, + }, +}; + +static struct kfd_gpu_cache_info vega20_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 3, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 8192, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 16, + }, +}; + +static struct kfd_gpu_cache_info aldebaran_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 8192, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 14, + }, +}; + +static struct kfd_gpu_cache_info navi10_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 10, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 4096, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 10, + }, +}; + +static struct kfd_gpu_cache_info vangogh_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 8, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 1024, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 8, + }, +}; + +static struct kfd_gpu_cache_info navi14_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 12, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 2048, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 12, + }, +}; + +static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 10, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 4096, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 10, + }, + { + /* L3 Data Cache per GPU */ + .cache_size = 128*1024, + .cache_level = 3, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 10, + }, +}; + +static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 10, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 3072, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 10, + }, + { + /* L3 Data Cache per GPU */ + .cache_size = 96*1024, + .cache_level = 3, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 10, + }, +}; + +static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 8, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 2048, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 8, + }, + { + /* L3 Data Cache per GPU */ + .cache_size = 32*1024, + .cache_level = 3, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 8, + }, +}; + +static struct kfd_gpu_cache_info beige_goby_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 8, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 1024, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 8, + }, + { + /* L3 Data Cache per GPU */ + .cache_size = 16*1024, + .cache_level = 3, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 8, + }, +}; + +static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 6, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 2048, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 6, + }, +}; static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) @@ -544,7 +1194,7 @@ err: } /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ -static int fill_in_pcache(struct crat_subtype_cache *pcache, +static int fill_in_l1_pcache(struct crat_subtype_cache *pcache, struct kfd_gpu_cache_info *pcache_info, struct kfd_cu_info *cu_info, int mem_available, @@ -597,6 +1247,70 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache, return 1; } +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache, + struct kfd_gpu_cache_info *pcache_info, + struct kfd_cu_info *cu_info, + int mem_available, + int cache_type, unsigned int cu_processor_id) +{ + unsigned int cu_sibling_map_mask; + int first_active_cu; + int i, j, k; + + /* First check if enough memory is available */ + if (sizeof(struct crat_subtype_cache) > mem_available) + return -ENOMEM; + + cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; + cu_sibling_map_mask &= + ((1 << pcache_info[cache_type].num_cu_shared) - 1); + first_active_cu = ffs(cu_sibling_map_mask); + + /* CU could be inactive. In case of shared cache find the first active + * CU. and incase of non-shared cache check if the CU is inactive. If + * inactive active skip it + */ + if (first_active_cu) { + memset(pcache, 0, sizeof(struct crat_subtype_cache)); + pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; + pcache->length = sizeof(struct crat_subtype_cache); + pcache->flags = pcache_info[cache_type].flags; + pcache->processor_id_low = cu_processor_id + + (first_active_cu - 1); + pcache->cache_level = pcache_info[cache_type].cache_level; + pcache->cache_size = pcache_info[cache_type].cache_size; + + /* Sibling map is w.r.t processor_id_low, so shift out + * inactive CU + */ + cu_sibling_map_mask = + cu_sibling_map_mask >> (first_active_cu - 1); + k = 0; + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; + j++) { + pcache->sibling_map[k] = + (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[k+1] = + (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[k+2] = + (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[k+3] = + (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + k += 4; + cu_sibling_map_mask = + cu_info->cu_bitmap[i % 4][j + i / 4]; + cu_sibling_map_mask &= ( + (1 << pcache_info[cache_type].num_cu_shared) + - 1); + } + } + return 0; + } + return 1; +} + /* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info * tables * @@ -624,6 +1338,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, int mem_available = available_size; unsigned int cu_processor_id; int ret; + unsigned int num_cu_shared; switch (kdev->device_info->asic_family) { case CHIP_KAVERI: @@ -663,12 +1378,21 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, num_of_cache_types = ARRAY_SIZE(vegam_cache_info); break; case CHIP_VEGA10: + pcache_info = vega10_cache_info; + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + break; case CHIP_VEGA12: + pcache_info = vega12_cache_info; + num_of_cache_types = ARRAY_SIZE(vega12_cache_info); + break; case CHIP_VEGA20: case CHIP_ARCTURUS: + pcache_info = vega20_cache_info; + num_of_cache_types = ARRAY_SIZE(vega20_cache_info); + break; case CHIP_ALDEBARAN: - pcache_info = vega10_cache_info; - num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + pcache_info = aldebaran_cache_info; + num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); break; case CHIP_RAVEN: pcache_info = raven_cache_info; @@ -680,17 +1404,37 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, break; case CHIP_NAVI10: case CHIP_NAVI12: + pcache_info = navi10_cache_info; + num_of_cache_types = ARRAY_SIZE(navi10_cache_info); + break; case CHIP_NAVI14: + pcache_info = navi14_cache_info; + num_of_cache_types = ARRAY_SIZE(navi14_cache_info); + break; case CHIP_SIENNA_CICHLID: + pcache_info = sienna_cichlid_cache_info; + num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); + break; case CHIP_NAVY_FLOUNDER: + pcache_info = navy_flounder_cache_info; + num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); + break; case CHIP_DIMGREY_CAVEFISH: - pcache_info = navi10_cache_info; - num_of_cache_types = ARRAY_SIZE(navi10_cache_info); + pcache_info = dimgrey_cavefish_cache_info; + num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); break; case CHIP_VANGOGH: pcache_info = vangogh_cache_info; num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); break; + case CHIP_BEIGE_GOBY: + pcache_info = beige_goby_cache_info; + num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); + break; + case CHIP_YELLOW_CARP: + pcache_info = yellow_carp_cache_info; + num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); + break; default: return -EINVAL; } @@ -709,40 +1453,58 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, */ for (ct = 0; ct < num_of_cache_types; ct++) { - cu_processor_id = gpu_processor_id; - for (i = 0; i < cu_info->num_shader_engines; i++) { - for (j = 0; j < cu_info->num_shader_arrays_per_engine; - j++) { - for (k = 0; k < cu_info->num_cu_per_sh; - k += pcache_info[ct].num_cu_shared) { - - ret = fill_in_pcache(pcache, - pcache_info, - cu_info, - mem_available, - cu_info->cu_bitmap[i % 4][j + i / 4], - ct, - cu_processor_id, - k); - - if (ret < 0) - break; - - if (!ret) { - pcache++; - (*num_of_entries)++; - mem_available -= - sizeof(*pcache); - (*size_filled) += - sizeof(*pcache); - } - - /* Move to next CU block */ - cu_processor_id += - pcache_info[ct].num_cu_shared; - } - } + cu_processor_id = gpu_processor_id; + if (pcache_info[ct].cache_level == 1) { + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { + for (k = 0; k < cu_info->num_cu_per_sh; + k += pcache_info[ct].num_cu_shared) { + ret = fill_in_l1_pcache(pcache, + pcache_info, + cu_info, + mem_available, + cu_info->cu_bitmap[i % 4][j + i / 4], + ct, + cu_processor_id, + k); + + if (ret < 0) + break; + + if (!ret) { + pcache++; + (*num_of_entries)++; + mem_available -= sizeof(*pcache); + (*size_filled) += sizeof(*pcache); + } + + /* Move to next CU block */ + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= + cu_info->num_cu_per_sh) ? + pcache_info[ct].num_cu_shared : + (cu_info->num_cu_per_sh - k); + cu_processor_id += num_cu_shared; } + } + } + } else { + ret = fill_in_l2_l3_pcache(pcache, + pcache_info, + cu_info, + mem_available, + ct, + cu_processor_id); + + if (ret < 0) + break; + + if (!ret) { + pcache++; + (*num_of_entries)++; + mem_available -= sizeof(*pcache); + (*size_filled) += sizeof(*pcache); + } + } } pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); @@ -1100,6 +1862,92 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size, return 0; } +#ifdef CONFIG_ACPI_NUMA +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) +{ + struct acpi_table_header *table_header = NULL; + struct acpi_subtable_header *sub_header = NULL; + unsigned long table_end, subtable_len; + u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 | + pci_dev_id(kdev->pdev); + u32 bdf; + acpi_status status; + struct acpi_srat_cpu_affinity *cpu; + struct acpi_srat_generic_affinity *gpu; + int pxm = 0, max_pxm = 0; + int numa_node = NUMA_NO_NODE; + bool found = false; + + /* Fetch the SRAT table from ACPI */ + status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header); + if (status == AE_NOT_FOUND) { + pr_warn("SRAT table not found\n"); + return; + } else if (ACPI_FAILURE(status)) { + const char *err = acpi_format_exception(status); + pr_err("SRAT table error: %s\n", err); + return; + } + + table_end = (unsigned long)table_header + table_header->length; + + /* Parse all entries looking for a match. */ + sub_header = (struct acpi_subtable_header *) + ((unsigned long)table_header + + sizeof(struct acpi_table_srat)); + subtable_len = sub_header->length; + + while (((unsigned long)sub_header) + subtable_len < table_end) { + /* + * If length is 0, break from this loop to avoid + * infinite loop. + */ + if (subtable_len == 0) { + pr_err("SRAT invalid zero length\n"); + break; + } + + switch (sub_header->type) { + case ACPI_SRAT_TYPE_CPU_AFFINITY: + cpu = (struct acpi_srat_cpu_affinity *)sub_header; + pxm = *((u32 *)cpu->proximity_domain_hi) << 8 | + cpu->proximity_domain_lo; + if (pxm > max_pxm) + max_pxm = pxm; + break; + case ACPI_SRAT_TYPE_GENERIC_AFFINITY: + gpu = (struct acpi_srat_generic_affinity *)sub_header; + bdf = *((u16 *)(&gpu->device_handle[0])) << 16 | + *((u16 *)(&gpu->device_handle[2])); + if (bdf == pci_id) { + found = true; + numa_node = pxm_to_node(gpu->proximity_domain); + } + break; + default: + break; + } + + if (found) + break; + + sub_header = (struct acpi_subtable_header *) + ((unsigned long)sub_header + subtable_len); + subtable_len = sub_header->length; + } + + acpi_put_table(table_header); + + /* Workaround bad cpu-gpu binding case */ + if (found && (numa_node < 0 || + numa_node > pxm_to_node(max_pxm))) + numa_node = 0; + + if (numa_node != NUMA_NO_NODE) + set_dev_node(&kdev->pdev->dev, numa_node); +} +#endif + /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU * to its NUMA node * @avail_size: Available size in the memory @@ -1140,11 +1988,17 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, */ sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; + sub_type_hdr->num_hops_xgmi = 1; } else { sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; } sub_type_hdr->proximity_domain_from = proximity_domain; + +#ifdef CONFIG_ACPI_NUMA + if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) + kfd_find_numa_node_in_srat(kdev); +#endif #ifdef CONFIG_NUMA if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) sub_type_hdr->proximity_domain_to = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 357b9bf62a1c..6b57dfd2cd2a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -26,10 +26,12 @@ #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" +#include "kfd_pm4_headers_aldebaran.h" #include "cwsr_trap_handler.h" #include "kfd_iommu.h" #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" +#include "kfd_migrate.h" #define MQD_SIZE_ALIGNED 768 @@ -80,6 +82,8 @@ static const struct kfd2kgd_calls *kfd2kgd_funcs[] = { [CHIP_NAVY_FLOUNDER] = &gfx_v10_3_kfd2kgd, [CHIP_VANGOGH] = &gfx_v10_3_kfd2kgd, [CHIP_DIMGREY_CAVEFISH] = &gfx_v10_3_kfd2kgd, + [CHIP_BEIGE_GOBY] = &gfx_v10_3_kfd2kgd, + [CHIP_YELLOW_CARP] = &gfx_v10_3_kfd2kgd, }; #ifdef KFD_SUPPORT_IOMMU_V2 @@ -556,6 +560,41 @@ static const struct kfd_device_info dimgrey_cavefish_device_info = { .num_sdma_queues_per_engine = 8, }; +static const struct kfd_device_info beige_goby_device_info = { + .asic_family = CHIP_BEIGE_GOBY, + .asic_name = "beige_goby", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = true, + .num_sdma_engines = 1, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + +static const struct kfd_device_info yellow_carp_device_info = { + .asic_family = CHIP_YELLOW_CARP, + .asic_name = "yellow_carp", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = false, + .num_sdma_engines = 1, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, +}; /* For each entry, [0] is regular and [1] is virtualisation device. */ static const struct kfd_device_info *kfd_supported_devices[][2] = { @@ -576,7 +615,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = { [CHIP_VEGA20] = {&vega20_device_info, NULL}, [CHIP_RENOIR] = {&renoir_device_info, NULL}, [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info}, - [CHIP_ALDEBARAN] = {&aldebaran_device_info, NULL}, + [CHIP_ALDEBARAN] = {&aldebaran_device_info, &aldebaran_device_info}, [CHIP_NAVI10] = {&navi10_device_info, NULL}, [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info}, [CHIP_NAVI14] = {&navi14_device_info, NULL}, @@ -584,6 +623,8 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = { [CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info}, [CHIP_VANGOGH] = {&vangogh_device_info, NULL}, [CHIP_DIMGREY_CAVEFISH] = {&dimgrey_cavefish_device_info, &dimgrey_cavefish_device_info}, + [CHIP_BEIGE_GOBY] = {&beige_goby_device_info, &beige_goby_device_info}, + [CHIP_YELLOW_CARP] = {&yellow_carp_device_info, NULL}, }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -697,7 +738,9 @@ static int kfd_gws_init(struct kfd_dev *kfd) && kfd->device_info->asic_family <= CHIP_RAVEN && kfd->mec2_fw_version >= 0x1b3) || (kfd->device_info->asic_family == CHIP_ARCTURUS - && kfd->mec2_fw_version >= 0x30)) + && kfd->mec2_fw_version >= 0x30) + || (kfd->device_info->asic_family == CHIP_ALDEBARAN + && kfd->mec2_fw_version >= 0x28)) ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); @@ -713,7 +756,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { - unsigned int size; + unsigned int size, map_process_packet_size; kfd->ddev = ddev; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, @@ -748,7 +791,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, * calculate max size of runlist packet. * There can be only 2 packets at once */ - size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) + + map_process_packet_size = + kfd->device_info->asic_family == CHIP_ALDEBARAN ? + sizeof(struct pm4_mes_map_process_aldebaran) : + sizeof(struct pm4_mes_map_process); + size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size + max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) + sizeof(struct pm4_mes_runlist)) * 2; @@ -814,6 +861,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd); + svm_migrate_init((struct amdgpu_device *)kfd->kgd); + if (kfd_resume(kfd)) goto kfd_resume_error; @@ -861,7 +910,7 @@ out: void kgd2kfd_device_exit(struct kfd_dev *kfd) { if (kfd->init_complete) { - kgd2kfd_suspend(kfd, false); + svm_migrate_fini((struct amdgpu_device *)kfd->kgd); device_queue_manager_uninit(kfd->dqm); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d3eaa1549bd7..16a1713808c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -248,7 +248,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ - kfd_flush_tlb(qpd_to_pdd(qpd)); + kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); if (dqm->dev->kfd2kgd->set_scratch_backing_va) dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, @@ -284,7 +284,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, if (flush_texture_cache_nocpsch(q->device, qpd)) pr_err("Failed to flush TC\n"); - kfd_flush_tlb(qpd_to_pdd(qpd)); + kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -486,9 +486,6 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, if (retval == -ETIME) qpd->reset_wavefronts = true; - - mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - list_del(&q->list); if (list_empty(&qpd->queues_list)) { if (qpd->reset_wavefronts) { @@ -523,6 +520,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, int retval; uint64_t sdma_val = 0; struct kfd_process_device *pdd = qpd_to_pdd(qpd); + struct mqd_manager *mqd_mgr = + dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; /* Get the SDMA queue stats */ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || @@ -540,6 +539,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, pdd->sdma_past_activity_counter += sdma_val; dqm_unlock(dqm); + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; } @@ -738,7 +739,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ @@ -760,7 +761,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, dqm->dev->kgd, qpd->vmid, qpd->page_table_base); - kfd_flush_tlb(pdd); + kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); } /* Take a safe reference to the mm_struct, which may otherwise @@ -821,7 +822,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ @@ -873,7 +874,7 @@ static int register_process(struct device_queue_manager *dqm, pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); dqm_lock(dqm); list_add(&n->list, &dqm->queues); @@ -1629,7 +1630,7 @@ out: static int process_termination_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { - struct queue *q, *next; + struct queue *q; struct device_process_node *cur, *next_dpn; int retval = 0; bool found = false; @@ -1637,12 +1638,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, dqm_lock(dqm); /* Clear all user mode queues */ - list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + while (!list_empty(&qpd->queues_list)) { + struct mqd_manager *mqd_mgr; int ret; + q = list_first_entry(&qpd->queues_list, struct queue, list); + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; ret = destroy_queue_nocpsch_locked(dqm, qpd, q); if (ret) retval = ret; + dqm_unlock(dqm); + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + dqm_lock(dqm); } /* Unregister process */ @@ -1674,36 +1682,34 @@ static int get_wave_state(struct device_queue_manager *dqm, u32 *save_area_used_size) { struct mqd_manager *mqd_mgr; - int r; dqm_lock(dqm); - if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || - q->properties.is_active || !q->device->cwsr_enabled) { - r = -EINVAL; - goto dqm_unlock; - } - mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; - if (!mqd_mgr->get_wave_state) { - r = -EINVAL; - goto dqm_unlock; + if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || + q->properties.is_active || !q->device->cwsr_enabled || + !mqd_mgr->get_wave_state) { + dqm_unlock(dqm); + return -EINVAL; } - r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, - ctl_stack_used_size, save_area_used_size); - -dqm_unlock: dqm_unlock(dqm); - return r; + + /* + * get_wave_state is outside the dqm lock to prevent circular locking + * and the queue should be protected against destruction by the process + * lock. + */ + return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, + ctl_stack_used_size, save_area_used_size); } static int process_termination_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { int retval; - struct queue *q, *next; + struct queue *q; struct kernel_queue *kq, *kq_next; struct mqd_manager *mqd_mgr; struct device_process_node *cur, *next_dpn; @@ -1760,24 +1766,26 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - dqm_unlock(dqm); - - /* Outside the DQM lock because under the DQM lock we can't do - * reclaim or take other locks that others hold while reclaiming. - */ - if (found) - kfd_dec_compute_active(dqm->dev); - /* Lastly, free mqd resources. * Do free_mqd() after dqm_unlock to avoid circular locking. */ - list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + while (!list_empty(&qpd->queues_list)) { + q = list_first_entry(&qpd->queues_list, struct queue, list); mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; list_del(&q->list); qpd->queue_count--; + dqm_unlock(dqm); mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + dqm_lock(dqm); } + dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev); return retval; } @@ -1936,6 +1944,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: device_queue_manager_init_v10_navi10(&dqm->asic_ops); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index eca6331efa94..b5c3d13643f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm, qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; - if (dqm->dev->noretry && - !dqm->dev->use_iommu_v2) + + if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) { + /* Aldebaran can safely support different XNACK modes + * per process + */ + if (!pdd->process->xnack_enabled) + qpd->sh_mem_config |= + 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + } else if (dqm->dev->noretry && + !dqm->dev->use_iommu_v2) { qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + } qpd->sh_mem_ape1_limit = 0; qpd->sh_mem_ape1_base = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index ba2c2ce0c55a..3eea4edee355 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -1050,3 +1050,44 @@ void kfd_signal_reset_event(struct kfd_dev *dev) } srcu_read_unlock(&kfd_processes_srcu, idx); } + +void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid) +{ + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct kfd_hsa_memory_exception_data memory_exception_data; + struct kfd_hsa_hw_exception_data hw_exception_data; + struct kfd_event *ev; + uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID; + + if (!p) + return; /* Presumably process exited. */ + + memset(&hw_exception_data, 0, sizeof(hw_exception_data)); + hw_exception_data.gpu_id = dev->id; + hw_exception_data.memory_lost = 1; + hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC; + + memset(&memory_exception_data, 0, sizeof(memory_exception_data)); + memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED; + memory_exception_data.gpu_id = dev->id; + memory_exception_data.failure.imprecise = true; + + mutex_lock(&p->event_mutex); + idr_for_each_entry_continue(&p->event_idr, ev, id) { + if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { + ev->hw_exception_data = hw_exception_data; + set_event(ev); + } + + if (ev->type == KFD_EVENT_TYPE_MEMORY) { + ev->memory_exception_data = memory_exception_data; + set_event(ev); + } + } + mutex_unlock(&p->event_mutex); + + /* user application will handle SIGBUS signal */ + send_sig(SIGBUS, p->lead_thread, 0); + + kfd_unref_process(p); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index a2c9063076cc..a9b329f0f862 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -420,6 +420,8 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: kfd_init_apertures_v9(pdd, id); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 696944fa0177..12d91e53556c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -25,7 +25,6 @@ #include "soc15_int.h" #include "kfd_device_queue_manager.h" #include "kfd_smi_events.h" -#include "amdgpu.h" enum SQ_INTERRUPT_WORD_ENCODING { SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0, @@ -231,7 +230,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, sq_intr_err); if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - kfd_signal_hw_exception_event(pasid); + kfd_signal_poison_consumed_event(dev, pasid); amdgpu_amdkfd_gpu_reset(dev->kgd); return; } @@ -250,8 +249,13 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, client_id == SOC15_IH_CLIENTID_SDMA5 || client_id == SOC15_IH_CLIENTID_SDMA6 || client_id == SOC15_IH_CLIENTID_SDMA7) { - if (source_id == SOC15_INTSRC_SDMA_TRAP) + if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); + } else if (source_id == SOC15_INTSRC_SDMA_ECC) { + kfd_signal_poison_consumed_event(dev, pasid); + amdgpu_amdkfd_gpu_reset(dev->kgd); + return; + } } else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || client_id == SOC15_IH_CLIENTID_UTCL2) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c new file mode 100644 index 000000000000..2660f03e63a7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -0,0 +1,889 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/types.h> +#include <linux/hmm.h> +#include <linux/dma-direction.h> +#include <linux/dma-mapping.h> +#include "amdgpu_sync.h" +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_mn.h" +#include "amdgpu_res_cursor.h" +#include "kfd_priv.h" +#include "kfd_svm.h" +#include "kfd_migrate.h" + +static uint64_t +svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) +{ + return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM); +} + +static int +svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, + dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + unsigned int num_dw, num_bytes; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + uint64_t pte_flags; + void *cpu_addr; + int r; + + /* use gart window 0 */ + *gart_addr = adev->gmc.gart_start; + + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); + num_bytes = npages * 8; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, + AMDGPU_IB_POOL_DELAYED, &job); + if (r) + return r; + + src_addr = num_dw * 4; + src_addr += job->ibs[0].gpu_addr; + + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, + dst_addr, num_bytes, false); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; + pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; + if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO)) + pte_flags |= AMDGPU_PTE_WRITEABLE; + pte_flags |= adev->gart.gart_pte_flags; + + cpu_addr = &job->ibs[0].ptr[num_dw]; + + r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + dma_fence_put(fence); + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + +/** + * svm_migrate_copy_memory_gart - sdma copy data between ram and vram + * + * @adev: amdgpu device the sdma ring running + * @src: source page address array + * @dst: destination page address array + * @npages: number of pages to copy + * @direction: enum MIGRATION_COPY_DIR + * @mfence: output, sdma fence to signal after sdma is done + * + * ram address uses GART table continuous entries mapping to ram pages, + * vram address uses direct mapping of vram pages, which must have npages + * number of continuous pages. + * GART update and sdma uses same buf copy function ring, sdma is splited to + * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for + * the last sdma finish fence which is returned to check copy memory is done. + * + * Context: Process context, takes and releases gtt_window_lock + * + * Return: + * 0 - OK, otherwise error code + */ + +static int +svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, + uint64_t *vram, uint64_t npages, + enum MIGRATION_COPY_DIR direction, + struct dma_fence **mfence) +{ + const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + uint64_t gart_s, gart_d; + struct dma_fence *next; + uint64_t size; + int r; + + mutex_lock(&adev->mman.gtt_window_lock); + + while (npages) { + size = min(GTT_MAX_PAGES, npages); + + if (direction == FROM_VRAM_TO_RAM) { + gart_s = svm_migrate_direct_mapping_addr(adev, *vram); + r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0); + + } else if (direction == FROM_RAM_TO_VRAM) { + r = svm_migrate_gart_map(ring, size, sys, &gart_s, + KFD_IOCTL_SVM_FLAG_GPU_RO); + gart_d = svm_migrate_direct_mapping_addr(adev, *vram); + } + if (r) { + pr_debug("failed %d to create gart mapping\n", r); + goto out_unlock; + } + + r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, + NULL, &next, false, true, false); + if (r) { + pr_debug("failed %d to copy memory\n", r); + goto out_unlock; + } + + dma_fence_put(*mfence); + *mfence = next; + npages -= size; + if (npages) { + sys += size; + vram += size; + } + } + +out_unlock: + mutex_unlock(&adev->mman.gtt_window_lock); + + return r; +} + +/** + * svm_migrate_copy_done - wait for memory copy sdma is done + * + * @adev: amdgpu device the sdma memory copy is executing on + * @mfence: migrate fence + * + * Wait for dma fence is signaled, if the copy ssplit into multiple sdma + * operations, this is the last sdma operation fence. + * + * Context: called after svm_migrate_copy_memory + * + * Return: + * 0 - success + * otherwise - error code from dma fence signal + */ +static int +svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) +{ + int r = 0; + + if (mfence) { + r = dma_fence_wait(mfence, false); + dma_fence_put(mfence); + pr_debug("sdma copy memory fence done\n"); + } + + return r; +} + +unsigned long +svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr) +{ + return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT; +} + +static void +svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) +{ + struct page *page; + + page = pfn_to_page(pfn); + page->zone_device_data = prange; + get_page(page); + lock_page(page); +} + +static void +svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr) +{ + struct page *page; + + page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr)); + unlock_page(page); + put_page(page); +} + +static unsigned long +svm_migrate_addr(struct amdgpu_device *adev, struct page *page) +{ + unsigned long addr; + + addr = page_to_pfn(page) << PAGE_SHIFT; + return (addr - adev->kfd.dev->pgmap.range.start); +} + +static struct page * +svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr) +{ + struct page *page; + + page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + if (page) + lock_page(page); + + return page; +} + +static void svm_migrate_put_sys_page(unsigned long addr) +{ + struct page *page; + + page = pfn_to_page(addr >> PAGE_SHIFT); + unlock_page(page); + put_page(page); +} + +static int +svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + struct migrate_vma *migrate, struct dma_fence **mfence, + dma_addr_t *scratch) +{ + uint64_t npages = migrate->cpages; + struct device *dev = adev->dev; + struct amdgpu_res_cursor cursor; + dma_addr_t *src; + uint64_t *dst; + uint64_t i, j; + int r; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, + prange->last); + + src = scratch; + dst = (uint64_t *)(scratch + npages); + + r = svm_range_vram_node_new(adev, prange, true); + if (r) { + pr_debug("failed %d get 0x%llx pages from vram\n", r, npages); + goto out; + } + + amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT, + npages << PAGE_SHIFT, &cursor); + for (i = j = 0; i < npages; i++) { + struct page *spage; + + dst[i] = cursor.start + (j << PAGE_SHIFT); + migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); + svm_migrate_get_vram_page(prange, migrate->dst[i]); + + migrate->dst[i] = migrate_pfn(migrate->dst[i]); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; + + if (migrate->src[i] & MIGRATE_PFN_VALID) { + spage = migrate_pfn_to_page(migrate->src[i]); + src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, + DMA_TO_DEVICE); + r = dma_mapping_error(dev, src[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + goto out_free_vram_pages; + } + } else { + if (j) { + r = svm_migrate_copy_memory_gart( + adev, src + i - j, + dst + i - j, j, + FROM_RAM_TO_VRAM, + mfence); + if (r) + goto out_free_vram_pages; + amdgpu_res_next(&cursor, j << PAGE_SHIFT); + j = 0; + } else { + amdgpu_res_next(&cursor, PAGE_SIZE); + } + continue; + } + + pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n", + src[i] >> PAGE_SHIFT, page_to_pfn(spage)); + + if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) { + r = svm_migrate_copy_memory_gart(adev, src + i - j, + dst + i - j, j + 1, + FROM_RAM_TO_VRAM, + mfence); + if (r) + goto out_free_vram_pages; + amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE); + j= 0; + } else { + j++; + } + } + + r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j, + FROM_RAM_TO_VRAM, mfence); + +out_free_vram_pages: + if (r) { + pr_debug("failed %d to copy memory to vram\n", r); + while (i--) { + svm_migrate_put_vram_page(adev, dst[i]); + migrate->dst[i] = 0; + } + } + +out: + return r; +} + +static int +svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + struct vm_area_struct *vma, uint64_t start, + uint64_t end) +{ + uint64_t npages = (end - start) >> PAGE_SHIFT; + struct dma_fence *mfence = NULL; + struct migrate_vma migrate; + dma_addr_t *scratch; + size_t size; + void *buf; + int r = -ENOMEM; + int retry = 0; + + memset(&migrate, 0, sizeof(migrate)); + migrate.vma = vma; + migrate.start = start; + migrate.end = end; + migrate.flags = MIGRATE_VMA_SELECT_SYSTEM; + migrate.pgmap_owner = adev; + + size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); + size *= npages; + buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + if (!buf) + goto out; + + migrate.src = buf; + migrate.dst = migrate.src + npages; + scratch = (dma_addr_t *)(migrate.dst + npages); + +retry: + r = migrate_vma_setup(&migrate); + if (r) { + pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange->start, prange->last); + goto out_free; + } + if (migrate.cpages != npages) { + pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages, + npages); + migrate_vma_finalize(&migrate); + if (retry++ >= 3) { + r = -ENOMEM; + pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange->start, prange->last); + goto out_free; + } + + goto retry; + } + + if (migrate.cpages) { + r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, + scratch); + migrate_vma_pages(&migrate); + svm_migrate_copy_done(adev, mfence); + migrate_vma_finalize(&migrate); + } + + svm_range_dma_unmap(adev->dev, scratch, 0, npages); + svm_range_free_dma_mappings(prange); + +out_free: + kvfree(buf); +out: + return r; +} + +/** + * svm_migrate_ram_to_vram - migrate svm range from system to device + * @prange: range structure + * @best_loc: the device to migrate to + * @mm: the process mm structure + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +static int +svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) +{ + unsigned long addr, start, end; + struct vm_area_struct *vma; + struct amdgpu_device *adev; + int r = 0; + + if (prange->actual_loc == best_loc) { + pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", + prange->svms, prange->start, prange->last, best_loc); + return 0; + } + + adev = svm_range_get_adev_by_id(prange, best_loc); + if (!adev) { + pr_debug("failed to get device by id 0x%x\n", best_loc); + return -ENODEV; + } + + pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, + prange->start, prange->last, best_loc); + + /* FIXME: workaround for page locking bug with invalid pages */ + svm_range_prefault(prange, mm); + + start = prange->start << PAGE_SHIFT; + end = (prange->last + 1) << PAGE_SHIFT; + + for (addr = start; addr < end;) { + unsigned long next; + + vma = find_vma(mm, addr); + if (!vma || addr < vma->vm_start) + break; + + next = min(vma->vm_end, end); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next); + if (r) { + pr_debug("failed to migrate\n"); + break; + } + addr = next; + } + + if (!r) + prange->actual_loc = best_loc; + + return r; +} + +static void svm_migrate_page_free(struct page *page) +{ + /* Keep this function to avoid warning */ +} + +static int +svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, + struct migrate_vma *migrate, struct dma_fence **mfence, + dma_addr_t *scratch) +{ + uint64_t npages = migrate->cpages; + struct device *dev = adev->dev; + uint64_t *src; + dma_addr_t *dst; + struct page *dpage; + uint64_t i = 0, j; + uint64_t addr; + int r = 0; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, + prange->last); + + addr = prange->start << PAGE_SHIFT; + + src = (uint64_t *)(scratch + npages); + dst = scratch; + + for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) { + struct page *spage; + + spage = migrate_pfn_to_page(migrate->src[i]); + if (!spage) { + pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + r = -ENOMEM; + goto out_oom; + } + src[i] = svm_migrate_addr(adev, spage); + if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) { + r = svm_migrate_copy_memory_gart(adev, dst + i - j, + src + i - j, j, + FROM_VRAM_TO_RAM, + mfence); + if (r) + goto out_oom; + j = 0; + } + + dpage = svm_migrate_get_sys_page(migrate->vma, addr); + if (!dpage) { + pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + r = -ENOMEM; + goto out_oom; + } + + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); + r = dma_mapping_error(dev, dst[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + goto out_oom; + } + + pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n", + dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); + + migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; + } + + r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j, + FROM_VRAM_TO_RAM, mfence); + +out_oom: + if (r) { + pr_debug("failed %d copy to ram\n", r); + while (i--) { + svm_migrate_put_sys_page(dst[i]); + migrate->dst[i] = 0; + } + } + + return r; +} + +static int +svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, + struct vm_area_struct *vma, uint64_t start, uint64_t end) +{ + uint64_t npages = (end - start) >> PAGE_SHIFT; + struct dma_fence *mfence = NULL; + struct migrate_vma migrate; + dma_addr_t *scratch; + size_t size; + void *buf; + int r = -ENOMEM; + + memset(&migrate, 0, sizeof(migrate)); + migrate.vma = vma; + migrate.start = start; + migrate.end = end; + migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + migrate.pgmap_owner = adev; + + size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); + size *= npages; + buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + if (!buf) + goto out; + + migrate.src = buf; + migrate.dst = migrate.src + npages; + scratch = (dma_addr_t *)(migrate.dst + npages); + + r = migrate_vma_setup(&migrate); + if (r) { + pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange->start, prange->last); + goto out_free; + } + + pr_debug("cpages %ld\n", migrate.cpages); + + if (migrate.cpages) { + r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, + scratch); + migrate_vma_pages(&migrate); + svm_migrate_copy_done(adev, mfence); + migrate_vma_finalize(&migrate); + } else { + pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n", + prange->start, prange->last); + } + + svm_range_dma_unmap(adev->dev, scratch, 0, npages); + +out_free: + kvfree(buf); +out: + return r; +} + +/** + * svm_migrate_vram_to_ram - migrate svm range from device to system + * @prange: range structure + * @mm: process mm, use current->mm if NULL + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) +{ + struct amdgpu_device *adev; + struct vm_area_struct *vma; + unsigned long addr; + unsigned long start; + unsigned long end; + int r = 0; + + if (!prange->actual_loc) { + pr_debug("[0x%lx 0x%lx] already migrated to ram\n", + prange->start, prange->last); + return 0; + } + + adev = svm_range_get_adev_by_id(prange, prange->actual_loc); + if (!adev) { + pr_debug("failed to get device by id 0x%x\n", + prange->actual_loc); + return -ENODEV; + } + + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n", + prange->svms, prange, prange->start, prange->last, + prange->actual_loc); + + start = prange->start << PAGE_SHIFT; + end = (prange->last + 1) << PAGE_SHIFT; + + for (addr = start; addr < end;) { + unsigned long next; + + vma = find_vma(mm, addr); + if (!vma || addr < vma->vm_start) + break; + + next = min(vma->vm_end, end); + r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); + if (r) { + pr_debug("failed %d to migrate\n", r); + break; + } + addr = next; + } + + if (!r) { + svm_range_vram_node_free(prange); + prange->actual_loc = 0; + } + return r; +} + +/** + * svm_migrate_vram_to_vram - migrate svm range from device to device + * @prange: range structure + * @best_loc: the device to migrate to + * @mm: process mm, use current->mm if NULL + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +static int +svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) +{ + int r; + + /* + * TODO: for both devices with PCIe large bar or on same xgmi hive, skip + * system memory as migration bridge + */ + + pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); + + r = svm_migrate_vram_to_ram(prange, mm); + if (r) + return r; + + return svm_migrate_ram_to_vram(prange, best_loc, mm); +} + +int +svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) +{ + if (!prange->actual_loc) + return svm_migrate_ram_to_vram(prange, best_loc, mm); + else + return svm_migrate_vram_to_vram(prange, best_loc, mm); + +} + +/** + * svm_migrate_to_ram - CPU page fault handler + * @vmf: CPU vm fault vma, address + * + * Context: vm fault handler, caller holds the mmap read lock + * + * Return: + * 0 - OK + * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault + */ +static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) +{ + unsigned long addr = vmf->address; + struct vm_area_struct *vma; + enum svm_work_list_ops op; + struct svm_range *parent; + struct svm_range *prange; + struct kfd_process *p; + struct mm_struct *mm; + int r = 0; + + vma = vmf->vma; + mm = vma->vm_mm; + + p = kfd_lookup_process_by_mm(vma->vm_mm); + if (!p) { + pr_debug("failed find process at fault address 0x%lx\n", addr); + return VM_FAULT_SIGBUS; + } + addr >>= PAGE_SHIFT; + pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr); + + mutex_lock(&p->svms.lock); + + prange = svm_range_from_addr(&p->svms, addr, &parent); + if (!prange) { + pr_debug("cannot find svm range at 0x%lx\n", addr); + r = -EFAULT; + goto out; + } + + mutex_lock(&parent->migrate_mutex); + if (prange != parent) + mutex_lock_nested(&prange->migrate_mutex, 1); + + if (!prange->actual_loc) + goto out_unlock_prange; + + svm_range_lock(parent); + if (prange != parent) + mutex_lock_nested(&prange->lock, 1); + r = svm_range_split_by_granularity(p, mm, addr, parent, prange); + if (prange != parent) + mutex_unlock(&prange->lock); + svm_range_unlock(parent); + if (r) { + pr_debug("failed %d to split range by granularity\n", r); + goto out_unlock_prange; + } + + r = svm_migrate_vram_to_ram(prange, mm); + if (r) + pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, + prange, prange->start, prange->last); + + /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ + if (p->xnack_enabled && parent == prange) + op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP; + else + op = SVM_OP_UPDATE_RANGE_NOTIFIER; + svm_range_add_list_work(&p->svms, parent, mm, op); + schedule_deferred_list_work(&p->svms); + +out_unlock_prange: + if (prange != parent) + mutex_unlock(&prange->migrate_mutex); + mutex_unlock(&parent->migrate_mutex); +out: + mutex_unlock(&p->svms.lock); + kfd_unref_process(p); + + pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); + + return r ? VM_FAULT_SIGBUS : 0; +} + +static const struct dev_pagemap_ops svm_migrate_pgmap_ops = { + .page_free = svm_migrate_page_free, + .migrate_to_ram = svm_migrate_to_ram, +}; + +/* Each VRAM page uses sizeof(struct page) on system memory */ +#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page)) + +int svm_migrate_init(struct amdgpu_device *adev) +{ + struct kfd_dev *kfddev = adev->kfd.dev; + struct dev_pagemap *pgmap; + struct resource *res; + unsigned long size; + void *r; + + /* Page migration works on Vega10 or newer */ + if (kfddev->device_info->asic_family < CHIP_VEGA10) + return -EINVAL; + + pgmap = &kfddev->pgmap; + memset(pgmap, 0, sizeof(*pgmap)); + + /* TODO: register all vram to HMM for now. + * should remove reserved size + */ + size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20); + res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); + if (IS_ERR(res)) + return -ENOMEM; + + pgmap->type = MEMORY_DEVICE_PRIVATE; + pgmap->nr_range = 1; + pgmap->range.start = res->start; + pgmap->range.end = res->end; + pgmap->ops = &svm_migrate_pgmap_ops; + pgmap->owner = adev; + pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + r = devm_memremap_pages(adev->dev, pgmap); + if (IS_ERR(r)) { + pr_err("failed to register HMM device memory\n"); + devm_release_mem_region(adev->dev, res->start, + res->end - res->start + 1); + return PTR_ERR(r); + } + + pr_debug("reserve %ldMB system memory for VRAM pages struct\n", + SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20); + + amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size)); + + pr_info("HMM registered %ldMB device memory\n", size >> 20); + + return 0; +} + +void svm_migrate_fini(struct amdgpu_device *adev) +{ + struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap; + + devm_memunmap_pages(adev->dev, pgmap); + devm_release_mem_region(adev->dev, pgmap->range.start, + pgmap->range.end - pgmap->range.start + 1); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h new file mode 100644 index 000000000000..0de76b5d4973 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_MIGRATE_H_ +#define KFD_MIGRATE_H_ + +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + +#include <linux/rwsem.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/sched/mm.h> +#include <linux/hmm.h> +#include "kfd_priv.h" +#include "kfd_svm.h" + +enum MIGRATION_COPY_DIR { + FROM_RAM_TO_VRAM = 0, + FROM_VRAM_TO_RAM +}; + +int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm); +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); +unsigned long +svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); + +int svm_migrate_init(struct amdgpu_device *adev); +void svm_migrate_fini(struct amdgpu_device *adev); + +#else + +static inline int svm_migrate_init(struct amdgpu_device *adev) +{ + return 0; +} +static inline void svm_migrate_fini(struct amdgpu_device *adev) +{ + /* empty */ +} + +#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ + +#endif /* KFD_MIGRATE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index e840dd581719..d8e940f03102 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -124,14 +124,14 @@ static int pm_create_runlist_ib(struct packet_manager *pm, { unsigned int alloc_size_bytes; unsigned int *rl_buffer, rl_wptr, i; - int retval, proccesses_mapped; + int retval, processes_mapped; struct device_process_node *cur; struct qcm_process_device *qpd; struct queue *q; struct kernel_queue *kq; bool is_over_subscription; - rl_wptr = retval = proccesses_mapped = 0; + rl_wptr = retval = processes_mapped = 0; retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, &alloc_size_bytes, &is_over_subscription); @@ -148,7 +148,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, list_for_each_entry(cur, queues, list) { qpd = cur->qpd; /* build map process packet */ - if (proccesses_mapped >= pm->dqm->processes_count) { + if (processes_mapped >= pm->dqm->processes_count) { pr_debug("Not enough space left in runlist IB\n"); pm_release_ib(pm); return -ENOMEM; @@ -158,7 +158,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, if (retval) return retval; - proccesses_mapped++; + processes_mapped++; inc_wptr(&rl_wptr, pm->pmf->map_process_size, alloc_size_bytes); @@ -242,7 +242,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_RAVEN: case CHIP_RENOIR: case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_NAVI14: @@ -250,8 +249,13 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: pm->pmf = &kfd_v9_pm_funcs; break; + case CHIP_ALDEBARAN: + pm->pmf = &kfd_aldebaran_pm_funcs; + break; default: WARN(1, "Unexpected ASIC family %u", dqm->dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index e3ba0cd3b6fa..7ea3f671b325 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -24,6 +24,7 @@ #include "kfd_kernel_queue.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_ai.h" +#include "kfd_pm4_headers_aldebaran.h" #include "kfd_pm4_opcodes.h" #include "gc/gc_10_1_0_sh_mask.h" @@ -35,7 +36,6 @@ static int pm_map_process_v9(struct packet_manager *pm, packet = (struct pm4_mes_map_process *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process)); - packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; @@ -73,6 +73,45 @@ static int pm_map_process_v9(struct packet_manager *pm, return 0; } +static int pm_map_process_aldebaran(struct packet_manager *pm, + uint32_t *buffer, struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process_aldebaran *packet; + uint64_t vm_page_table_base_addr = qpd->page_table_base; + + packet = (struct pm4_mes_map_process_aldebaran *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process_aldebaran)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 10; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields14.gds_size = qpd->gds_size & 0x3F; + packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; + packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0; + packet->bitfields14.num_oac = qpd->num_oac; + packet->bitfields14.sdma_enable = 1; + packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + if (qpd->tba_addr) { + packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); + packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); + } + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + packet->vm_context_page_table_base_addr_lo32 = + lower_32_bits(vm_page_table_base_addr); + packet->vm_context_page_table_base_addr_hi32 = + upper_32_bits(vm_page_table_base_addr); + + return 0; +} + static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, uint64_t ib, size_t ib_size_in_dwords, bool chain) { @@ -324,3 +363,20 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; + +const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { + .map_process = pm_map_process_aldebaran, + .runlist = pm_runlist_v9, + .set_resources = pm_set_resources_v9, + .map_queues = pm_map_queues_v9, + .unmap_queues = pm_unmap_queues_v9, + .query_status = pm_query_status_v9, + .release_mem = NULL, + .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = 0, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h new file mode 100644 index 000000000000..f795ec815e2a --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h @@ -0,0 +1,93 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/*--------------------MES_MAP_PROCESS (PER DEBUG VMID)--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED +#define PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED + +struct pm4_mes_map_process_aldebaran { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t pasid:16; /* 0 - 15 */ + uint32_t single_memops:1; /* 16 */ + uint32_t reserved1:1; /* 17 */ + uint32_t debug_vmid:4; /* 18 - 21 */ + uint32_t new_debug:1; /* 22 */ + uint32_t tmz:1; /* 23 */ + uint32_t diq_enable:1; /* 24 */ + uint32_t process_quantum:7; /* 25 - 31 */ + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t vm_context_page_table_base_addr_lo32; + + uint32_t vm_context_page_table_base_addr_hi32; + + uint32_t sh_mem_bases; + + uint32_t sh_mem_config; + + uint32_t sq_shader_tba_lo; + + uint32_t sq_shader_tba_hi; + + uint32_t sq_shader_tma_lo; + + uint32_t sq_shader_tma_hi; + + uint32_t reserved6; + + uint32_t gds_addr_lo; + + uint32_t gds_addr_hi; + + union { + struct { + uint32_t num_gws:7; + uint32_t sdma_enable:1; + uint32_t num_oac:4; + uint32_t gds_size_hi:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields14; + uint32_t ordinal14; + }; + + uint32_t spi_gdbg_per_vmid_cntl; + + uint32_t tcp_watch_cntl[4]; + + uint32_t completion_signal_lo; + + uint32_t completion_signal_hi; + +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0b6595f7acda..6dc22fa1e555 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -322,6 +322,9 @@ struct kfd_dev { unsigned int max_doorbell_slices; int noretry; + + /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ + struct dev_pagemap pgmap; }; enum kfd_mempool { @@ -669,7 +672,7 @@ struct kfd_process_device { /* VM context for GPUVM allocations */ struct file *drm_file; - void *vm; + void *drm_priv; /* GPUVM allocations storage */ struct idr alloc_idr; @@ -731,6 +734,18 @@ struct kfd_process_device { #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) +struct svm_range_list { + struct mutex lock; + struct rb_root_cached objects; + struct list_head list; + struct work_struct deferred_list_work; + struct list_head deferred_range_list; + spinlock_t deferred_list_lock; + atomic_t evicted_ranges; + struct delayed_work restore_work; + DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); +}; + /* Process data */ struct kfd_process { /* @@ -809,6 +824,11 @@ struct kfd_process { struct kobject *kobj; struct kobject *kobj_queues; struct attribute attr_pasid; + + /* shared virtual memory registered by this process */ + struct svm_range_list svms; + + bool xnack_enabled; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ @@ -842,6 +862,20 @@ struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); + +int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); +int kfd_process_gpuid_from_kgd(struct kfd_process *p, + struct amdgpu_device *adev, uint32_t *gpuid, + uint32_t *gpuidx); +static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, + uint32_t gpuidx, uint32_t *gpuid) { + return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL; +} +static inline struct kfd_process_device *kfd_process_device_from_gpuidx( + struct kfd_process *p, uint32_t gpuidx) { + return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL; +} + void kfd_unref_process(struct kfd_process *p); int kfd_process_evict_queues(struct kfd_process *p); int kfd_process_restore_queues(struct kfd_process *p); @@ -857,6 +891,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); +bool kfd_process_xnack_mode(struct kfd_process *p, bool supported); + int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma); @@ -1052,6 +1088,7 @@ struct packet_manager_funcs { extern const struct packet_manager_funcs kfd_vi_pm_funcs; extern const struct packet_manager_funcs kfd_v9_pm_funcs; +extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs; int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); void pm_uninit(struct packet_manager *pm, bool hanging); @@ -1107,7 +1144,9 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, void kfd_signal_reset_event(struct kfd_dev *dev); -void kfd_flush_tlb(struct kfd_process_device *pdd); +void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid); + +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d97e330a5022..09b98a83f670 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -42,6 +42,7 @@ struct mm_struct; #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "kfd_iommu.h" +#include "kfd_svm.h" /* * List of struct kfd_process (field kfd_process). @@ -108,8 +109,6 @@ static void kfd_sdma_activity_worker(struct work_struct *work) workarea = container_of(work, struct kfd_sdma_activity_handler_workarea, sdma_activity_work); - if (!workarea) - return; pdd = workarea->pdd; if (!pdd) @@ -250,7 +249,7 @@ cleanup: } /** - * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device + * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device * by current process. Translates acquired wave count into number of compute units * that are occupied. * @@ -647,8 +646,9 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, { struct kfd_dev *dev = pdd->dev; - amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL); + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv, + NULL); } /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process @@ -667,11 +667,12 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, int err; err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, - pdd->vm, &mem, NULL, flags); + pdd->drm_priv, &mem, NULL, flags); if (err) goto err_alloc_mem; - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm); + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, + pdd->drm_priv, NULL); if (err) goto err_map_mem; @@ -712,7 +713,8 @@ sync_memory_failed: return err; err_map_mem: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv, + NULL); err_alloc_mem: *kptr = NULL; return err; @@ -901,13 +903,14 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *peer_pdd = p->pdds[i]; - if (!peer_pdd->vm) + if (!peer_pdd->drm_priv) continue; amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - peer_pdd->dev->kgd, mem, peer_pdd->vm); + peer_pdd->dev->kgd, mem, peer_pdd->drm_priv); } - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, + pdd->drm_priv, NULL); kfd_process_device_remove_obj_handle(pdd, id); } } @@ -932,7 +935,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->drm_file) { amdgpu_amdkfd_gpuvm_release_process_vm( - pdd->dev->kgd, pdd->vm); + pdd->dev->kgd, pdd->drm_priv); fput(pdd->drm_file); } @@ -1000,6 +1003,7 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_iommu_unbind_process(p); kfd_process_free_outstanding_kfd_bos(p); + svm_range_list_fini(p); kfd_process_destroy_pdds(p); dma_fence_put(p->ef); @@ -1058,6 +1062,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); + cancel_delayed_work_sync(&p->svms.restore_work); mutex_lock(&p->mutex); @@ -1186,6 +1191,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd, } } +bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) +{ + int i; + + /* On most GFXv9 GPUs, the retry mode in the SQ must match the + * boot time retry setting. Mixing processes with different + * XNACK/retry settings can hang the GPU. + * + * Different GPUs can have different noretry settings depending + * on HW bugs or limitations. We need to find at least one + * XNACK mode for this process that's compatible with all GPUs. + * Fortunately GPUs with retry enabled (noretry=0) can run code + * built for XNACK-off. On GFXv9 it may perform slower. + * + * Therefore applications built for XNACK-off can always be + * supported and will be our fallback if any GPU does not + * support retry. + */ + for (i = 0; i < p->n_pdds; i++) { + struct kfd_dev *dev = p->pdds[i]->dev; + + /* Only consider GFXv9 and higher GPUs. Older GPUs don't + * support the SVM APIs and don't need to be considered + * for the XNACK mode selection. + */ + if (dev->device_info->asic_family < CHIP_VEGA10) + continue; + /* Aldebaran can always support XNACK because it can support + * per-process XNACK mode selection. But let the dev->noretry + * setting still influence the default XNACK mode. + */ + if (supported && + dev->device_info->asic_family == CHIP_ALDEBARAN) + continue; + + /* GFXv10 and later GPUs do not support shader preemption + * during page faults. This can lead to poor QoS for queue + * management and memory-manager-related preemptions or + * even deadlocks. + */ + if (dev->device_info->asic_family >= CHIP_NAVI10) + return false; + + if (dev->noretry) + return false; + } + + return true; +} + /* * On return the kfd_process is fully operational and will be freed when the * mm is released @@ -1224,6 +1279,13 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err != 0) goto err_init_apertures; + /* Check XNACK support after PDDs are created in kfd_init_apertures */ + process->xnack_enabled = kfd_process_xnack_mode(process, false); + + err = svm_range_list_init(process); + if (err) + goto err_init_svm_range_list; + /* alloc_notifier needs to find the process in the hash table */ hash_add_rcu(kfd_processes_table, &process->kfd_processes, (uintptr_t)process->mm); @@ -1246,6 +1308,8 @@ static struct kfd_process *create_process(const struct task_struct *thread) err_register_notifier: hash_del_rcu(&process->kfd_processes); + svm_range_list_fini(process); +err_init_svm_range_list: kfd_process_free_outstanding_kfd_bos(process); kfd_process_destroy_pdds(process); err_init_apertures: @@ -1375,7 +1439,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, if (!drm_file) return -EINVAL; - if (pdd->vm) + if (pdd->drm_priv) return -EBUSY; p = pdd->process; @@ -1383,13 +1447,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( dev->kgd, drm_file, p->pasid, - &pdd->vm, &p->kgd_process_info, &p->ef); + &p->kgd_process_info, &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; } - - amdgpu_vm_set_task_info(pdd->vm); + pdd->drm_priv = drm_file->private_data; ret = kfd_process_device_reserve_ib_mem(pdd); if (ret) @@ -1405,7 +1468,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, err_init_cwsr: err_reserve_ib_mem: kfd_process_device_free_bos(pdd); - pdd->vm = NULL; + pdd->drm_priv = NULL; return ret; } @@ -1429,7 +1492,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, return ERR_PTR(-ENOMEM); } - if (!pdd->vm) + if (!pdd->drm_priv) return ERR_PTR(-ENODEV); /* @@ -1600,6 +1663,32 @@ int kfd_process_restore_queues(struct kfd_process *p) return ret; } +int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) +{ + int i; + + for (i = 0; i < p->n_pdds; i++) + if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id) + return i; + return -EINVAL; +} + +int +kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev, + uint32_t *gpuid, uint32_t *gpuidx) +{ + struct kgd_dev *kgd = (struct kgd_dev *)adev; + int i; + + for (i = 0; i < p->n_pdds; i++) + if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) { + *gpuid = p->pdds[i]->dev->id; + *gpuidx = i; + return 0; + } + return -EINVAL; +} + static void evict_process_worker(struct work_struct *work) { int ret; @@ -1748,7 +1837,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); } -void kfd_flush_tlb(struct kfd_process_device *pdd) +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) { struct kfd_dev *dev = pdd->dev; @@ -1761,7 +1850,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd) pdd->qpd.vmid); } else { amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, - pdd->process->pasid); + pdd->process->pasid, type); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c new file mode 100644 index 000000000000..dff1011dd7ee --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -0,0 +1,3102 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/types.h> +#include <linux/sched/task.h> +#include "amdgpu_sync.h" +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_mn.h" +#include "amdgpu.h" +#include "amdgpu_xgmi.h" +#include "kfd_priv.h" +#include "kfd_svm.h" +#include "kfd_migrate.h" + +#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 + +/* Long enough to ensure no retry fault comes after svm range is restored and + * page table is updated. + */ +#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 + +static void svm_range_evict_svm_bo_worker(struct work_struct *work); +static bool +svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); + +static const struct mmu_interval_notifier_ops svm_range_mn_ops = { + .invalidate = svm_range_cpu_invalidate_pagetables, +}; + +/** + * svm_range_unlink - unlink svm_range from lists and interval tree + * @prange: svm range structure to be removed + * + * Remove the svm_range from the svms and svm_bo lists and the svms + * interval tree. + * + * Context: The caller must hold svms->lock + */ +static void svm_range_unlink(struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange, prange->start, prange->last); + + if (prange->svm_bo) { + spin_lock(&prange->svm_bo->list_lock); + list_del(&prange->svm_bo_list); + spin_unlock(&prange->svm_bo->list_lock); + } + + list_del(&prange->list); + if (prange->it_node.start != 0 && prange->it_node.last != 0) + interval_tree_remove(&prange->it_node, &prange->svms->objects); +} + +static void +svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange, prange->start, prange->last); + + mmu_interval_notifier_insert_locked(&prange->notifier, mm, + prange->start << PAGE_SHIFT, + prange->npages << PAGE_SHIFT, + &svm_range_mn_ops); +} + +/** + * svm_range_add_to_svms - add svm range to svms + * @prange: svm range structure to be added + * + * Add the svm range to svms interval tree and link list + * + * Context: The caller must hold svms->lock + */ +static void svm_range_add_to_svms(struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange, prange->start, prange->last); + + list_add_tail(&prange->list, &prange->svms->list); + prange->it_node.start = prange->start; + prange->it_node.last = prange->last; + interval_tree_insert(&prange->it_node, &prange->svms->objects); +} + +static void svm_range_remove_notifier(struct svm_range *prange) +{ + pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange, + prange->notifier.interval_tree.start >> PAGE_SHIFT, + prange->notifier.interval_tree.last >> PAGE_SHIFT); + + if (prange->notifier.interval_tree.start != 0 && + prange->notifier.interval_tree.last != 0) + mmu_interval_notifier_remove(&prange->notifier); +} + +static int +svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr, + unsigned long *hmm_pfns, uint64_t npages) +{ + enum dma_data_direction dir = DMA_BIDIRECTIONAL; + dma_addr_t *addr = *dma_addr; + struct page *page; + int i, r; + + if (!addr) { + addr = kvmalloc_array(npages, sizeof(*addr), + GFP_KERNEL | __GFP_ZERO); + if (!addr) + return -ENOMEM; + *dma_addr = addr; + } + + for (i = 0; i < npages; i++) { + if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), + "leaking dma mapping\n")) + dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); + + page = hmm_pfn_to_page(hmm_pfns[i]); + addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); + r = dma_mapping_error(dev, addr[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + return r; + } + pr_debug("dma mapping 0x%llx for page addr 0x%lx\n", + addr[i] >> PAGE_SHIFT, page_to_pfn(page)); + } + return 0; +} + +static int +svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, + unsigned long *hmm_pfns) +{ + struct kfd_process *p; + uint32_t gpuidx; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + + pr_debug("mapping to gpu idx 0x%x\n", gpuidx); + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + r = svm_range_dma_map_dev(adev->dev, &prange->dma_addr[gpuidx], + hmm_pfns, prange->npages); + if (r) + break; + } + + return r; +} + +void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, + unsigned long offset, unsigned long npages) +{ + enum dma_data_direction dir = DMA_BIDIRECTIONAL; + int i; + + if (!dma_addr) + return; + + for (i = offset; i < offset + npages; i++) { + if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) + continue; + pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); + dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); + dma_addr[i] = 0; + } +} + +void svm_range_free_dma_mappings(struct svm_range *prange) +{ + struct kfd_process_device *pdd; + dma_addr_t *dma_addr; + struct device *dev; + struct kfd_process *p; + uint32_t gpuidx; + + p = container_of(prange->svms, struct kfd_process, svms); + + for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { + dma_addr = prange->dma_addr[gpuidx]; + if (!dma_addr) + continue; + + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + continue; + } + dev = &pdd->dev->pdev->dev; + svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); + kvfree(dma_addr); + prange->dma_addr[gpuidx] = NULL; + } +} + +static void svm_range_free(struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, + prange->start, prange->last); + + svm_range_vram_node_free(prange); + svm_range_free_dma_mappings(prange); + mutex_destroy(&prange->lock); + mutex_destroy(&prange->migrate_mutex); + kfree(prange); +} + +static void +svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc, + uint8_t *granularity, uint32_t *flags) +{ + *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + *granularity = 9; + *flags = + KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT; +} + +static struct +svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, + uint64_t last) +{ + uint64_t size = last - start + 1; + struct svm_range *prange; + struct kfd_process *p; + + prange = kzalloc(sizeof(*prange), GFP_KERNEL); + if (!prange) + return NULL; + prange->npages = size; + prange->svms = svms; + prange->start = start; + prange->last = last; + INIT_LIST_HEAD(&prange->list); + INIT_LIST_HEAD(&prange->update_list); + INIT_LIST_HEAD(&prange->remove_list); + INIT_LIST_HEAD(&prange->insert_list); + INIT_LIST_HEAD(&prange->svm_bo_list); + INIT_LIST_HEAD(&prange->deferred_list); + INIT_LIST_HEAD(&prange->child_list); + atomic_set(&prange->invalid, 0); + prange->validate_timestamp = 0; + mutex_init(&prange->migrate_mutex); + mutex_init(&prange->lock); + + p = container_of(svms, struct kfd_process, svms); + if (p->xnack_enabled) + bitmap_copy(prange->bitmap_access, svms->bitmap_supported, + MAX_GPU_INSTANCE); + + svm_range_set_default_attributes(&prange->preferred_loc, + &prange->prefetch_loc, + &prange->granularity, &prange->flags); + + pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last); + + return prange; +} + +static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo) +{ + if (!svm_bo || !kref_get_unless_zero(&svm_bo->kref)) + return false; + + return true; +} + +static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo) +{ + if (svm_bo) + kref_get(&svm_bo->kref); + + return svm_bo; +} + +static void svm_range_bo_release(struct kref *kref) +{ + struct svm_range_bo *svm_bo; + + svm_bo = container_of(kref, struct svm_range_bo, kref); + spin_lock(&svm_bo->list_lock); + while (!list_empty(&svm_bo->range_list)) { + struct svm_range *prange = + list_first_entry(&svm_bo->range_list, + struct svm_range, svm_bo_list); + /* list_del_init tells a concurrent svm_range_vram_node_new when + * it's safe to reuse the svm_bo pointer and svm_bo_list head. + */ + list_del_init(&prange->svm_bo_list); + spin_unlock(&svm_bo->list_lock); + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange->start, prange->last); + mutex_lock(&prange->lock); + prange->svm_bo = NULL; + mutex_unlock(&prange->lock); + + spin_lock(&svm_bo->list_lock); + } + spin_unlock(&svm_bo->list_lock); + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) { + /* We're not in the eviction worker. + * Signal the fence and synchronize with any + * pending eviction work. + */ + dma_fence_signal(&svm_bo->eviction_fence->base); + cancel_work_sync(&svm_bo->eviction_work); + } + dma_fence_put(&svm_bo->eviction_fence->base); + amdgpu_bo_unref(&svm_bo->bo); + kfree(svm_bo); +} + +static void svm_range_bo_unref(struct svm_range_bo *svm_bo) +{ + if (!svm_bo) + return; + + kref_put(&svm_bo->kref, svm_range_bo_release); +} + +static bool +svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange) +{ + struct amdgpu_device *bo_adev; + + mutex_lock(&prange->lock); + if (!prange->svm_bo) { + mutex_unlock(&prange->lock); + return false; + } + if (prange->ttm_res) { + /* We still have a reference, all is well */ + mutex_unlock(&prange->lock); + return true; + } + if (svm_bo_ref_unless_zero(prange->svm_bo)) { + /* + * Migrate from GPU to GPU, remove range from source bo_adev + * svm_bo range list, and return false to allocate svm_bo from + * destination adev. + */ + bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + if (bo_adev != adev) { + mutex_unlock(&prange->lock); + + spin_lock(&prange->svm_bo->list_lock); + list_del_init(&prange->svm_bo_list); + spin_unlock(&prange->svm_bo->list_lock); + + svm_range_bo_unref(prange->svm_bo); + return false; + } + if (READ_ONCE(prange->svm_bo->evicting)) { + struct dma_fence *f; + struct svm_range_bo *svm_bo; + /* The BO is getting evicted, + * we need to get a new one + */ + mutex_unlock(&prange->lock); + svm_bo = prange->svm_bo; + f = dma_fence_get(&svm_bo->eviction_fence->base); + svm_range_bo_unref(prange->svm_bo); + /* wait for the fence to avoid long spin-loop + * at list_empty_careful + */ + dma_fence_wait(f, false); + dma_fence_put(f); + } else { + /* The BO was still around and we got + * a new reference to it + */ + mutex_unlock(&prange->lock); + pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + + prange->ttm_res = prange->svm_bo->bo->tbo.resource; + return true; + } + + } else { + mutex_unlock(&prange->lock); + } + + /* We need a new svm_bo. Spin-loop to wait for concurrent + * svm_range_bo_release to finish removing this range from + * its range list. After this, it is safe to reuse the + * svm_bo pointer and svm_bo_list head. + */ + while (!list_empty_careful(&prange->svm_bo_list)) + ; + + return false; +} + +static struct svm_range_bo *svm_range_bo_new(void) +{ + struct svm_range_bo *svm_bo; + + svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL); + if (!svm_bo) + return NULL; + + kref_init(&svm_bo->kref); + INIT_LIST_HEAD(&svm_bo->range_list); + spin_lock_init(&svm_bo->list_lock); + + return svm_bo; +} + +int +svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, + bool clear) +{ + struct amdgpu_bo_param bp; + struct svm_range_bo *svm_bo; + struct amdgpu_bo_user *ubo; + struct amdgpu_bo *bo; + struct kfd_process *p; + struct mm_struct *mm; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, + prange->start, prange->last); + + if (svm_range_validate_svm_bo(adev, prange)) + return 0; + + svm_bo = svm_range_bo_new(); + if (!svm_bo) { + pr_debug("failed to alloc svm bo\n"); + return -ENOMEM; + } + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_debug("failed to get mm\n"); + kfree(svm_bo); + return -ESRCH; + } + svm_bo->svms = prange->svms; + svm_bo->eviction_fence = + amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), + mm, + svm_bo); + mmput(mm); + INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker); + svm_bo->evicting = 0; + memset(&bp, 0, sizeof(bp)); + bp.size = prange->npages * PAGE_SIZE; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0; + bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO; + bp.type = ttm_bo_type_device; + bp.resv = NULL; + + r = amdgpu_bo_create_user(adev, &bp, &ubo); + if (r) { + pr_debug("failed %d to create bo\n", r); + goto create_bo_failed; + } + bo = &ubo->bo; + r = amdgpu_bo_reserve(bo, true); + if (r) { + pr_debug("failed %d to reserve bo\n", r); + goto reserve_bo_failed; + } + + r = dma_resv_reserve_shared(bo->tbo.base.resv, 1); + if (r) { + pr_debug("failed %d to reserve bo\n", r); + amdgpu_bo_unreserve(bo); + goto reserve_bo_failed; + } + amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true); + + amdgpu_bo_unreserve(bo); + + svm_bo->bo = bo; + prange->svm_bo = svm_bo; + prange->ttm_res = bo->tbo.resource; + prange->offset = 0; + + spin_lock(&svm_bo->list_lock); + list_add(&prange->svm_bo_list, &svm_bo->range_list); + spin_unlock(&svm_bo->list_lock); + + return 0; + +reserve_bo_failed: + amdgpu_bo_unref(&bo); +create_bo_failed: + dma_fence_put(&svm_bo->eviction_fence->base); + kfree(svm_bo); + prange->ttm_res = NULL; + + return r; +} + +void svm_range_vram_node_free(struct svm_range *prange) +{ + svm_range_bo_unref(prange->svm_bo); + prange->ttm_res = NULL; +} + +struct amdgpu_device * +svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + int32_t gpu_idx; + + p = container_of(prange->svms, struct kfd_process, svms); + + gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id); + if (gpu_idx < 0) { + pr_debug("failed to get device by id 0x%x\n", gpu_id); + return NULL; + } + pdd = kfd_process_device_from_gpuidx(p, gpu_idx); + if (!pdd) { + pr_debug("failed to get device by idx 0x%x\n", gpu_idx); + return NULL; + } + + return (struct amdgpu_device *)pdd->dev->kgd; +} + +static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); + + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +} + +static int +svm_range_check_attr(struct kfd_process *p, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + uint32_t i; + + for (i = 0; i < nattr; i++) { + uint32_t val = attrs[i].value; + int gpuidx = MAX_GPU_INSTANCE; + + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM && + val != KFD_IOCTL_SVM_LOCATION_UNDEFINED) + gpuidx = kfd_process_gpuidx_from_gpuid(p, val); + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM) + gpuidx = kfd_process_gpuidx_from_gpuid(p, val); + break; + case KFD_IOCTL_SVM_ATTR_ACCESS: + case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + gpuidx = kfd_process_gpuidx_from_gpuid(p, val); + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + break; + default: + pr_debug("unknown attr type 0x%x\n", attrs[i].type); + return -EINVAL; + } + + if (gpuidx < 0) { + pr_debug("no GPU 0x%x found\n", val); + return -EINVAL; + } else if (gpuidx < MAX_GPU_INSTANCE && + !test_bit(gpuidx, p->svms.bitmap_supported)) { + pr_debug("GPU 0x%x not supported\n", val); + return -EINVAL; + } + } + + return 0; +} + +static void +svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + uint32_t i; + int gpuidx; + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + prange->preferred_loc = attrs[i].value; + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + prange->prefetch_loc = attrs[i].value; + break; + case KFD_IOCTL_SVM_ATTR_ACCESS: + case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + gpuidx = kfd_process_gpuidx_from_gpuid(p, + attrs[i].value); + if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) { + bitmap_clear(prange->bitmap_access, gpuidx, 1); + bitmap_clear(prange->bitmap_aip, gpuidx, 1); + } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) { + bitmap_set(prange->bitmap_access, gpuidx, 1); + bitmap_clear(prange->bitmap_aip, gpuidx, 1); + } else { + bitmap_clear(prange->bitmap_access, gpuidx, 1); + bitmap_set(prange->bitmap_aip, gpuidx, 1); + } + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + prange->flags |= attrs[i].value; + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + prange->flags &= ~attrs[i].value; + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + prange->granularity = attrs[i].value; + break; + default: + WARN_ONCE(1, "svm_range_check_attrs wasn't called?"); + } + } +} + +/** + * svm_range_debug_dump - print all range information from svms + * @svms: svm range list header + * + * debug output svm range start, end, prefetch location from svms + * interval tree and link list + * + * Context: The caller must hold svms->lock + */ +static void svm_range_debug_dump(struct svm_range_list *svms) +{ + struct interval_tree_node *node; + struct svm_range *prange; + + pr_debug("dump svms 0x%p list\n", svms); + pr_debug("range\tstart\tpage\tend\t\tlocation\n"); + + list_for_each_entry(prange, &svms->list, list) { + pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n", + prange, prange->start, prange->npages, + prange->start + prange->npages - 1, + prange->actual_loc); + } + + pr_debug("dump svms 0x%p interval tree\n", svms); + pr_debug("range\tstart\tpage\tend\t\tlocation\n"); + node = interval_tree_iter_first(&svms->objects, 0, ~0ULL); + while (node) { + prange = container_of(node, struct svm_range, it_node); + pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n", + prange, prange->start, prange->npages, + prange->start + prange->npages - 1, + prange->actual_loc); + node = interval_tree_iter_next(node, 0, ~0ULL); + } +} + +static bool +svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new) +{ + return (old->prefetch_loc == new->prefetch_loc && + old->flags == new->flags && + old->granularity == new->granularity); +} + +static int +svm_range_split_array(void *ppnew, void *ppold, size_t size, + uint64_t old_start, uint64_t old_n, + uint64_t new_start, uint64_t new_n) +{ + unsigned char *new, *old, *pold; + uint64_t d; + + if (!ppold) + return 0; + pold = *(unsigned char **)ppold; + if (!pold) + return 0; + + new = kvmalloc_array(new_n, size, GFP_KERNEL); + if (!new) + return -ENOMEM; + + d = (new_start - old_start) * size; + memcpy(new, pold + d, new_n * size); + + old = kvmalloc_array(old_n, size, GFP_KERNEL); + if (!old) { + kvfree(new); + return -ENOMEM; + } + + d = (new_start == old_start) ? new_n * size : 0; + memcpy(old, pold + d, old_n * size); + + kvfree(pold); + *(void **)ppold = old; + *(void **)ppnew = new; + + return 0; +} + +static int +svm_range_split_pages(struct svm_range *new, struct svm_range *old, + uint64_t start, uint64_t last) +{ + uint64_t npages = last - start + 1; + int i, r; + + for (i = 0; i < MAX_GPU_INSTANCE; i++) { + r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i], + sizeof(*old->dma_addr[i]), old->start, + npages, new->start, new->npages); + if (r) + return r; + } + + return 0; +} + +static int +svm_range_split_nodes(struct svm_range *new, struct svm_range *old, + uint64_t start, uint64_t last) +{ + uint64_t npages = last - start + 1; + + pr_debug("svms 0x%p new prange 0x%p start 0x%lx [0x%llx 0x%llx]\n", + new->svms, new, new->start, start, last); + + if (new->start == old->start) { + new->offset = old->offset; + old->offset += new->npages; + } else { + new->offset = old->offset + npages; + } + + new->svm_bo = svm_range_bo_ref(old->svm_bo); + new->ttm_res = old->ttm_res; + + spin_lock(&new->svm_bo->list_lock); + list_add(&new->svm_bo_list, &new->svm_bo->range_list); + spin_unlock(&new->svm_bo->list_lock); + + return 0; +} + +/** + * svm_range_split_adjust - split range and adjust + * + * @new: new range + * @old: the old range + * @start: the old range adjust to start address in pages + * @last: the old range adjust to last address in pages + * + * Copy system memory dma_addr or vram ttm_res in old range to new + * range from new_start up to size new->npages, the remaining old range is from + * start to last + * + * Return: + * 0 - OK, -ENOMEM - out of memory + */ +static int +svm_range_split_adjust(struct svm_range *new, struct svm_range *old, + uint64_t start, uint64_t last) +{ + int r; + + pr_debug("svms 0x%p new 0x%lx old [0x%lx 0x%lx] => [0x%llx 0x%llx]\n", + new->svms, new->start, old->start, old->last, start, last); + + if (new->start < old->start || + new->last > old->last) { + WARN_ONCE(1, "invalid new range start or last\n"); + return -EINVAL; + } + + r = svm_range_split_pages(new, old, start, last); + if (r) + return r; + + if (old->actual_loc && old->ttm_res) { + r = svm_range_split_nodes(new, old, start, last); + if (r) + return r; + } + + old->npages = last - start + 1; + old->start = start; + old->last = last; + new->flags = old->flags; + new->preferred_loc = old->preferred_loc; + new->prefetch_loc = old->prefetch_loc; + new->actual_loc = old->actual_loc; + new->granularity = old->granularity; + bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); + bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + + return 0; +} + +/** + * svm_range_split - split a range in 2 ranges + * + * @prange: the svm range to split + * @start: the remaining range start address in pages + * @last: the remaining range last address in pages + * @new: the result new range generated + * + * Two cases only: + * case 1: if start == prange->start + * prange ==> prange[start, last] + * new range [last + 1, prange->last] + * + * case 2: if last == prange->last + * prange ==> prange[start, last] + * new range [prange->start, start - 1] + * + * Return: + * 0 - OK, -ENOMEM - out of memory, -EINVAL - invalid start, last + */ +static int +svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, + struct svm_range **new) +{ + uint64_t old_start = prange->start; + uint64_t old_last = prange->last; + struct svm_range_list *svms; + int r = 0; + + pr_debug("svms 0x%p [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", prange->svms, + old_start, old_last, start, last); + + if (old_start != start && old_last != last) + return -EINVAL; + if (start < old_start || last > old_last) + return -EINVAL; + + svms = prange->svms; + if (old_start == start) + *new = svm_range_new(svms, last + 1, old_last); + else + *new = svm_range_new(svms, old_start, start - 1); + if (!*new) + return -ENOMEM; + + r = svm_range_split_adjust(*new, prange, start, last); + if (r) { + pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", + r, old_start, old_last, start, last); + svm_range_free(*new); + *new = NULL; + } + + return r; +} + +static int +svm_range_split_tail(struct svm_range *prange, struct svm_range *new, + uint64_t new_last, struct list_head *insert_list) +{ + struct svm_range *tail; + int r = svm_range_split(prange, prange->start, new_last, &tail); + + if (!r) + list_add(&tail->insert_list, insert_list); + return r; +} + +static int +svm_range_split_head(struct svm_range *prange, struct svm_range *new, + uint64_t new_start, struct list_head *insert_list) +{ + struct svm_range *head; + int r = svm_range_split(prange, new_start, prange->last, &head); + + if (!r) + list_add(&head->insert_list, insert_list); + return r; +} + +static void +svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, + struct svm_range *pchild, enum svm_work_list_ops op) +{ + pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", + pchild, pchild->start, pchild->last, prange, op); + + pchild->work_item.mm = mm; + pchild->work_item.op = op; + list_add_tail(&pchild->child_list, &prange->child_list); +} + +/** + * svm_range_split_by_granularity - collect ranges within granularity boundary + * + * @p: the process with svms list + * @mm: mm structure + * @addr: the vm fault address in pages, to split the prange + * @parent: parent range if prange is from child list + * @prange: prange to split + * + * Trims @prange to be a single aligned block of prange->granularity if + * possible. The head and tail are added to the child_list in @parent. + * + * Context: caller must hold mmap_read_lock and prange->lock + * + * Return: + * 0 - OK, otherwise error code + */ +int +svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, + unsigned long addr, struct svm_range *parent, + struct svm_range *prange) +{ + struct svm_range *head, *tail; + unsigned long start, last, size; + int r; + + /* Align splited range start and size to granularity size, then a single + * PTE will be used for whole range, this reduces the number of PTE + * updated and the L1 TLB space used for translation. + */ + size = 1UL << prange->granularity; + start = ALIGN_DOWN(addr, size); + last = ALIGN(addr + 1, size) - 1; + + pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n", + prange->svms, prange->start, prange->last, start, last, size); + + if (start > prange->start) { + r = svm_range_split(prange, start, prange->last, &head); + if (r) + return r; + svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE); + } + + if (last < prange->last) { + r = svm_range_split(prange, prange->start, last, &tail); + if (r) + return r; + svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + } + + /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ + if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) { + prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP; + pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n", + prange, prange->start, prange->last, + SVM_OP_ADD_RANGE_AND_MAP); + } + return 0; +} + +static uint64_t +svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) +{ + struct amdgpu_device *bo_adev; + uint32_t flags = prange->flags; + uint32_t mapping_flags = 0; + uint64_t pte_flags; + bool snoop = !prange->ttm_res; + bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; + + if (prange->svm_bo && prange->ttm_res) + bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + if (prange->svm_bo && prange->ttm_res) { + if (bo_adev == adev) { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (amdgpu_xgmi_same_hive(adev, bo_adev)) + snoop = true; + } + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + break; + case CHIP_ALDEBARAN: + if (prange->svm_bo && prange->ttm_res) { + if (bo_adev == adev) { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + if (adev->gmc.xgmi.connected_to_cpu) + snoop = true; + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (amdgpu_xgmi_same_hive(adev, bo_adev)) + snoop = true; + } + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + break; + default: + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + + mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; + + if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO) + mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE; + if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + + pte_flags = AMDGPU_PTE_VALID; + pte_flags |= prange->ttm_res ? 0 : AMDGPU_PTE_SYSTEM; + pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; + + pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); + + pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n", + prange->svms, prange->start, prange->last, + prange->ttm_res ? 1:0, pte_flags, mapping_flags); + + return pte_flags; +} + +static int +svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, + uint64_t start, uint64_t last, + struct dma_fence **fence) +{ + uint64_t init_pte_value = 0; + + pr_debug("[0x%llx 0x%llx]\n", start, last); + + return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL, + start, last, init_pte_value, 0, + NULL, NULL, fence, NULL); +} + +static int +svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, + unsigned long last) +{ + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); + struct kfd_process_device *pdd; + struct dma_fence *fence = NULL; + struct amdgpu_device *adev; + struct kfd_process *p; + uint32_t gpuidx; + int r = 0; + + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, + MAX_GPU_INSTANCE); + p = container_of(prange->svms, struct kfd_process, svms); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pr_debug("unmap from gpu idx 0x%x\n", gpuidx); + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv), + start, last, &fence); + if (r) + break; + + if (fence) { + r = dma_fence_wait(fence, false); + dma_fence_put(fence); + fence = NULL; + if (r) + break; + } + amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, + p->pasid, TLB_FLUSH_HEAVYWEIGHT); + } + + return r; +} + +static int +svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct svm_range *prange, dma_addr_t *dma_addr, + struct amdgpu_device *bo_adev, struct dma_fence **fence) +{ + struct amdgpu_bo_va bo_va; + bool table_freed = false; + uint64_t pte_flags; + int r = 0; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, + prange->last); + + if (prange->svm_bo && prange->ttm_res) { + bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); + prange->mapping.bo_va = &bo_va; + } + + prange->mapping.start = prange->start; + prange->mapping.last = prange->last; + prange->mapping.offset = prange->ttm_res ? prange->offset : 0; + pte_flags = svm_range_get_pte_flags(adev, prange); + + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, + prange->mapping.start, + prange->mapping.last, pte_flags, + prange->mapping.offset, + prange->ttm_res, + dma_addr, &vm->last_update, + &table_freed); + if (r) { + pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); + goto out; + } + + r = amdgpu_vm_update_pdes(adev, vm, false); + if (r) { + pr_debug("failed %d to update directories 0x%lx\n", r, + prange->start); + goto out; + } + + if (fence) + *fence = dma_fence_get(vm->last_update); + + if (table_freed) { + struct kfd_process *p; + + p = container_of(prange->svms, struct kfd_process, svms); + amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, + p->pasid, TLB_FLUSH_LEGACY); + } +out: + prange->mapping.bo_va = NULL; + return r; +} + +static int svm_range_map_to_gpus(struct svm_range *prange, + unsigned long *bitmap, bool wait) +{ + struct kfd_process_device *pdd; + struct amdgpu_device *bo_adev; + struct amdgpu_device *adev; + struct kfd_process *p; + struct dma_fence *fence = NULL; + uint32_t gpuidx; + int r = 0; + + if (prange->svm_bo && prange->ttm_res) + bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + else + bo_adev = NULL; + + p = container_of(prange->svms, struct kfd_process, svms); + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pr_debug("mapping to gpu idx 0x%x\n", gpuidx); + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + pdd = kfd_bind_process_to_device(pdd->dev, p); + if (IS_ERR(pdd)) + return -EINVAL; + + if (bo_adev && adev != bo_adev && + !amdgpu_xgmi_same_hive(adev, bo_adev)) { + pr_debug("cannot map to device idx %d\n", gpuidx); + continue; + } + + r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), + prange, prange->dma_addr[gpuidx], + bo_adev, wait ? &fence : NULL); + if (r) + break; + + if (fence) { + r = dma_fence_wait(fence, false); + dma_fence_put(fence); + fence = NULL; + if (r) { + pr_debug("failed %d to dma fence wait\n", r); + break; + } + } + } + + return r; +} + +struct svm_validate_context { + struct kfd_process *process; + struct svm_range *prange; + bool intr; + unsigned long bitmap[MAX_GPU_INSTANCE]; + struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1]; + struct list_head validate_list; + struct ww_acquire_ctx ticket; +}; + +static int svm_range_reserve_bos(struct svm_validate_context *ctx) +{ + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct amdgpu_vm *vm; + uint32_t gpuidx; + int r; + + INIT_LIST_HEAD(&ctx->validate_list); + for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) { + pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + vm = drm_priv_to_vm(pdd->drm_priv); + + ctx->tv[gpuidx].bo = &vm->root.bo->tbo; + ctx->tv[gpuidx].num_shared = 4; + list_add(&ctx->tv[gpuidx].head, &ctx->validate_list); + } + if (ctx->prange->svm_bo && ctx->prange->ttm_res) { + ctx->tv[MAX_GPU_INSTANCE].bo = &ctx->prange->svm_bo->bo->tbo; + ctx->tv[MAX_GPU_INSTANCE].num_shared = 1; + list_add(&ctx->tv[MAX_GPU_INSTANCE].head, &ctx->validate_list); + } + + r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list, + ctx->intr, NULL); + if (r) { + pr_debug("failed %d to reserve bo\n", r); + return r; + } + + for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) { + pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + r = -EINVAL; + goto unreserve_out; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv), + svm_range_bo_validate, NULL); + if (r) { + pr_debug("failed %d validate pt bos\n", r); + goto unreserve_out; + } + } + + return 0; + +unreserve_out: + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list); + return r; +} + +static void svm_range_unreserve_bos(struct svm_validate_context *ctx) +{ + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list); +} + +/* + * Validation+GPU mapping with concurrent invalidation (MMU notifiers) + * + * To prevent concurrent destruction or change of range attributes, the + * svm_read_lock must be held. The caller must not hold the svm_write_lock + * because that would block concurrent evictions and lead to deadlocks. To + * serialize concurrent migrations or validations of the same range, the + * prange->migrate_mutex must be held. + * + * For VRAM ranges, the SVM BO must be allocated and valid (protected by its + * eviction fence. + * + * The following sequence ensures race-free validation and GPU mapping: + * + * 1. Reserve page table (and SVM BO if range is in VRAM) + * 2. hmm_range_fault to get page addresses (if system memory) + * 3. DMA-map pages (if system memory) + * 4-a. Take notifier lock + * 4-b. Check that pages still valid (mmu_interval_read_retry) + * 4-c. Check that the range was not split or otherwise invalidated + * 4-d. Update GPU page table + * 4.e. Release notifier lock + * 5. Release page table (and SVM BO) reservation + */ +static int svm_range_validate_and_map(struct mm_struct *mm, + struct svm_range *prange, + int32_t gpuidx, bool intr, bool wait) +{ + struct svm_validate_context ctx; + struct hmm_range *hmm_range; + int r = 0; + + ctx.process = container_of(prange->svms, struct kfd_process, svms); + ctx.prange = prange; + ctx.intr = intr; + + if (gpuidx < MAX_GPU_INSTANCE) { + bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE); + bitmap_set(ctx.bitmap, gpuidx, 1); + } else if (ctx.process->xnack_enabled) { + bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE); + + /* If prefetch range to GPU, or GPU retry fault migrate range to + * GPU, which has ACCESS attribute to the range, create mapping + * on that GPU. + */ + if (prange->actual_loc) { + gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process, + prange->actual_loc); + if (gpuidx < 0) { + WARN_ONCE(1, "failed get device by id 0x%x\n", + prange->actual_loc); + return -EINVAL; + } + if (test_bit(gpuidx, prange->bitmap_access)) + bitmap_set(ctx.bitmap, gpuidx, 1); + } + } else { + bitmap_or(ctx.bitmap, prange->bitmap_access, + prange->bitmap_aip, MAX_GPU_INSTANCE); + } + + if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) + return 0; + + if (prange->actual_loc && !prange->ttm_res) { + /* This should never happen. actual_loc gets set by + * svm_migrate_ram_to_vram after allocating a BO. + */ + WARN(1, "VRAM BO missing during validation\n"); + return -EINVAL; + } + + svm_range_reserve_bos(&ctx); + + if (!prange->actual_loc) { + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + prange->start << PAGE_SHIFT, + prange->npages, &hmm_range, + false, true); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + goto unreserve_out; + } + + r = svm_range_dma_map(prange, ctx.bitmap, + hmm_range->hmm_pfns); + if (r) { + pr_debug("failed %d to dma map range\n", r); + goto unreserve_out; + } + + prange->validated_once = true; + } + + svm_range_lock(prange); + if (!prange->actual_loc) { + if (amdgpu_hmm_range_get_pages_done(hmm_range)) { + pr_debug("hmm update the range, need validate again\n"); + r = -EAGAIN; + goto unlock_out; + } + } + if (!list_empty(&prange->child_list)) { + pr_debug("range split by unmap in parallel, validate again\n"); + r = -EAGAIN; + goto unlock_out; + } + + r = svm_range_map_to_gpus(prange, ctx.bitmap, wait); + +unlock_out: + svm_range_unlock(prange); +unreserve_out: + svm_range_unreserve_bos(&ctx); + + if (!r) + prange->validate_timestamp = ktime_to_us(ktime_get()); + + return r; +} + +/** + * svm_range_list_lock_and_flush_work - flush pending deferred work + * + * @svms: the svm range list + * @mm: the mm structure + * + * Context: Returns with mmap write lock held, pending deferred work flushed + * + */ +static void +svm_range_list_lock_and_flush_work(struct svm_range_list *svms, + struct mm_struct *mm) +{ +retry_flush_work: + flush_work(&svms->deferred_list_work); + mmap_write_lock(mm); + + if (list_empty(&svms->deferred_range_list)) + return; + mmap_write_unlock(mm); + pr_debug("retry flush\n"); + goto retry_flush_work; +} + +static void svm_range_restore_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info; + struct svm_range_list *svms; + struct svm_range *prange; + struct kfd_process *p; + struct mm_struct *mm; + int evicted_ranges; + int invalid; + int r; + + svms = container_of(dwork, struct svm_range_list, restore_work); + evicted_ranges = atomic_read(&svms->evicted_ranges); + if (!evicted_ranges) + return; + + pr_debug("restore svm ranges\n"); + + /* kfd_process_notifier_release destroys this worker thread. So during + * the lifetime of this thread, kfd_process and mm will be valid. + */ + p = container_of(svms, struct kfd_process, svms); + process_info = p->kgd_process_info; + mm = p->mm; + if (!mm) + return; + + mutex_lock(&process_info->lock); + svm_range_list_lock_and_flush_work(svms, mm); + mutex_lock(&svms->lock); + + evicted_ranges = atomic_read(&svms->evicted_ranges); + + list_for_each_entry(prange, &svms->list, list) { + invalid = atomic_read(&prange->invalid); + if (!invalid) + continue; + + pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n", + prange->svms, prange, prange->start, prange->last, + invalid); + + /* + * If range is migrating, wait for migration is done. + */ + mutex_lock(&prange->migrate_mutex); + + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + false, true); + if (r) + pr_debug("failed %d to map 0x%lx to gpus\n", r, + prange->start); + + mutex_unlock(&prange->migrate_mutex); + if (r) + goto out_reschedule; + + if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid) + goto out_reschedule; + } + + if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) != + evicted_ranges) + goto out_reschedule; + + evicted_ranges = 0; + + r = kgd2kfd_resume_mm(mm); + if (r) { + /* No recovery from this failure. Probably the CP is + * hanging. No point trying again. + */ + pr_debug("failed %d to resume KFD\n", r); + } + + pr_debug("restore svm ranges successfully\n"); + +out_reschedule: + mutex_unlock(&svms->lock); + mmap_write_unlock(mm); + mutex_unlock(&process_info->lock); + + /* If validation failed, reschedule another attempt */ + if (evicted_ranges) { + pr_debug("reschedule to restore svm range\n"); + schedule_delayed_work(&svms->restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + } +} + +/** + * svm_range_evict - evict svm range + * + * Stop all queues of the process to ensure GPU doesn't access the memory, then + * return to let CPU evict the buffer and proceed CPU pagetable update. + * + * Don't need use lock to sync cpu pagetable invalidation with GPU execution. + * If invalidation happens while restore work is running, restore work will + * restart to ensure to get the latest CPU pages mapping to GPU, then start + * the queues. + */ +static int +svm_range_evict(struct svm_range *prange, struct mm_struct *mm, + unsigned long start, unsigned long last) +{ + struct svm_range_list *svms = prange->svms; + struct kfd_process *p; + int r = 0; + + p = container_of(svms, struct kfd_process, svms); + + pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n", + svms, prange->start, prange->last, start, last); + + if (!p->xnack_enabled) { + int evicted_ranges; + + atomic_inc(&prange->invalid); + evicted_ranges = atomic_inc_return(&svms->evicted_ranges); + if (evicted_ranges != 1) + return r; + + pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + + /* First eviction, stop the queues */ + r = kgd2kfd_quiesce_mm(mm); + if (r) + pr_debug("failed to quiesce KFD\n"); + + pr_debug("schedule to restore svm %p ranges\n", svms); + schedule_delayed_work(&svms->restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + } else { + struct svm_range *pchild; + unsigned long s, l; + + pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", + prange->svms, start, last); + list_for_each_entry(pchild, &prange->child_list, child_list) { + mutex_lock_nested(&pchild->lock, 1); + s = max(start, pchild->start); + l = min(last, pchild->last); + if (l >= s) + svm_range_unmap_from_gpus(pchild, s, l); + mutex_unlock(&pchild->lock); + } + s = max(start, prange->start); + l = min(last, prange->last); + if (l >= s) + svm_range_unmap_from_gpus(prange, s, l); + } + + return r; +} + +static struct svm_range *svm_range_clone(struct svm_range *old) +{ + struct svm_range *new; + + new = svm_range_new(old->svms, old->start, old->last); + if (!new) + return NULL; + + if (old->svm_bo) { + new->ttm_res = old->ttm_res; + new->offset = old->offset; + new->svm_bo = svm_range_bo_ref(old->svm_bo); + spin_lock(&new->svm_bo->list_lock); + list_add(&new->svm_bo_list, &new->svm_bo->range_list); + spin_unlock(&new->svm_bo->list_lock); + } + new->flags = old->flags; + new->preferred_loc = old->preferred_loc; + new->prefetch_loc = old->prefetch_loc; + new->actual_loc = old->actual_loc; + new->granularity = old->granularity; + bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); + bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + + return new; +} + +/** + * svm_range_handle_overlap - split overlap ranges + * @svms: svm range list header + * @new: range added with this attributes + * @start: range added start address, in pages + * @last: range last address, in pages + * @update_list: output, the ranges attributes are updated. For set_attr, this + * will do validation and map to GPUs. For unmap, this will be + * removed and unmap from GPUs + * @insert_list: output, the ranges will be inserted into svms, attributes are + * not changes. For set_attr, this will add into svms. + * @remove_list:output, the ranges will be removed from svms + * @left: the remaining range after overlap, For set_attr, this will be added + * as new range. + * + * Total have 5 overlap cases. + * + * This function handles overlap of an address interval with existing + * struct svm_ranges for applying new attributes. This may require + * splitting existing struct svm_ranges. All changes should be applied to + * the range_list and interval tree transactionally. If any split operation + * fails, the entire update fails. Therefore the existing overlapping + * svm_ranges are cloned and the original svm_ranges left unchanged. If the + * transaction succeeds, the modified clones are added and the originals + * freed. Otherwise the clones are removed and the old svm_ranges remain. + * + * Context: The caller must hold svms->lock + */ +static int +svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, + unsigned long start, unsigned long last, + struct list_head *update_list, + struct list_head *insert_list, + struct list_head *remove_list, + unsigned long *left) +{ + struct interval_tree_node *node; + struct svm_range *prange; + struct svm_range *tmp; + int r = 0; + + INIT_LIST_HEAD(update_list); + INIT_LIST_HEAD(insert_list); + INIT_LIST_HEAD(remove_list); + + node = interval_tree_iter_first(&svms->objects, start, last); + while (node) { + struct interval_tree_node *next; + struct svm_range *old; + unsigned long next_start; + + pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start, + node->last); + + old = container_of(node, struct svm_range, it_node); + next = interval_tree_iter_next(node, start, last); + next_start = min(node->last, last) + 1; + + if (node->start < start || node->last > last) { + /* node intersects the updated range, clone+split it */ + prange = svm_range_clone(old); + if (!prange) { + r = -ENOMEM; + goto out; + } + + list_add(&old->remove_list, remove_list); + list_add(&prange->insert_list, insert_list); + + if (node->start < start) { + pr_debug("change old range start\n"); + r = svm_range_split_head(prange, new, start, + insert_list); + if (r) + goto out; + } + if (node->last > last) { + pr_debug("change old range last\n"); + r = svm_range_split_tail(prange, new, last, + insert_list); + if (r) + goto out; + } + } else { + /* The node is contained within start..last, + * just update it + */ + prange = old; + } + + if (!svm_range_is_same_attrs(prange, new)) + list_add(&prange->update_list, update_list); + + /* insert a new node if needed */ + if (node->start > start) { + prange = svm_range_new(prange->svms, start, + node->start - 1); + if (!prange) { + r = -ENOMEM; + goto out; + } + + list_add(&prange->insert_list, insert_list); + list_add(&prange->update_list, update_list); + } + + node = next; + start = next_start; + } + + if (left && start <= last) + *left = last - start + 1; + +out: + if (r) + list_for_each_entry_safe(prange, tmp, insert_list, insert_list) + svm_range_free(prange); + + return r; +} + +static void +svm_range_update_notifier_and_interval_tree(struct mm_struct *mm, + struct svm_range *prange) +{ + unsigned long start; + unsigned long last; + + start = prange->notifier.interval_tree.start >> PAGE_SHIFT; + last = prange->notifier.interval_tree.last >> PAGE_SHIFT; + + if (prange->start == start && prange->last == last) + return; + + pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", + prange->svms, prange, start, last, prange->start, + prange->last); + + if (start != 0 && last != 0) { + interval_tree_remove(&prange->it_node, &prange->svms->objects); + svm_range_remove_notifier(prange); + } + prange->it_node.start = prange->start; + prange->it_node.last = prange->last; + + interval_tree_insert(&prange->it_node, &prange->svms->objects); + svm_range_add_notifier_locked(mm, prange); +} + +static void +svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) +{ + struct mm_struct *mm = prange->work_item.mm; + + switch (prange->work_item.op) { + case SVM_OP_NULL: + pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + break; + case SVM_OP_UNMAP_RANGE: + pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + svm_range_unlink(prange); + svm_range_remove_notifier(prange); + svm_range_free(prange); + break; + case SVM_OP_UPDATE_RANGE_NOTIFIER: + pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + svm_range_update_notifier_and_interval_tree(mm, prange); + break; + case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP: + pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + svm_range_update_notifier_and_interval_tree(mm, prange); + /* TODO: implement deferred validation and mapping */ + break; + case SVM_OP_ADD_RANGE: + pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, + prange->start, prange->last); + svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); + break; + case SVM_OP_ADD_RANGE_AND_MAP: + pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, + prange, prange->start, prange->last); + svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); + /* TODO: implement deferred validation and mapping */ + break; + default: + WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange, + prange->work_item.op); + } +} + +static void svm_range_drain_retry_fault(struct svm_range_list *svms) +{ + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct kfd_process *p; + uint32_t i; + + p = container_of(svms, struct kfd_process, svms); + + for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { + pdd = p->pdds[i]; + if (!pdd) + continue; + + pr_debug("drain retry fault gpu %d svms %p\n", i, svms); + adev = (struct amdgpu_device *)pdd->dev->kgd; + + amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1); + pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); + } +} + +static void svm_range_deferred_list_work(struct work_struct *work) +{ + struct svm_range_list *svms; + struct svm_range *prange; + struct mm_struct *mm; + + svms = container_of(work, struct svm_range_list, deferred_list_work); + pr_debug("enter svms 0x%p\n", svms); + + spin_lock(&svms->deferred_list_lock); + while (!list_empty(&svms->deferred_range_list)) { + prange = list_first_entry(&svms->deferred_range_list, + struct svm_range, deferred_list); + spin_unlock(&svms->deferred_list_lock); + pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange, + prange->start, prange->last, prange->work_item.op); + + /* Make sure no stale retry fault coming after range is freed */ + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) + svm_range_drain_retry_fault(prange->svms); + + mm = prange->work_item.mm; + mmap_write_lock(mm); + mutex_lock(&svms->lock); + + /* Remove from deferred_list must be inside mmap write lock, + * otherwise, svm_range_list_lock_and_flush_work may hold mmap + * write lock, and continue because deferred_list is empty, then + * deferred_list handle is blocked by mmap write lock. + */ + spin_lock(&svms->deferred_list_lock); + list_del_init(&prange->deferred_list); + spin_unlock(&svms->deferred_list_lock); + + mutex_lock(&prange->migrate_mutex); + while (!list_empty(&prange->child_list)) { + struct svm_range *pchild; + + pchild = list_first_entry(&prange->child_list, + struct svm_range, child_list); + pr_debug("child prange 0x%p op %d\n", pchild, + pchild->work_item.op); + list_del_init(&pchild->child_list); + svm_range_handle_list_op(svms, pchild); + } + mutex_unlock(&prange->migrate_mutex); + + svm_range_handle_list_op(svms, prange); + mutex_unlock(&svms->lock); + mmap_write_unlock(mm); + + spin_lock(&svms->deferred_list_lock); + } + spin_unlock(&svms->deferred_list_lock); + + pr_debug("exit svms 0x%p\n", svms); +} + +void +svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, + struct mm_struct *mm, enum svm_work_list_ops op) +{ + spin_lock(&svms->deferred_list_lock); + /* if prange is on the deferred list */ + if (!list_empty(&prange->deferred_list)) { + pr_debug("update exist prange 0x%p work op %d\n", prange, op); + WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n"); + if (op != SVM_OP_NULL && + prange->work_item.op != SVM_OP_UNMAP_RANGE) + prange->work_item.op = op; + } else { + prange->work_item.op = op; + prange->work_item.mm = mm; + list_add_tail(&prange->deferred_list, + &prange->svms->deferred_range_list); + pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", + prange, prange->start, prange->last, op); + } + spin_unlock(&svms->deferred_list_lock); +} + +void schedule_deferred_list_work(struct svm_range_list *svms) +{ + spin_lock(&svms->deferred_list_lock); + if (!list_empty(&svms->deferred_range_list)) + schedule_work(&svms->deferred_list_work); + spin_unlock(&svms->deferred_list_lock); +} + +static void +svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, + struct svm_range *prange, unsigned long start, + unsigned long last) +{ + struct svm_range *head; + struct svm_range *tail; + + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) { + pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange, + prange->start, prange->last); + return; + } + if (start > prange->last || last < prange->start) + return; + + head = tail = prange; + if (start > prange->start) + svm_range_split(prange, prange->start, start - 1, &tail); + if (last < tail->last) + svm_range_split(tail, last + 1, tail->last, &head); + + if (head != prange && tail != prange) { + svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + } else if (tail != prange) { + svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); + } else if (head != prange) { + svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + } else if (parent != prange) { + prange->work_item.op = SVM_OP_UNMAP_RANGE; + } +} + +static void +svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, + unsigned long start, unsigned long last) +{ + struct svm_range_list *svms; + struct svm_range *pchild; + struct kfd_process *p; + unsigned long s, l; + bool unmap_parent; + + p = kfd_lookup_process_by_mm(mm); + if (!p) + return; + svms = &p->svms; + + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, + prange, prange->start, prange->last, start, last); + + unmap_parent = start <= prange->start && last >= prange->last; + + list_for_each_entry(pchild, &prange->child_list, child_list) { + mutex_lock_nested(&pchild->lock, 1); + s = max(start, pchild->start); + l = min(last, pchild->last); + if (l >= s) + svm_range_unmap_from_gpus(pchild, s, l); + svm_range_unmap_split(mm, prange, pchild, start, last); + mutex_unlock(&pchild->lock); + } + s = max(start, prange->start); + l = min(last, prange->last); + if (l >= s) + svm_range_unmap_from_gpus(prange, s, l); + svm_range_unmap_split(mm, prange, prange, start, last); + + if (unmap_parent) + svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); + else + svm_range_add_list_work(svms, prange, mm, + SVM_OP_UPDATE_RANGE_NOTIFIER); + schedule_deferred_list_work(svms); + + kfd_unref_process(p); +} + +/** + * svm_range_cpu_invalidate_pagetables - interval notifier callback + * + * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it + * is from migration, or CPU page invalidation callback. + * + * For unmap event, unmap range from GPUs, remove prange from svms in a delayed + * work thread, and split prange if only part of prange is unmapped. + * + * For invalidation event, if GPU retry fault is not enabled, evict the queues, + * then schedule svm_range_restore_work to update GPU mapping and resume queues. + * If GPU retry fault is enabled, unmap the svm range from GPU, retry fault will + * update GPU mapping to recover. + * + * Context: mmap lock, notifier_invalidate_start lock are held + * for invalidate event, prange lock is held if this is from migration + */ +static bool +svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct svm_range *prange; + unsigned long start; + unsigned long last; + + if (range->event == MMU_NOTIFY_RELEASE) + return true; + + start = mni->interval_tree.start; + last = mni->interval_tree.last; + start = (start > range->start ? start : range->start) >> PAGE_SHIFT; + last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT; + pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n", + start, last, range->start >> PAGE_SHIFT, + (range->end - 1) >> PAGE_SHIFT, + mni->interval_tree.start >> PAGE_SHIFT, + mni->interval_tree.last >> PAGE_SHIFT, range->event); + + prange = container_of(mni, struct svm_range, notifier); + + svm_range_lock(prange); + mmu_interval_set_seq(mni, cur_seq); + + switch (range->event) { + case MMU_NOTIFY_UNMAP: + svm_range_unmap_from_cpu(mni->mm, prange, start, last); + break; + default: + svm_range_evict(prange, mni->mm, start, last); + break; + } + + svm_range_unlock(prange); + + return true; +} + +/** + * svm_range_from_addr - find svm range from fault address + * @svms: svm range list header + * @addr: address to search range interval tree, in pages + * @parent: parent range if range is on child list + * + * Context: The caller must hold svms->lock + * + * Return: the svm_range found or NULL + */ +struct svm_range * +svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, + struct svm_range **parent) +{ + struct interval_tree_node *node; + struct svm_range *prange; + struct svm_range *pchild; + + node = interval_tree_iter_first(&svms->objects, addr, addr); + if (!node) + return NULL; + + prange = container_of(node, struct svm_range, it_node); + pr_debug("address 0x%lx prange [0x%lx 0x%lx] node [0x%lx 0x%lx]\n", + addr, prange->start, prange->last, node->start, node->last); + + if (addr >= prange->start && addr <= prange->last) { + if (parent) + *parent = prange; + return prange; + } + list_for_each_entry(pchild, &prange->child_list, child_list) + if (addr >= pchild->start && addr <= pchild->last) { + pr_debug("found address 0x%lx pchild [0x%lx 0x%lx]\n", + addr, pchild->start, pchild->last); + if (parent) + *parent = prange; + return pchild; + } + + return NULL; +} + +/* svm_range_best_restore_location - decide the best fault restore location + * @prange: svm range structure + * @adev: the GPU on which vm fault happened + * + * This is only called when xnack is on, to decide the best location to restore + * the range mapping after GPU vm fault. Caller uses the best location to do + * migration if actual loc is not best location, then update GPU page table + * mapping to the best location. + * + * If vm fault gpu is range preferred loc, the best_loc is preferred loc. + * If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu + * If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then + * if range actual loc is cpu, best_loc is cpu + * if vm fault gpu is on xgmi same hive of range actual loc gpu, best_loc is + * range actual loc. + * Otherwise, GPU no access, best_loc is -1. + * + * Return: + * -1 means vm fault GPU no access + * 0 for CPU or GPU id + */ +static int32_t +svm_range_best_restore_location(struct svm_range *prange, + struct amdgpu_device *adev, + int32_t *gpuidx) +{ + struct amdgpu_device *bo_adev; + struct kfd_process *p; + uint32_t gpuid; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + + r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx); + if (r < 0) { + pr_debug("failed to get gpuid from kgd\n"); + return -1; + } + + if (prange->preferred_loc == gpuid) + return prange->preferred_loc; + + if (test_bit(*gpuidx, prange->bitmap_access)) + return gpuid; + + if (test_bit(*gpuidx, prange->bitmap_aip)) { + if (!prange->actual_loc) + return 0; + + bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc); + if (amdgpu_xgmi_same_hive(adev, bo_adev)) + return prange->actual_loc; + else + return 0; + } + + return -1; +} +static int +svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, + unsigned long *start, unsigned long *last) +{ + struct vm_area_struct *vma; + struct interval_tree_node *node; + unsigned long start_limit, end_limit; + + vma = find_vma(p->mm, addr << PAGE_SHIFT); + if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { + pr_debug("VMA does not exist in address [0x%llx]\n", addr); + return -EFAULT; + } + start_limit = max(vma->vm_start >> PAGE_SHIFT, + (unsigned long)ALIGN_DOWN(addr, 2UL << 8)); + end_limit = min(vma->vm_end >> PAGE_SHIFT, + (unsigned long)ALIGN(addr + 1, 2UL << 8)); + /* First range that starts after the fault address */ + node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX); + if (node) { + end_limit = min(end_limit, node->start); + /* Last range that ends before the fault address */ + node = container_of(rb_prev(&node->rb), + struct interval_tree_node, rb); + } else { + /* Last range must end before addr because + * there was no range after addr + */ + node = container_of(rb_last(&p->svms.objects.rb_root), + struct interval_tree_node, rb); + } + if (node) { + if (node->last >= addr) { + WARN(1, "Overlap with prev node and page fault addr\n"); + return -EFAULT; + } + start_limit = max(start_limit, node->last + 1); + } + + *start = start_limit; + *last = end_limit - 1; + + pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n", + vma->vm_start >> PAGE_SHIFT, *start, + vma->vm_end >> PAGE_SHIFT, *last); + + return 0; + +} +static struct +svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, + struct kfd_process *p, + struct mm_struct *mm, + int64_t addr) +{ + struct svm_range *prange = NULL; + unsigned long start, last; + uint32_t gpuid, gpuidx; + + if (svm_range_get_range_boundaries(p, addr, &start, &last)) + return NULL; + + prange = svm_range_new(&p->svms, start, last); + if (!prange) { + pr_debug("Failed to create prange in address [0x%llx]\n", addr); + return NULL; + } + if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) { + pr_debug("failed to get gpuid from kgd\n"); + svm_range_free(prange); + return NULL; + } + + svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); + + return prange; +} + +/* svm_range_skip_recover - decide if prange can be recovered + * @prange: svm range structure + * + * GPU vm retry fault handle skip recover the range for cases: + * 1. prange is on deferred list to be removed after unmap, it is stale fault, + * deferred list work will drain the stale fault before free the prange. + * 2. prange is on deferred list to add interval notifier after split, or + * 3. prange is child range, it is split from parent prange, recover later + * after interval notifier is added. + * + * Return: true to skip recover, false to recover + */ +static bool svm_range_skip_recover(struct svm_range *prange) +{ + struct svm_range_list *svms = prange->svms; + + spin_lock(&svms->deferred_list_lock); + if (list_empty(&prange->deferred_list) && + list_empty(&prange->child_list)) { + spin_unlock(&svms->deferred_list_lock); + return false; + } + spin_unlock(&svms->deferred_list_lock); + + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) { + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n", + svms, prange, prange->start, prange->last); + return true; + } + if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP || + prange->work_item.op == SVM_OP_ADD_RANGE) { + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n", + svms, prange, prange->start, prange->last); + return true; + } + return false; +} + +int +svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, + uint64_t addr) +{ + struct mm_struct *mm = NULL; + struct svm_range_list *svms; + struct svm_range *prange; + struct kfd_process *p; + uint64_t timestamp; + int32_t best_loc, gpuidx; + bool write_locked = false; + int r = 0; + + if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { + pr_debug("device does not support SVM\n"); + return -EFAULT; + } + + p = kfd_lookup_process_by_pasid(pasid); + if (!p) { + pr_debug("kfd process not founded pasid 0x%x\n", pasid); + return -ESRCH; + } + if (!p->xnack_enabled) { + pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); + return -EFAULT; + } + svms = &p->svms; + + pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); + + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_debug("svms 0x%p failed to get mm\n", svms); + r = -ESRCH; + goto out; + } + + mmap_read_lock(mm); +retry_write_locked: + mutex_lock(&svms->lock); + prange = svm_range_from_addr(svms, addr, NULL); + if (!prange) { + pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", + svms, addr); + if (!write_locked) { + /* Need the write lock to create new range with MMU notifier. + * Also flush pending deferred work to make sure the interval + * tree is up to date before we add a new range + */ + mutex_unlock(&svms->lock); + mmap_read_unlock(mm); + mmap_write_lock(mm); + write_locked = true; + goto retry_write_locked; + } + prange = svm_range_create_unregistered_range(adev, p, mm, addr); + if (!prange) { + pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n", + svms, addr); + mmap_write_downgrade(mm); + r = -EFAULT; + goto out_unlock_svms; + } + } + if (write_locked) + mmap_write_downgrade(mm); + + mutex_lock(&prange->migrate_mutex); + + if (svm_range_skip_recover(prange)) { + amdgpu_gmc_filter_faults_remove(adev, addr, pasid); + goto out_unlock_range; + } + + timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; + /* skip duplicate vm fault on different pages of same range */ + if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { + pr_debug("svms 0x%p [0x%lx %lx] already restored\n", + svms, prange->start, prange->last); + goto out_unlock_range; + } + + best_loc = svm_range_best_restore_location(prange, adev, &gpuidx); + if (best_loc == -1) { + pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n", + svms, prange->start, prange->last); + r = -EACCES; + goto out_unlock_range; + } + + pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n", + svms, prange->start, prange->last, best_loc, + prange->actual_loc); + + if (prange->actual_loc != best_loc) { + if (best_loc) { + r = svm_migrate_to_vram(prange, best_loc, mm); + if (r) { + pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", + r, addr); + /* Fallback to system memory if migration to + * VRAM failed + */ + if (prange->actual_loc) + r = svm_migrate_vram_to_ram(prange, mm); + else + r = 0; + } + } else { + r = svm_migrate_vram_to_ram(prange, mm); + } + if (r) { + pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", + r, svms, prange->start, prange->last); + goto out_unlock_range; + } + } + + r = svm_range_validate_and_map(mm, prange, gpuidx, false, false); + if (r) + pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", + r, svms, prange->start, prange->last); + +out_unlock_range: + mutex_unlock(&prange->migrate_mutex); +out_unlock_svms: + mutex_unlock(&svms->lock); + mmap_read_unlock(mm); + mmput(mm); +out: + kfd_unref_process(p); + + if (r == -EAGAIN) { + pr_debug("recover vm fault later\n"); + amdgpu_gmc_filter_faults_remove(adev, addr, pasid); + r = 0; + } + return r; +} + +void svm_range_list_fini(struct kfd_process *p) +{ + struct svm_range *prange; + struct svm_range *next; + + pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms); + + /* Ensure list work is finished before process is destroyed */ + flush_work(&p->svms.deferred_list_work); + + list_for_each_entry_safe(prange, next, &p->svms.list, list) { + svm_range_unlink(prange); + svm_range_remove_notifier(prange); + svm_range_free(prange); + } + + mutex_destroy(&p->svms.lock); + + pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms); +} + +int svm_range_list_init(struct kfd_process *p) +{ + struct svm_range_list *svms = &p->svms; + int i; + + svms->objects = RB_ROOT_CACHED; + mutex_init(&svms->lock); + INIT_LIST_HEAD(&svms->list); + atomic_set(&svms->evicted_ranges, 0); + INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work); + INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); + INIT_LIST_HEAD(&svms->deferred_range_list); + spin_lock_init(&svms->deferred_list_lock); + + for (i = 0; i < p->n_pdds; i++) + if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev)) + bitmap_set(svms->bitmap_supported, i, 1); + + return 0; +} + +/** + * svm_range_is_valid - check if virtual address range is valid + * @mm: current process mm_struct + * @start: range start address, in pages + * @size: range size, in pages + * + * Valid virtual address range means it belongs to one or more VMAs + * + * Context: Process context + * + * Return: + * true - valid svm range + * false - invalid svm range + */ +static bool +svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size) +{ + const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP; + struct vm_area_struct *vma; + unsigned long end; + + start <<= PAGE_SHIFT; + end = start + (size << PAGE_SHIFT); + + do { + vma = find_vma(mm, start); + if (!vma || start < vma->vm_start || + (vma->vm_flags & device_vma)) + return false; + start = min(end, vma->vm_end); + } while (start < end); + + return true; +} + +/** + * svm_range_add - add svm range and handle overlap + * @p: the range add to this process svms + * @start: page size aligned + * @size: page size aligned + * @nattr: number of attributes + * @attrs: array of attributes + * @update_list: output, the ranges need validate and update GPU mapping + * @insert_list: output, the ranges need insert to svms + * @remove_list: output, the ranges are replaced and need remove from svms + * + * Check if the virtual address range has overlap with the registered ranges, + * split the overlapped range, copy and adjust pages address and vram nodes in + * old and new ranges. + * + * Context: Process context, caller must hold svms->lock + * + * Return: + * 0 - OK, otherwise error code + */ +static int +svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, + struct list_head *update_list, struct list_head *insert_list, + struct list_head *remove_list) +{ + uint64_t last = start + size - 1UL; + struct svm_range_list *svms; + struct svm_range new = {0}; + struct svm_range *prange; + unsigned long left = 0; + int r = 0; + + pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last); + + svm_range_apply_attrs(p, &new, nattr, attrs); + + svms = &p->svms; + + r = svm_range_handle_overlap(svms, &new, start, last, update_list, + insert_list, remove_list, &left); + if (r) + return r; + + if (left) { + prange = svm_range_new(svms, last - left + 1, last); + list_add(&prange->insert_list, insert_list); + list_add(&prange->update_list, update_list); + } + + return 0; +} + +/* svm_range_best_prefetch_location - decide the best prefetch location + * @prange: svm range structure + * + * For xnack off: + * If range map to single GPU, the best acutal location is prefetch loc, which + * can be CPU or GPU. + * + * If range map to multiple GPUs, only if mGPU connection on xgmi same hive, + * the best actual location could be prefetch_loc GPU. If mGPU connection on + * PCIe, the best actual location is always CPU, because GPU cannot access vram + * of other GPUs, assuming PCIe small bar (large bar support is not upstream). + * + * For xnack on: + * The best actual location is prefetch location. If mGPU connection on xgmi + * same hive, range map to multiple GPUs. Otherwise, the range only map to + * actual location GPU. Other GPU access vm fault will trigger migration. + * + * Context: Process context + * + * Return: + * 0 for CPU or GPU id + */ +static uint32_t +svm_range_best_prefetch_location(struct svm_range *prange) +{ + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); + uint32_t best_loc = prange->prefetch_loc; + struct kfd_process_device *pdd; + struct amdgpu_device *bo_adev; + struct amdgpu_device *adev; + struct kfd_process *p; + uint32_t gpuidx; + + p = container_of(prange->svms, struct kfd_process, svms); + + /* xnack on */ + if (p->xnack_enabled) + goto out; + + /* xnack off */ + if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) + goto out; + + bo_adev = svm_range_get_adev_by_id(prange, best_loc); + if (!bo_adev) { + WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc); + best_loc = 0; + goto out; + } + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, + MAX_GPU_INSTANCE); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to get device by idx 0x%x\n", gpuidx); + continue; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + if (adev == bo_adev) + continue; + + if (!amdgpu_xgmi_same_hive(adev, bo_adev)) { + best_loc = 0; + break; + } + } + +out: + pr_debug("xnack %d svms 0x%p [0x%lx 0x%lx] best loc 0x%x\n", + p->xnack_enabled, &p->svms, prange->start, prange->last, + best_loc); + + return best_loc; +} + +/* FIXME: This is a workaround for page locking bug when some pages are + * invalid during migration to VRAM + */ +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm) +{ + struct hmm_range *hmm_range; + int r; + + if (prange->validated_once) + return; + + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + prange->start << PAGE_SHIFT, + prange->npages, &hmm_range, + false, true); + if (!r) { + amdgpu_hmm_range_get_pages_done(hmm_range); + prange->validated_once = true; + } +} + +/* svm_range_trigger_migration - start page migration if prefetch loc changed + * @mm: current process mm_struct + * @prange: svm range structure + * @migrated: output, true if migration is triggered + * + * If range perfetch_loc is GPU, actual loc is cpu 0, then migrate the range + * from ram to vram. + * If range prefetch_loc is cpu 0, actual loc is GPU, then migrate the range + * from vram to ram. + * + * If GPU vm fault retry is not enabled, migration interact with MMU notifier + * and restore work: + * 1. migrate_vma_setup invalidate pages, MMU notifier callback svm_range_evict + * stops all queues, schedule restore work + * 2. svm_range_restore_work wait for migration is done by + * a. svm_range_validate_vram takes prange->migrate_mutex + * b. svm_range_validate_ram HMM get pages wait for CPU fault handle returns + * 3. restore work update mappings of GPU, resume all queues. + * + * Context: Process context + * + * Return: + * 0 - OK, otherwise - error code of migration + */ +static int +svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, + bool *migrated) +{ + uint32_t best_loc; + int r = 0; + + *migrated = false; + best_loc = svm_range_best_prefetch_location(prange); + + if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || + best_loc == prange->actual_loc) + return 0; + + /* + * Prefetch to GPU without host access flag, set actual_loc to gpu, then + * validate on gpu and map to gpus will be handled afterwards. + */ + if (best_loc && !prange->actual_loc && + !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) { + prange->actual_loc = best_loc; + return 0; + } + + if (!best_loc) { + r = svm_migrate_vram_to_ram(prange, mm); + *migrated = !r; + return r; + } + + r = svm_migrate_to_vram(prange, best_loc, mm); + *migrated = !r; + + return r; +} + +int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) +{ + if (!fence) + return -EINVAL; + + if (dma_fence_is_signaled(&fence->base)) + return 0; + + if (fence->svm_bo) { + WRITE_ONCE(fence->svm_bo->evicting, 1); + schedule_work(&fence->svm_bo->eviction_work); + } + + return 0; +} + +static void svm_range_evict_svm_bo_worker(struct work_struct *work) +{ + struct svm_range_bo *svm_bo; + struct kfd_process *p; + struct mm_struct *mm; + + svm_bo = container_of(work, struct svm_range_bo, eviction_work); + if (!svm_bo_ref_unless_zero(svm_bo)) + return; /* svm_bo was freed while eviction was pending */ + + /* svm_range_bo_release destroys this worker thread. So during + * the lifetime of this thread, kfd_process and mm will be valid. + */ + p = container_of(svm_bo->svms, struct kfd_process, svms); + mm = p->mm; + if (!mm) + return; + + mmap_read_lock(mm); + spin_lock(&svm_bo->list_lock); + while (!list_empty(&svm_bo->range_list)) { + struct svm_range *prange = + list_first_entry(&svm_bo->range_list, + struct svm_range, svm_bo_list); + list_del_init(&prange->svm_bo_list); + spin_unlock(&svm_bo->list_lock); + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange->start, prange->last); + + mutex_lock(&prange->migrate_mutex); + svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm); + + mutex_lock(&prange->lock); + prange->svm_bo = NULL; + mutex_unlock(&prange->lock); + + mutex_unlock(&prange->migrate_mutex); + + spin_lock(&svm_bo->list_lock); + } + spin_unlock(&svm_bo->list_lock); + mmap_read_unlock(mm); + + dma_fence_signal(&svm_bo->eviction_fence->base); + /* This is the last reference to svm_bo, after svm_range_vram_node_free + * has been called in svm_migrate_vram_to_ram + */ + WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n"); + svm_range_bo_unref(svm_bo); +} + +static int +svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + struct amdkfd_process_info *process_info = p->kgd_process_info; + struct mm_struct *mm = current->mm; + struct list_head update_list; + struct list_head insert_list; + struct list_head remove_list; + struct svm_range_list *svms; + struct svm_range *prange; + struct svm_range *next; + int r = 0; + + pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", + p->pasid, &p->svms, start, start + size - 1, size); + + r = svm_range_check_attr(p, nattr, attrs); + if (r) + return r; + + svms = &p->svms; + + mutex_lock(&process_info->lock); + + svm_range_list_lock_and_flush_work(svms, mm); + + if (!svm_range_is_valid(mm, start, size)) { + pr_debug("invalid range\n"); + r = -EFAULT; + mmap_write_unlock(mm); + goto out; + } + + mutex_lock(&svms->lock); + + /* Add new range and split existing ranges as needed */ + r = svm_range_add(p, start, size, nattr, attrs, &update_list, + &insert_list, &remove_list); + if (r) { + mutex_unlock(&svms->lock); + mmap_write_unlock(mm); + goto out; + } + /* Apply changes as a transaction */ + list_for_each_entry_safe(prange, next, &insert_list, insert_list) { + svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); + } + list_for_each_entry(prange, &update_list, update_list) { + svm_range_apply_attrs(p, prange, nattr, attrs); + /* TODO: unmap ranges from GPU that lost access */ + } + list_for_each_entry_safe(prange, next, &remove_list, + remove_list) { + pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange, prange->start, + prange->last); + svm_range_unlink(prange); + svm_range_remove_notifier(prange); + svm_range_free(prange); + } + + mmap_write_downgrade(mm); + /* Trigger migrations and revalidate and map to GPUs as needed. If + * this fails we may be left with partially completed actions. There + * is no clean way of rolling back to the previous state in such a + * case because the rollback wouldn't be guaranteed to work either. + */ + list_for_each_entry(prange, &update_list, update_list) { + bool migrated; + + mutex_lock(&prange->migrate_mutex); + + r = svm_range_trigger_migration(mm, prange, &migrated); + if (r) + goto out_unlock_range; + + if (migrated && !p->xnack_enabled) { + pr_debug("restore_work will update mappings of GPUs\n"); + mutex_unlock(&prange->migrate_mutex); + continue; + } + + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + true, true); + if (r) + pr_debug("failed %d to map svm range\n", r); + +out_unlock_range: + mutex_unlock(&prange->migrate_mutex); + if (r) + break; + } + + svm_range_debug_dump(svms); + + mutex_unlock(&svms->lock); + mmap_read_unlock(mm); +out: + mutex_unlock(&process_info->lock); + + pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, + &p->svms, start, start + size - 1, r); + + return r; +} + +static int +svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); + DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); + bool get_preferred_loc = false; + bool get_prefetch_loc = false; + bool get_granularity = false; + bool get_accessible = false; + bool get_flags = false; + uint64_t last = start + size - 1UL; + struct mm_struct *mm = current->mm; + uint8_t granularity = 0xff; + struct interval_tree_node *node; + struct svm_range_list *svms; + struct svm_range *prange; + uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + uint32_t flags = 0xffffffff; + int gpuidx; + uint32_t i; + + pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start, + start + size - 1, nattr); + + mmap_read_lock(mm); + if (!svm_range_is_valid(mm, start, size)) { + pr_debug("invalid range\n"); + mmap_read_unlock(mm); + return -EINVAL; + } + mmap_read_unlock(mm); + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + get_preferred_loc = true; + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + get_prefetch_loc = true; + break; + case KFD_IOCTL_SVM_ATTR_ACCESS: + get_accessible = true; + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + get_flags = true; + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + get_granularity = true; + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + fallthrough; + default: + pr_debug("get invalid attr type 0x%x\n", attrs[i].type); + return -EINVAL; + } + } + + svms = &p->svms; + + mutex_lock(&svms->lock); + + node = interval_tree_iter_first(&svms->objects, start, last); + if (!node) { + pr_debug("range attrs not found return default values\n"); + svm_range_set_default_attributes(&location, &prefetch_loc, + &granularity, &flags); + if (p->xnack_enabled) + bitmap_copy(bitmap_access, svms->bitmap_supported, + MAX_GPU_INSTANCE); + else + bitmap_zero(bitmap_access, MAX_GPU_INSTANCE); + bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE); + goto fill_values; + } + bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); + bitmap_copy(bitmap_aip, svms->bitmap_supported, MAX_GPU_INSTANCE); + + while (node) { + struct interval_tree_node *next; + + prange = container_of(node, struct svm_range, it_node); + next = interval_tree_iter_next(node, start, last); + + if (get_preferred_loc) { + if (prange->preferred_loc == + KFD_IOCTL_SVM_LOCATION_UNDEFINED || + (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED && + location != prange->preferred_loc)) { + location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + get_preferred_loc = false; + } else { + location = prange->preferred_loc; + } + } + if (get_prefetch_loc) { + if (prange->prefetch_loc == + KFD_IOCTL_SVM_LOCATION_UNDEFINED || + (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED && + prefetch_loc != prange->prefetch_loc)) { + prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + get_prefetch_loc = false; + } else { + prefetch_loc = prange->prefetch_loc; + } + } + if (get_accessible) { + bitmap_and(bitmap_access, bitmap_access, + prange->bitmap_access, MAX_GPU_INSTANCE); + bitmap_and(bitmap_aip, bitmap_aip, + prange->bitmap_aip, MAX_GPU_INSTANCE); + } + if (get_flags) + flags &= prange->flags; + + if (get_granularity && prange->granularity < granularity) + granularity = prange->granularity; + + node = next; + } +fill_values: + mutex_unlock(&svms->lock); + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + attrs[i].value = location; + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + attrs[i].value = prefetch_loc; + break; + case KFD_IOCTL_SVM_ATTR_ACCESS: + gpuidx = kfd_process_gpuidx_from_gpuid(p, + attrs[i].value); + if (gpuidx < 0) { + pr_debug("invalid gpuid %x\n", attrs[i].value); + return -EINVAL; + } + if (test_bit(gpuidx, bitmap_access)) + attrs[i].type = KFD_IOCTL_SVM_ATTR_ACCESS; + else if (test_bit(gpuidx, bitmap_aip)) + attrs[i].type = + KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE; + else + attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS; + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + attrs[i].value = flags; + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + attrs[i].value = (uint32_t)granularity; + break; + } + } + + return 0; +} + +int +svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, + uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs) +{ + int r; + + start >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + + switch (op) { + case KFD_IOCTL_SVM_OP_SET_ATTR: + r = svm_range_set_attr(p, start, size, nattrs, attrs); + break; + case KFD_IOCTL_SVM_OP_GET_ATTR: + r = svm_range_get_attr(p, start, size, nattrs, attrs); + break; + default: + r = EINVAL; + break; + } + + return r; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h new file mode 100644 index 000000000000..0c0fc399395e --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -0,0 +1,213 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_SVM_H_ +#define KFD_SVM_H_ + +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + +#include <linux/rwsem.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/sched/mm.h> +#include <linux/hmm.h> +#include "amdgpu.h" +#include "kfd_priv.h" + +struct svm_range_bo { + struct amdgpu_bo *bo; + struct kref kref; + struct list_head range_list; /* all svm ranges shared this bo */ + spinlock_t list_lock; + struct amdgpu_amdkfd_fence *eviction_fence; + struct work_struct eviction_work; + struct svm_range_list *svms; + uint32_t evicting; +}; + +enum svm_work_list_ops { + SVM_OP_NULL, + SVM_OP_UNMAP_RANGE, + SVM_OP_UPDATE_RANGE_NOTIFIER, + SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP, + SVM_OP_ADD_RANGE, + SVM_OP_ADD_RANGE_AND_MAP +}; + +struct svm_work_list_item { + enum svm_work_list_ops op; + struct mm_struct *mm; +}; + +/** + * struct svm_range - shared virtual memory range + * + * @svms: list of svm ranges, structure defined in kfd_process + * @migrate_mutex: to serialize range migration, validation and mapping update + * @start: range start address in pages + * @last: range last address in pages + * @it_node: node [start, last] stored in interval tree, start, last are page + * aligned, page size is (last - start + 1) + * @list: link list node, used to scan all ranges of svms + * @update_list:link list node used to add to update_list + * @remove_list:link list node used to add to remove list + * @insert_list:link list node used to add to insert list + * @mapping: bo_va mapping structure to create and update GPU page table + * @npages: number of pages + * @dma_addr: dma mapping address on each GPU for system memory physical page + * @ttm_res: vram ttm resource map + * @offset: range start offset within mm_nodes + * @svm_bo: struct to manage splited amdgpu_bo + * @svm_bo_list:link list node, to scan all ranges which share same svm_bo + * @lock: protect prange start, last, child_list, svm_bo_list + * @saved_flags:save/restore current PF_MEMALLOC flags + * @flags: flags defined as KFD_IOCTL_SVM_FLAG_* + * @perferred_loc: perferred location, 0 for CPU, or GPU id + * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id + * @actual_loc: the actual location, 0 for CPU, or GPU id + * @granularity:migration granularity, log2 num pages + * @invalid: not 0 means cpu page table is invalidated + * @validate_timestamp: system timestamp when range is validated + * @notifier: register mmu interval notifier + * @work_item: deferred work item information + * @deferred_list: list header used to add range to deferred list + * @child_list: list header for split ranges which are not added to svms yet + * @bitmap_access: index bitmap of GPUs which can access the range + * @bitmap_aip: index bitmap of GPUs which can access the range in place + * + * Data structure for virtual memory range shared by CPU and GPUs, it can be + * allocated from system memory ram or device vram, and migrate from ram to vram + * or from vram to ram. + */ +struct svm_range { + struct svm_range_list *svms; + struct mutex migrate_mutex; + unsigned long start; + unsigned long last; + struct interval_tree_node it_node; + struct list_head list; + struct list_head update_list; + struct list_head remove_list; + struct list_head insert_list; + struct amdgpu_bo_va_mapping mapping; + uint64_t npages; + dma_addr_t *dma_addr[MAX_GPU_INSTANCE]; + struct ttm_resource *ttm_res; + uint64_t offset; + struct svm_range_bo *svm_bo; + struct list_head svm_bo_list; + struct mutex lock; + unsigned int saved_flags; + uint32_t flags; + uint32_t preferred_loc; + uint32_t prefetch_loc; + uint32_t actual_loc; + uint8_t granularity; + atomic_t invalid; + uint64_t validate_timestamp; + struct mmu_interval_notifier notifier; + struct svm_work_list_item work_item; + struct list_head deferred_list; + struct list_head child_list; + DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); + DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); + bool validated_once; +}; + +static inline void svm_range_lock(struct svm_range *prange) +{ + mutex_lock(&prange->lock); + prange->saved_flags = memalloc_noreclaim_save(); + +} +static inline void svm_range_unlock(struct svm_range *prange) +{ + memalloc_noreclaim_restore(prange->saved_flags); + mutex_unlock(&prange->lock); +} + +int svm_range_list_init(struct kfd_process *p); +void svm_range_list_fini(struct kfd_process *p); +int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, + uint64_t size, uint32_t nattrs, + struct kfd_ioctl_svm_attribute *attrs); +struct svm_range *svm_range_from_addr(struct svm_range_list *svms, + unsigned long addr, + struct svm_range **parent); +struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange, + uint32_t id); +int svm_range_vram_node_new(struct amdgpu_device *adev, + struct svm_range *prange, bool clear); +void svm_range_vram_node_free(struct svm_range *prange); +int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, + unsigned long addr, struct svm_range *parent, + struct svm_range *prange); +int svm_range_restore_pages(struct amdgpu_device *adev, + unsigned int pasid, uint64_t addr); +int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); +void svm_range_add_list_work(struct svm_range_list *svms, + struct svm_range *prange, struct mm_struct *mm, + enum svm_work_list_ops op); +void schedule_deferred_list_work(struct svm_range_list *svms); +void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, + unsigned long offset, unsigned long npages); +void svm_range_free_dma_mappings(struct svm_range *prange); +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm); + +/* SVM API and HMM page migration work together, device memory type + * is initialized to not 0 when page migration register device memory. + */ +#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0) + +#else + +struct kfd_process; + +static inline int svm_range_list_init(struct kfd_process *p) +{ + return 0; +} +static inline void svm_range_list_fini(struct kfd_process *p) +{ + /* empty */ +} + +static inline int svm_range_restore_pages(struct amdgpu_device *adev, + unsigned int pasid, uint64_t addr) +{ + return -EFAULT; +} + +static inline int svm_range_schedule_evict_svm_bo( + struct amdgpu_amdkfd_fence *fence) +{ + WARN_ONCE(1, "SVM eviction fence triggered, but SVM is disabled"); + return -EINVAL; +} + +#define KFD_IS_SVM_API_SUPPORTED(dev) false + +#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ + +#endif /* KFD_SVM_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index cdef608db4f4..b1ce072aa20b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -36,6 +36,7 @@ #include "kfd_topology.h" #include "kfd_device_queue_manager.h" #include "kfd_iommu.h" +#include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" @@ -1192,40 +1193,83 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) mem->mem_clk_max = local_mem_info.mem_clk_max; } -static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) +static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, + struct kfd_topology_device *target_gpu_dev, + struct kfd_iolink_properties *link) { - struct kfd_iolink_properties *link, *cpu_link; - struct kfd_topology_device *cpu_dev; - uint32_t cap; - uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED; - uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED; - - if (!dev || !dev->gpu) + /* xgmi always supports atomics between links. */ + if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) return; - pcie_capability_read_dword(dev->gpu->pdev, - PCI_EXP_DEVCAP2, &cap); + /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */ + if (target_gpu_dev) { + uint32_t cap; - if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | - PCI_EXP_DEVCAP2_ATOMIC_COMP64))) - cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | - CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; + pcie_capability_read_dword(target_gpu_dev->gpu->pdev, + PCI_EXP_DEVCAP2, &cap); + + if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64))) + link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | + CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; + /* set gpu (dev) flags. */ + } else { + if (!dev->gpu->pci_atomic_requested || + dev->gpu->device_info->asic_family == + CHIP_HAWAII) + link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | + CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; + } +} - if (!dev->gpu->pci_atomic_requested || - dev->gpu->device_info->asic_family == CHIP_HAWAII) - flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | - CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; +static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, + struct kfd_iolink_properties *outbound_link, + struct kfd_iolink_properties *inbound_link) +{ + /* CPU -> GPU with PCIe */ + if (!to_dev->gpu && + inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) + inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; + + if (to_dev->gpu) { + /* GPU <-> GPU with PCIe and + * Vega20 with XGMI + */ + if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || + (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) { + outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; + inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; + } + } +} + +static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) +{ + struct kfd_iolink_properties *link, *inbound_link; + struct kfd_topology_device *peer_dev; + + if (!dev || !dev->gpu) + return; /* GPU only creates direct links so apply flags setting to all */ list_for_each_entry(link, &dev->io_link_props, list) { - link->flags = flag; - cpu_dev = kfd_topology_device_by_proximity_domain( + link->flags = CRAT_IOLINK_FLAGS_ENABLED; + kfd_set_iolink_no_atomics(dev, NULL, link); + peer_dev = kfd_topology_device_by_proximity_domain( link->node_to); - if (cpu_dev) { - list_for_each_entry(cpu_link, - &cpu_dev->io_link_props, list) - if (cpu_link->node_to == link->node_from) - cpu_link->flags = cpu_flag; + + if (!peer_dev) + continue; + + list_for_each_entry(inbound_link, &peer_dev->io_link_props, + list) { + if (inbound_link->node_to != link->node_from) + continue; + + inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; + kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); + kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); } } } @@ -1378,6 +1422,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); @@ -1410,15 +1456,18 @@ int kfd_topology_add_device(struct kfd_dev *gpu) adev = (struct amdgpu_device *)(dev->gpu->kgd); /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ dev->node_props.capability |= - ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? + ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? HSA_CAP_SRAM_EDCSUPPORTED : 0; - dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? + dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? HSA_CAP_MEM_EDCSUPPORTED : 0; if (adev->asic_type != CHIP_VEGA10) - dev->node_props.capability |= (adev->ras_features != 0) ? + dev->node_props.capability |= (adev->ras_enabled != 0) ? HSA_CAP_RASEVENTNOTIFY : 0; + if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) + dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; + kfd_debug_print_topology(); if (!res) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index b8b68087bd7a..8b48c6692007 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -53,8 +53,9 @@ #define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 #define HSA_CAP_ASIC_REVISION_SHIFT 22 #define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 - -#define HSA_CAP_RESERVED 0xf80f8000 +#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 +#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 +#define HSA_CAP_RESERVED 0xe00f8000 struct kfd_node_properties { uint64_t hive_id; @@ -98,9 +99,9 @@ struct kfd_node_properties { #define HSA_MEM_HEAP_TYPE_GPU_LDS 4 #define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 -#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 -#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 -#define HSA_MEM_FLAGS_RESERVED 0xfffffffc +#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 +#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 +#define HSA_MEM_FLAGS_RESERVED 0xfffffffc struct kfd_mem_properties { struct list_head list; diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h index 0bc0b25cb410..daf3c44547d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -30,6 +30,7 @@ #define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 #define SOC15_INTSRC_VMC_FAULT 0 #define SOC15_INTSRC_SDMA_TRAP 224 +#define SOC15_INTSRC_SDMA_ECC 220 #define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff) |