summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c120
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c954
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c59
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c82
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c41
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c889
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.h65
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c58
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h93
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h43
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c129
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c3102
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h213
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c105
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/soc15_int.h1
22 files changed, 5847 insertions, 175 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index f02c938f75da..8cc0a76ddf9f 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -12,3 +12,16 @@ config HSA_AMD
select DRM_AMDGPU_USERPTR
help
Enable this if you want to use HSA features on AMD GPU devices.
+
+config HSA_AMD_SVM
+ bool "Enable HMM-based shared virtual memory manager"
+ depends on HSA_AMD && DEVICE_PRIVATE
+ default y
+ select HMM_MIRROR
+ select MMU_NOTIFIER
+ help
+ Enable this to use unified memory and managed memory in HIP. This
+ memory manager supports two modes of operation. One based on
+ preemptions and one based on page faults. To enable page fault
+ based memory management on most GFXv9 GPUs, set the module
+ parameter amdgpu.noretry=0.
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index e1e4115dcf78..c4f3aff11072 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -63,3 +63,8 @@ endif
ifneq ($(CONFIG_DEBUG_FS),)
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
endif
+
+ifneq ($(CONFIG_HSA_AMD_SVM),)
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \
+ $(AMDKFD_PATH)/kfd_migrate.o
+endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 43de260b2230..67541c30327a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -38,6 +38,7 @@
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
+#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
@@ -1297,7 +1298,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->kgd, args->va_addr, args->size,
- pdd->vm, (struct kgd_mem **) &mem, &offset,
+ pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
flags);
if (err)
@@ -1328,7 +1329,8 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return 0;
err_free:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+ pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
return err;
@@ -1365,7 +1367,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
}
ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
- (struct kgd_mem *)mem, &size);
+ (struct kgd_mem *)mem, pdd->drm_priv, &size);
/* If freeing the buffer failed, leave the handle in place for
* clean-up during process tear-down.
@@ -1391,6 +1393,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
long err = 0;
int i;
uint32_t *devices_arr = NULL;
+ bool table_freed = false;
dev = kfd_device_by_id(GET_GPU_ID(args->handle));
if (!dev)
@@ -1448,7 +1451,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+ peer->kgd, (struct kgd_mem *)mem,
+ peer_pdd->drm_priv, &table_freed);
if (err) {
pr_err("Failed to map to gpu %d/%d\n",
i, args->n_devices);
@@ -1466,16 +1470,17 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
/* Flush TLBs after waiting for the page table updates to complete */
- for (i = 0; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (WARN_ON_ONCE(!peer))
- continue;
- peer_pdd = kfd_get_process_device_data(peer, p);
- if (WARN_ON_ONCE(!peer_pdd))
- continue;
- kfd_flush_tlb(peer_pdd);
+ if (table_freed) {
+ for (i = 0; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (WARN_ON_ONCE(!peer))
+ continue;
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+ }
}
-
kfree(devices_arr);
return err;
@@ -1555,7 +1560,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+ peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
if (err) {
pr_err("Failed to unmap from gpu %d/%d\n",
i, args->n_devices);
@@ -1563,10 +1568,27 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
args->n_success = i+1;
}
- kfree(devices_arr);
-
mutex_unlock(&p->mutex);
+ err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+ if (err) {
+ pr_debug("Sync memory failed, wait interrupted by user signal\n");
+ goto sync_memory_failed;
+ }
+
+ /* Flush TLBs after waiting for the page table updates to complete */
+ for (i = 0; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (WARN_ON_ONCE(!peer))
+ continue;
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
+ }
+
+ kfree(devices_arr);
+
return 0;
bind_process_to_device_failed:
@@ -1574,6 +1596,7 @@ get_mem_obj_from_handle_failed:
unmap_memory_from_gpu_failed:
mutex_unlock(&p->mutex);
copy_from_user_failed:
+sync_memory_failed:
kfree(devices_arr);
return err;
}
@@ -1701,7 +1724,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
}
r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
- args->va_addr, pdd->vm,
+ args->va_addr, pdd->drm_priv,
(struct kgd_mem **)&mem, &size,
NULL);
if (r)
@@ -1721,7 +1744,8 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
return 0;
err_free:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+ pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
dma_buf_put(dmabuf);
@@ -1742,6 +1766,61 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(dev, &args->anon_fd);
}
+static int kfd_ioctl_set_xnack_mode(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_set_xnack_mode_args *args = data;
+ int r = 0;
+
+ mutex_lock(&p->mutex);
+ if (args->xnack_enabled >= 0) {
+ if (!list_empty(&p->pqm.queues)) {
+ pr_debug("Process has user queues running\n");
+ mutex_unlock(&p->mutex);
+ return -EBUSY;
+ }
+ if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
+ r = -EPERM;
+ else
+ p->xnack_enabled = args->xnack_enabled;
+ } else {
+ args->xnack_enabled = p->xnack_enabled;
+ }
+ mutex_unlock(&p->mutex);
+
+ return r;
+}
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_svm_args *args = data;
+ int r = 0;
+
+ pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
+ args->start_addr, args->size, args->op, args->nattr);
+
+ if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
+ return -EINVAL;
+ if (!args->start_addr || !args->size)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
+ args->attrs);
+
+ mutex_unlock(&p->mutex);
+
+ return r;
+}
+#else
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+ return -EPERM;
+}
+#endif
+
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1840,6 +1919,11 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
kfd_ioctl_smi_events, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
+ kfd_ioctl_set_xnack_mode, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index c60e82697385..c6b02aee4993 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -55,7 +55,7 @@ struct kfd_gpu_cache_info {
uint32_t cache_level;
uint32_t flags;
/* Indicates how many Compute Units share this cache
- * Value = 1 indicates the cache is not shared
+ * within a SA. Value = 1 indicates the cache is not shared
*/
uint32_t num_cu_shared;
};
@@ -69,7 +69,6 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
-
},
{
/* Scalar L1 Instruction Cache (in SQC module) per bank */
@@ -126,9 +125,6 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
/* TODO: Add L2 Cache information */
};
-/* NOTE: In future if more information is added to struct kfd_gpu_cache_info
- * the following ASICs may need a separate table.
- */
#define hawaii_cache_info kaveri_cache_info
#define tonga_cache_info carrizo_cache_info
#define fiji_cache_info carrizo_cache_info
@@ -136,13 +132,667 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define polaris11_cache_info carrizo_cache_info
#define polaris12_cache_info carrizo_cache_info
#define vegam_cache_info carrizo_cache_info
-/* TODO - check & update Vega10 cache details */
-#define vega10_cache_info carrizo_cache_info
-#define raven_cache_info carrizo_cache_info
-#define renoir_cache_info carrizo_cache_info
-/* TODO - check & update Navi10 cache details */
-#define navi10_cache_info carrizo_cache_info
-#define vangogh_cache_info carrizo_cache_info
+
+/* NOTE: L1 cache information has been updated and L2/L3
+ * cache information has been added for Vega10 and
+ * newer ASICs. The unit for cache_size is KiB.
+ * In future, check & update cache details
+ * for every new ASIC is required.
+ */
+
+static struct kfd_gpu_cache_info vega10_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 4096,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 16,
+ },
+};
+
+static struct kfd_gpu_cache_info raven_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 1024,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 11,
+ },
+};
+
+static struct kfd_gpu_cache_info renoir_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 1024,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+};
+
+static struct kfd_gpu_cache_info vega12_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 2048,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 5,
+ },
+};
+
+static struct kfd_gpu_cache_info vega20_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 8192,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 16,
+ },
+};
+
+static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 8192,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 14,
+ },
+};
+
+static struct kfd_gpu_cache_info navi10_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 4096,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+};
+
+static struct kfd_gpu_cache_info vangogh_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 1024,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+};
+
+static struct kfd_gpu_cache_info navi14_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 12,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 2048,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 12,
+ },
+};
+
+static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 4096,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L3 Data Cache per GPU */
+ .cache_size = 128*1024,
+ .cache_level = 3,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+};
+
+static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 3072,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L3 Data Cache per GPU */
+ .cache_size = 96*1024,
+ .cache_level = 3,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+};
+
+static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 2048,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+ {
+ /* L3 Data Cache per GPU */
+ .cache_size = 32*1024,
+ .cache_level = 3,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+};
+
+static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 1024,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+ {
+ /* L3 Data Cache per GPU */
+ .cache_size = 16*1024,
+ .cache_level = 3,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+};
+
+static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 6,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 2048,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 6,
+ },
+};
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
@@ -544,7 +1194,7 @@ err:
}
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_pcache(struct crat_subtype_cache *pcache,
+static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
int mem_available,
@@ -597,6 +1247,70 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache,
return 1;
}
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
+ struct kfd_gpu_cache_info *pcache_info,
+ struct kfd_cu_info *cu_info,
+ int mem_available,
+ int cache_type, unsigned int cu_processor_id)
+{
+ unsigned int cu_sibling_map_mask;
+ int first_active_cu;
+ int i, j, k;
+
+ /* First check if enough memory is available */
+ if (sizeof(struct crat_subtype_cache) > mem_available)
+ return -ENOMEM;
+
+ cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
+ cu_sibling_map_mask &=
+ ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ first_active_cu = ffs(cu_sibling_map_mask);
+
+ /* CU could be inactive. In case of shared cache find the first active
+ * CU. and incase of non-shared cache check if the CU is inactive. If
+ * inactive active skip it
+ */
+ if (first_active_cu) {
+ memset(pcache, 0, sizeof(struct crat_subtype_cache));
+ pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+ pcache->length = sizeof(struct crat_subtype_cache);
+ pcache->flags = pcache_info[cache_type].flags;
+ pcache->processor_id_low = cu_processor_id
+ + (first_active_cu - 1);
+ pcache->cache_level = pcache_info[cache_type].cache_level;
+ pcache->cache_size = pcache_info[cache_type].cache_size;
+
+ /* Sibling map is w.r.t processor_id_low, so shift out
+ * inactive CU
+ */
+ cu_sibling_map_mask =
+ cu_sibling_map_mask >> (first_active_cu - 1);
+ k = 0;
+ for (i = 0; i < cu_info->num_shader_engines; i++) {
+ for (j = 0; j < cu_info->num_shader_arrays_per_engine;
+ j++) {
+ pcache->sibling_map[k] =
+ (uint8_t)(cu_sibling_map_mask & 0xFF);
+ pcache->sibling_map[k+1] =
+ (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+ pcache->sibling_map[k+2] =
+ (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+ pcache->sibling_map[k+3] =
+ (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+ k += 4;
+ cu_sibling_map_mask =
+ cu_info->cu_bitmap[i % 4][j + i / 4];
+ cu_sibling_map_mask &= (
+ (1 << pcache_info[cache_type].num_cu_shared)
+ - 1);
+ }
+ }
+ return 0;
+ }
+ return 1;
+}
+
/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
* tables
*
@@ -624,6 +1338,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
int mem_available = available_size;
unsigned int cu_processor_id;
int ret;
+ unsigned int num_cu_shared;
switch (kdev->device_info->asic_family) {
case CHIP_KAVERI:
@@ -663,12 +1378,21 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
case CHIP_VEGA10:
+ pcache_info = vega10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ break;
case CHIP_VEGA12:
+ pcache_info = vega12_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+ break;
case CHIP_VEGA20:
case CHIP_ARCTURUS:
+ pcache_info = vega20_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+ break;
case CHIP_ALDEBARAN:
- pcache_info = vega10_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ pcache_info = aldebaran_cache_info;
+ num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
break;
case CHIP_RAVEN:
pcache_info = raven_cache_info;
@@ -680,17 +1404,37 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
break;
case CHIP_NAVI10:
case CHIP_NAVI12:
+ pcache_info = navi10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+ break;
case CHIP_NAVI14:
+ pcache_info = navi14_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+ break;
case CHIP_SIENNA_CICHLID:
+ pcache_info = sienna_cichlid_cache_info;
+ num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+ break;
case CHIP_NAVY_FLOUNDER:
+ pcache_info = navy_flounder_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+ break;
case CHIP_DIMGREY_CAVEFISH:
- pcache_info = navi10_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+ pcache_info = dimgrey_cavefish_cache_info;
+ num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
break;
case CHIP_VANGOGH:
pcache_info = vangogh_cache_info;
num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
break;
+ case CHIP_BEIGE_GOBY:
+ pcache_info = beige_goby_cache_info;
+ num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
+ break;
+ case CHIP_YELLOW_CARP:
+ pcache_info = yellow_carp_cache_info;
+ num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
+ break;
default:
return -EINVAL;
}
@@ -709,40 +1453,58 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
*/
for (ct = 0; ct < num_of_cache_types; ct++) {
- cu_processor_id = gpu_processor_id;
- for (i = 0; i < cu_info->num_shader_engines; i++) {
- for (j = 0; j < cu_info->num_shader_arrays_per_engine;
- j++) {
- for (k = 0; k < cu_info->num_cu_per_sh;
- k += pcache_info[ct].num_cu_shared) {
-
- ret = fill_in_pcache(pcache,
- pcache_info,
- cu_info,
- mem_available,
- cu_info->cu_bitmap[i % 4][j + i / 4],
- ct,
- cu_processor_id,
- k);
-
- if (ret < 0)
- break;
-
- if (!ret) {
- pcache++;
- (*num_of_entries)++;
- mem_available -=
- sizeof(*pcache);
- (*size_filled) +=
- sizeof(*pcache);
- }
-
- /* Move to next CU block */
- cu_processor_id +=
- pcache_info[ct].num_cu_shared;
- }
- }
+ cu_processor_id = gpu_processor_id;
+ if (pcache_info[ct].cache_level == 1) {
+ for (i = 0; i < cu_info->num_shader_engines; i++) {
+ for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+ for (k = 0; k < cu_info->num_cu_per_sh;
+ k += pcache_info[ct].num_cu_shared) {
+ ret = fill_in_l1_pcache(pcache,
+ pcache_info,
+ cu_info,
+ mem_available,
+ cu_info->cu_bitmap[i % 4][j + i / 4],
+ ct,
+ cu_processor_id,
+ k);
+
+ if (ret < 0)
+ break;
+
+ if (!ret) {
+ pcache++;
+ (*num_of_entries)++;
+ mem_available -= sizeof(*pcache);
+ (*size_filled) += sizeof(*pcache);
+ }
+
+ /* Move to next CU block */
+ num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+ cu_info->num_cu_per_sh) ?
+ pcache_info[ct].num_cu_shared :
+ (cu_info->num_cu_per_sh - k);
+ cu_processor_id += num_cu_shared;
}
+ }
+ }
+ } else {
+ ret = fill_in_l2_l3_pcache(pcache,
+ pcache_info,
+ cu_info,
+ mem_available,
+ ct,
+ cu_processor_id);
+
+ if (ret < 0)
+ break;
+
+ if (!ret) {
+ pcache++;
+ (*num_of_entries)++;
+ mem_available -= sizeof(*pcache);
+ (*size_filled) += sizeof(*pcache);
+ }
+ }
}
pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
@@ -1100,6 +1862,92 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
return 0;
}
+#ifdef CONFIG_ACPI_NUMA
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+{
+ struct acpi_table_header *table_header = NULL;
+ struct acpi_subtable_header *sub_header = NULL;
+ unsigned long table_end, subtable_len;
+ u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+ pci_dev_id(kdev->pdev);
+ u32 bdf;
+ acpi_status status;
+ struct acpi_srat_cpu_affinity *cpu;
+ struct acpi_srat_generic_affinity *gpu;
+ int pxm = 0, max_pxm = 0;
+ int numa_node = NUMA_NO_NODE;
+ bool found = false;
+
+ /* Fetch the SRAT table from ACPI */
+ status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
+ if (status == AE_NOT_FOUND) {
+ pr_warn("SRAT table not found\n");
+ return;
+ } else if (ACPI_FAILURE(status)) {
+ const char *err = acpi_format_exception(status);
+ pr_err("SRAT table error: %s\n", err);
+ return;
+ }
+
+ table_end = (unsigned long)table_header + table_header->length;
+
+ /* Parse all entries looking for a match. */
+ sub_header = (struct acpi_subtable_header *)
+ ((unsigned long)table_header +
+ sizeof(struct acpi_table_srat));
+ subtable_len = sub_header->length;
+
+ while (((unsigned long)sub_header) + subtable_len < table_end) {
+ /*
+ * If length is 0, break from this loop to avoid
+ * infinite loop.
+ */
+ if (subtable_len == 0) {
+ pr_err("SRAT invalid zero length\n");
+ break;
+ }
+
+ switch (sub_header->type) {
+ case ACPI_SRAT_TYPE_CPU_AFFINITY:
+ cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+ pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+ cpu->proximity_domain_lo;
+ if (pxm > max_pxm)
+ max_pxm = pxm;
+ break;
+ case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+ gpu = (struct acpi_srat_generic_affinity *)sub_header;
+ bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
+ *((u16 *)(&gpu->device_handle[2]));
+ if (bdf == pci_id) {
+ found = true;
+ numa_node = pxm_to_node(gpu->proximity_domain);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (found)
+ break;
+
+ sub_header = (struct acpi_subtable_header *)
+ ((unsigned long)sub_header + subtable_len);
+ subtable_len = sub_header->length;
+ }
+
+ acpi_put_table(table_header);
+
+ /* Workaround bad cpu-gpu binding case */
+ if (found && (numa_node < 0 ||
+ numa_node > pxm_to_node(max_pxm)))
+ numa_node = 0;
+
+ if (numa_node != NUMA_NO_NODE)
+ set_dev_node(&kdev->pdev->dev, numa_node);
+}
+#endif
+
/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
* to its NUMA node
* @avail_size: Available size in the memory
@@ -1140,11 +1988,17 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
*/
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
+ sub_type_hdr->num_hops_xgmi = 1;
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
}
sub_type_hdr->proximity_domain_from = proximity_domain;
+
+#ifdef CONFIG_ACPI_NUMA
+ if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+ kfd_find_numa_node_in_srat(kdev);
+#endif
#ifdef CONFIG_NUMA
if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
sub_type_hdr->proximity_domain_to = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 357b9bf62a1c..6b57dfd2cd2a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -26,10 +26,12 @@
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
+#include "kfd_pm4_headers_aldebaran.h"
#include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
+#include "kfd_migrate.h"
#define MQD_SIZE_ALIGNED 768
@@ -80,6 +82,8 @@ static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
[CHIP_NAVY_FLOUNDER] = &gfx_v10_3_kfd2kgd,
[CHIP_VANGOGH] = &gfx_v10_3_kfd2kgd,
[CHIP_DIMGREY_CAVEFISH] = &gfx_v10_3_kfd2kgd,
+ [CHIP_BEIGE_GOBY] = &gfx_v10_3_kfd2kgd,
+ [CHIP_YELLOW_CARP] = &gfx_v10_3_kfd2kgd,
};
#ifdef KFD_SUPPORT_IOMMU_V2
@@ -556,6 +560,41 @@ static const struct kfd_device_info dimgrey_cavefish_device_info = {
.num_sdma_queues_per_engine = 8,
};
+static const struct kfd_device_info beige_goby_device_info = {
+ .asic_family = CHIP_BEIGE_GOBY,
+ .asic_name = "beige_goby",
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .needs_iommu_device = false,
+ .supports_cwsr = true,
+ .needs_pci_atomics = true,
+ .num_sdma_engines = 1,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 8,
+};
+
+static const struct kfd_device_info yellow_carp_device_info = {
+ .asic_family = CHIP_YELLOW_CARP,
+ .asic_name = "yellow_carp",
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .needs_iommu_device = false,
+ .supports_cwsr = true,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 1,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+};
/* For each entry, [0] is regular and [1] is virtualisation device. */
static const struct kfd_device_info *kfd_supported_devices[][2] = {
@@ -576,7 +615,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
[CHIP_VEGA20] = {&vega20_device_info, NULL},
[CHIP_RENOIR] = {&renoir_device_info, NULL},
[CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
- [CHIP_ALDEBARAN] = {&aldebaran_device_info, NULL},
+ [CHIP_ALDEBARAN] = {&aldebaran_device_info, &aldebaran_device_info},
[CHIP_NAVI10] = {&navi10_device_info, NULL},
[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
[CHIP_NAVI14] = {&navi14_device_info, NULL},
@@ -584,6 +623,8 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
[CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info},
[CHIP_VANGOGH] = {&vangogh_device_info, NULL},
[CHIP_DIMGREY_CAVEFISH] = {&dimgrey_cavefish_device_info, &dimgrey_cavefish_device_info},
+ [CHIP_BEIGE_GOBY] = {&beige_goby_device_info, &beige_goby_device_info},
+ [CHIP_YELLOW_CARP] = {&yellow_carp_device_info, NULL},
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -697,7 +738,9 @@ static int kfd_gws_init(struct kfd_dev *kfd)
&& kfd->device_info->asic_family <= CHIP_RAVEN
&& kfd->mec2_fw_version >= 0x1b3)
|| (kfd->device_info->asic_family == CHIP_ARCTURUS
- && kfd->mec2_fw_version >= 0x30))
+ && kfd->mec2_fw_version >= 0x30)
+ || (kfd->device_info->asic_family == CHIP_ALDEBARAN
+ && kfd->mec2_fw_version >= 0x28))
ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
@@ -713,7 +756,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources)
{
- unsigned int size;
+ unsigned int size, map_process_packet_size;
kfd->ddev = ddev;
kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
@@ -748,7 +791,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
* calculate max size of runlist packet.
* There can be only 2 packets at once
*/
- size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
+ map_process_packet_size =
+ kfd->device_info->asic_family == CHIP_ALDEBARAN ?
+ sizeof(struct pm4_mes_map_process_aldebaran) :
+ sizeof(struct pm4_mes_map_process);
+ size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
+ sizeof(struct pm4_mes_runlist)) * 2;
@@ -814,6 +861,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_cwsr_init(kfd);
+ svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+
if (kfd_resume(kfd))
goto kfd_resume_error;
@@ -861,7 +910,7 @@ out:
void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
if (kfd->init_complete) {
- kgd2kfd_suspend(kfd, false);
+ svm_migrate_fini((struct amdgpu_device *)kfd->kgd);
device_queue_manager_uninit(kfd->dqm);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d3eaa1549bd7..16a1713808c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -248,7 +248,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
qpd->vmid,
qpd->page_table_base);
/* invalidate the VM context after pasid and vmid mapping is set up */
- kfd_flush_tlb(qpd_to_pdd(qpd));
+ kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
if (dqm->dev->kfd2kgd->set_scratch_backing_va)
dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
@@ -284,7 +284,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
if (flush_texture_cache_nocpsch(q->device, qpd))
pr_err("Failed to flush TC\n");
- kfd_flush_tlb(qpd_to_pdd(qpd));
+ kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
@@ -486,9 +486,6 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
if (retval == -ETIME)
qpd->reset_wavefronts = true;
-
- mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
list_del(&q->list);
if (list_empty(&qpd->queues_list)) {
if (qpd->reset_wavefronts) {
@@ -523,6 +520,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
int retval;
uint64_t sdma_val = 0;
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+ struct mqd_manager *mqd_mgr =
+ dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
@@ -540,6 +539,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
pdd->sdma_past_activity_counter += sdma_val;
dqm_unlock(dqm);
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
return retval;
}
@@ -738,7 +739,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
- pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
+ pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
dqm_lock(dqm);
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
@@ -760,7 +761,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
dqm->dev->kgd,
qpd->vmid,
qpd->page_table_base);
- kfd_flush_tlb(pdd);
+ kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
}
/* Take a safe reference to the mm_struct, which may otherwise
@@ -821,7 +822,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
- pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
+ pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
dqm_lock(dqm);
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
@@ -873,7 +874,7 @@ static int register_process(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
- pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
+ pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
dqm_lock(dqm);
list_add(&n->list, &dqm->queues);
@@ -1629,7 +1630,7 @@ out:
static int process_termination_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- struct queue *q, *next;
+ struct queue *q;
struct device_process_node *cur, *next_dpn;
int retval = 0;
bool found = false;
@@ -1637,12 +1638,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
dqm_lock(dqm);
/* Clear all user mode queues */
- list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+ while (!list_empty(&qpd->queues_list)) {
+ struct mqd_manager *mqd_mgr;
int ret;
+ q = list_first_entry(&qpd->queues_list, struct queue, list);
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
if (ret)
retval = ret;
+ dqm_unlock(dqm);
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+ dqm_lock(dqm);
}
/* Unregister process */
@@ -1674,36 +1682,34 @@ static int get_wave_state(struct device_queue_manager *dqm,
u32 *save_area_used_size)
{
struct mqd_manager *mqd_mgr;
- int r;
dqm_lock(dqm);
- if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
- q->properties.is_active || !q->device->cwsr_enabled) {
- r = -EINVAL;
- goto dqm_unlock;
- }
-
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
- if (!mqd_mgr->get_wave_state) {
- r = -EINVAL;
- goto dqm_unlock;
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.is_active || !q->device->cwsr_enabled ||
+ !mqd_mgr->get_wave_state) {
+ dqm_unlock(dqm);
+ return -EINVAL;
}
- r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
- ctl_stack_used_size, save_area_used_size);
-
-dqm_unlock:
dqm_unlock(dqm);
- return r;
+
+ /*
+ * get_wave_state is outside the dqm lock to prevent circular locking
+ * and the queue should be protected against destruction by the process
+ * lock.
+ */
+ return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
+ ctl_stack_used_size, save_area_used_size);
}
static int process_termination_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
int retval;
- struct queue *q, *next;
+ struct queue *q;
struct kernel_queue *kq, *kq_next;
struct mqd_manager *mqd_mgr;
struct device_process_node *cur, *next_dpn;
@@ -1760,24 +1766,26 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = false;
}
- dqm_unlock(dqm);
-
- /* Outside the DQM lock because under the DQM lock we can't do
- * reclaim or take other locks that others hold while reclaiming.
- */
- if (found)
- kfd_dec_compute_active(dqm->dev);
-
/* Lastly, free mqd resources.
* Do free_mqd() after dqm_unlock to avoid circular locking.
*/
- list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+ while (!list_empty(&qpd->queues_list)) {
+ q = list_first_entry(&qpd->queues_list, struct queue, list);
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
list_del(&q->list);
qpd->queue_count--;
+ dqm_unlock(dqm);
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+ dqm_lock(dqm);
}
+ dqm_unlock(dqm);
+
+ /* Outside the DQM lock because under the DQM lock we can't do
+ * reclaim or take other locks that others hold while reclaiming.
+ */
+ if (found)
+ kfd_dec_compute_active(dqm->dev);
return retval;
}
@@ -1936,6 +1944,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_NAVY_FLOUNDER:
case CHIP_VANGOGH:
case CHIP_DIMGREY_CAVEFISH:
+ case CHIP_BEIGE_GOBY:
+ case CHIP_YELLOW_CARP:
device_queue_manager_init_v10_navi10(&dqm->asic_ops);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index eca6331efa94..b5c3d13643f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
- if (dqm->dev->noretry &&
- !dqm->dev->use_iommu_v2)
+
+ if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
+ /* Aldebaran can safely support different XNACK modes
+ * per process
+ */
+ if (!pdd->process->xnack_enabled)
+ qpd->sh_mem_config |=
+ 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+ } else if (dqm->dev->noretry &&
+ !dqm->dev->use_iommu_v2) {
qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+ }
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ba2c2ce0c55a..3eea4edee355 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1050,3 +1050,44 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
+
+void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
+{
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_hsa_memory_exception_data memory_exception_data;
+ struct kfd_hsa_hw_exception_data hw_exception_data;
+ struct kfd_event *ev;
+ uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+
+ if (!p)
+ return; /* Presumably process exited. */
+
+ memset(&hw_exception_data, 0, sizeof(hw_exception_data));
+ hw_exception_data.gpu_id = dev->id;
+ hw_exception_data.memory_lost = 1;
+ hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
+
+ memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+ memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
+ memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.failure.imprecise = true;
+
+ mutex_lock(&p->event_mutex);
+ idr_for_each_entry_continue(&p->event_idr, ev, id) {
+ if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ ev->hw_exception_data = hw_exception_data;
+ set_event(ev);
+ }
+
+ if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+ ev->memory_exception_data = memory_exception_data;
+ set_event(ev);
+ }
+ }
+ mutex_unlock(&p->event_mutex);
+
+ /* user application will handle SIGBUS signal */
+ send_sig(SIGBUS, p->lead_thread, 0);
+
+ kfd_unref_process(p);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index a2c9063076cc..a9b329f0f862 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -420,6 +420,8 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_NAVY_FLOUNDER:
case CHIP_VANGOGH:
case CHIP_DIMGREY_CAVEFISH:
+ case CHIP_BEIGE_GOBY:
+ case CHIP_YELLOW_CARP:
kfd_init_apertures_v9(pdd, id);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 696944fa0177..12d91e53556c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -25,7 +25,6 @@
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
#include "kfd_smi_events.h"
-#include "amdgpu.h"
enum SQ_INTERRUPT_WORD_ENCODING {
SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
@@ -231,7 +230,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
sq_intr_err);
if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
- kfd_signal_hw_exception_event(pasid);
+ kfd_signal_poison_consumed_event(dev, pasid);
amdgpu_amdkfd_gpu_reset(dev->kgd);
return;
}
@@ -250,8 +249,13 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
client_id == SOC15_IH_CLIENTID_SDMA5 ||
client_id == SOC15_IH_CLIENTID_SDMA6 ||
client_id == SOC15_IH_CLIENTID_SDMA7) {
- if (source_id == SOC15_INTSRC_SDMA_TRAP)
+ if (source_id == SOC15_INTSRC_SDMA_TRAP) {
kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
+ } else if (source_id == SOC15_INTSRC_SDMA_ECC) {
+ kfd_signal_poison_consumed_event(dev, pasid);
+ amdgpu_amdkfd_gpu_reset(dev->kgd);
+ return;
+ }
} else if (client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_VMC1 ||
client_id == SOC15_IH_CLIENTID_UTCL2) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
new file mode 100644
index 000000000000..2660f03e63a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -0,0 +1,889 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/hmm.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mn.h"
+#include "amdgpu_res_cursor.h"
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+
+static uint64_t
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+ return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
+}
+
+static int
+svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
+ dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_job *job;
+ unsigned int num_dw, num_bytes;
+ struct dma_fence *fence;
+ uint64_t src_addr, dst_addr;
+ uint64_t pte_flags;
+ void *cpu_addr;
+ int r;
+
+ /* use gart window 0 */
+ *gart_addr = adev->gmc.gart_start;
+
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+ num_bytes = npages * 8;
+
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
+ AMDGPU_IB_POOL_DELAYED, &job);
+ if (r)
+ return r;
+
+ src_addr = num_dw * 4;
+ src_addr += job->ibs[0].gpu_addr;
+
+ dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
+ dst_addr, num_bytes, false);
+
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+
+ pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+ pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
+ if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+ pte_flags |= AMDGPU_PTE_WRITEABLE;
+ pte_flags |= adev->gart.gart_pte_flags;
+
+ cpu_addr = &job->ibs[0].ptr[num_dw];
+
+ r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
+ if (r)
+ goto error_free;
+
+ r = amdgpu_job_submit(job, &adev->mman.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+ if (r)
+ goto error_free;
+
+ dma_fence_put(fence);
+
+ return r;
+
+error_free:
+ amdgpu_job_free(job);
+ return r;
+}
+
+/**
+ * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
+ *
+ * @adev: amdgpu device the sdma ring running
+ * @src: source page address array
+ * @dst: destination page address array
+ * @npages: number of pages to copy
+ * @direction: enum MIGRATION_COPY_DIR
+ * @mfence: output, sdma fence to signal after sdma is done
+ *
+ * ram address uses GART table continuous entries mapping to ram pages,
+ * vram address uses direct mapping of vram pages, which must have npages
+ * number of continuous pages.
+ * GART update and sdma uses same buf copy function ring, sdma is splited to
+ * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
+ * the last sdma finish fence which is returned to check copy memory is done.
+ *
+ * Context: Process context, takes and releases gtt_window_lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+
+static int
+svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
+ uint64_t *vram, uint64_t npages,
+ enum MIGRATION_COPY_DIR direction,
+ struct dma_fence **mfence)
+{
+ const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ uint64_t gart_s, gart_d;
+ struct dma_fence *next;
+ uint64_t size;
+ int r;
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+
+ while (npages) {
+ size = min(GTT_MAX_PAGES, npages);
+
+ if (direction == FROM_VRAM_TO_RAM) {
+ gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
+ r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
+
+ } else if (direction == FROM_RAM_TO_VRAM) {
+ r = svm_migrate_gart_map(ring, size, sys, &gart_s,
+ KFD_IOCTL_SVM_FLAG_GPU_RO);
+ gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
+ }
+ if (r) {
+ pr_debug("failed %d to create gart mapping\n", r);
+ goto out_unlock;
+ }
+
+ r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
+ NULL, &next, false, true, false);
+ if (r) {
+ pr_debug("failed %d to copy memory\n", r);
+ goto out_unlock;
+ }
+
+ dma_fence_put(*mfence);
+ *mfence = next;
+ npages -= size;
+ if (npages) {
+ sys += size;
+ vram += size;
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ return r;
+}
+
+/**
+ * svm_migrate_copy_done - wait for memory copy sdma is done
+ *
+ * @adev: amdgpu device the sdma memory copy is executing on
+ * @mfence: migrate fence
+ *
+ * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
+ * operations, this is the last sdma operation fence.
+ *
+ * Context: called after svm_migrate_copy_memory
+ *
+ * Return:
+ * 0 - success
+ * otherwise - error code from dma fence signal
+ */
+static int
+svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
+{
+ int r = 0;
+
+ if (mfence) {
+ r = dma_fence_wait(mfence, false);
+ dma_fence_put(mfence);
+ pr_debug("sdma copy memory fence done\n");
+ }
+
+ return r;
+}
+
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
+{
+ return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
+}
+
+static void
+svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
+{
+ struct page *page;
+
+ page = pfn_to_page(pfn);
+ page->zone_device_data = prange;
+ get_page(page);
+ lock_page(page);
+}
+
+static void
+svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
+{
+ struct page *page;
+
+ page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
+ unlock_page(page);
+ put_page(page);
+}
+
+static unsigned long
+svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
+{
+ unsigned long addr;
+
+ addr = page_to_pfn(page) << PAGE_SHIFT;
+ return (addr - adev->kfd.dev->pgmap.range.start);
+}
+
+static struct page *
+svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ struct page *page;
+
+ page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+ if (page)
+ lock_page(page);
+
+ return page;
+}
+
+static void svm_migrate_put_sys_page(unsigned long addr)
+{
+ struct page *page;
+
+ page = pfn_to_page(addr >> PAGE_SHIFT);
+ unlock_page(page);
+ put_page(page);
+}
+
+static int
+svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+ struct migrate_vma *migrate, struct dma_fence **mfence,
+ dma_addr_t *scratch)
+{
+ uint64_t npages = migrate->cpages;
+ struct device *dev = adev->dev;
+ struct amdgpu_res_cursor cursor;
+ dma_addr_t *src;
+ uint64_t *dst;
+ uint64_t i, j;
+ int r;
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+ prange->last);
+
+ src = scratch;
+ dst = (uint64_t *)(scratch + npages);
+
+ r = svm_range_vram_node_new(adev, prange, true);
+ if (r) {
+ pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
+ goto out;
+ }
+
+ amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+ npages << PAGE_SHIFT, &cursor);
+ for (i = j = 0; i < npages; i++) {
+ struct page *spage;
+
+ dst[i] = cursor.start + (j << PAGE_SHIFT);
+ migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+ svm_migrate_get_vram_page(prange, migrate->dst[i]);
+
+ migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+ migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+
+ if (migrate->src[i] & MIGRATE_PFN_VALID) {
+ spage = migrate_pfn_to_page(migrate->src[i]);
+ src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ r = dma_mapping_error(dev, src[i]);
+ if (r) {
+ pr_debug("failed %d dma_map_page\n", r);
+ goto out_free_vram_pages;
+ }
+ } else {
+ if (j) {
+ r = svm_migrate_copy_memory_gart(
+ adev, src + i - j,
+ dst + i - j, j,
+ FROM_RAM_TO_VRAM,
+ mfence);
+ if (r)
+ goto out_free_vram_pages;
+ amdgpu_res_next(&cursor, j << PAGE_SHIFT);
+ j = 0;
+ } else {
+ amdgpu_res_next(&cursor, PAGE_SIZE);
+ }
+ continue;
+ }
+
+ pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
+ src[i] >> PAGE_SHIFT, page_to_pfn(spage));
+
+ if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
+ r = svm_migrate_copy_memory_gart(adev, src + i - j,
+ dst + i - j, j + 1,
+ FROM_RAM_TO_VRAM,
+ mfence);
+ if (r)
+ goto out_free_vram_pages;
+ amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
+ j= 0;
+ } else {
+ j++;
+ }
+ }
+
+ r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
+ FROM_RAM_TO_VRAM, mfence);
+
+out_free_vram_pages:
+ if (r) {
+ pr_debug("failed %d to copy memory to vram\n", r);
+ while (i--) {
+ svm_migrate_put_vram_page(adev, dst[i]);
+ migrate->dst[i] = 0;
+ }
+ }
+
+out:
+ return r;
+}
+
+static int
+svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+ struct vm_area_struct *vma, uint64_t start,
+ uint64_t end)
+{
+ uint64_t npages = (end - start) >> PAGE_SHIFT;
+ struct dma_fence *mfence = NULL;
+ struct migrate_vma migrate;
+ dma_addr_t *scratch;
+ size_t size;
+ void *buf;
+ int r = -ENOMEM;
+ int retry = 0;
+
+ memset(&migrate, 0, sizeof(migrate));
+ migrate.vma = vma;
+ migrate.start = start;
+ migrate.end = end;
+ migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
+ migrate.pgmap_owner = adev;
+
+ size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
+ size *= npages;
+ buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ if (!buf)
+ goto out;
+
+ migrate.src = buf;
+ migrate.dst = migrate.src + npages;
+ scratch = (dma_addr_t *)(migrate.dst + npages);
+
+retry:
+ r = migrate_vma_setup(&migrate);
+ if (r) {
+ pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
+ r, prange->svms, prange->start, prange->last);
+ goto out_free;
+ }
+ if (migrate.cpages != npages) {
+ pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages,
+ npages);
+ migrate_vma_finalize(&migrate);
+ if (retry++ >= 3) {
+ r = -ENOMEM;
+ pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n",
+ r, prange->svms, prange->start, prange->last);
+ goto out_free;
+ }
+
+ goto retry;
+ }
+
+ if (migrate.cpages) {
+ r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence,
+ scratch);
+ migrate_vma_pages(&migrate);
+ svm_migrate_copy_done(adev, mfence);
+ migrate_vma_finalize(&migrate);
+ }
+
+ svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+ svm_range_free_dma_mappings(prange);
+
+out_free:
+ kvfree(buf);
+out:
+ return r;
+}
+
+/**
+ * svm_migrate_ram_to_vram - migrate svm range from system to device
+ * @prange: range structure
+ * @best_loc: the device to migrate to
+ * @mm: the process mm structure
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+ struct mm_struct *mm)
+{
+ unsigned long addr, start, end;
+ struct vm_area_struct *vma;
+ struct amdgpu_device *adev;
+ int r = 0;
+
+ if (prange->actual_loc == best_loc) {
+ pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
+ prange->svms, prange->start, prange->last, best_loc);
+ return 0;
+ }
+
+ adev = svm_range_get_adev_by_id(prange, best_loc);
+ if (!adev) {
+ pr_debug("failed to get device by id 0x%x\n", best_loc);
+ return -ENODEV;
+ }
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
+ prange->start, prange->last, best_loc);
+
+ /* FIXME: workaround for page locking bug with invalid pages */
+ svm_range_prefault(prange, mm);
+
+ start = prange->start << PAGE_SHIFT;
+ end = (prange->last + 1) << PAGE_SHIFT;
+
+ for (addr = start; addr < end;) {
+ unsigned long next;
+
+ vma = find_vma(mm, addr);
+ if (!vma || addr < vma->vm_start)
+ break;
+
+ next = min(vma->vm_end, end);
+ r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
+ if (r) {
+ pr_debug("failed to migrate\n");
+ break;
+ }
+ addr = next;
+ }
+
+ if (!r)
+ prange->actual_loc = best_loc;
+
+ return r;
+}
+
+static void svm_migrate_page_free(struct page *page)
+{
+ /* Keep this function to avoid warning */
+}
+
+static int
+svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+ struct migrate_vma *migrate, struct dma_fence **mfence,
+ dma_addr_t *scratch)
+{
+ uint64_t npages = migrate->cpages;
+ struct device *dev = adev->dev;
+ uint64_t *src;
+ dma_addr_t *dst;
+ struct page *dpage;
+ uint64_t i = 0, j;
+ uint64_t addr;
+ int r = 0;
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+ prange->last);
+
+ addr = prange->start << PAGE_SHIFT;
+
+ src = (uint64_t *)(scratch + npages);
+ dst = scratch;
+
+ for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
+ struct page *spage;
+
+ spage = migrate_pfn_to_page(migrate->src[i]);
+ if (!spage) {
+ pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
+ prange->svms, prange->start, prange->last);
+ r = -ENOMEM;
+ goto out_oom;
+ }
+ src[i] = svm_migrate_addr(adev, spage);
+ if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+ r = svm_migrate_copy_memory_gart(adev, dst + i - j,
+ src + i - j, j,
+ FROM_VRAM_TO_RAM,
+ mfence);
+ if (r)
+ goto out_oom;
+ j = 0;
+ }
+
+ dpage = svm_migrate_get_sys_page(migrate->vma, addr);
+ if (!dpage) {
+ pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
+ prange->svms, prange->start, prange->last);
+ r = -ENOMEM;
+ goto out_oom;
+ }
+
+ dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ r = dma_mapping_error(dev, dst[i]);
+ if (r) {
+ pr_debug("failed %d dma_map_page\n", r);
+ goto out_oom;
+ }
+
+ pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
+ dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
+
+ migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
+ migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+ }
+
+ r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
+ FROM_VRAM_TO_RAM, mfence);
+
+out_oom:
+ if (r) {
+ pr_debug("failed %d copy to ram\n", r);
+ while (i--) {
+ svm_migrate_put_sys_page(dst[i]);
+ migrate->dst[i] = 0;
+ }
+ }
+
+ return r;
+}
+
+static int
+svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+ struct vm_area_struct *vma, uint64_t start, uint64_t end)
+{
+ uint64_t npages = (end - start) >> PAGE_SHIFT;
+ struct dma_fence *mfence = NULL;
+ struct migrate_vma migrate;
+ dma_addr_t *scratch;
+ size_t size;
+ void *buf;
+ int r = -ENOMEM;
+
+ memset(&migrate, 0, sizeof(migrate));
+ migrate.vma = vma;
+ migrate.start = start;
+ migrate.end = end;
+ migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+ migrate.pgmap_owner = adev;
+
+ size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
+ size *= npages;
+ buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ if (!buf)
+ goto out;
+
+ migrate.src = buf;
+ migrate.dst = migrate.src + npages;
+ scratch = (dma_addr_t *)(migrate.dst + npages);
+
+ r = migrate_vma_setup(&migrate);
+ if (r) {
+ pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
+ r, prange->svms, prange->start, prange->last);
+ goto out_free;
+ }
+
+ pr_debug("cpages %ld\n", migrate.cpages);
+
+ if (migrate.cpages) {
+ r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
+ scratch);
+ migrate_vma_pages(&migrate);
+ svm_migrate_copy_done(adev, mfence);
+ migrate_vma_finalize(&migrate);
+ } else {
+ pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
+ prange->start, prange->last);
+ }
+
+ svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+
+out_free:
+ kvfree(buf);
+out:
+ return r;
+}
+
+/**
+ * svm_migrate_vram_to_ram - migrate svm range from device to system
+ * @prange: range structure
+ * @mm: process mm, use current->mm if NULL
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev;
+ struct vm_area_struct *vma;
+ unsigned long addr;
+ unsigned long start;
+ unsigned long end;
+ int r = 0;
+
+ if (!prange->actual_loc) {
+ pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
+ prange->start, prange->last);
+ return 0;
+ }
+
+ adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
+ if (!adev) {
+ pr_debug("failed to get device by id 0x%x\n",
+ prange->actual_loc);
+ return -ENODEV;
+ }
+
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
+ prange->svms, prange, prange->start, prange->last,
+ prange->actual_loc);
+
+ start = prange->start << PAGE_SHIFT;
+ end = (prange->last + 1) << PAGE_SHIFT;
+
+ for (addr = start; addr < end;) {
+ unsigned long next;
+
+ vma = find_vma(mm, addr);
+ if (!vma || addr < vma->vm_start)
+ break;
+
+ next = min(vma->vm_end, end);
+ r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
+ if (r) {
+ pr_debug("failed %d to migrate\n", r);
+ break;
+ }
+ addr = next;
+ }
+
+ if (!r) {
+ svm_range_vram_node_free(prange);
+ prange->actual_loc = 0;
+ }
+ return r;
+}
+
+/**
+ * svm_migrate_vram_to_vram - migrate svm range from device to device
+ * @prange: range structure
+ * @best_loc: the device to migrate to
+ * @mm: process mm, use current->mm if NULL
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
+ struct mm_struct *mm)
+{
+ int r;
+
+ /*
+ * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
+ * system memory as migration bridge
+ */
+
+ pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
+
+ r = svm_migrate_vram_to_ram(prange, mm);
+ if (r)
+ return r;
+
+ return svm_migrate_ram_to_vram(prange, best_loc, mm);
+}
+
+int
+svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+ struct mm_struct *mm)
+{
+ if (!prange->actual_loc)
+ return svm_migrate_ram_to_vram(prange, best_loc, mm);
+ else
+ return svm_migrate_vram_to_vram(prange, best_loc, mm);
+
+}
+
+/**
+ * svm_migrate_to_ram - CPU page fault handler
+ * @vmf: CPU vm fault vma, address
+ *
+ * Context: vm fault handler, caller holds the mmap read lock
+ *
+ * Return:
+ * 0 - OK
+ * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
+ */
+static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
+{
+ unsigned long addr = vmf->address;
+ struct vm_area_struct *vma;
+ enum svm_work_list_ops op;
+ struct svm_range *parent;
+ struct svm_range *prange;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+ int r = 0;
+
+ vma = vmf->vma;
+ mm = vma->vm_mm;
+
+ p = kfd_lookup_process_by_mm(vma->vm_mm);
+ if (!p) {
+ pr_debug("failed find process at fault address 0x%lx\n", addr);
+ return VM_FAULT_SIGBUS;
+ }
+ addr >>= PAGE_SHIFT;
+ pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
+
+ mutex_lock(&p->svms.lock);
+
+ prange = svm_range_from_addr(&p->svms, addr, &parent);
+ if (!prange) {
+ pr_debug("cannot find svm range at 0x%lx\n", addr);
+ r = -EFAULT;
+ goto out;
+ }
+
+ mutex_lock(&parent->migrate_mutex);
+ if (prange != parent)
+ mutex_lock_nested(&prange->migrate_mutex, 1);
+
+ if (!prange->actual_loc)
+ goto out_unlock_prange;
+
+ svm_range_lock(parent);
+ if (prange != parent)
+ mutex_lock_nested(&prange->lock, 1);
+ r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
+ if (prange != parent)
+ mutex_unlock(&prange->lock);
+ svm_range_unlock(parent);
+ if (r) {
+ pr_debug("failed %d to split range by granularity\n", r);
+ goto out_unlock_prange;
+ }
+
+ r = svm_migrate_vram_to_ram(prange, mm);
+ if (r)
+ pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
+ prange, prange->start, prange->last);
+
+ /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+ if (p->xnack_enabled && parent == prange)
+ op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
+ else
+ op = SVM_OP_UPDATE_RANGE_NOTIFIER;
+ svm_range_add_list_work(&p->svms, parent, mm, op);
+ schedule_deferred_list_work(&p->svms);
+
+out_unlock_prange:
+ if (prange != parent)
+ mutex_unlock(&prange->migrate_mutex);
+ mutex_unlock(&parent->migrate_mutex);
+out:
+ mutex_unlock(&p->svms.lock);
+ kfd_unref_process(p);
+
+ pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
+
+ return r ? VM_FAULT_SIGBUS : 0;
+}
+
+static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
+ .page_free = svm_migrate_page_free,
+ .migrate_to_ram = svm_migrate_to_ram,
+};
+
+/* Each VRAM page uses sizeof(struct page) on system memory */
+#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
+
+int svm_migrate_init(struct amdgpu_device *adev)
+{
+ struct kfd_dev *kfddev = adev->kfd.dev;
+ struct dev_pagemap *pgmap;
+ struct resource *res;
+ unsigned long size;
+ void *r;
+
+ /* Page migration works on Vega10 or newer */
+ if (kfddev->device_info->asic_family < CHIP_VEGA10)
+ return -EINVAL;
+
+ pgmap = &kfddev->pgmap;
+ memset(pgmap, 0, sizeof(*pgmap));
+
+ /* TODO: register all vram to HMM for now.
+ * should remove reserved size
+ */
+ size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
+ res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
+ if (IS_ERR(res))
+ return -ENOMEM;
+
+ pgmap->type = MEMORY_DEVICE_PRIVATE;
+ pgmap->nr_range = 1;
+ pgmap->range.start = res->start;
+ pgmap->range.end = res->end;
+ pgmap->ops = &svm_migrate_pgmap_ops;
+ pgmap->owner = adev;
+ pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+ r = devm_memremap_pages(adev->dev, pgmap);
+ if (IS_ERR(r)) {
+ pr_err("failed to register HMM device memory\n");
+ devm_release_mem_region(adev->dev, res->start,
+ res->end - res->start + 1);
+ return PTR_ERR(r);
+ }
+
+ pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
+ SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
+
+ amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
+
+ pr_info("HMM registered %ldMB device memory\n", size >> 20);
+
+ return 0;
+}
+
+void svm_migrate_fini(struct amdgpu_device *adev)
+{
+ struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap;
+
+ devm_memunmap_pages(adev->dev, pgmap);
+ devm_release_mem_region(adev->dev, pgmap->range.start,
+ pgmap->range.end - pgmap->range.start + 1);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
new file mode 100644
index 000000000000..0de76b5d4973
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_MIGRATE_H_
+#define KFD_MIGRATE_H_
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/sched/mm.h>
+#include <linux/hmm.h>
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+
+enum MIGRATION_COPY_DIR {
+ FROM_RAM_TO_VRAM = 0,
+ FROM_VRAM_TO_RAM
+};
+
+int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+ struct mm_struct *mm);
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
+
+int svm_migrate_init(struct amdgpu_device *adev);
+void svm_migrate_fini(struct amdgpu_device *adev);
+
+#else
+
+static inline int svm_migrate_init(struct amdgpu_device *adev)
+{
+ return 0;
+}
+static inline void svm_migrate_fini(struct amdgpu_device *adev)
+{
+ /* empty */
+}
+
+#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
+
+#endif /* KFD_MIGRATE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index e840dd581719..d8e940f03102 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -124,14 +124,14 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
{
unsigned int alloc_size_bytes;
unsigned int *rl_buffer, rl_wptr, i;
- int retval, proccesses_mapped;
+ int retval, processes_mapped;
struct device_process_node *cur;
struct qcm_process_device *qpd;
struct queue *q;
struct kernel_queue *kq;
bool is_over_subscription;
- rl_wptr = retval = proccesses_mapped = 0;
+ rl_wptr = retval = processes_mapped = 0;
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
&alloc_size_bytes, &is_over_subscription);
@@ -148,7 +148,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
list_for_each_entry(cur, queues, list) {
qpd = cur->qpd;
/* build map process packet */
- if (proccesses_mapped >= pm->dqm->processes_count) {
+ if (processes_mapped >= pm->dqm->processes_count) {
pr_debug("Not enough space left in runlist IB\n");
pm_release_ib(pm);
return -ENOMEM;
@@ -158,7 +158,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (retval)
return retval;
- proccesses_mapped++;
+ processes_mapped++;
inc_wptr(&rl_wptr, pm->pmf->map_process_size,
alloc_size_bytes);
@@ -242,7 +242,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_RAVEN:
case CHIP_RENOIR:
case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
@@ -250,8 +249,13 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_NAVY_FLOUNDER:
case CHIP_VANGOGH:
case CHIP_DIMGREY_CAVEFISH:
+ case CHIP_BEIGE_GOBY:
+ case CHIP_YELLOW_CARP:
pm->pmf = &kfd_v9_pm_funcs;
break;
+ case CHIP_ALDEBARAN:
+ pm->pmf = &kfd_aldebaran_pm_funcs;
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dqm->dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index e3ba0cd3b6fa..7ea3f671b325 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -24,6 +24,7 @@
#include "kfd_kernel_queue.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_ai.h"
+#include "kfd_pm4_headers_aldebaran.h"
#include "kfd_pm4_opcodes.h"
#include "gc/gc_10_1_0_sh_mask.h"
@@ -35,7 +36,6 @@ static int pm_map_process_v9(struct packet_manager *pm,
packet = (struct pm4_mes_map_process *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
@@ -73,6 +73,45 @@ static int pm_map_process_v9(struct packet_manager *pm,
return 0;
}
+static int pm_map_process_aldebaran(struct packet_manager *pm,
+ uint32_t *buffer, struct qcm_process_device *qpd)
+{
+ struct pm4_mes_map_process_aldebaran *packet;
+ uint64_t vm_page_table_base_addr = qpd->page_table_base;
+
+ packet = (struct pm4_mes_map_process_aldebaran *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+ sizeof(struct pm4_mes_map_process_aldebaran));
+ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+ packet->bitfields2.process_quantum = 10;
+ packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
+ packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
+ packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
+ packet->bitfields14.num_oac = qpd->num_oac;
+ packet->bitfields14.sdma_enable = 1;
+ packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+ packet->sh_mem_config = qpd->sh_mem_config;
+ packet->sh_mem_bases = qpd->sh_mem_bases;
+ if (qpd->tba_addr) {
+ packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+ packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+ packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+ }
+
+ packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+ packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+ packet->vm_context_page_table_base_addr_lo32 =
+ lower_32_bits(vm_page_table_base_addr);
+ packet->vm_context_page_table_base_addr_hi32 =
+ upper_32_bits(vm_page_table_base_addr);
+
+ return 0;
+}
+
static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
uint64_t ib, size_t ib_size_in_dwords, bool chain)
{
@@ -324,3 +363,20 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
+
+const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
+ .map_process = pm_map_process_aldebaran,
+ .runlist = pm_runlist_v9,
+ .set_resources = pm_set_resources_v9,
+ .map_queues = pm_map_queues_v9,
+ .unmap_queues = pm_unmap_queues_v9,
+ .query_status = pm_query_status_v9,
+ .release_mem = NULL,
+ .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
+ .runlist_size = sizeof(struct pm4_mes_runlist),
+ .set_resources_size = sizeof(struct pm4_mes_set_resources),
+ .map_queues_size = sizeof(struct pm4_mes_map_queues),
+ .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .query_status_size = sizeof(struct pm4_mes_query_status),
+ .release_mem_size = 0,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
new file mode 100644
index 000000000000..f795ec815e2a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*--------------------MES_MAP_PROCESS (PER DEBUG VMID)--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
+#define PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
+
+struct pm4_mes_map_process_aldebaran {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16; /* 0 - 15 */
+ uint32_t single_memops:1; /* 16 */
+ uint32_t reserved1:1; /* 17 */
+ uint32_t debug_vmid:4; /* 18 - 21 */
+ uint32_t new_debug:1; /* 22 */
+ uint32_t tmz:1; /* 23 */
+ uint32_t diq_enable:1; /* 24 */
+ uint32_t process_quantum:7; /* 25 - 31 */
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t vm_context_page_table_base_addr_lo32;
+
+ uint32_t vm_context_page_table_base_addr_hi32;
+
+ uint32_t sh_mem_bases;
+
+ uint32_t sh_mem_config;
+
+ uint32_t sq_shader_tba_lo;
+
+ uint32_t sq_shader_tba_hi;
+
+ uint32_t sq_shader_tma_lo;
+
+ uint32_t sq_shader_tma_hi;
+
+ uint32_t reserved6;
+
+ uint32_t gds_addr_lo;
+
+ uint32_t gds_addr_hi;
+
+ union {
+ struct {
+ uint32_t num_gws:7;
+ uint32_t sdma_enable:1;
+ uint32_t num_oac:4;
+ uint32_t gds_size_hi:4;
+ uint32_t gds_size:6;
+ uint32_t num_queues:10;
+ } bitfields14;
+ uint32_t ordinal14;
+ };
+
+ uint32_t spi_gdbg_per_vmid_cntl;
+
+ uint32_t tcp_watch_cntl[4];
+
+ uint32_t completion_signal_lo;
+
+ uint32_t completion_signal_hi;
+
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0b6595f7acda..6dc22fa1e555 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -322,6 +322,9 @@ struct kfd_dev {
unsigned int max_doorbell_slices;
int noretry;
+
+ /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
+ struct dev_pagemap pgmap;
};
enum kfd_mempool {
@@ -669,7 +672,7 @@ struct kfd_process_device {
/* VM context for GPUVM allocations */
struct file *drm_file;
- void *vm;
+ void *drm_priv;
/* GPUVM allocations storage */
struct idr alloc_idr;
@@ -731,6 +734,18 @@ struct kfd_process_device {
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
+struct svm_range_list {
+ struct mutex lock;
+ struct rb_root_cached objects;
+ struct list_head list;
+ struct work_struct deferred_list_work;
+ struct list_head deferred_range_list;
+ spinlock_t deferred_list_lock;
+ atomic_t evicted_ranges;
+ struct delayed_work restore_work;
+ DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
+};
+
/* Process data */
struct kfd_process {
/*
@@ -809,6 +824,11 @@ struct kfd_process {
struct kobject *kobj;
struct kobject *kobj_queues;
struct attribute attr_pasid;
+
+ /* shared virtual memory registered by this process */
+ struct svm_range_list svms;
+
+ bool xnack_enabled;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -842,6 +862,20 @@ struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
+
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
+int kfd_process_gpuid_from_kgd(struct kfd_process *p,
+ struct amdgpu_device *adev, uint32_t *gpuid,
+ uint32_t *gpuidx);
+static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
+ uint32_t gpuidx, uint32_t *gpuid) {
+ return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
+}
+static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
+ struct kfd_process *p, uint32_t gpuidx) {
+ return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
+}
+
void kfd_unref_process(struct kfd_process *p);
int kfd_process_evict_queues(struct kfd_process *p);
int kfd_process_restore_queues(struct kfd_process *p);
@@ -857,6 +891,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
+
int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma);
@@ -1052,6 +1088,7 @@ struct packet_manager_funcs {
extern const struct packet_manager_funcs kfd_vi_pm_funcs;
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
+extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
void pm_uninit(struct packet_manager *pm, bool hanging);
@@ -1107,7 +1144,9 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
void kfd_signal_reset_event(struct kfd_dev *dev);
-void kfd_flush_tlb(struct kfd_process_device *pdd);
+void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
+
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d97e330a5022..09b98a83f670 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -42,6 +42,7 @@ struct mm_struct;
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
#include "kfd_iommu.h"
+#include "kfd_svm.h"
/*
* List of struct kfd_process (field kfd_process).
@@ -108,8 +109,6 @@ static void kfd_sdma_activity_worker(struct work_struct *work)
workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
sdma_activity_work);
- if (!workarea)
- return;
pdd = workarea->pdd;
if (!pdd)
@@ -250,7 +249,7 @@ cleanup:
}
/**
- * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
+ * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
* by current process. Translates acquired wave count into number of compute units
* that are occupied.
*
@@ -647,8 +646,9 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
{
struct kfd_dev *dev = pdd->dev;
- amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
+ NULL);
}
/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
@@ -667,11 +667,12 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
int err;
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
- pdd->vm, &mem, NULL, flags);
+ pdd->drm_priv, &mem, NULL, flags);
if (err)
goto err_alloc_mem;
- err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
+ err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
+ pdd->drm_priv, NULL);
if (err)
goto err_map_mem;
@@ -712,7 +713,8 @@ sync_memory_failed:
return err;
err_map_mem:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
+ NULL);
err_alloc_mem:
*kptr = NULL;
return err;
@@ -901,13 +903,14 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *peer_pdd = p->pdds[i];
- if (!peer_pdd->vm)
+ if (!peer_pdd->drm_priv)
continue;
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- peer_pdd->dev->kgd, mem, peer_pdd->vm);
+ peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
}
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
+ pdd->drm_priv, NULL);
kfd_process_device_remove_obj_handle(pdd, id);
}
}
@@ -932,7 +935,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
if (pdd->drm_file) {
amdgpu_amdkfd_gpuvm_release_process_vm(
- pdd->dev->kgd, pdd->vm);
+ pdd->dev->kgd, pdd->drm_priv);
fput(pdd->drm_file);
}
@@ -1000,6 +1003,7 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_iommu_unbind_process(p);
kfd_process_free_outstanding_kfd_bos(p);
+ svm_range_list_fini(p);
kfd_process_destroy_pdds(p);
dma_fence_put(p->ef);
@@ -1058,6 +1062,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
+ cancel_delayed_work_sync(&p->svms.restore_work);
mutex_lock(&p->mutex);
@@ -1186,6 +1191,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
}
}
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
+{
+ int i;
+
+ /* On most GFXv9 GPUs, the retry mode in the SQ must match the
+ * boot time retry setting. Mixing processes with different
+ * XNACK/retry settings can hang the GPU.
+ *
+ * Different GPUs can have different noretry settings depending
+ * on HW bugs or limitations. We need to find at least one
+ * XNACK mode for this process that's compatible with all GPUs.
+ * Fortunately GPUs with retry enabled (noretry=0) can run code
+ * built for XNACK-off. On GFXv9 it may perform slower.
+ *
+ * Therefore applications built for XNACK-off can always be
+ * supported and will be our fallback if any GPU does not
+ * support retry.
+ */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_dev *dev = p->pdds[i]->dev;
+
+ /* Only consider GFXv9 and higher GPUs. Older GPUs don't
+ * support the SVM APIs and don't need to be considered
+ * for the XNACK mode selection.
+ */
+ if (dev->device_info->asic_family < CHIP_VEGA10)
+ continue;
+ /* Aldebaran can always support XNACK because it can support
+ * per-process XNACK mode selection. But let the dev->noretry
+ * setting still influence the default XNACK mode.
+ */
+ if (supported &&
+ dev->device_info->asic_family == CHIP_ALDEBARAN)
+ continue;
+
+ /* GFXv10 and later GPUs do not support shader preemption
+ * during page faults. This can lead to poor QoS for queue
+ * management and memory-manager-related preemptions or
+ * even deadlocks.
+ */
+ if (dev->device_info->asic_family >= CHIP_NAVI10)
+ return false;
+
+ if (dev->noretry)
+ return false;
+ }
+
+ return true;
+}
+
/*
* On return the kfd_process is fully operational and will be freed when the
* mm is released
@@ -1224,6 +1279,13 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (err != 0)
goto err_init_apertures;
+ /* Check XNACK support after PDDs are created in kfd_init_apertures */
+ process->xnack_enabled = kfd_process_xnack_mode(process, false);
+
+ err = svm_range_list_init(process);
+ if (err)
+ goto err_init_svm_range_list;
+
/* alloc_notifier needs to find the process in the hash table */
hash_add_rcu(kfd_processes_table, &process->kfd_processes,
(uintptr_t)process->mm);
@@ -1246,6 +1308,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
err_register_notifier:
hash_del_rcu(&process->kfd_processes);
+ svm_range_list_fini(process);
+err_init_svm_range_list:
kfd_process_free_outstanding_kfd_bos(process);
kfd_process_destroy_pdds(process);
err_init_apertures:
@@ -1375,7 +1439,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (!drm_file)
return -EINVAL;
- if (pdd->vm)
+ if (pdd->drm_priv)
return -EBUSY;
p = pdd->process;
@@ -1383,13 +1447,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
dev->kgd, drm_file, p->pasid,
- &pdd->vm, &p->kgd_process_info, &p->ef);
+ &p->kgd_process_info, &p->ef);
if (ret) {
pr_err("Failed to create process VM object\n");
return ret;
}
-
- amdgpu_vm_set_task_info(pdd->vm);
+ pdd->drm_priv = drm_file->private_data;
ret = kfd_process_device_reserve_ib_mem(pdd);
if (ret)
@@ -1405,7 +1468,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
err_init_cwsr:
err_reserve_ib_mem:
kfd_process_device_free_bos(pdd);
- pdd->vm = NULL;
+ pdd->drm_priv = NULL;
return ret;
}
@@ -1429,7 +1492,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
return ERR_PTR(-ENOMEM);
}
- if (!pdd->vm)
+ if (!pdd->drm_priv)
return ERR_PTR(-ENODEV);
/*
@@ -1600,6 +1663,32 @@ int kfd_process_restore_queues(struct kfd_process *p)
return ret;
}
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
+{
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++)
+ if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
+ return i;
+ return -EINVAL;
+}
+
+int
+kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
+ uint32_t *gpuid, uint32_t *gpuidx)
+{
+ struct kgd_dev *kgd = (struct kgd_dev *)adev;
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++)
+ if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
+ *gpuid = p->pdds[i]->dev->id;
+ *gpuidx = i;
+ return 0;
+ }
+ return -EINVAL;
+}
+
static void evict_process_worker(struct work_struct *work)
{
int ret;
@@ -1748,7 +1837,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
}
-void kfd_flush_tlb(struct kfd_process_device *pdd)
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
{
struct kfd_dev *dev = pdd->dev;
@@ -1761,7 +1850,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
pdd->qpd.vmid);
} else {
amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
- pdd->process->pasid);
+ pdd->process->pasid, type);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
new file mode 100644
index 000000000000..dff1011dd7ee
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -0,0 +1,3102 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/sched/task.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mn.h"
+#include "amdgpu.h"
+#include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
+/* Long enough to ensure no retry fault comes after svm range is restored and
+ * page table is updated.
+ */
+#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
+
+static void svm_range_evict_svm_bo_worker(struct work_struct *work);
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
+
+static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
+ .invalidate = svm_range_cpu_invalidate_pagetables,
+};
+
+/**
+ * svm_range_unlink - unlink svm_range from lists and interval tree
+ * @prange: svm range structure to be removed
+ *
+ * Remove the svm_range from the svms and svm_bo lists and the svms
+ * interval tree.
+ *
+ * Context: The caller must hold svms->lock
+ */
+static void svm_range_unlink(struct svm_range *prange)
+{
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+ prange, prange->start, prange->last);
+
+ if (prange->svm_bo) {
+ spin_lock(&prange->svm_bo->list_lock);
+ list_del(&prange->svm_bo_list);
+ spin_unlock(&prange->svm_bo->list_lock);
+ }
+
+ list_del(&prange->list);
+ if (prange->it_node.start != 0 && prange->it_node.last != 0)
+ interval_tree_remove(&prange->it_node, &prange->svms->objects);
+}
+
+static void
+svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange)
+{
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+ prange, prange->start, prange->last);
+
+ mmu_interval_notifier_insert_locked(&prange->notifier, mm,
+ prange->start << PAGE_SHIFT,
+ prange->npages << PAGE_SHIFT,
+ &svm_range_mn_ops);
+}
+
+/**
+ * svm_range_add_to_svms - add svm range to svms
+ * @prange: svm range structure to be added
+ *
+ * Add the svm range to svms interval tree and link list
+ *
+ * Context: The caller must hold svms->lock
+ */
+static void svm_range_add_to_svms(struct svm_range *prange)
+{
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+ prange, prange->start, prange->last);
+
+ list_add_tail(&prange->list, &prange->svms->list);
+ prange->it_node.start = prange->start;
+ prange->it_node.last = prange->last;
+ interval_tree_insert(&prange->it_node, &prange->svms->objects);
+}
+
+static void svm_range_remove_notifier(struct svm_range *prange)
+{
+ pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+ prange->svms, prange,
+ prange->notifier.interval_tree.start >> PAGE_SHIFT,
+ prange->notifier.interval_tree.last >> PAGE_SHIFT);
+
+ if (prange->notifier.interval_tree.start != 0 &&
+ prange->notifier.interval_tree.last != 0)
+ mmu_interval_notifier_remove(&prange->notifier);
+}
+
+static int
+svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
+ unsigned long *hmm_pfns, uint64_t npages)
+{
+ enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+ dma_addr_t *addr = *dma_addr;
+ struct page *page;
+ int i, r;
+
+ if (!addr) {
+ addr = kvmalloc_array(npages, sizeof(*addr),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!addr)
+ return -ENOMEM;
+ *dma_addr = addr;
+ }
+
+ for (i = 0; i < npages; i++) {
+ if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
+ "leaking dma mapping\n"))
+ dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
+
+ page = hmm_pfn_to_page(hmm_pfns[i]);
+ addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
+ r = dma_mapping_error(dev, addr[i]);
+ if (r) {
+ pr_debug("failed %d dma_map_page\n", r);
+ return r;
+ }
+ pr_debug("dma mapping 0x%llx for page addr 0x%lx\n",
+ addr[i] >> PAGE_SHIFT, page_to_pfn(page));
+ }
+ return 0;
+}
+
+static int
+svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+ unsigned long *hmm_pfns)
+{
+ struct kfd_process *p;
+ uint32_t gpuidx;
+ int r;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+
+ for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+ struct kfd_process_device *pdd;
+ struct amdgpu_device *adev;
+
+ pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
+ pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ return -EINVAL;
+ }
+ adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+ r = svm_range_dma_map_dev(adev->dev, &prange->dma_addr[gpuidx],
+ hmm_pfns, prange->npages);
+ if (r)
+ break;
+ }
+
+ return r;
+}
+
+void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+ unsigned long offset, unsigned long npages)
+{
+ enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+ int i;
+
+ if (!dma_addr)
+ return;
+
+ for (i = offset; i < offset + npages; i++) {
+ if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i]))
+ continue;
+ pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
+ dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
+ dma_addr[i] = 0;
+ }
+}
+
+void svm_range_free_dma_mappings(struct svm_range *prange)
+{
+ struct kfd_process_device *pdd;
+ dma_addr_t *dma_addr;
+ struct device *dev;
+ struct kfd_process *p;
+ uint32_t gpuidx;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+
+ for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
+ dma_addr = prange->dma_addr[gpuidx];
+ if (!dma_addr)
+ continue;
+
+ pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ continue;
+ }
+ dev = &pdd->dev->pdev->dev;
+ svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
+ kvfree(dma_addr);
+ prange->dma_addr[gpuidx] = NULL;
+ }
+}
+
+static void svm_range_free(struct svm_range *prange)
+{
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
+ prange->start, prange->last);
+
+ svm_range_vram_node_free(prange);
+ svm_range_free_dma_mappings(prange);
+ mutex_destroy(&prange->lock);
+ mutex_destroy(&prange->migrate_mutex);
+ kfree(prange);
+}
+
+static void
+svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
+ uint8_t *granularity, uint32_t *flags)
+{
+ *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+ *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+ *granularity = 9;
+ *flags =
+ KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
+}
+
+static struct
+svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
+ uint64_t last)
+{
+ uint64_t size = last - start + 1;
+ struct svm_range *prange;
+ struct kfd_process *p;
+
+ prange = kzalloc(sizeof(*prange), GFP_KERNEL);
+ if (!prange)
+ return NULL;
+ prange->npages = size;
+ prange->svms = svms;
+ prange->start = start;
+ prange->last = last;
+ INIT_LIST_HEAD(&prange->list);
+ INIT_LIST_HEAD(&prange->update_list);
+ INIT_LIST_HEAD(&prange->remove_list);
+ INIT_LIST_HEAD(&prange->insert_list);
+ INIT_LIST_HEAD(&prange->svm_bo_list);
+ INIT_LIST_HEAD(&prange->deferred_list);
+ INIT_LIST_HEAD(&prange->child_list);
+ atomic_set(&prange->invalid, 0);
+ prange->validate_timestamp = 0;
+ mutex_init(&prange->migrate_mutex);
+ mutex_init(&prange->lock);
+
+ p = container_of(svms, struct kfd_process, svms);
+ if (p->xnack_enabled)
+ bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
+ MAX_GPU_INSTANCE);
+
+ svm_range_set_default_attributes(&prange->preferred_loc,
+ &prange->prefetch_loc,
+ &prange->granularity, &prange->flags);
+
+ pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
+
+ return prange;
+}
+
+static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
+{
+ if (!svm_bo || !kref_get_unless_zero(&svm_bo->kref))
+ return false;
+
+ return true;
+}
+
+static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
+{
+ if (svm_bo)
+ kref_get(&svm_bo->kref);
+
+ return svm_bo;
+}
+
+static void svm_range_bo_release(struct kref *kref)
+{
+ struct svm_range_bo *svm_bo;
+
+ svm_bo = container_of(kref, struct svm_range_bo, kref);
+ spin_lock(&svm_bo->list_lock);
+ while (!list_empty(&svm_bo->range_list)) {
+ struct svm_range *prange =
+ list_first_entry(&svm_bo->range_list,
+ struct svm_range, svm_bo_list);
+ /* list_del_init tells a concurrent svm_range_vram_node_new when
+ * it's safe to reuse the svm_bo pointer and svm_bo_list head.
+ */
+ list_del_init(&prange->svm_bo_list);
+ spin_unlock(&svm_bo->list_lock);
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+ prange->start, prange->last);
+ mutex_lock(&prange->lock);
+ prange->svm_bo = NULL;
+ mutex_unlock(&prange->lock);
+
+ spin_lock(&svm_bo->list_lock);
+ }
+ spin_unlock(&svm_bo->list_lock);
+ if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
+ /* We're not in the eviction worker.
+ * Signal the fence and synchronize with any
+ * pending eviction work.
+ */
+ dma_fence_signal(&svm_bo->eviction_fence->base);
+ cancel_work_sync(&svm_bo->eviction_work);
+ }
+ dma_fence_put(&svm_bo->eviction_fence->base);
+ amdgpu_bo_unref(&svm_bo->bo);
+ kfree(svm_bo);
+}
+
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+ if (!svm_bo)
+ return;
+
+ kref_put(&svm_bo->kref, svm_range_bo_release);
+}
+
+static bool
+svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
+{
+ struct amdgpu_device *bo_adev;
+
+ mutex_lock(&prange->lock);
+ if (!prange->svm_bo) {
+ mutex_unlock(&prange->lock);
+ return false;
+ }
+ if (prange->ttm_res) {
+ /* We still have a reference, all is well */
+ mutex_unlock(&prange->lock);
+ return true;
+ }
+ if (svm_bo_ref_unless_zero(prange->svm_bo)) {
+ /*
+ * Migrate from GPU to GPU, remove range from source bo_adev
+ * svm_bo range list, and return false to allocate svm_bo from
+ * destination adev.
+ */
+ bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+ if (bo_adev != adev) {
+ mutex_unlock(&prange->lock);
+
+ spin_lock(&prange->svm_bo->list_lock);
+ list_del_init(&prange->svm_bo_list);
+ spin_unlock(&prange->svm_bo->list_lock);
+
+ svm_range_bo_unref(prange->svm_bo);
+ return false;
+ }
+ if (READ_ONCE(prange->svm_bo->evicting)) {
+ struct dma_fence *f;
+ struct svm_range_bo *svm_bo;
+ /* The BO is getting evicted,
+ * we need to get a new one
+ */
+ mutex_unlock(&prange->lock);
+ svm_bo = prange->svm_bo;
+ f = dma_fence_get(&svm_bo->eviction_fence->base);
+ svm_range_bo_unref(prange->svm_bo);
+ /* wait for the fence to avoid long spin-loop
+ * at list_empty_careful
+ */
+ dma_fence_wait(f, false);
+ dma_fence_put(f);
+ } else {
+ /* The BO was still around and we got
+ * a new reference to it
+ */
+ mutex_unlock(&prange->lock);
+ pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n",
+ prange->svms, prange->start, prange->last);
+
+ prange->ttm_res = prange->svm_bo->bo->tbo.resource;
+ return true;
+ }
+
+ } else {
+ mutex_unlock(&prange->lock);
+ }
+
+ /* We need a new svm_bo. Spin-loop to wait for concurrent
+ * svm_range_bo_release to finish removing this range from
+ * its range list. After this, it is safe to reuse the
+ * svm_bo pointer and svm_bo_list head.
+ */
+ while (!list_empty_careful(&prange->svm_bo_list))
+ ;
+
+ return false;
+}
+
+static struct svm_range_bo *svm_range_bo_new(void)
+{
+ struct svm_range_bo *svm_bo;
+
+ svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL);
+ if (!svm_bo)
+ return NULL;
+
+ kref_init(&svm_bo->kref);
+ INIT_LIST_HEAD(&svm_bo->range_list);
+ spin_lock_init(&svm_bo->list_lock);
+
+ return svm_bo;
+}
+
+int
+svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
+ bool clear)
+{
+ struct amdgpu_bo_param bp;
+ struct svm_range_bo *svm_bo;
+ struct amdgpu_bo_user *ubo;
+ struct amdgpu_bo *bo;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+ int r;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+ pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
+ prange->start, prange->last);
+
+ if (svm_range_validate_svm_bo(adev, prange))
+ return 0;
+
+ svm_bo = svm_range_bo_new();
+ if (!svm_bo) {
+ pr_debug("failed to alloc svm bo\n");
+ return -ENOMEM;
+ }
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_debug("failed to get mm\n");
+ kfree(svm_bo);
+ return -ESRCH;
+ }
+ svm_bo->svms = prange->svms;
+ svm_bo->eviction_fence =
+ amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+ mm,
+ svm_bo);
+ mmput(mm);
+ INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker);
+ svm_bo->evicting = 0;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = prange->npages * PAGE_SIZE;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
+ bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO;
+ bp.type = ttm_bo_type_device;
+ bp.resv = NULL;
+
+ r = amdgpu_bo_create_user(adev, &bp, &ubo);
+ if (r) {
+ pr_debug("failed %d to create bo\n", r);
+ goto create_bo_failed;
+ }
+ bo = &ubo->bo;
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ pr_debug("failed %d to reserve bo\n", r);
+ goto reserve_bo_failed;
+ }
+
+ r = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
+ if (r) {
+ pr_debug("failed %d to reserve bo\n", r);
+ amdgpu_bo_unreserve(bo);
+ goto reserve_bo_failed;
+ }
+ amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true);
+
+ amdgpu_bo_unreserve(bo);
+
+ svm_bo->bo = bo;
+ prange->svm_bo = svm_bo;
+ prange->ttm_res = bo->tbo.resource;
+ prange->offset = 0;
+
+ spin_lock(&svm_bo->list_lock);
+ list_add(&prange->svm_bo_list, &svm_bo->range_list);
+ spin_unlock(&svm_bo->list_lock);
+
+ return 0;
+
+reserve_bo_failed:
+ amdgpu_bo_unref(&bo);
+create_bo_failed:
+ dma_fence_put(&svm_bo->eviction_fence->base);
+ kfree(svm_bo);
+ prange->ttm_res = NULL;
+
+ return r;
+}
+
+void svm_range_vram_node_free(struct svm_range *prange)
+{
+ svm_range_bo_unref(prange->svm_bo);
+ prange->ttm_res = NULL;
+}
+
+struct amdgpu_device *
+svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
+{
+ struct kfd_process_device *pdd;
+ struct kfd_process *p;
+ int32_t gpu_idx;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+
+ gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id);
+ if (gpu_idx < 0) {
+ pr_debug("failed to get device by id 0x%x\n", gpu_id);
+ return NULL;
+ }
+ pdd = kfd_process_device_from_gpuidx(p, gpu_idx);
+ if (!pdd) {
+ pr_debug("failed to get device by idx 0x%x\n", gpu_idx);
+ return NULL;
+ }
+
+ return (struct amdgpu_device *)pdd->dev->kgd;
+}
+
+static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+static int
+svm_range_check_attr(struct kfd_process *p,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+ uint32_t i;
+
+ for (i = 0; i < nattr; i++) {
+ uint32_t val = attrs[i].value;
+ int gpuidx = MAX_GPU_INSTANCE;
+
+ switch (attrs[i].type) {
+ case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+ if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM &&
+ val != KFD_IOCTL_SVM_LOCATION_UNDEFINED)
+ gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
+ break;
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM)
+ gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
+ break;
+ case KFD_IOCTL_SVM_ATTR_ACCESS:
+ case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+ case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ break;
+ case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ break;
+ case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+ break;
+ default:
+ pr_debug("unknown attr type 0x%x\n", attrs[i].type);
+ return -EINVAL;
+ }
+
+ if (gpuidx < 0) {
+ pr_debug("no GPU 0x%x found\n", val);
+ return -EINVAL;
+ } else if (gpuidx < MAX_GPU_INSTANCE &&
+ !test_bit(gpuidx, p->svms.bitmap_supported)) {
+ pr_debug("GPU 0x%x not supported\n", val);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static void
+svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+ uint32_t i;
+ int gpuidx;
+
+ for (i = 0; i < nattr; i++) {
+ switch (attrs[i].type) {
+ case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+ prange->preferred_loc = attrs[i].value;
+ break;
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ prange->prefetch_loc = attrs[i].value;
+ break;
+ case KFD_IOCTL_SVM_ATTR_ACCESS:
+ case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+ case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ gpuidx = kfd_process_gpuidx_from_gpuid(p,
+ attrs[i].value);
+ if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
+ bitmap_clear(prange->bitmap_access, gpuidx, 1);
+ bitmap_clear(prange->bitmap_aip, gpuidx, 1);
+ } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
+ bitmap_set(prange->bitmap_access, gpuidx, 1);
+ bitmap_clear(prange->bitmap_aip, gpuidx, 1);
+ } else {
+ bitmap_clear(prange->bitmap_access, gpuidx, 1);
+ bitmap_set(prange->bitmap_aip, gpuidx, 1);
+ }
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ prange->flags |= attrs[i].value;
+ break;
+ case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ prange->flags &= ~attrs[i].value;
+ break;
+ case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+ prange->granularity = attrs[i].value;
+ break;
+ default:
+ WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
+ }
+ }
+}
+
+/**
+ * svm_range_debug_dump - print all range information from svms
+ * @svms: svm range list header
+ *
+ * debug output svm range start, end, prefetch location from svms
+ * interval tree and link list
+ *
+ * Context: The caller must hold svms->lock
+ */
+static void svm_range_debug_dump(struct svm_range_list *svms)
+{
+ struct interval_tree_node *node;
+ struct svm_range *prange;
+
+ pr_debug("dump svms 0x%p list\n", svms);
+ pr_debug("range\tstart\tpage\tend\t\tlocation\n");
+
+ list_for_each_entry(prange, &svms->list, list) {
+ pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
+ prange, prange->start, prange->npages,
+ prange->start + prange->npages - 1,
+ prange->actual_loc);
+ }
+
+ pr_debug("dump svms 0x%p interval tree\n", svms);
+ pr_debug("range\tstart\tpage\tend\t\tlocation\n");
+ node = interval_tree_iter_first(&svms->objects, 0, ~0ULL);
+ while (node) {
+ prange = container_of(node, struct svm_range, it_node);
+ pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
+ prange, prange->start, prange->npages,
+ prange->start + prange->npages - 1,
+ prange->actual_loc);
+ node = interval_tree_iter_next(node, 0, ~0ULL);
+ }
+}
+
+static bool
+svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new)
+{
+ return (old->prefetch_loc == new->prefetch_loc &&
+ old->flags == new->flags &&
+ old->granularity == new->granularity);
+}
+
+static int
+svm_range_split_array(void *ppnew, void *ppold, size_t size,
+ uint64_t old_start, uint64_t old_n,
+ uint64_t new_start, uint64_t new_n)
+{
+ unsigned char *new, *old, *pold;
+ uint64_t d;
+
+ if (!ppold)
+ return 0;
+ pold = *(unsigned char **)ppold;
+ if (!pold)
+ return 0;
+
+ new = kvmalloc_array(new_n, size, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ d = (new_start - old_start) * size;
+ memcpy(new, pold + d, new_n * size);
+
+ old = kvmalloc_array(old_n, size, GFP_KERNEL);
+ if (!old) {
+ kvfree(new);
+ return -ENOMEM;
+ }
+
+ d = (new_start == old_start) ? new_n * size : 0;
+ memcpy(old, pold + d, old_n * size);
+
+ kvfree(pold);
+ *(void **)ppold = old;
+ *(void **)ppnew = new;
+
+ return 0;
+}
+
+static int
+svm_range_split_pages(struct svm_range *new, struct svm_range *old,
+ uint64_t start, uint64_t last)
+{
+ uint64_t npages = last - start + 1;
+ int i, r;
+
+ for (i = 0; i < MAX_GPU_INSTANCE; i++) {
+ r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
+ sizeof(*old->dma_addr[i]), old->start,
+ npages, new->start, new->npages);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int
+svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
+ uint64_t start, uint64_t last)
+{
+ uint64_t npages = last - start + 1;
+
+ pr_debug("svms 0x%p new prange 0x%p start 0x%lx [0x%llx 0x%llx]\n",
+ new->svms, new, new->start, start, last);
+
+ if (new->start == old->start) {
+ new->offset = old->offset;
+ old->offset += new->npages;
+ } else {
+ new->offset = old->offset + npages;
+ }
+
+ new->svm_bo = svm_range_bo_ref(old->svm_bo);
+ new->ttm_res = old->ttm_res;
+
+ spin_lock(&new->svm_bo->list_lock);
+ list_add(&new->svm_bo_list, &new->svm_bo->range_list);
+ spin_unlock(&new->svm_bo->list_lock);
+
+ return 0;
+}
+
+/**
+ * svm_range_split_adjust - split range and adjust
+ *
+ * @new: new range
+ * @old: the old range
+ * @start: the old range adjust to start address in pages
+ * @last: the old range adjust to last address in pages
+ *
+ * Copy system memory dma_addr or vram ttm_res in old range to new
+ * range from new_start up to size new->npages, the remaining old range is from
+ * start to last
+ *
+ * Return:
+ * 0 - OK, -ENOMEM - out of memory
+ */
+static int
+svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
+ uint64_t start, uint64_t last)
+{
+ int r;
+
+ pr_debug("svms 0x%p new 0x%lx old [0x%lx 0x%lx] => [0x%llx 0x%llx]\n",
+ new->svms, new->start, old->start, old->last, start, last);
+
+ if (new->start < old->start ||
+ new->last > old->last) {
+ WARN_ONCE(1, "invalid new range start or last\n");
+ return -EINVAL;
+ }
+
+ r = svm_range_split_pages(new, old, start, last);
+ if (r)
+ return r;
+
+ if (old->actual_loc && old->ttm_res) {
+ r = svm_range_split_nodes(new, old, start, last);
+ if (r)
+ return r;
+ }
+
+ old->npages = last - start + 1;
+ old->start = start;
+ old->last = last;
+ new->flags = old->flags;
+ new->preferred_loc = old->preferred_loc;
+ new->prefetch_loc = old->prefetch_loc;
+ new->actual_loc = old->actual_loc;
+ new->granularity = old->granularity;
+ bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
+ bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+
+ return 0;
+}
+
+/**
+ * svm_range_split - split a range in 2 ranges
+ *
+ * @prange: the svm range to split
+ * @start: the remaining range start address in pages
+ * @last: the remaining range last address in pages
+ * @new: the result new range generated
+ *
+ * Two cases only:
+ * case 1: if start == prange->start
+ * prange ==> prange[start, last]
+ * new range [last + 1, prange->last]
+ *
+ * case 2: if last == prange->last
+ * prange ==> prange[start, last]
+ * new range [prange->start, start - 1]
+ *
+ * Return:
+ * 0 - OK, -ENOMEM - out of memory, -EINVAL - invalid start, last
+ */
+static int
+svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
+ struct svm_range **new)
+{
+ uint64_t old_start = prange->start;
+ uint64_t old_last = prange->last;
+ struct svm_range_list *svms;
+ int r = 0;
+
+ pr_debug("svms 0x%p [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", prange->svms,
+ old_start, old_last, start, last);
+
+ if (old_start != start && old_last != last)
+ return -EINVAL;
+ if (start < old_start || last > old_last)
+ return -EINVAL;
+
+ svms = prange->svms;
+ if (old_start == start)
+ *new = svm_range_new(svms, last + 1, old_last);
+ else
+ *new = svm_range_new(svms, old_start, start - 1);
+ if (!*new)
+ return -ENOMEM;
+
+ r = svm_range_split_adjust(*new, prange, start, last);
+ if (r) {
+ pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
+ r, old_start, old_last, start, last);
+ svm_range_free(*new);
+ *new = NULL;
+ }
+
+ return r;
+}
+
+static int
+svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
+ uint64_t new_last, struct list_head *insert_list)
+{
+ struct svm_range *tail;
+ int r = svm_range_split(prange, prange->start, new_last, &tail);
+
+ if (!r)
+ list_add(&tail->insert_list, insert_list);
+ return r;
+}
+
+static int
+svm_range_split_head(struct svm_range *prange, struct svm_range *new,
+ uint64_t new_start, struct list_head *insert_list)
+{
+ struct svm_range *head;
+ int r = svm_range_split(prange, new_start, prange->last, &head);
+
+ if (!r)
+ list_add(&head->insert_list, insert_list);
+ return r;
+}
+
+static void
+svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
+ struct svm_range *pchild, enum svm_work_list_ops op)
+{
+ pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
+ pchild, pchild->start, pchild->last, prange, op);
+
+ pchild->work_item.mm = mm;
+ pchild->work_item.op = op;
+ list_add_tail(&pchild->child_list, &prange->child_list);
+}
+
+/**
+ * svm_range_split_by_granularity - collect ranges within granularity boundary
+ *
+ * @p: the process with svms list
+ * @mm: mm structure
+ * @addr: the vm fault address in pages, to split the prange
+ * @parent: parent range if prange is from child list
+ * @prange: prange to split
+ *
+ * Trims @prange to be a single aligned block of prange->granularity if
+ * possible. The head and tail are added to the child_list in @parent.
+ *
+ * Context: caller must hold mmap_read_lock and prange->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+int
+svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
+ unsigned long addr, struct svm_range *parent,
+ struct svm_range *prange)
+{
+ struct svm_range *head, *tail;
+ unsigned long start, last, size;
+ int r;
+
+ /* Align splited range start and size to granularity size, then a single
+ * PTE will be used for whole range, this reduces the number of PTE
+ * updated and the L1 TLB space used for translation.
+ */
+ size = 1UL << prange->granularity;
+ start = ALIGN_DOWN(addr, size);
+ last = ALIGN(addr + 1, size) - 1;
+
+ pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
+ prange->svms, prange->start, prange->last, start, last, size);
+
+ if (start > prange->start) {
+ r = svm_range_split(prange, start, prange->last, &head);
+ if (r)
+ return r;
+ svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
+ }
+
+ if (last < prange->last) {
+ r = svm_range_split(prange, prange->start, last, &tail);
+ if (r)
+ return r;
+ svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+ }
+
+ /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+ if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
+ prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
+ pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
+ prange, prange->start, prange->last,
+ SVM_OP_ADD_RANGE_AND_MAP);
+ }
+ return 0;
+}
+
+static uint64_t
+svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
+{
+ struct amdgpu_device *bo_adev;
+ uint32_t flags = prange->flags;
+ uint32_t mapping_flags = 0;
+ uint64_t pte_flags;
+ bool snoop = !prange->ttm_res;
+ bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+
+ if (prange->svm_bo && prange->ttm_res)
+ bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ if (prange->svm_bo && prange->ttm_res) {
+ if (bo_adev == adev) {
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+ } else {
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ snoop = true;
+ }
+ } else {
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+ break;
+ case CHIP_ALDEBARAN:
+ if (prange->svm_bo && prange->ttm_res) {
+ if (bo_adev == adev) {
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+ if (adev->gmc.xgmi.connected_to_cpu)
+ snoop = true;
+ } else {
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ snoop = true;
+ }
+ } else {
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+ break;
+ default:
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+
+ mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
+
+ if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
+ mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
+ if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+ pte_flags = AMDGPU_PTE_VALID;
+ pte_flags |= prange->ttm_res ? 0 : AMDGPU_PTE_SYSTEM;
+ pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+
+ pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
+ prange->svms, prange->start, prange->last,
+ prange->ttm_res ? 1:0, pte_flags, mapping_flags);
+
+ return pte_flags;
+}
+
+static int
+svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ uint64_t start, uint64_t last,
+ struct dma_fence **fence)
+{
+ uint64_t init_pte_value = 0;
+
+ pr_debug("[0x%llx 0x%llx]\n", start, last);
+
+ return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
+ start, last, init_pte_value, 0,
+ NULL, NULL, fence, NULL);
+}
+
+static int
+svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
+ unsigned long last)
+{
+ DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+ struct kfd_process_device *pdd;
+ struct dma_fence *fence = NULL;
+ struct amdgpu_device *adev;
+ struct kfd_process *p;
+ uint32_t gpuidx;
+ int r = 0;
+
+ bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+ MAX_GPU_INSTANCE);
+ p = container_of(prange->svms, struct kfd_process, svms);
+
+ for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+ pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
+ pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ return -EINVAL;
+ }
+ adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+ r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
+ start, last, &fence);
+ if (r)
+ break;
+
+ if (fence) {
+ r = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ fence = NULL;
+ if (r)
+ break;
+ }
+ amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
+ p->pasid, TLB_FLUSH_HEAVYWEIGHT);
+ }
+
+ return r;
+}
+
+static int
+svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct svm_range *prange, dma_addr_t *dma_addr,
+ struct amdgpu_device *bo_adev, struct dma_fence **fence)
+{
+ struct amdgpu_bo_va bo_va;
+ bool table_freed = false;
+ uint64_t pte_flags;
+ int r = 0;
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+ prange->last);
+
+ if (prange->svm_bo && prange->ttm_res) {
+ bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
+ prange->mapping.bo_va = &bo_va;
+ }
+
+ prange->mapping.start = prange->start;
+ prange->mapping.last = prange->last;
+ prange->mapping.offset = prange->ttm_res ? prange->offset : 0;
+ pte_flags = svm_range_get_pte_flags(adev, prange);
+
+ r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
+ prange->mapping.start,
+ prange->mapping.last, pte_flags,
+ prange->mapping.offset,
+ prange->ttm_res,
+ dma_addr, &vm->last_update,
+ &table_freed);
+ if (r) {
+ pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
+ goto out;
+ }
+
+ r = amdgpu_vm_update_pdes(adev, vm, false);
+ if (r) {
+ pr_debug("failed %d to update directories 0x%lx\n", r,
+ prange->start);
+ goto out;
+ }
+
+ if (fence)
+ *fence = dma_fence_get(vm->last_update);
+
+ if (table_freed) {
+ struct kfd_process *p;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+ amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
+ p->pasid, TLB_FLUSH_LEGACY);
+ }
+out:
+ prange->mapping.bo_va = NULL;
+ return r;
+}
+
+static int svm_range_map_to_gpus(struct svm_range *prange,
+ unsigned long *bitmap, bool wait)
+{
+ struct kfd_process_device *pdd;
+ struct amdgpu_device *bo_adev;
+ struct amdgpu_device *adev;
+ struct kfd_process *p;
+ struct dma_fence *fence = NULL;
+ uint32_t gpuidx;
+ int r = 0;
+
+ if (prange->svm_bo && prange->ttm_res)
+ bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+ else
+ bo_adev = NULL;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+ for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+ pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
+ pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ return -EINVAL;
+ }
+ adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
+ if (IS_ERR(pdd))
+ return -EINVAL;
+
+ if (bo_adev && adev != bo_adev &&
+ !amdgpu_xgmi_same_hive(adev, bo_adev)) {
+ pr_debug("cannot map to device idx %d\n", gpuidx);
+ continue;
+ }
+
+ r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
+ prange, prange->dma_addr[gpuidx],
+ bo_adev, wait ? &fence : NULL);
+ if (r)
+ break;
+
+ if (fence) {
+ r = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ fence = NULL;
+ if (r) {
+ pr_debug("failed %d to dma fence wait\n", r);
+ break;
+ }
+ }
+ }
+
+ return r;
+}
+
+struct svm_validate_context {
+ struct kfd_process *process;
+ struct svm_range *prange;
+ bool intr;
+ unsigned long bitmap[MAX_GPU_INSTANCE];
+ struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1];
+ struct list_head validate_list;
+ struct ww_acquire_ctx ticket;
+};
+
+static int svm_range_reserve_bos(struct svm_validate_context *ctx)
+{
+ struct kfd_process_device *pdd;
+ struct amdgpu_device *adev;
+ struct amdgpu_vm *vm;
+ uint32_t gpuidx;
+ int r;
+
+ INIT_LIST_HEAD(&ctx->validate_list);
+ for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+ pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ return -EINVAL;
+ }
+ adev = (struct amdgpu_device *)pdd->dev->kgd;
+ vm = drm_priv_to_vm(pdd->drm_priv);
+
+ ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
+ ctx->tv[gpuidx].num_shared = 4;
+ list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
+ }
+ if (ctx->prange->svm_bo && ctx->prange->ttm_res) {
+ ctx->tv[MAX_GPU_INSTANCE].bo = &ctx->prange->svm_bo->bo->tbo;
+ ctx->tv[MAX_GPU_INSTANCE].num_shared = 1;
+ list_add(&ctx->tv[MAX_GPU_INSTANCE].head, &ctx->validate_list);
+ }
+
+ r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
+ ctx->intr, NULL);
+ if (r) {
+ pr_debug("failed %d to reserve bo\n", r);
+ return r;
+ }
+
+ for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+ pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ r = -EINVAL;
+ goto unreserve_out;
+ }
+ adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+ r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv),
+ svm_range_bo_validate, NULL);
+ if (r) {
+ pr_debug("failed %d validate pt bos\n", r);
+ goto unreserve_out;
+ }
+ }
+
+ return 0;
+
+unreserve_out:
+ ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ return r;
+}
+
+static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
+{
+ ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+}
+
+/*
+ * Validation+GPU mapping with concurrent invalidation (MMU notifiers)
+ *
+ * To prevent concurrent destruction or change of range attributes, the
+ * svm_read_lock must be held. The caller must not hold the svm_write_lock
+ * because that would block concurrent evictions and lead to deadlocks. To
+ * serialize concurrent migrations or validations of the same range, the
+ * prange->migrate_mutex must be held.
+ *
+ * For VRAM ranges, the SVM BO must be allocated and valid (protected by its
+ * eviction fence.
+ *
+ * The following sequence ensures race-free validation and GPU mapping:
+ *
+ * 1. Reserve page table (and SVM BO if range is in VRAM)
+ * 2. hmm_range_fault to get page addresses (if system memory)
+ * 3. DMA-map pages (if system memory)
+ * 4-a. Take notifier lock
+ * 4-b. Check that pages still valid (mmu_interval_read_retry)
+ * 4-c. Check that the range was not split or otherwise invalidated
+ * 4-d. Update GPU page table
+ * 4.e. Release notifier lock
+ * 5. Release page table (and SVM BO) reservation
+ */
+static int svm_range_validate_and_map(struct mm_struct *mm,
+ struct svm_range *prange,
+ int32_t gpuidx, bool intr, bool wait)
+{
+ struct svm_validate_context ctx;
+ struct hmm_range *hmm_range;
+ int r = 0;
+
+ ctx.process = container_of(prange->svms, struct kfd_process, svms);
+ ctx.prange = prange;
+ ctx.intr = intr;
+
+ if (gpuidx < MAX_GPU_INSTANCE) {
+ bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE);
+ bitmap_set(ctx.bitmap, gpuidx, 1);
+ } else if (ctx.process->xnack_enabled) {
+ bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+
+ /* If prefetch range to GPU, or GPU retry fault migrate range to
+ * GPU, which has ACCESS attribute to the range, create mapping
+ * on that GPU.
+ */
+ if (prange->actual_loc) {
+ gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process,
+ prange->actual_loc);
+ if (gpuidx < 0) {
+ WARN_ONCE(1, "failed get device by id 0x%x\n",
+ prange->actual_loc);
+ return -EINVAL;
+ }
+ if (test_bit(gpuidx, prange->bitmap_access))
+ bitmap_set(ctx.bitmap, gpuidx, 1);
+ }
+ } else {
+ bitmap_or(ctx.bitmap, prange->bitmap_access,
+ prange->bitmap_aip, MAX_GPU_INSTANCE);
+ }
+
+ if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
+ return 0;
+
+ if (prange->actual_loc && !prange->ttm_res) {
+ /* This should never happen. actual_loc gets set by
+ * svm_migrate_ram_to_vram after allocating a BO.
+ */
+ WARN(1, "VRAM BO missing during validation\n");
+ return -EINVAL;
+ }
+
+ svm_range_reserve_bos(&ctx);
+
+ if (!prange->actual_loc) {
+ r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+ prange->start << PAGE_SHIFT,
+ prange->npages, &hmm_range,
+ false, true);
+ if (r) {
+ pr_debug("failed %d to get svm range pages\n", r);
+ goto unreserve_out;
+ }
+
+ r = svm_range_dma_map(prange, ctx.bitmap,
+ hmm_range->hmm_pfns);
+ if (r) {
+ pr_debug("failed %d to dma map range\n", r);
+ goto unreserve_out;
+ }
+
+ prange->validated_once = true;
+ }
+
+ svm_range_lock(prange);
+ if (!prange->actual_loc) {
+ if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+ pr_debug("hmm update the range, need validate again\n");
+ r = -EAGAIN;
+ goto unlock_out;
+ }
+ }
+ if (!list_empty(&prange->child_list)) {
+ pr_debug("range split by unmap in parallel, validate again\n");
+ r = -EAGAIN;
+ goto unlock_out;
+ }
+
+ r = svm_range_map_to_gpus(prange, ctx.bitmap, wait);
+
+unlock_out:
+ svm_range_unlock(prange);
+unreserve_out:
+ svm_range_unreserve_bos(&ctx);
+
+ if (!r)
+ prange->validate_timestamp = ktime_to_us(ktime_get());
+
+ return r;
+}
+
+/**
+ * svm_range_list_lock_and_flush_work - flush pending deferred work
+ *
+ * @svms: the svm range list
+ * @mm: the mm structure
+ *
+ * Context: Returns with mmap write lock held, pending deferred work flushed
+ *
+ */
+static void
+svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
+ struct mm_struct *mm)
+{
+retry_flush_work:
+ flush_work(&svms->deferred_list_work);
+ mmap_write_lock(mm);
+
+ if (list_empty(&svms->deferred_range_list))
+ return;
+ mmap_write_unlock(mm);
+ pr_debug("retry flush\n");
+ goto retry_flush_work;
+}
+
+static void svm_range_restore_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+ int evicted_ranges;
+ int invalid;
+ int r;
+
+ svms = container_of(dwork, struct svm_range_list, restore_work);
+ evicted_ranges = atomic_read(&svms->evicted_ranges);
+ if (!evicted_ranges)
+ return;
+
+ pr_debug("restore svm ranges\n");
+
+ /* kfd_process_notifier_release destroys this worker thread. So during
+ * the lifetime of this thread, kfd_process and mm will be valid.
+ */
+ p = container_of(svms, struct kfd_process, svms);
+ process_info = p->kgd_process_info;
+ mm = p->mm;
+ if (!mm)
+ return;
+
+ mutex_lock(&process_info->lock);
+ svm_range_list_lock_and_flush_work(svms, mm);
+ mutex_lock(&svms->lock);
+
+ evicted_ranges = atomic_read(&svms->evicted_ranges);
+
+ list_for_each_entry(prange, &svms->list, list) {
+ invalid = atomic_read(&prange->invalid);
+ if (!invalid)
+ continue;
+
+ pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+ prange->svms, prange, prange->start, prange->last,
+ invalid);
+
+ /*
+ * If range is migrating, wait for migration is done.
+ */
+ mutex_lock(&prange->migrate_mutex);
+
+ r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+ false, true);
+ if (r)
+ pr_debug("failed %d to map 0x%lx to gpus\n", r,
+ prange->start);
+
+ mutex_unlock(&prange->migrate_mutex);
+ if (r)
+ goto out_reschedule;
+
+ if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
+ goto out_reschedule;
+ }
+
+ if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
+ evicted_ranges)
+ goto out_reschedule;
+
+ evicted_ranges = 0;
+
+ r = kgd2kfd_resume_mm(mm);
+ if (r) {
+ /* No recovery from this failure. Probably the CP is
+ * hanging. No point trying again.
+ */
+ pr_debug("failed %d to resume KFD\n", r);
+ }
+
+ pr_debug("restore svm ranges successfully\n");
+
+out_reschedule:
+ mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+ mutex_unlock(&process_info->lock);
+
+ /* If validation failed, reschedule another attempt */
+ if (evicted_ranges) {
+ pr_debug("reschedule to restore svm range\n");
+ schedule_delayed_work(&svms->restore_work,
+ msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+ }
+}
+
+/**
+ * svm_range_evict - evict svm range
+ *
+ * Stop all queues of the process to ensure GPU doesn't access the memory, then
+ * return to let CPU evict the buffer and proceed CPU pagetable update.
+ *
+ * Don't need use lock to sync cpu pagetable invalidation with GPU execution.
+ * If invalidation happens while restore work is running, restore work will
+ * restart to ensure to get the latest CPU pages mapping to GPU, then start
+ * the queues.
+ */
+static int
+svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
+ unsigned long start, unsigned long last)
+{
+ struct svm_range_list *svms = prange->svms;
+ struct kfd_process *p;
+ int r = 0;
+
+ p = container_of(svms, struct kfd_process, svms);
+
+ pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+ svms, prange->start, prange->last, start, last);
+
+ if (!p->xnack_enabled) {
+ int evicted_ranges;
+
+ atomic_inc(&prange->invalid);
+ evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
+ if (evicted_ranges != 1)
+ return r;
+
+ pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+ prange->svms, prange->start, prange->last);
+
+ /* First eviction, stop the queues */
+ r = kgd2kfd_quiesce_mm(mm);
+ if (r)
+ pr_debug("failed to quiesce KFD\n");
+
+ pr_debug("schedule to restore svm %p ranges\n", svms);
+ schedule_delayed_work(&svms->restore_work,
+ msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+ } else {
+ struct svm_range *pchild;
+ unsigned long s, l;
+
+ pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
+ prange->svms, start, last);
+ list_for_each_entry(pchild, &prange->child_list, child_list) {
+ mutex_lock_nested(&pchild->lock, 1);
+ s = max(start, pchild->start);
+ l = min(last, pchild->last);
+ if (l >= s)
+ svm_range_unmap_from_gpus(pchild, s, l);
+ mutex_unlock(&pchild->lock);
+ }
+ s = max(start, prange->start);
+ l = min(last, prange->last);
+ if (l >= s)
+ svm_range_unmap_from_gpus(prange, s, l);
+ }
+
+ return r;
+}
+
+static struct svm_range *svm_range_clone(struct svm_range *old)
+{
+ struct svm_range *new;
+
+ new = svm_range_new(old->svms, old->start, old->last);
+ if (!new)
+ return NULL;
+
+ if (old->svm_bo) {
+ new->ttm_res = old->ttm_res;
+ new->offset = old->offset;
+ new->svm_bo = svm_range_bo_ref(old->svm_bo);
+ spin_lock(&new->svm_bo->list_lock);
+ list_add(&new->svm_bo_list, &new->svm_bo->range_list);
+ spin_unlock(&new->svm_bo->list_lock);
+ }
+ new->flags = old->flags;
+ new->preferred_loc = old->preferred_loc;
+ new->prefetch_loc = old->prefetch_loc;
+ new->actual_loc = old->actual_loc;
+ new->granularity = old->granularity;
+ bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
+ bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+
+ return new;
+}
+
+/**
+ * svm_range_handle_overlap - split overlap ranges
+ * @svms: svm range list header
+ * @new: range added with this attributes
+ * @start: range added start address, in pages
+ * @last: range last address, in pages
+ * @update_list: output, the ranges attributes are updated. For set_attr, this
+ * will do validation and map to GPUs. For unmap, this will be
+ * removed and unmap from GPUs
+ * @insert_list: output, the ranges will be inserted into svms, attributes are
+ * not changes. For set_attr, this will add into svms.
+ * @remove_list:output, the ranges will be removed from svms
+ * @left: the remaining range after overlap, For set_attr, this will be added
+ * as new range.
+ *
+ * Total have 5 overlap cases.
+ *
+ * This function handles overlap of an address interval with existing
+ * struct svm_ranges for applying new attributes. This may require
+ * splitting existing struct svm_ranges. All changes should be applied to
+ * the range_list and interval tree transactionally. If any split operation
+ * fails, the entire update fails. Therefore the existing overlapping
+ * svm_ranges are cloned and the original svm_ranges left unchanged. If the
+ * transaction succeeds, the modified clones are added and the originals
+ * freed. Otherwise the clones are removed and the old svm_ranges remain.
+ *
+ * Context: The caller must hold svms->lock
+ */
+static int
+svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
+ unsigned long start, unsigned long last,
+ struct list_head *update_list,
+ struct list_head *insert_list,
+ struct list_head *remove_list,
+ unsigned long *left)
+{
+ struct interval_tree_node *node;
+ struct svm_range *prange;
+ struct svm_range *tmp;
+ int r = 0;
+
+ INIT_LIST_HEAD(update_list);
+ INIT_LIST_HEAD(insert_list);
+ INIT_LIST_HEAD(remove_list);
+
+ node = interval_tree_iter_first(&svms->objects, start, last);
+ while (node) {
+ struct interval_tree_node *next;
+ struct svm_range *old;
+ unsigned long next_start;
+
+ pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,
+ node->last);
+
+ old = container_of(node, struct svm_range, it_node);
+ next = interval_tree_iter_next(node, start, last);
+ next_start = min(node->last, last) + 1;
+
+ if (node->start < start || node->last > last) {
+ /* node intersects the updated range, clone+split it */
+ prange = svm_range_clone(old);
+ if (!prange) {
+ r = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&old->remove_list, remove_list);
+ list_add(&prange->insert_list, insert_list);
+
+ if (node->start < start) {
+ pr_debug("change old range start\n");
+ r = svm_range_split_head(prange, new, start,
+ insert_list);
+ if (r)
+ goto out;
+ }
+ if (node->last > last) {
+ pr_debug("change old range last\n");
+ r = svm_range_split_tail(prange, new, last,
+ insert_list);
+ if (r)
+ goto out;
+ }
+ } else {
+ /* The node is contained within start..last,
+ * just update it
+ */
+ prange = old;
+ }
+
+ if (!svm_range_is_same_attrs(prange, new))
+ list_add(&prange->update_list, update_list);
+
+ /* insert a new node if needed */
+ if (node->start > start) {
+ prange = svm_range_new(prange->svms, start,
+ node->start - 1);
+ if (!prange) {
+ r = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&prange->insert_list, insert_list);
+ list_add(&prange->update_list, update_list);
+ }
+
+ node = next;
+ start = next_start;
+ }
+
+ if (left && start <= last)
+ *left = last - start + 1;
+
+out:
+ if (r)
+ list_for_each_entry_safe(prange, tmp, insert_list, insert_list)
+ svm_range_free(prange);
+
+ return r;
+}
+
+static void
+svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
+ struct svm_range *prange)
+{
+ unsigned long start;
+ unsigned long last;
+
+ start = prange->notifier.interval_tree.start >> PAGE_SHIFT;
+ last = prange->notifier.interval_tree.last >> PAGE_SHIFT;
+
+ if (prange->start == start && prange->last == last)
+ return;
+
+ pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+ prange->svms, prange, start, last, prange->start,
+ prange->last);
+
+ if (start != 0 && last != 0) {
+ interval_tree_remove(&prange->it_node, &prange->svms->objects);
+ svm_range_remove_notifier(prange);
+ }
+ prange->it_node.start = prange->start;
+ prange->it_node.last = prange->last;
+
+ interval_tree_insert(&prange->it_node, &prange->svms->objects);
+ svm_range_add_notifier_locked(mm, prange);
+}
+
+static void
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+{
+ struct mm_struct *mm = prange->work_item.mm;
+
+ switch (prange->work_item.op) {
+ case SVM_OP_NULL:
+ pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+ svms, prange, prange->start, prange->last);
+ break;
+ case SVM_OP_UNMAP_RANGE:
+ pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+ svms, prange, prange->start, prange->last);
+ svm_range_unlink(prange);
+ svm_range_remove_notifier(prange);
+ svm_range_free(prange);
+ break;
+ case SVM_OP_UPDATE_RANGE_NOTIFIER:
+ pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+ svms, prange, prange->start, prange->last);
+ svm_range_update_notifier_and_interval_tree(mm, prange);
+ break;
+ case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP:
+ pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+ svms, prange, prange->start, prange->last);
+ svm_range_update_notifier_and_interval_tree(mm, prange);
+ /* TODO: implement deferred validation and mapping */
+ break;
+ case SVM_OP_ADD_RANGE:
+ pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
+ prange->start, prange->last);
+ svm_range_add_to_svms(prange);
+ svm_range_add_notifier_locked(mm, prange);
+ break;
+ case SVM_OP_ADD_RANGE_AND_MAP:
+ pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms,
+ prange, prange->start, prange->last);
+ svm_range_add_to_svms(prange);
+ svm_range_add_notifier_locked(mm, prange);
+ /* TODO: implement deferred validation and mapping */
+ break;
+ default:
+ WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange,
+ prange->work_item.op);
+ }
+}
+
+static void svm_range_drain_retry_fault(struct svm_range_list *svms)
+{
+ struct kfd_process_device *pdd;
+ struct amdgpu_device *adev;
+ struct kfd_process *p;
+ uint32_t i;
+
+ p = container_of(svms, struct kfd_process, svms);
+
+ for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
+ pdd = p->pdds[i];
+ if (!pdd)
+ continue;
+
+ pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
+ adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+ amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
+ pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
+ }
+}
+
+static void svm_range_deferred_list_work(struct work_struct *work)
+{
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ struct mm_struct *mm;
+
+ svms = container_of(work, struct svm_range_list, deferred_list_work);
+ pr_debug("enter svms 0x%p\n", svms);
+
+ spin_lock(&svms->deferred_list_lock);
+ while (!list_empty(&svms->deferred_range_list)) {
+ prange = list_first_entry(&svms->deferred_range_list,
+ struct svm_range, deferred_list);
+ spin_unlock(&svms->deferred_list_lock);
+ pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
+ prange->start, prange->last, prange->work_item.op);
+
+ /* Make sure no stale retry fault coming after range is freed */
+ if (prange->work_item.op == SVM_OP_UNMAP_RANGE)
+ svm_range_drain_retry_fault(prange->svms);
+
+ mm = prange->work_item.mm;
+ mmap_write_lock(mm);
+ mutex_lock(&svms->lock);
+
+ /* Remove from deferred_list must be inside mmap write lock,
+ * otherwise, svm_range_list_lock_and_flush_work may hold mmap
+ * write lock, and continue because deferred_list is empty, then
+ * deferred_list handle is blocked by mmap write lock.
+ */
+ spin_lock(&svms->deferred_list_lock);
+ list_del_init(&prange->deferred_list);
+ spin_unlock(&svms->deferred_list_lock);
+
+ mutex_lock(&prange->migrate_mutex);
+ while (!list_empty(&prange->child_list)) {
+ struct svm_range *pchild;
+
+ pchild = list_first_entry(&prange->child_list,
+ struct svm_range, child_list);
+ pr_debug("child prange 0x%p op %d\n", pchild,
+ pchild->work_item.op);
+ list_del_init(&pchild->child_list);
+ svm_range_handle_list_op(svms, pchild);
+ }
+ mutex_unlock(&prange->migrate_mutex);
+
+ svm_range_handle_list_op(svms, prange);
+ mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+
+ spin_lock(&svms->deferred_list_lock);
+ }
+ spin_unlock(&svms->deferred_list_lock);
+
+ pr_debug("exit svms 0x%p\n", svms);
+}
+
+void
+svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
+ struct mm_struct *mm, enum svm_work_list_ops op)
+{
+ spin_lock(&svms->deferred_list_lock);
+ /* if prange is on the deferred list */
+ if (!list_empty(&prange->deferred_list)) {
+ pr_debug("update exist prange 0x%p work op %d\n", prange, op);
+ WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n");
+ if (op != SVM_OP_NULL &&
+ prange->work_item.op != SVM_OP_UNMAP_RANGE)
+ prange->work_item.op = op;
+ } else {
+ prange->work_item.op = op;
+ prange->work_item.mm = mm;
+ list_add_tail(&prange->deferred_list,
+ &prange->svms->deferred_range_list);
+ pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+ prange, prange->start, prange->last, op);
+ }
+ spin_unlock(&svms->deferred_list_lock);
+}
+
+void schedule_deferred_list_work(struct svm_range_list *svms)
+{
+ spin_lock(&svms->deferred_list_lock);
+ if (!list_empty(&svms->deferred_range_list))
+ schedule_work(&svms->deferred_list_work);
+ spin_unlock(&svms->deferred_list_lock);
+}
+
+static void
+svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
+ struct svm_range *prange, unsigned long start,
+ unsigned long last)
+{
+ struct svm_range *head;
+ struct svm_range *tail;
+
+ if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+ pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange,
+ prange->start, prange->last);
+ return;
+ }
+ if (start > prange->last || last < prange->start)
+ return;
+
+ head = tail = prange;
+ if (start > prange->start)
+ svm_range_split(prange, prange->start, start - 1, &tail);
+ if (last < tail->last)
+ svm_range_split(tail, last + 1, tail->last, &head);
+
+ if (head != prange && tail != prange) {
+ svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+ } else if (tail != prange) {
+ svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+ } else if (head != prange) {
+ svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ } else if (parent != prange) {
+ prange->work_item.op = SVM_OP_UNMAP_RANGE;
+ }
+}
+
+static void
+svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
+ unsigned long start, unsigned long last)
+{
+ struct svm_range_list *svms;
+ struct svm_range *pchild;
+ struct kfd_process *p;
+ unsigned long s, l;
+ bool unmap_parent;
+
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return;
+ svms = &p->svms;
+
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
+ prange, prange->start, prange->last, start, last);
+
+ unmap_parent = start <= prange->start && last >= prange->last;
+
+ list_for_each_entry(pchild, &prange->child_list, child_list) {
+ mutex_lock_nested(&pchild->lock, 1);
+ s = max(start, pchild->start);
+ l = min(last, pchild->last);
+ if (l >= s)
+ svm_range_unmap_from_gpus(pchild, s, l);
+ svm_range_unmap_split(mm, prange, pchild, start, last);
+ mutex_unlock(&pchild->lock);
+ }
+ s = max(start, prange->start);
+ l = min(last, prange->last);
+ if (l >= s)
+ svm_range_unmap_from_gpus(prange, s, l);
+ svm_range_unmap_split(mm, prange, prange, start, last);
+
+ if (unmap_parent)
+ svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
+ else
+ svm_range_add_list_work(svms, prange, mm,
+ SVM_OP_UPDATE_RANGE_NOTIFIER);
+ schedule_deferred_list_work(svms);
+
+ kfd_unref_process(p);
+}
+
+/**
+ * svm_range_cpu_invalidate_pagetables - interval notifier callback
+ *
+ * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it
+ * is from migration, or CPU page invalidation callback.
+ *
+ * For unmap event, unmap range from GPUs, remove prange from svms in a delayed
+ * work thread, and split prange if only part of prange is unmapped.
+ *
+ * For invalidation event, if GPU retry fault is not enabled, evict the queues,
+ * then schedule svm_range_restore_work to update GPU mapping and resume queues.
+ * If GPU retry fault is enabled, unmap the svm range from GPU, retry fault will
+ * update GPU mapping to recover.
+ *
+ * Context: mmap lock, notifier_invalidate_start lock are held
+ * for invalidate event, prange lock is held if this is from migration
+ */
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct svm_range *prange;
+ unsigned long start;
+ unsigned long last;
+
+ if (range->event == MMU_NOTIFY_RELEASE)
+ return true;
+
+ start = mni->interval_tree.start;
+ last = mni->interval_tree.last;
+ start = (start > range->start ? start : range->start) >> PAGE_SHIFT;
+ last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT;
+ pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",
+ start, last, range->start >> PAGE_SHIFT,
+ (range->end - 1) >> PAGE_SHIFT,
+ mni->interval_tree.start >> PAGE_SHIFT,
+ mni->interval_tree.last >> PAGE_SHIFT, range->event);
+
+ prange = container_of(mni, struct svm_range, notifier);
+
+ svm_range_lock(prange);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ switch (range->event) {
+ case MMU_NOTIFY_UNMAP:
+ svm_range_unmap_from_cpu(mni->mm, prange, start, last);
+ break;
+ default:
+ svm_range_evict(prange, mni->mm, start, last);
+ break;
+ }
+
+ svm_range_unlock(prange);
+
+ return true;
+}
+
+/**
+ * svm_range_from_addr - find svm range from fault address
+ * @svms: svm range list header
+ * @addr: address to search range interval tree, in pages
+ * @parent: parent range if range is on child list
+ *
+ * Context: The caller must hold svms->lock
+ *
+ * Return: the svm_range found or NULL
+ */
+struct svm_range *
+svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
+ struct svm_range **parent)
+{
+ struct interval_tree_node *node;
+ struct svm_range *prange;
+ struct svm_range *pchild;
+
+ node = interval_tree_iter_first(&svms->objects, addr, addr);
+ if (!node)
+ return NULL;
+
+ prange = container_of(node, struct svm_range, it_node);
+ pr_debug("address 0x%lx prange [0x%lx 0x%lx] node [0x%lx 0x%lx]\n",
+ addr, prange->start, prange->last, node->start, node->last);
+
+ if (addr >= prange->start && addr <= prange->last) {
+ if (parent)
+ *parent = prange;
+ return prange;
+ }
+ list_for_each_entry(pchild, &prange->child_list, child_list)
+ if (addr >= pchild->start && addr <= pchild->last) {
+ pr_debug("found address 0x%lx pchild [0x%lx 0x%lx]\n",
+ addr, pchild->start, pchild->last);
+ if (parent)
+ *parent = prange;
+ return pchild;
+ }
+
+ return NULL;
+}
+
+/* svm_range_best_restore_location - decide the best fault restore location
+ * @prange: svm range structure
+ * @adev: the GPU on which vm fault happened
+ *
+ * This is only called when xnack is on, to decide the best location to restore
+ * the range mapping after GPU vm fault. Caller uses the best location to do
+ * migration if actual loc is not best location, then update GPU page table
+ * mapping to the best location.
+ *
+ * If vm fault gpu is range preferred loc, the best_loc is preferred loc.
+ * If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu
+ * If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then
+ * if range actual loc is cpu, best_loc is cpu
+ * if vm fault gpu is on xgmi same hive of range actual loc gpu, best_loc is
+ * range actual loc.
+ * Otherwise, GPU no access, best_loc is -1.
+ *
+ * Return:
+ * -1 means vm fault GPU no access
+ * 0 for CPU or GPU id
+ */
+static int32_t
+svm_range_best_restore_location(struct svm_range *prange,
+ struct amdgpu_device *adev,
+ int32_t *gpuidx)
+{
+ struct amdgpu_device *bo_adev;
+ struct kfd_process *p;
+ uint32_t gpuid;
+ int r;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+
+ r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx);
+ if (r < 0) {
+ pr_debug("failed to get gpuid from kgd\n");
+ return -1;
+ }
+
+ if (prange->preferred_loc == gpuid)
+ return prange->preferred_loc;
+
+ if (test_bit(*gpuidx, prange->bitmap_access))
+ return gpuid;
+
+ if (test_bit(*gpuidx, prange->bitmap_aip)) {
+ if (!prange->actual_loc)
+ return 0;
+
+ bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
+ if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ return prange->actual_loc;
+ else
+ return 0;
+ }
+
+ return -1;
+}
+static int
+svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
+ unsigned long *start, unsigned long *last)
+{
+ struct vm_area_struct *vma;
+ struct interval_tree_node *node;
+ unsigned long start_limit, end_limit;
+
+ vma = find_vma(p->mm, addr << PAGE_SHIFT);
+ if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+ pr_debug("VMA does not exist in address [0x%llx]\n", addr);
+ return -EFAULT;
+ }
+ start_limit = max(vma->vm_start >> PAGE_SHIFT,
+ (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+ end_limit = min(vma->vm_end >> PAGE_SHIFT,
+ (unsigned long)ALIGN(addr + 1, 2UL << 8));
+ /* First range that starts after the fault address */
+ node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
+ if (node) {
+ end_limit = min(end_limit, node->start);
+ /* Last range that ends before the fault address */
+ node = container_of(rb_prev(&node->rb),
+ struct interval_tree_node, rb);
+ } else {
+ /* Last range must end before addr because
+ * there was no range after addr
+ */
+ node = container_of(rb_last(&p->svms.objects.rb_root),
+ struct interval_tree_node, rb);
+ }
+ if (node) {
+ if (node->last >= addr) {
+ WARN(1, "Overlap with prev node and page fault addr\n");
+ return -EFAULT;
+ }
+ start_limit = max(start_limit, node->last + 1);
+ }
+
+ *start = start_limit;
+ *last = end_limit - 1;
+
+ pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n",
+ vma->vm_start >> PAGE_SHIFT, *start,
+ vma->vm_end >> PAGE_SHIFT, *last);
+
+ return 0;
+
+}
+static struct
+svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+ struct kfd_process *p,
+ struct mm_struct *mm,
+ int64_t addr)
+{
+ struct svm_range *prange = NULL;
+ unsigned long start, last;
+ uint32_t gpuid, gpuidx;
+
+ if (svm_range_get_range_boundaries(p, addr, &start, &last))
+ return NULL;
+
+ prange = svm_range_new(&p->svms, start, last);
+ if (!prange) {
+ pr_debug("Failed to create prange in address [0x%llx]\n", addr);
+ return NULL;
+ }
+ if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) {
+ pr_debug("failed to get gpuid from kgd\n");
+ svm_range_free(prange);
+ return NULL;
+ }
+
+ svm_range_add_to_svms(prange);
+ svm_range_add_notifier_locked(mm, prange);
+
+ return prange;
+}
+
+/* svm_range_skip_recover - decide if prange can be recovered
+ * @prange: svm range structure
+ *
+ * GPU vm retry fault handle skip recover the range for cases:
+ * 1. prange is on deferred list to be removed after unmap, it is stale fault,
+ * deferred list work will drain the stale fault before free the prange.
+ * 2. prange is on deferred list to add interval notifier after split, or
+ * 3. prange is child range, it is split from parent prange, recover later
+ * after interval notifier is added.
+ *
+ * Return: true to skip recover, false to recover
+ */
+static bool svm_range_skip_recover(struct svm_range *prange)
+{
+ struct svm_range_list *svms = prange->svms;
+
+ spin_lock(&svms->deferred_list_lock);
+ if (list_empty(&prange->deferred_list) &&
+ list_empty(&prange->child_list)) {
+ spin_unlock(&svms->deferred_list_lock);
+ return false;
+ }
+ spin_unlock(&svms->deferred_list_lock);
+
+ if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n",
+ svms, prange, prange->start, prange->last);
+ return true;
+ }
+ if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP ||
+ prange->work_item.op == SVM_OP_ADD_RANGE) {
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n",
+ svms, prange, prange->start, prange->last);
+ return true;
+ }
+ return false;
+}
+
+int
+svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+ uint64_t addr)
+{
+ struct mm_struct *mm = NULL;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ struct kfd_process *p;
+ uint64_t timestamp;
+ int32_t best_loc, gpuidx;
+ bool write_locked = false;
+ int r = 0;
+
+ if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
+ pr_debug("device does not support SVM\n");
+ return -EFAULT;
+ }
+
+ p = kfd_lookup_process_by_pasid(pasid);
+ if (!p) {
+ pr_debug("kfd process not founded pasid 0x%x\n", pasid);
+ return -ESRCH;
+ }
+ if (!p->xnack_enabled) {
+ pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
+ return -EFAULT;
+ }
+ svms = &p->svms;
+
+ pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
+
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_debug("svms 0x%p failed to get mm\n", svms);
+ r = -ESRCH;
+ goto out;
+ }
+
+ mmap_read_lock(mm);
+retry_write_locked:
+ mutex_lock(&svms->lock);
+ prange = svm_range_from_addr(svms, addr, NULL);
+ if (!prange) {
+ pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
+ svms, addr);
+ if (!write_locked) {
+ /* Need the write lock to create new range with MMU notifier.
+ * Also flush pending deferred work to make sure the interval
+ * tree is up to date before we add a new range
+ */
+ mutex_unlock(&svms->lock);
+ mmap_read_unlock(mm);
+ mmap_write_lock(mm);
+ write_locked = true;
+ goto retry_write_locked;
+ }
+ prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+ if (!prange) {
+ pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
+ svms, addr);
+ mmap_write_downgrade(mm);
+ r = -EFAULT;
+ goto out_unlock_svms;
+ }
+ }
+ if (write_locked)
+ mmap_write_downgrade(mm);
+
+ mutex_lock(&prange->migrate_mutex);
+
+ if (svm_range_skip_recover(prange)) {
+ amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ goto out_unlock_range;
+ }
+
+ timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
+ /* skip duplicate vm fault on different pages of same range */
+ if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+ pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
+ svms, prange->start, prange->last);
+ goto out_unlock_range;
+ }
+
+ best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
+ if (best_loc == -1) {
+ pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
+ svms, prange->start, prange->last);
+ r = -EACCES;
+ goto out_unlock_range;
+ }
+
+ pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
+ svms, prange->start, prange->last, best_loc,
+ prange->actual_loc);
+
+ if (prange->actual_loc != best_loc) {
+ if (best_loc) {
+ r = svm_migrate_to_vram(prange, best_loc, mm);
+ if (r) {
+ pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
+ r, addr);
+ /* Fallback to system memory if migration to
+ * VRAM failed
+ */
+ if (prange->actual_loc)
+ r = svm_migrate_vram_to_ram(prange, mm);
+ else
+ r = 0;
+ }
+ } else {
+ r = svm_migrate_vram_to_ram(prange, mm);
+ }
+ if (r) {
+ pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
+ r, svms, prange->start, prange->last);
+ goto out_unlock_range;
+ }
+ }
+
+ r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
+ if (r)
+ pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
+ r, svms, prange->start, prange->last);
+
+out_unlock_range:
+ mutex_unlock(&prange->migrate_mutex);
+out_unlock_svms:
+ mutex_unlock(&svms->lock);
+ mmap_read_unlock(mm);
+ mmput(mm);
+out:
+ kfd_unref_process(p);
+
+ if (r == -EAGAIN) {
+ pr_debug("recover vm fault later\n");
+ amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ r = 0;
+ }
+ return r;
+}
+
+void svm_range_list_fini(struct kfd_process *p)
+{
+ struct svm_range *prange;
+ struct svm_range *next;
+
+ pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+
+ /* Ensure list work is finished before process is destroyed */
+ flush_work(&p->svms.deferred_list_work);
+
+ list_for_each_entry_safe(prange, next, &p->svms.list, list) {
+ svm_range_unlink(prange);
+ svm_range_remove_notifier(prange);
+ svm_range_free(prange);
+ }
+
+ mutex_destroy(&p->svms.lock);
+
+ pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms);
+}
+
+int svm_range_list_init(struct kfd_process *p)
+{
+ struct svm_range_list *svms = &p->svms;
+ int i;
+
+ svms->objects = RB_ROOT_CACHED;
+ mutex_init(&svms->lock);
+ INIT_LIST_HEAD(&svms->list);
+ atomic_set(&svms->evicted_ranges, 0);
+ INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
+ INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
+ INIT_LIST_HEAD(&svms->deferred_range_list);
+ spin_lock_init(&svms->deferred_list_lock);
+
+ for (i = 0; i < p->n_pdds; i++)
+ if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev))
+ bitmap_set(svms->bitmap_supported, i, 1);
+
+ return 0;
+}
+
+/**
+ * svm_range_is_valid - check if virtual address range is valid
+ * @mm: current process mm_struct
+ * @start: range start address, in pages
+ * @size: range size, in pages
+ *
+ * Valid virtual address range means it belongs to one or more VMAs
+ *
+ * Context: Process context
+ *
+ * Return:
+ * true - valid svm range
+ * false - invalid svm range
+ */
+static bool
+svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size)
+{
+ const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
+ struct vm_area_struct *vma;
+ unsigned long end;
+
+ start <<= PAGE_SHIFT;
+ end = start + (size << PAGE_SHIFT);
+
+ do {
+ vma = find_vma(mm, start);
+ if (!vma || start < vma->vm_start ||
+ (vma->vm_flags & device_vma))
+ return false;
+ start = min(end, vma->vm_end);
+ } while (start < end);
+
+ return true;
+}
+
+/**
+ * svm_range_add - add svm range and handle overlap
+ * @p: the range add to this process svms
+ * @start: page size aligned
+ * @size: page size aligned
+ * @nattr: number of attributes
+ * @attrs: array of attributes
+ * @update_list: output, the ranges need validate and update GPU mapping
+ * @insert_list: output, the ranges need insert to svms
+ * @remove_list: output, the ranges are replaced and need remove from svms
+ *
+ * Check if the virtual address range has overlap with the registered ranges,
+ * split the overlapped range, copy and adjust pages address and vram nodes in
+ * old and new ranges.
+ *
+ * Context: Process context, caller must hold svms->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+ struct list_head *update_list, struct list_head *insert_list,
+ struct list_head *remove_list)
+{
+ uint64_t last = start + size - 1UL;
+ struct svm_range_list *svms;
+ struct svm_range new = {0};
+ struct svm_range *prange;
+ unsigned long left = 0;
+ int r = 0;
+
+ pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last);
+
+ svm_range_apply_attrs(p, &new, nattr, attrs);
+
+ svms = &p->svms;
+
+ r = svm_range_handle_overlap(svms, &new, start, last, update_list,
+ insert_list, remove_list, &left);
+ if (r)
+ return r;
+
+ if (left) {
+ prange = svm_range_new(svms, last - left + 1, last);
+ list_add(&prange->insert_list, insert_list);
+ list_add(&prange->update_list, update_list);
+ }
+
+ return 0;
+}
+
+/* svm_range_best_prefetch_location - decide the best prefetch location
+ * @prange: svm range structure
+ *
+ * For xnack off:
+ * If range map to single GPU, the best acutal location is prefetch loc, which
+ * can be CPU or GPU.
+ *
+ * If range map to multiple GPUs, only if mGPU connection on xgmi same hive,
+ * the best actual location could be prefetch_loc GPU. If mGPU connection on
+ * PCIe, the best actual location is always CPU, because GPU cannot access vram
+ * of other GPUs, assuming PCIe small bar (large bar support is not upstream).
+ *
+ * For xnack on:
+ * The best actual location is prefetch location. If mGPU connection on xgmi
+ * same hive, range map to multiple GPUs. Otherwise, the range only map to
+ * actual location GPU. Other GPU access vm fault will trigger migration.
+ *
+ * Context: Process context
+ *
+ * Return:
+ * 0 for CPU or GPU id
+ */
+static uint32_t
+svm_range_best_prefetch_location(struct svm_range *prange)
+{
+ DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+ uint32_t best_loc = prange->prefetch_loc;
+ struct kfd_process_device *pdd;
+ struct amdgpu_device *bo_adev;
+ struct amdgpu_device *adev;
+ struct kfd_process *p;
+ uint32_t gpuidx;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+
+ /* xnack on */
+ if (p->xnack_enabled)
+ goto out;
+
+ /* xnack off */
+ if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
+ goto out;
+
+ bo_adev = svm_range_get_adev_by_id(prange, best_loc);
+ if (!bo_adev) {
+ WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc);
+ best_loc = 0;
+ goto out;
+ }
+ bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+ MAX_GPU_INSTANCE);
+
+ for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+ pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to get device by idx 0x%x\n", gpuidx);
+ continue;
+ }
+ adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+ if (adev == bo_adev)
+ continue;
+
+ if (!amdgpu_xgmi_same_hive(adev, bo_adev)) {
+ best_loc = 0;
+ break;
+ }
+ }
+
+out:
+ pr_debug("xnack %d svms 0x%p [0x%lx 0x%lx] best loc 0x%x\n",
+ p->xnack_enabled, &p->svms, prange->start, prange->last,
+ best_loc);
+
+ return best_loc;
+}
+
+/* FIXME: This is a workaround for page locking bug when some pages are
+ * invalid during migration to VRAM
+ */
+void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm)
+{
+ struct hmm_range *hmm_range;
+ int r;
+
+ if (prange->validated_once)
+ return;
+
+ r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+ prange->start << PAGE_SHIFT,
+ prange->npages, &hmm_range,
+ false, true);
+ if (!r) {
+ amdgpu_hmm_range_get_pages_done(hmm_range);
+ prange->validated_once = true;
+ }
+}
+
+/* svm_range_trigger_migration - start page migration if prefetch loc changed
+ * @mm: current process mm_struct
+ * @prange: svm range structure
+ * @migrated: output, true if migration is triggered
+ *
+ * If range perfetch_loc is GPU, actual loc is cpu 0, then migrate the range
+ * from ram to vram.
+ * If range prefetch_loc is cpu 0, actual loc is GPU, then migrate the range
+ * from vram to ram.
+ *
+ * If GPU vm fault retry is not enabled, migration interact with MMU notifier
+ * and restore work:
+ * 1. migrate_vma_setup invalidate pages, MMU notifier callback svm_range_evict
+ * stops all queues, schedule restore work
+ * 2. svm_range_restore_work wait for migration is done by
+ * a. svm_range_validate_vram takes prange->migrate_mutex
+ * b. svm_range_validate_ram HMM get pages wait for CPU fault handle returns
+ * 3. restore work update mappings of GPU, resume all queues.
+ *
+ * Context: Process context
+ *
+ * Return:
+ * 0 - OK, otherwise - error code of migration
+ */
+static int
+svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
+ bool *migrated)
+{
+ uint32_t best_loc;
+ int r = 0;
+
+ *migrated = false;
+ best_loc = svm_range_best_prefetch_location(prange);
+
+ if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+ best_loc == prange->actual_loc)
+ return 0;
+
+ /*
+ * Prefetch to GPU without host access flag, set actual_loc to gpu, then
+ * validate on gpu and map to gpus will be handled afterwards.
+ */
+ if (best_loc && !prange->actual_loc &&
+ !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) {
+ prange->actual_loc = best_loc;
+ return 0;
+ }
+
+ if (!best_loc) {
+ r = svm_migrate_vram_to_ram(prange, mm);
+ *migrated = !r;
+ return r;
+ }
+
+ r = svm_migrate_to_vram(prange, best_loc, mm);
+ *migrated = !r;
+
+ return r;
+}
+
+int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
+{
+ if (!fence)
+ return -EINVAL;
+
+ if (dma_fence_is_signaled(&fence->base))
+ return 0;
+
+ if (fence->svm_bo) {
+ WRITE_ONCE(fence->svm_bo->evicting, 1);
+ schedule_work(&fence->svm_bo->eviction_work);
+ }
+
+ return 0;
+}
+
+static void svm_range_evict_svm_bo_worker(struct work_struct *work)
+{
+ struct svm_range_bo *svm_bo;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+
+ svm_bo = container_of(work, struct svm_range_bo, eviction_work);
+ if (!svm_bo_ref_unless_zero(svm_bo))
+ return; /* svm_bo was freed while eviction was pending */
+
+ /* svm_range_bo_release destroys this worker thread. So during
+ * the lifetime of this thread, kfd_process and mm will be valid.
+ */
+ p = container_of(svm_bo->svms, struct kfd_process, svms);
+ mm = p->mm;
+ if (!mm)
+ return;
+
+ mmap_read_lock(mm);
+ spin_lock(&svm_bo->list_lock);
+ while (!list_empty(&svm_bo->range_list)) {
+ struct svm_range *prange =
+ list_first_entry(&svm_bo->range_list,
+ struct svm_range, svm_bo_list);
+ list_del_init(&prange->svm_bo_list);
+ spin_unlock(&svm_bo->list_lock);
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+ prange->start, prange->last);
+
+ mutex_lock(&prange->migrate_mutex);
+ svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm);
+
+ mutex_lock(&prange->lock);
+ prange->svm_bo = NULL;
+ mutex_unlock(&prange->lock);
+
+ mutex_unlock(&prange->migrate_mutex);
+
+ spin_lock(&svm_bo->list_lock);
+ }
+ spin_unlock(&svm_bo->list_lock);
+ mmap_read_unlock(mm);
+
+ dma_fence_signal(&svm_bo->eviction_fence->base);
+ /* This is the last reference to svm_bo, after svm_range_vram_node_free
+ * has been called in svm_migrate_vram_to_ram
+ */
+ WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
+ svm_range_bo_unref(svm_bo);
+}
+
+static int
+svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+ struct amdkfd_process_info *process_info = p->kgd_process_info;
+ struct mm_struct *mm = current->mm;
+ struct list_head update_list;
+ struct list_head insert_list;
+ struct list_head remove_list;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ struct svm_range *next;
+ int r = 0;
+
+ pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
+ p->pasid, &p->svms, start, start + size - 1, size);
+
+ r = svm_range_check_attr(p, nattr, attrs);
+ if (r)
+ return r;
+
+ svms = &p->svms;
+
+ mutex_lock(&process_info->lock);
+
+ svm_range_list_lock_and_flush_work(svms, mm);
+
+ if (!svm_range_is_valid(mm, start, size)) {
+ pr_debug("invalid range\n");
+ r = -EFAULT;
+ mmap_write_unlock(mm);
+ goto out;
+ }
+
+ mutex_lock(&svms->lock);
+
+ /* Add new range and split existing ranges as needed */
+ r = svm_range_add(p, start, size, nattr, attrs, &update_list,
+ &insert_list, &remove_list);
+ if (r) {
+ mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+ goto out;
+ }
+ /* Apply changes as a transaction */
+ list_for_each_entry_safe(prange, next, &insert_list, insert_list) {
+ svm_range_add_to_svms(prange);
+ svm_range_add_notifier_locked(mm, prange);
+ }
+ list_for_each_entry(prange, &update_list, update_list) {
+ svm_range_apply_attrs(p, prange, nattr, attrs);
+ /* TODO: unmap ranges from GPU that lost access */
+ }
+ list_for_each_entry_safe(prange, next, &remove_list,
+ remove_list) {
+ pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+ prange->svms, prange, prange->start,
+ prange->last);
+ svm_range_unlink(prange);
+ svm_range_remove_notifier(prange);
+ svm_range_free(prange);
+ }
+
+ mmap_write_downgrade(mm);
+ /* Trigger migrations and revalidate and map to GPUs as needed. If
+ * this fails we may be left with partially completed actions. There
+ * is no clean way of rolling back to the previous state in such a
+ * case because the rollback wouldn't be guaranteed to work either.
+ */
+ list_for_each_entry(prange, &update_list, update_list) {
+ bool migrated;
+
+ mutex_lock(&prange->migrate_mutex);
+
+ r = svm_range_trigger_migration(mm, prange, &migrated);
+ if (r)
+ goto out_unlock_range;
+
+ if (migrated && !p->xnack_enabled) {
+ pr_debug("restore_work will update mappings of GPUs\n");
+ mutex_unlock(&prange->migrate_mutex);
+ continue;
+ }
+
+ r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+ true, true);
+ if (r)
+ pr_debug("failed %d to map svm range\n", r);
+
+out_unlock_range:
+ mutex_unlock(&prange->migrate_mutex);
+ if (r)
+ break;
+ }
+
+ svm_range_debug_dump(svms);
+
+ mutex_unlock(&svms->lock);
+ mmap_read_unlock(mm);
+out:
+ mutex_unlock(&process_info->lock);
+
+ pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
+ &p->svms, start, start + size - 1, r);
+
+ return r;
+}
+
+static int
+svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+ DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
+ DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
+ bool get_preferred_loc = false;
+ bool get_prefetch_loc = false;
+ bool get_granularity = false;
+ bool get_accessible = false;
+ bool get_flags = false;
+ uint64_t last = start + size - 1UL;
+ struct mm_struct *mm = current->mm;
+ uint8_t granularity = 0xff;
+ struct interval_tree_node *node;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+ uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+ uint32_t flags = 0xffffffff;
+ int gpuidx;
+ uint32_t i;
+
+ pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start,
+ start + size - 1, nattr);
+
+ mmap_read_lock(mm);
+ if (!svm_range_is_valid(mm, start, size)) {
+ pr_debug("invalid range\n");
+ mmap_read_unlock(mm);
+ return -EINVAL;
+ }
+ mmap_read_unlock(mm);
+
+ for (i = 0; i < nattr; i++) {
+ switch (attrs[i].type) {
+ case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+ get_preferred_loc = true;
+ break;
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ get_prefetch_loc = true;
+ break;
+ case KFD_IOCTL_SVM_ATTR_ACCESS:
+ get_accessible = true;
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ get_flags = true;
+ break;
+ case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+ get_granularity = true;
+ break;
+ case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+ case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ fallthrough;
+ default:
+ pr_debug("get invalid attr type 0x%x\n", attrs[i].type);
+ return -EINVAL;
+ }
+ }
+
+ svms = &p->svms;
+
+ mutex_lock(&svms->lock);
+
+ node = interval_tree_iter_first(&svms->objects, start, last);
+ if (!node) {
+ pr_debug("range attrs not found return default values\n");
+ svm_range_set_default_attributes(&location, &prefetch_loc,
+ &granularity, &flags);
+ if (p->xnack_enabled)
+ bitmap_copy(bitmap_access, svms->bitmap_supported,
+ MAX_GPU_INSTANCE);
+ else
+ bitmap_zero(bitmap_access, MAX_GPU_INSTANCE);
+ bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE);
+ goto fill_values;
+ }
+ bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE);
+ bitmap_copy(bitmap_aip, svms->bitmap_supported, MAX_GPU_INSTANCE);
+
+ while (node) {
+ struct interval_tree_node *next;
+
+ prange = container_of(node, struct svm_range, it_node);
+ next = interval_tree_iter_next(node, start, last);
+
+ if (get_preferred_loc) {
+ if (prange->preferred_loc ==
+ KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+ (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+ location != prange->preferred_loc)) {
+ location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+ get_preferred_loc = false;
+ } else {
+ location = prange->preferred_loc;
+ }
+ }
+ if (get_prefetch_loc) {
+ if (prange->prefetch_loc ==
+ KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+ (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+ prefetch_loc != prange->prefetch_loc)) {
+ prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+ get_prefetch_loc = false;
+ } else {
+ prefetch_loc = prange->prefetch_loc;
+ }
+ }
+ if (get_accessible) {
+ bitmap_and(bitmap_access, bitmap_access,
+ prange->bitmap_access, MAX_GPU_INSTANCE);
+ bitmap_and(bitmap_aip, bitmap_aip,
+ prange->bitmap_aip, MAX_GPU_INSTANCE);
+ }
+ if (get_flags)
+ flags &= prange->flags;
+
+ if (get_granularity && prange->granularity < granularity)
+ granularity = prange->granularity;
+
+ node = next;
+ }
+fill_values:
+ mutex_unlock(&svms->lock);
+
+ for (i = 0; i < nattr; i++) {
+ switch (attrs[i].type) {
+ case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+ attrs[i].value = location;
+ break;
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ attrs[i].value = prefetch_loc;
+ break;
+ case KFD_IOCTL_SVM_ATTR_ACCESS:
+ gpuidx = kfd_process_gpuidx_from_gpuid(p,
+ attrs[i].value);
+ if (gpuidx < 0) {
+ pr_debug("invalid gpuid %x\n", attrs[i].value);
+ return -EINVAL;
+ }
+ if (test_bit(gpuidx, bitmap_access))
+ attrs[i].type = KFD_IOCTL_SVM_ATTR_ACCESS;
+ else if (test_bit(gpuidx, bitmap_aip))
+ attrs[i].type =
+ KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE;
+ else
+ attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS;
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ attrs[i].value = flags;
+ break;
+ case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+ attrs[i].value = (uint32_t)granularity;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int
+svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
+ uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
+{
+ int r;
+
+ start >>= PAGE_SHIFT;
+ size >>= PAGE_SHIFT;
+
+ switch (op) {
+ case KFD_IOCTL_SVM_OP_SET_ATTR:
+ r = svm_range_set_attr(p, start, size, nattrs, attrs);
+ break;
+ case KFD_IOCTL_SVM_OP_GET_ATTR:
+ r = svm_range_get_attr(p, start, size, nattrs, attrs);
+ break;
+ default:
+ r = EINVAL;
+ break;
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
new file mode 100644
index 000000000000..0c0fc399395e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_SVM_H_
+#define KFD_SVM_H_
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/sched/mm.h>
+#include <linux/hmm.h>
+#include "amdgpu.h"
+#include "kfd_priv.h"
+
+struct svm_range_bo {
+ struct amdgpu_bo *bo;
+ struct kref kref;
+ struct list_head range_list; /* all svm ranges shared this bo */
+ spinlock_t list_lock;
+ struct amdgpu_amdkfd_fence *eviction_fence;
+ struct work_struct eviction_work;
+ struct svm_range_list *svms;
+ uint32_t evicting;
+};
+
+enum svm_work_list_ops {
+ SVM_OP_NULL,
+ SVM_OP_UNMAP_RANGE,
+ SVM_OP_UPDATE_RANGE_NOTIFIER,
+ SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP,
+ SVM_OP_ADD_RANGE,
+ SVM_OP_ADD_RANGE_AND_MAP
+};
+
+struct svm_work_list_item {
+ enum svm_work_list_ops op;
+ struct mm_struct *mm;
+};
+
+/**
+ * struct svm_range - shared virtual memory range
+ *
+ * @svms: list of svm ranges, structure defined in kfd_process
+ * @migrate_mutex: to serialize range migration, validation and mapping update
+ * @start: range start address in pages
+ * @last: range last address in pages
+ * @it_node: node [start, last] stored in interval tree, start, last are page
+ * aligned, page size is (last - start + 1)
+ * @list: link list node, used to scan all ranges of svms
+ * @update_list:link list node used to add to update_list
+ * @remove_list:link list node used to add to remove list
+ * @insert_list:link list node used to add to insert list
+ * @mapping: bo_va mapping structure to create and update GPU page table
+ * @npages: number of pages
+ * @dma_addr: dma mapping address on each GPU for system memory physical page
+ * @ttm_res: vram ttm resource map
+ * @offset: range start offset within mm_nodes
+ * @svm_bo: struct to manage splited amdgpu_bo
+ * @svm_bo_list:link list node, to scan all ranges which share same svm_bo
+ * @lock: protect prange start, last, child_list, svm_bo_list
+ * @saved_flags:save/restore current PF_MEMALLOC flags
+ * @flags: flags defined as KFD_IOCTL_SVM_FLAG_*
+ * @perferred_loc: perferred location, 0 for CPU, or GPU id
+ * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
+ * @actual_loc: the actual location, 0 for CPU, or GPU id
+ * @granularity:migration granularity, log2 num pages
+ * @invalid: not 0 means cpu page table is invalidated
+ * @validate_timestamp: system timestamp when range is validated
+ * @notifier: register mmu interval notifier
+ * @work_item: deferred work item information
+ * @deferred_list: list header used to add range to deferred list
+ * @child_list: list header for split ranges which are not added to svms yet
+ * @bitmap_access: index bitmap of GPUs which can access the range
+ * @bitmap_aip: index bitmap of GPUs which can access the range in place
+ *
+ * Data structure for virtual memory range shared by CPU and GPUs, it can be
+ * allocated from system memory ram or device vram, and migrate from ram to vram
+ * or from vram to ram.
+ */
+struct svm_range {
+ struct svm_range_list *svms;
+ struct mutex migrate_mutex;
+ unsigned long start;
+ unsigned long last;
+ struct interval_tree_node it_node;
+ struct list_head list;
+ struct list_head update_list;
+ struct list_head remove_list;
+ struct list_head insert_list;
+ struct amdgpu_bo_va_mapping mapping;
+ uint64_t npages;
+ dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
+ struct ttm_resource *ttm_res;
+ uint64_t offset;
+ struct svm_range_bo *svm_bo;
+ struct list_head svm_bo_list;
+ struct mutex lock;
+ unsigned int saved_flags;
+ uint32_t flags;
+ uint32_t preferred_loc;
+ uint32_t prefetch_loc;
+ uint32_t actual_loc;
+ uint8_t granularity;
+ atomic_t invalid;
+ uint64_t validate_timestamp;
+ struct mmu_interval_notifier notifier;
+ struct svm_work_list_item work_item;
+ struct list_head deferred_list;
+ struct list_head child_list;
+ DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
+ DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
+ bool validated_once;
+};
+
+static inline void svm_range_lock(struct svm_range *prange)
+{
+ mutex_lock(&prange->lock);
+ prange->saved_flags = memalloc_noreclaim_save();
+
+}
+static inline void svm_range_unlock(struct svm_range *prange)
+{
+ memalloc_noreclaim_restore(prange->saved_flags);
+ mutex_unlock(&prange->lock);
+}
+
+int svm_range_list_init(struct kfd_process *p);
+void svm_range_list_fini(struct kfd_process *p);
+int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
+ uint64_t size, uint32_t nattrs,
+ struct kfd_ioctl_svm_attribute *attrs);
+struct svm_range *svm_range_from_addr(struct svm_range_list *svms,
+ unsigned long addr,
+ struct svm_range **parent);
+struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange,
+ uint32_t id);
+int svm_range_vram_node_new(struct amdgpu_device *adev,
+ struct svm_range *prange, bool clear);
+void svm_range_vram_node_free(struct svm_range *prange);
+int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
+ unsigned long addr, struct svm_range *parent,
+ struct svm_range *prange);
+int svm_range_restore_pages(struct amdgpu_device *adev,
+ unsigned int pasid, uint64_t addr);
+int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
+void svm_range_add_list_work(struct svm_range_list *svms,
+ struct svm_range *prange, struct mm_struct *mm,
+ enum svm_work_list_ops op);
+void schedule_deferred_list_work(struct svm_range_list *svms);
+void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+ unsigned long offset, unsigned long npages);
+void svm_range_free_dma_mappings(struct svm_range *prange);
+void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm);
+
+/* SVM API and HMM page migration work together, device memory type
+ * is initialized to not 0 when page migration register device memory.
+ */
+#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
+
+#else
+
+struct kfd_process;
+
+static inline int svm_range_list_init(struct kfd_process *p)
+{
+ return 0;
+}
+static inline void svm_range_list_fini(struct kfd_process *p)
+{
+ /* empty */
+}
+
+static inline int svm_range_restore_pages(struct amdgpu_device *adev,
+ unsigned int pasid, uint64_t addr)
+{
+ return -EFAULT;
+}
+
+static inline int svm_range_schedule_evict_svm_bo(
+ struct amdgpu_amdkfd_fence *fence)
+{
+ WARN_ONCE(1, "SVM eviction fence triggered, but SVM is disabled");
+ return -EINVAL;
+}
+
+#define KFD_IS_SVM_API_SUPPORTED(dev) false
+
+#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
+
+#endif /* KFD_SVM_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index cdef608db4f4..b1ce072aa20b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -36,6 +36,7 @@
#include "kfd_topology.h"
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
+#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
@@ -1192,40 +1193,83 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
mem->mem_clk_max = local_mem_info.mem_clk_max;
}
-static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
+static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
+ struct kfd_topology_device *target_gpu_dev,
+ struct kfd_iolink_properties *link)
{
- struct kfd_iolink_properties *link, *cpu_link;
- struct kfd_topology_device *cpu_dev;
- uint32_t cap;
- uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
- uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
-
- if (!dev || !dev->gpu)
+ /* xgmi always supports atomics between links. */
+ if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI)
return;
- pcie_capability_read_dword(dev->gpu->pdev,
- PCI_EXP_DEVCAP2, &cap);
+ /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */
+ if (target_gpu_dev) {
+ uint32_t cap;
- if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
- cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
- CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+ pcie_capability_read_dword(target_gpu_dev->gpu->pdev,
+ PCI_EXP_DEVCAP2, &cap);
+
+ if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
+ link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+ /* set gpu (dev) flags. */
+ } else {
+ if (!dev->gpu->pci_atomic_requested ||
+ dev->gpu->device_info->asic_family ==
+ CHIP_HAWAII)
+ link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+ }
+}
- if (!dev->gpu->pci_atomic_requested ||
- dev->gpu->device_info->asic_family == CHIP_HAWAII)
- flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
- CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,
+ struct kfd_iolink_properties *outbound_link,
+ struct kfd_iolink_properties *inbound_link)
+{
+ /* CPU -> GPU with PCIe */
+ if (!to_dev->gpu &&
+ inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
+ inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
+
+ if (to_dev->gpu) {
+ /* GPU <-> GPU with PCIe and
+ * Vega20 with XGMI
+ */
+ if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||
+ (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
+ to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) {
+ outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
+ inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
+ }
+ }
+}
+
+static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
+{
+ struct kfd_iolink_properties *link, *inbound_link;
+ struct kfd_topology_device *peer_dev;
+
+ if (!dev || !dev->gpu)
+ return;
/* GPU only creates direct links so apply flags setting to all */
list_for_each_entry(link, &dev->io_link_props, list) {
- link->flags = flag;
- cpu_dev = kfd_topology_device_by_proximity_domain(
+ link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+ kfd_set_iolink_no_atomics(dev, NULL, link);
+ peer_dev = kfd_topology_device_by_proximity_domain(
link->node_to);
- if (cpu_dev) {
- list_for_each_entry(cpu_link,
- &cpu_dev->io_link_props, list)
- if (cpu_link->node_to == link->node_from)
- cpu_link->flags = cpu_flag;
+
+ if (!peer_dev)
+ continue;
+
+ list_for_each_entry(inbound_link, &peer_dev->io_link_props,
+ list) {
+ if (inbound_link->node_to != link->node_from)
+ continue;
+
+ inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+ kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
+ kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
}
}
}
@@ -1378,6 +1422,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
case CHIP_NAVY_FLOUNDER:
case CHIP_VANGOGH:
case CHIP_DIMGREY_CAVEFISH:
+ case CHIP_BEIGE_GOBY:
+ case CHIP_YELLOW_CARP:
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
@@ -1410,15 +1456,18 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
adev = (struct amdgpu_device *)(dev->gpu->kgd);
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
dev->node_props.capability |=
- ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+ ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
HSA_CAP_SRAM_EDCSUPPORTED : 0;
- dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+ dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
HSA_CAP_MEM_EDCSUPPORTED : 0;
if (adev->asic_type != CHIP_VEGA10)
- dev->node_props.capability |= (adev->ras_features != 0) ?
+ dev->node_props.capability |= (adev->ras_enabled != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
+ if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev))
+ dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
+
kfd_debug_print_topology();
if (!res)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index b8b68087bd7a..8b48c6692007 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -53,8 +53,9 @@
#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000
#define HSA_CAP_ASIC_REVISION_SHIFT 22
#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000
-
-#define HSA_CAP_RESERVED 0xf80f8000
+#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000
+#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000
+#define HSA_CAP_RESERVED 0xe00f8000
struct kfd_node_properties {
uint64_t hive_id;
@@ -98,9 +99,9 @@ struct kfd_node_properties {
#define HSA_MEM_HEAP_TYPE_GPU_LDS 4
#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5
-#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001
-#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002
-#define HSA_MEM_FLAGS_RESERVED 0xfffffffc
+#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001
+#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002
+#define HSA_MEM_FLAGS_RESERVED 0xfffffffc
struct kfd_mem_properties {
struct list_head list;
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
index 0bc0b25cb410..daf3c44547d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -30,6 +30,7 @@
#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239
#define SOC15_INTSRC_VMC_FAULT 0
#define SOC15_INTSRC_SDMA_TRAP 224
+#define SOC15_INTSRC_SDMA_ECC 220
#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)