diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
25 files changed, 2538 insertions, 326 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index bc5a2945bd2b..ed2f06c9f346 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,6 +4,7 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 + depends on DRM_AMDGPU && X86_64 + imply AMD_IOMMU_V2 help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index a317e76ffb5e..0d0242240c47 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o +ifneq ($(CONFIG_AMD_IOMMU_V2),) +amdkfd-y += kfd_iommu.o +endif + amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 62c3d9cd6ef1..cd679cf1fd30 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/err.h> #include <linux/fs.h> +#include <linux/file.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uaccess.h> @@ -825,12 +826,155 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, return 0; } +static int kfd_ioctl_get_process_apertures_new(struct file *filp, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_process_apertures_new_args *args = data; + struct kfd_process_device_apertures *pa; + struct kfd_process_device *pdd; + uint32_t nodes = 0; + int ret; + + dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); + + if (args->num_of_nodes == 0) { + /* Return number of nodes, so that user space can alloacate + * sufficient memory + */ + mutex_lock(&p->mutex); + + if (!kfd_has_process_device_data(p)) + goto out_unlock; + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); + do { + args->num_of_nodes++; + pdd = kfd_get_next_process_device_data(p, pdd); + } while (pdd); + + goto out_unlock; + } + + /* Fill in process-aperture information for all available + * nodes, but not more than args->num_of_nodes as that is + * the amount of memory allocated by user + */ + pa = kzalloc((sizeof(struct kfd_process_device_apertures) * + args->num_of_nodes), GFP_KERNEL); + if (!pa) + return -ENOMEM; + + mutex_lock(&p->mutex); + + if (!kfd_has_process_device_data(p)) { + args->num_of_nodes = 0; + kfree(pa); + goto out_unlock; + } + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); + do { + pa[nodes].gpu_id = pdd->dev->id; + pa[nodes].lds_base = pdd->lds_base; + pa[nodes].lds_limit = pdd->lds_limit; + pa[nodes].gpuvm_base = pdd->gpuvm_base; + pa[nodes].gpuvm_limit = pdd->gpuvm_limit; + pa[nodes].scratch_base = pdd->scratch_base; + pa[nodes].scratch_limit = pdd->scratch_limit; + + dev_dbg(kfd_device, + "gpu id %u\n", pdd->dev->id); + dev_dbg(kfd_device, + "lds_base %llX\n", pdd->lds_base); + dev_dbg(kfd_device, + "lds_limit %llX\n", pdd->lds_limit); + dev_dbg(kfd_device, + "gpuvm_base %llX\n", pdd->gpuvm_base); + dev_dbg(kfd_device, + "gpuvm_limit %llX\n", pdd->gpuvm_limit); + dev_dbg(kfd_device, + "scratch_base %llX\n", pdd->scratch_base); + dev_dbg(kfd_device, + "scratch_limit %llX\n", pdd->scratch_limit); + nodes++; + + pdd = kfd_get_next_process_device_data(p, pdd); + } while (pdd && (nodes < args->num_of_nodes)); + mutex_unlock(&p->mutex); + + args->num_of_nodes = nodes; + ret = copy_to_user( + (void __user *)args->kfd_process_device_apertures_ptr, + pa, + (nodes * sizeof(struct kfd_process_device_apertures))); + kfree(pa); + return ret ? -EFAULT : 0; + +out_unlock: + mutex_unlock(&p->mutex); + return 0; +} + static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, void *data) { struct kfd_ioctl_create_event_args *args = data; int err; + /* For dGPUs the event page is allocated in user mode. The + * handle is passed to KFD with the first call to this IOCTL + * through the event_page_offset field. + */ + if (args->event_page_offset) { + struct kfd_dev *kfd; + struct kfd_process_device *pdd; + void *mem, *kern_addr; + uint64_t size; + + if (p->signal_page) { + pr_err("Event page is already set\n"); + return -EINVAL; + } + + kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); + if (!kfd) { + pr_err("Getting device by id failed in %s\n", __func__); + return -EINVAL; + } + + mutex_lock(&p->mutex); + pdd = kfd_bind_process_to_device(kfd, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto out_unlock; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->event_page_offset)); + if (!mem) { + pr_err("Can't find BO, offset is 0x%llx\n", + args->event_page_offset); + err = -EINVAL; + goto out_unlock; + } + mutex_unlock(&p->mutex); + + err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, + mem, &kern_addr, &size); + if (err) { + pr_err("Failed to map event page to kernel\n"); + return err; + } + + err = kfd_event_page_set(p, kern_addr, size); + if (err) { + pr_err("Failed to set event page\n"); + return err; + } + } + err = kfd_event_create(filp, p, args->event_type, args->auto_reset != 0, args->node_id, &args->event_id, &args->event_trigger_data, @@ -838,6 +982,10 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, &args->event_slot_index); return err; + +out_unlock: + mutex_unlock(&p->mutex); + return err; } static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, @@ -901,7 +1049,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, mutex_unlock(&p->mutex); - if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && + pdd->qpd.vmid != 0) dev->kfd2kgd->set_scratch_backing_va( dev->kgd, args->va_addr, pdd->qpd.vmid); @@ -954,6 +1103,371 @@ static int kfd_ioctl_get_tile_config(struct file *filep, return 0; } +static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, + void *data) +{ + struct kfd_ioctl_acquire_vm_args *args = data; + struct kfd_process_device *pdd; + struct kfd_dev *dev; + struct file *drm_file; + int ret; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + drm_file = fget(args->drm_fd); + if (!drm_file) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + ret = -EINVAL; + goto err_unlock; + } + + if (pdd->drm_file) { + ret = pdd->drm_file == drm_file ? 0 : -EBUSY; + goto err_unlock; + } + + ret = kfd_process_device_init_vm(pdd, drm_file); + if (ret) + goto err_unlock; + /* On success, the PDD keeps the drm_file reference */ + mutex_unlock(&p->mutex); + + return 0; + +err_unlock: + mutex_unlock(&p->mutex); + fput(drm_file); + return ret; +} + +bool kfd_dev_is_large_bar(struct kfd_dev *dev) +{ + struct kfd_local_mem_info mem_info; + + if (debug_largebar) { + pr_debug("Simulate large-bar allocation on non large-bar machine\n"); + return true; + } + + if (dev->device_info->needs_iommu_device) + return false; + + dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); + if (mem_info.local_mem_size_private == 0 && + mem_info.local_mem_size_public > 0) + return true; + return false; +} + +static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; + struct kfd_process_device *pdd; + void *mem; + struct kfd_dev *dev; + int idr_handle; + long err; + uint64_t offset = args->mmap_offset; + uint32_t flags = args->flags; + + if (args->size == 0) + return -EINVAL; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && + (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && + !kfd_dev_is_large_bar(dev)) { + pr_err("Alloc host visible vram on small bar is not allowed\n"); + return -EINVAL; + } + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto err_unlock; + } + + err = dev->kfd2kgd->alloc_memory_of_gpu( + dev->kgd, args->va_addr, args->size, + pdd->vm, (struct kgd_mem **) &mem, &offset, + flags); + + if (err) + goto err_unlock; + + idr_handle = kfd_process_device_create_obj_handle(pdd, mem); + if (idr_handle < 0) { + err = -EFAULT; + goto err_free; + } + + mutex_unlock(&p->mutex); + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + args->mmap_offset = offset; + + return 0; + +err_free: + dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); +err_unlock: + mutex_unlock(&p->mutex); + return err; +} + +static int kfd_ioctl_free_memory_of_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_free_memory_of_gpu_args *args = data; + struct kfd_process_device *pdd; + void *mem; + struct kfd_dev *dev; + int ret; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + ret = -EINVAL; + goto err_unlock; + } + + mem = kfd_process_device_translate_handle( + pdd, GET_IDR_HANDLE(args->handle)); + if (!mem) { + ret = -EINVAL; + goto err_unlock; + } + + ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + + /* If freeing the buffer failed, leave the handle in place for + * clean-up during process tear-down. + */ + if (!ret) + kfd_process_device_remove_obj_handle( + pdd, GET_IDR_HANDLE(args->handle)); + +err_unlock: + mutex_unlock(&p->mutex); + return ret; +} + +static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_map_memory_to_gpu_args *args = data; + struct kfd_process_device *pdd, *peer_pdd; + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; + int i; + uint32_t *devices_arr = NULL; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + if (!args->n_devices) { + pr_debug("Device IDs array empty\n"); + return -EINVAL; + } + if (args->n_success > args->n_devices) { + pr_debug("n_success exceeds n_devices\n"); + return -EINVAL; + } + + devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), + GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, + (void __user *)args->device_ids_array_ptr, + args->n_devices * sizeof(*devices_arr)); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; + } + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto bind_process_to_device_failed; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->handle)); + if (!mem) { + err = -ENOMEM; + goto get_mem_obj_from_handle_failed; + } + + for (i = args->n_success; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + pr_debug("Getting device by id failed for 0x%x\n", + devices_arr[i]); + err = -EINVAL; + goto get_mem_obj_from_handle_failed; + } + + peer_pdd = kfd_bind_process_to_device(peer, p); + if (IS_ERR(peer_pdd)) { + err = PTR_ERR(peer_pdd); + goto get_mem_obj_from_handle_failed; + } + err = peer->kfd2kgd->map_memory_to_gpu( + peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + if (err) { + pr_err("Failed to map to gpu %d/%d\n", + i, args->n_devices); + goto map_memory_to_gpu_failed; + } + args->n_success = i+1; + } + + mutex_unlock(&p->mutex); + + err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); + if (err) { + pr_debug("Sync memory failed, wait interrupted by user signal\n"); + goto sync_memory_failed; + } + + /* Flush TLBs after waiting for the page table updates to complete */ + for (i = 0; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(peer, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd); + } + + kfree(devices_arr); + + return err; + +bind_process_to_device_failed: +get_mem_obj_from_handle_failed: +map_memory_to_gpu_failed: + mutex_unlock(&p->mutex); +copy_from_user_failed: +sync_memory_failed: + kfree(devices_arr); + + return err; +} + +static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; + struct kfd_process_device *pdd, *peer_pdd; + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; + uint32_t *devices_arr = NULL, i; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + if (!args->n_devices) { + pr_debug("Device IDs array empty\n"); + return -EINVAL; + } + if (args->n_success > args->n_devices) { + pr_debug("n_success exceeds n_devices\n"); + return -EINVAL; + } + + devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), + GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, + (void __user *)args->device_ids_array_ptr, + args->n_devices * sizeof(*devices_arr)); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; + } + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + err = PTR_ERR(pdd); + goto bind_process_to_device_failed; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->handle)); + if (!mem) { + err = -ENOMEM; + goto get_mem_obj_from_handle_failed; + } + + for (i = args->n_success; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + err = -EINVAL; + goto get_mem_obj_from_handle_failed; + } + + peer_pdd = kfd_get_process_device_data(peer, p); + if (!peer_pdd) { + err = -ENODEV; + goto get_mem_obj_from_handle_failed; + } + err = dev->kfd2kgd->unmap_memory_to_gpu( + peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + if (err) { + pr_err("Failed to unmap from gpu %d/%d\n", + i, args->n_devices); + goto unmap_memory_from_gpu_failed; + } + args->n_success = i+1; + } + kfree(devices_arr); + + mutex_unlock(&p->mutex); + + return 0; + +bind_process_to_device_failed: +get_mem_obj_from_handle_failed: +unmap_memory_from_gpu_failed: + mutex_unlock(&p->mutex); +copy_from_user_failed: + kfree(devices_arr); + return err; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1016,6 +1530,25 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, kfd_ioctl_set_trap_handler, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, + kfd_ioctl_get_process_apertures_new, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, + kfd_ioctl_acquire_vm, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, + kfd_ioctl_alloc_memory_of_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, + kfd_ioctl_free_memory_of_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, + kfd_ioctl_map_memory_to_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, + kfd_ioctl_unmap_memory_from_gpu, 0), + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2bc2816767a7..4f126ef6139b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -22,10 +22,10 @@ #include <linux/pci.h> #include <linux/acpi.h> -#include <linux/amd-iommu.h> #include "kfd_crat.h" #include "kfd_priv.h" #include "kfd_topology.h" +#include "kfd_iommu.h" /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. @@ -882,7 +882,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) crat_table->length = sizeof(struct crat_header); status = acpi_get_table("DSDT", 0, &acpi_table); - if (status == AE_NOT_FOUND) + if (status != AE_OK) pr_warn("DSDT table not found for OEM information\n"); else { crat_table->oem_revision = acpi_table->revision; @@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, struct crat_subtype_generic *sub_type_hdr; struct crat_subtype_computeunit *cu; struct kfd_cu_info cu_info; - struct amd_iommu_device_info iommu_info; int avail_size = *size; uint32_t total_num_of_cu; int num_of_cache_entries = 0; int cache_mem_filled = 0; int ret = 0; - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; struct kfd_local_mem_info local_mem_info; if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) @@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Check if this node supports IOMMU. During parsing this flag will * translate to HSA_CAP_ATS_PRESENT */ - iommu_info.flags = 0; - if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { - if ((iommu_info.flags & required_iommu_flags) == - required_iommu_flags) - cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; - } + if (!kfd_iommu_check_device(kdev)) + cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; crat_table->length += sub_type_hdr->length; crat_table->total_entries++; @@ -1125,6 +1117,9 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); + if (debug_largebar) + local_mem_info.local_mem_size_private = 0; + if (local_mem_info.local_mem_size_private == 0) ret = kfd_fill_gpu_memory_affinity(&avail_size, kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 3da25f7bda6b..9d4af961c5d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -33,6 +33,7 @@ #include "kfd_pm4_headers_diq.h" #include "kfd_dbgmgr.h" #include "kfd_dbgdev.h" +#include "kfd_device_queue_manager.h" static DEFINE_MUTEX(kfd_dbgmgr_mutex); @@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) } /* get actual type of DBGDevice cpsch or not */ - if (sched_policy == KFD_SCHED_POLICY_NO_HWS) + if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) type = DBGDEV_TYPE_NODIQ; kfd_dbgdev_init(new_buff->dbgdev, pdev, type); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a8fa33a08de3..3346699960dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) #include <linux/amd-iommu.h> +#endif #include <linux/bsearch.h> #include <linux/pci.h> #include <linux/slab.h> @@ -28,9 +30,12 @@ #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" #include "cwsr_trap_handler_gfx8.asm" +#include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 +static atomic_t kfd_device_suspended = ATOMIC_INIT(0); +#ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, @@ -41,6 +46,8 @@ static const struct kfd_device_info kaveri_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = true, + .needs_pci_atomics = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -53,15 +60,125 @@ static const struct kfd_device_info carrizo_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = true, + .needs_pci_atomics = false, }; +#endif + +static const struct kfd_device_info hawaii_device_info = { + .asic_family = CHIP_HAWAII, + .max_pasid_bits = 16, + /* max num of queues for KV.TODO should be a dynamic value */ + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info tonga_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info tonga_vf_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info fiji_device_info = { + .asic_family = CHIP_FIJI, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info fiji_vf_device_info = { + .asic_family = CHIP_FIJI, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + + +static const struct kfd_device_info polaris10_device_info = { + .asic_family = CHIP_POLARIS10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info polaris10_vf_device_info = { + .asic_family = CHIP_POLARIS10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info polaris11_device_info = { + .asic_family = CHIP_POLARIS11, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + struct kfd_deviceid { unsigned short did; const struct kfd_device_info *device_info; }; -/* Please keep this sorted by increasing device id. */ static const struct kfd_deviceid supported_devices[] = { +#ifdef KFD_SUPPORT_IOMMU_V2 { 0x1304, &kaveri_device_info }, /* Kaveri */ { 0x1305, &kaveri_device_info }, /* Kaveri */ { 0x1306, &kaveri_device_info }, /* Kaveri */ @@ -88,7 +205,51 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9874, &carrizo_device_info }, /* Carrizo */ { 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */ - { 0x9877, &carrizo_device_info } /* Carrizo */ + { 0x9877, &carrizo_device_info }, /* Carrizo */ +#endif + { 0x67A0, &hawaii_device_info }, /* Hawaii */ + { 0x67A1, &hawaii_device_info }, /* Hawaii */ + { 0x67A2, &hawaii_device_info }, /* Hawaii */ + { 0x67A8, &hawaii_device_info }, /* Hawaii */ + { 0x67A9, &hawaii_device_info }, /* Hawaii */ + { 0x67AA, &hawaii_device_info }, /* Hawaii */ + { 0x67B0, &hawaii_device_info }, /* Hawaii */ + { 0x67B1, &hawaii_device_info }, /* Hawaii */ + { 0x67B8, &hawaii_device_info }, /* Hawaii */ + { 0x67B9, &hawaii_device_info }, /* Hawaii */ + { 0x67BA, &hawaii_device_info }, /* Hawaii */ + { 0x67BE, &hawaii_device_info }, /* Hawaii */ + { 0x6920, &tonga_device_info }, /* Tonga */ + { 0x6921, &tonga_device_info }, /* Tonga */ + { 0x6928, &tonga_device_info }, /* Tonga */ + { 0x6929, &tonga_device_info }, /* Tonga */ + { 0x692B, &tonga_device_info }, /* Tonga */ + { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ + { 0x6938, &tonga_device_info }, /* Tonga */ + { 0x6939, &tonga_device_info }, /* Tonga */ + { 0x7300, &fiji_device_info }, /* Fiji */ + { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ + { 0x67C0, &polaris10_device_info }, /* Polaris10 */ + { 0x67C1, &polaris10_device_info }, /* Polaris10 */ + { 0x67C2, &polaris10_device_info }, /* Polaris10 */ + { 0x67C4, &polaris10_device_info }, /* Polaris10 */ + { 0x67C7, &polaris10_device_info }, /* Polaris10 */ + { 0x67C8, &polaris10_device_info }, /* Polaris10 */ + { 0x67C9, &polaris10_device_info }, /* Polaris10 */ + { 0x67CA, &polaris10_device_info }, /* Polaris10 */ + { 0x67CC, &polaris10_device_info }, /* Polaris10 */ + { 0x67CF, &polaris10_device_info }, /* Polaris10 */ + { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ + { 0x67DF, &polaris10_device_info }, /* Polaris10 */ + { 0x67E0, &polaris11_device_info }, /* Polaris11 */ + { 0x67E1, &polaris11_device_info }, /* Polaris11 */ + { 0x67E3, &polaris11_device_info }, /* Polaris11 */ + { 0x67E7, &polaris11_device_info }, /* Polaris11 */ + { 0x67E8, &polaris11_device_info }, /* Polaris11 */ + { 0x67E9, &polaris11_device_info }, /* Polaris11 */ + { 0x67EB, &polaris11_device_info }, /* Polaris11 */ + { 0x67EF, &polaris11_device_info }, /* Polaris11 */ + { 0x67FF, &polaris11_device_info }, /* Polaris11 */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -127,6 +288,21 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } + if (device_info->needs_pci_atomics) { + /* Allow BIF to recode atomics to PCIe 3.0 + * AtomicOps. 32 and 64-bit requests are possible and + * must be supported. + */ + if (pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics", + pdev->vendor, pdev->device); + return NULL; + } + } + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); if (!kfd) return NULL; @@ -144,77 +320,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return kfd; } -static bool device_iommu_pasid_init(struct kfd_dev *kfd) -{ - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; - - struct amd_iommu_device_info iommu_info; - unsigned int pasid_limit; - int err; - - err = amd_iommu_device_info(kfd->pdev, &iommu_info); - if (err < 0) { - dev_err(kfd_device, - "error getting iommu info. is the iommu enabled?\n"); - return false; - } - - if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { - dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) - != 0); - return false; - } - - pasid_limit = min_t(unsigned int, - (unsigned int)(1 << kfd->device_info->max_pasid_bits), - iommu_info.max_pasids); - - if (!kfd_set_pasid_limit(pasid_limit)) { - dev_err(kfd_device, "error setting pasid limit\n"); - return false; - } - - return true; -} - -static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) -{ - struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); - - if (dev) - kfd_process_iommu_unbind_callback(dev, pasid); -} - -/* - * This function called by IOMMU driver on PPR failure - */ -static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, - unsigned long address, u16 flags) -{ - struct kfd_dev *dev; - - dev_warn(kfd_device, - "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", - PCI_BUS_NUM(pdev->devfn), - PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), - pasid, - address, - flags); - - dev = kfd_device_by_pci_dev(pdev); - if (!WARN_ON(!dev)) - kfd_signal_iommu_event(dev, pasid, address, - flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); - - return AMD_IOMMU_INV_PRI_RSP_INVALID; -} - static void kfd_cwsr_init(struct kfd_dev *kfd) { if (cwsr_enable && kfd->device_info->supports_cwsr) { @@ -304,11 +409,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_queue_manager_error; } - if (!device_iommu_pasid_init(kfd)) { - dev_err(kfd_device, - "Error initializing iommuv2 for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); - goto device_iommu_pasid_error; + if (kfd_iommu_device_init(kfd)) { + dev_err(kfd_device, "Error initializing iommuv2\n"); + goto device_iommu_error; } kfd_cwsr_init(kfd); @@ -323,12 +426,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->pdev->device); pr_debug("Starting kfd with the following scheduling policy %d\n", - sched_policy); + kfd->dqm->sched_policy); goto out; kfd_resume_error: -device_iommu_pasid_error: +device_iommu_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: kfd_interrupt_exit(kfd); @@ -367,40 +470,45 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) if (!kfd->init_complete) return; - kfd->dqm->ops.stop(kfd->dqm); + /* For first KFD device suspend all the KFD processes */ + if (atomic_inc_return(&kfd_device_suspended) == 1) + kfd_suspend_all_processes(); - kfd_unbind_processes_from_device(kfd); + kfd->dqm->ops.stop(kfd->dqm); - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); + kfd_iommu_suspend(kfd); } int kgd2kfd_resume(struct kfd_dev *kfd) { + int ret, count; + if (!kfd->init_complete) return 0; - return kfd_resume(kfd); + ret = kfd_resume(kfd); + if (ret) + return ret; + + count = atomic_dec_return(&kfd_device_suspended); + WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); + if (count == 0) + ret = kfd_resume_all_processes(); + return ret; } static int kfd_resume(struct kfd_dev *kfd) { int err = 0; - unsigned int pasid_limit = kfd_get_pasid_limit(); - err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err) - return -ENXIO; - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, - iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, - iommu_invalid_ppr_cb); - - err = kfd_bind_processes_to_device(kfd); - if (err) - goto processes_bind_error; + err = kfd_iommu_resume(kfd); + if (err) { + dev_err(kfd_device, + "Failed to resume IOMMU for device %x:%x\n", + kfd->pdev->vendor, kfd->pdev->device); + return err; + } err = kfd->dqm->ops.start(kfd->dqm); if (err) { @@ -413,9 +521,7 @@ static int kfd_resume(struct kfd_dev *kfd) return err; dqm_start_error: -processes_bind_error: - amd_iommu_free_device(kfd->pdev); - + kfd_iommu_suspend(kfd); return err; } @@ -435,6 +541,54 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock(&kfd->interrupt_lock); } +/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will + * prepare for safe eviction of KFD BOs that belong to the specified + * process. + * + * @mm: mm_struct that identifies the specified KFD process + * @fence: eviction fence attached to KFD process BOs + * + */ +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct dma_fence *fence) +{ + struct kfd_process *p; + unsigned long active_time; + unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS); + + if (!fence) + return -EINVAL; + + if (dma_fence_is_signaled(fence)) + return 0; + + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ENODEV; + + if (fence->seqno == p->last_eviction_seqno) + goto out; + + p->last_eviction_seqno = fence->seqno; + + /* Avoid KFD process starvation. Wait for at least + * PROCESS_ACTIVE_TIME_MS before evicting the process again + */ + active_time = get_jiffies_64() - p->last_restore_timestamp; + if (delay_jiffies > active_time) + delay_jiffies -= active_time; + else + delay_jiffies = 0; + + /* During process initialization eviction_work.dwork is initialized + * to kfd_evict_bo_worker + */ + schedule_delayed_work(&p->eviction_work, delay_jiffies); +out: + kfd_unref_process(p); + return 0; +} + static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1bd5f26b3f00..d55d29d31da4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -21,10 +21,11 @@ * */ +#include <linux/ratelimit.h> +#include <linux/printk.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/types.h> -#include <linux/printk.h> #include <linux/bitops.h> #include <linux/sched.h> #include "kfd_priv.h" @@ -118,9 +119,8 @@ static int allocate_vmid(struct device_queue_manager *dqm, if (dqm->vmid_bitmap == 0) return -ENOMEM; - bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, - dqm->dev->vm_info.vmid_num_kfd); - clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); + bit = ffs(dqm->vmid_bitmap) - 1; + dqm->vmid_bitmap &= ~(1 << bit); allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; pr_debug("vmid allocation %d\n", allocated_vmid); @@ -130,19 +130,49 @@ static int allocate_vmid(struct device_queue_manager *dqm, set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); program_sh_mem_settings(dqm, qpd); + /* qpd->page_table_base is set earlier when register_process() + * is called, i.e. when the first queue is created. + */ + dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, + qpd->vmid, + qpd->page_table_base); + /* invalidate the VM context after pasid and vmid mapping is set up */ + kfd_flush_tlb(qpd_to_pdd(qpd)); + return 0; } +static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, + struct qcm_process_device *qpd) +{ + uint32_t len; + + if (!qpd->ib_kaddr) + return -ENOMEM; + + len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + + return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, + qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); +} + static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; + /* On GFX v7, CP doesn't flush TC at dequeue */ + if (q->device->device_info->asic_family == CHIP_HAWAII) + if (flush_texture_cache_nocpsch(q->device, qpd)) + pr_err("Failed to flush TC\n"); + + kfd_flush_tlb(qpd_to_pdd(qpd)); + /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); - set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); + dqm->vmid_bitmap |= (1 << bit); qpd->vmid = 0; q->properties.vmid = 0; } @@ -170,6 +200,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, goto out_unlock; } q->properties.vmid = qpd->vmid; + /* + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ + if (qpd->evicted) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; @@ -223,12 +261,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) continue; if (dqm->allocated_queues[pipe] != 0) { - bit = find_first_bit( - (unsigned long *)&dqm->allocated_queues[pipe], - get_queues_per_pipe(dqm)); - - clear_bit(bit, - (unsigned long *)&dqm->allocated_queues[pipe]); + bit = ffs(dqm->allocated_queues[pipe]) - 1; + dqm->allocated_queues[pipe] &= ~(1 << bit); q->pipe = pipe; q->queue = bit; set = true; @@ -249,7 +283,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) static inline void deallocate_hqd(struct device_queue_manager *dqm, struct queue *q) { - set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); + dqm->allocated_queues[q->pipe] |= (1 << q->queue); } static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, @@ -371,21 +405,35 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) { int retval; struct mqd_manager *mqd; + struct kfd_process_device *pdd; bool prev_active = false; mutex_lock(&dqm->lock); + pdd = kfd_get_process_device_data(q->device, q->process); + if (!pdd) { + retval = -ENODEV; + goto out_unlock; + } mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; goto out_unlock; } + /* + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ + if (pdd->qpd.evicted) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); /* Save previous activity state for counters */ prev_active = q->properties.is_active; /* Make sure the queue is unmapped before updating the MQD */ - if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); if (retval) { @@ -417,7 +465,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) else if (!q->properties.is_active && prev_active) dqm->queue_count--; - if (sched_policy != KFD_SCHED_POLICY_NO_HWS) + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || @@ -451,10 +499,193 @@ static struct mqd_manager *get_mqd_manager( return mqd; } +static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q; + struct mqd_manager *mqd; + struct kfd_process_device *pdd; + int retval = 0; + + mutex_lock(&dqm->lock); + if (qpd->evicted++ > 0) /* already evicted, do nothing */ + goto out; + + pdd = qpd_to_pdd(qpd); + pr_info_ratelimited("Evicting PASID %u queues\n", + pdd->process->pasid); + + /* unactivate all active queues on the qpd */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_active) + continue; + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) { /* should not be here */ + pr_err("Cannot evict queue, mqd mgr is NULL\n"); + retval = -ENOMEM; + goto out; + } + q->properties.is_evicted = true; + q->properties.is_active = false; + retval = mqd->destroy_mqd(mqd, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, + KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); + if (retval) + goto out; + dqm->queue_count--; + } + +out: + mutex_unlock(&dqm->lock); + return retval; +} + +static int evict_process_queues_cpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q; + struct kfd_process_device *pdd; + int retval = 0; + + mutex_lock(&dqm->lock); + if (qpd->evicted++ > 0) /* already evicted, do nothing */ + goto out; + + pdd = qpd_to_pdd(qpd); + pr_info_ratelimited("Evicting PASID %u queues\n", + pdd->process->pasid); + + /* unactivate all active queues on the qpd */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_active) + continue; + q->properties.is_evicted = true; + q->properties.is_active = false; + dqm->queue_count--; + } + retval = execute_queues_cpsch(dqm, + qpd->is_debug ? + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + +out: + mutex_unlock(&dqm->lock); + return retval; +} + +static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q; + struct mqd_manager *mqd; + struct kfd_process_device *pdd; + uint32_t pd_base; + int retval = 0; + + pdd = qpd_to_pdd(qpd); + /* Retrieve PD base */ + pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + + mutex_lock(&dqm->lock); + if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ + goto out; + if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ + qpd->evicted--; + goto out; + } + + pr_info_ratelimited("Restoring PASID %u queues\n", + pdd->process->pasid); + + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; + pr_debug("Updated PD address to 0x%08x\n", pd_base); + + if (!list_empty(&qpd->queues_list)) { + dqm->dev->kfd2kgd->set_vm_context_page_table_base( + dqm->dev->kgd, + qpd->vmid, + qpd->page_table_base); + kfd_flush_tlb(pdd); + } + + /* activate all active queues on the qpd */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_evicted) + continue; + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) { /* should not be here */ + pr_err("Cannot restore queue, mqd mgr is NULL\n"); + retval = -ENOMEM; + goto out; + } + q->properties.is_evicted = false; + q->properties.is_active = true; + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + q->queue, &q->properties, + q->process->mm); + if (retval) + goto out; + dqm->queue_count++; + } + qpd->evicted = 0; +out: + mutex_unlock(&dqm->lock); + return retval; +} + +static int restore_process_queues_cpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q; + struct kfd_process_device *pdd; + uint32_t pd_base; + int retval = 0; + + pdd = qpd_to_pdd(qpd); + /* Retrieve PD base */ + pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + + mutex_lock(&dqm->lock); + if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ + goto out; + if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ + qpd->evicted--; + goto out; + } + + pr_info_ratelimited("Restoring PASID %u queues\n", + pdd->process->pasid); + + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; + pr_debug("Updated PD address to 0x%08x\n", pd_base); + + /* activate all active queues on the qpd */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_evicted) + continue; + q->properties.is_evicted = false; + q->properties.is_active = true; + dqm->queue_count++; + } + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!retval) + qpd->evicted = 0; +out: + mutex_unlock(&dqm->lock); + return retval; +} + static int register_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct device_process_node *n; + struct kfd_process_device *pdd; + uint32_t pd_base; int retval; n = kzalloc(sizeof(*n), GFP_KERNEL); @@ -463,9 +694,16 @@ static int register_process(struct device_queue_manager *dqm, n->qpd = qpd; + pdd = qpd_to_pdd(qpd); + /* Retrieve PD base */ + pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + mutex_lock(&dqm->lock); list_add(&n->list, &dqm->queues); + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; + retval = dqm->asic_ops.update_qpd(dqm, qpd); dqm->processes_count++; @@ -573,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { init_interrupts(dqm); - return 0; + return pm_init(&dqm->packets, dqm); } static int stop_nocpsch(struct device_queue_manager *dqm) { + pm_uninit(&dqm->packets); return 0; } @@ -589,10 +828,8 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, if (dqm->sdma_bitmap == 0) return -ENOMEM; - bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap, - CIK_SDMA_QUEUES); - - clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap); + bit = ffs(dqm->sdma_bitmap) - 1; + dqm->sdma_bitmap &= ~(1 << bit); *sdma_queue_id = bit; return 0; @@ -603,7 +840,7 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm, { if (sdma_queue_id >= CIK_SDMA_QUEUES) return; - set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); + dqm->sdma_bitmap |= (1 << sdma_queue_id); } static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, @@ -840,6 +1077,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out_deallocate_sdma_queue; } + /* + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ + if (qpd->evicted) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); dqm->asic_ops.init_sdma_vm(dqm, q, qpd); @@ -1103,7 +1348,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, alternate_aperture_base, alternate_aperture_size); - if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) + if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", @@ -1250,8 +1495,24 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) if (!dqm) return NULL; + switch (dev->device_info->asic_family) { + /* HWS is not available on Hawaii. */ + case CHIP_HAWAII: + /* HWS depends on CWSR for timely dequeue. CWSR is not + * available on Tonga. + * + * FIXME: This argument also applies to Kaveri. + */ + case CHIP_TONGA: + dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; + break; + default: + dqm->sched_policy = sched_policy; + break; + } + dqm->dev = dev; - switch (sched_policy) { + switch (dqm->sched_policy) { case KFD_SCHED_POLICY_HWS: case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: /* initialize dqm for cp scheduling */ @@ -1270,6 +1531,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.set_cache_memory_policy = set_cache_memory_policy; dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_cpsch; + dqm->ops.evict_process_queues = evict_process_queues_cpsch; + dqm->ops.restore_process_queues = restore_process_queues_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1286,9 +1549,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.set_cache_memory_policy = set_cache_memory_policy; dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_nocpsch; + dqm->ops.evict_process_queues = evict_process_queues_nocpsch; + dqm->ops.restore_process_queues = + restore_process_queues_nocpsch; break; default: - pr_err("Invalid scheduling policy %d\n", sched_policy); + pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); goto out_free; } @@ -1300,6 +1566,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_KAVERI: device_queue_manager_init_cik(&dqm->asic_ops); break; + + case CHIP_HAWAII: + device_queue_manager_init_cik_hawaii(&dqm->asic_ops); + break; + + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + device_queue_manager_init_vi_tonga(&dqm->asic_ops); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index c61b693bfa8c..412beff3281d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -79,6 +79,10 @@ struct device_process_node { * * @process_termination: Clears all process queues belongs to that device. * + * @evict_process_queues: Evict all active queues of a process + * + * @restore_process_queues: Restore all evicted queues queues of a process + * */ struct device_queue_manager_ops { @@ -129,6 +133,11 @@ struct device_queue_manager_ops { int (*process_termination)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); + + int (*evict_process_queues)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + int (*restore_process_queues)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); }; struct device_queue_manager_asic_ops { @@ -180,12 +189,17 @@ struct device_queue_manager { unsigned int *fence_addr; struct kfd_mem_obj *fence_mem; bool active_runlist; + int sched_policy; }; void device_queue_manager_init_cik( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_cik_hawaii( + struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 28e48c90c596..aed4c21417bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, uint64_t alternate_aperture_size); static int update_qpd_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd); +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); void device_queue_manager_init_cik( struct device_queue_manager_asic_ops *asic_ops) @@ -45,6 +50,14 @@ void device_queue_manager_init_cik( asic_ops->init_sdma_vm = init_sdma_vm; } +void device_queue_manager_init_cik_hawaii( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; + asic_ops->update_qpd = update_qpd_cik_hawaii; + asic_ops->init_sdma_vm = init_sdma_vm_hawaii; +} + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) { /* In 64-bit mode, we can only control the top 3 bits of the LDS, @@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm, return 0; } +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | + DEFAULT_MTYPE(MTYPE_NONCACHED) | + APE1_MTYPE(MTYPE_NONCACHED); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + + pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); + + return 0; +} + static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { @@ -147,3 +190,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } + +static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + q->properties.sdma_vm_addr = + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 2fbce57a2f21..fd60a116be37 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -33,10 +33,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); +static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd); +static int update_qpd_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static void init_sdma_vm_tonga(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops) @@ -46,6 +57,14 @@ void device_queue_manager_init_vi( asic_ops->init_sdma_vm = init_sdma_vm; } +void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; + asic_ops->update_qpd = update_qpd_vi_tonga; + asic_ops->init_sdma_vm = init_sdma_vm_tonga; +} + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) { /* In 64-bit mode, we can only control the top 3 bits of the LDS, @@ -103,6 +122,33 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, return true; } +static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + uint32_t default_mtype; + uint32_t ape1_mtype; + + default_mtype = (default_policy == cache_policy_coherent) ? + MTYPE_UC : + MTYPE_NC; + + ape1_mtype = (alternate_policy == cache_policy_coherent) ? + MTYPE_UC : + MTYPE_NC; + + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT; + + return true; +} + static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -144,6 +190,40 @@ static int update_qpd_vi(struct device_queue_manager *dqm, return 0; } +static int update_qpd_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + MTYPE_UC << + SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + MTYPE_UC << + SH_MEM_CONFIG__APE1_MTYPE__SHIFT; + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + + pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n", + temp, qpd->sh_mem_bases); + + return 0; +} + static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { @@ -159,3 +239,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } + +static void init_sdma_vm_tonga(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + q->properties.sdma_vm_addr = + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 93aae5c1e78b..4890a90f1e44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -30,6 +30,7 @@ #include <linux/memory.h> #include "kfd_priv.h" #include "kfd_events.h" +#include "kfd_iommu.h" #include <linux/device.h> /* @@ -51,6 +52,7 @@ struct kfd_event_waiter { struct kfd_signal_page { uint64_t *kernel_address; uint64_t __user *user_address; + bool need_to_free_pages; }; @@ -78,6 +80,7 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) KFD_SIGNAL_EVENT_LIMIT * 8); page->kernel_address = backing_store; + page->need_to_free_pages = true; pr_debug("Allocated new event signal page at %p, for process %p\n", page, p); @@ -268,8 +271,9 @@ static void shutdown_signal_page(struct kfd_process *p) struct kfd_signal_page *page = p->signal_page; if (page) { - free_pages((unsigned long)page->kernel_address, - get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); + if (page->need_to_free_pages) + free_pages((unsigned long)page->kernel_address, + get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); kfree(page); } } @@ -291,6 +295,30 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev) return ev->type == KFD_EVENT_TYPE_SIGNAL; } +int kfd_event_page_set(struct kfd_process *p, void *kernel_address, + uint64_t size) +{ + struct kfd_signal_page *page; + + if (p->signal_page) + return -EBUSY; + + page = kzalloc(sizeof(*page), GFP_KERNEL); + if (!page) + return -ENOMEM; + + /* Initialize all events to unsignaled */ + memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT, + KFD_SIGNAL_EVENT_LIMIT * 8); + + page->kernel_address = kernel_address; + + p->signal_page = page; + p->signal_mapped_size = size; + + return 0; +} + int kfd_event_create(struct file *devkfd, struct kfd_process *p, uint32_t event_type, bool auto_reset, uint32_t node_id, uint32_t *event_id, uint32_t *event_trigger_data, @@ -837,6 +865,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, } } +#ifdef KFD_SUPPORT_IOMMU_V2 void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, unsigned long address, bool is_write_requested, bool is_execute_requested) @@ -905,6 +934,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, mutex_unlock(&p->event_mutex); kfd_unref_process(p); } +#endif /* KFD_SUPPORT_IOMMU_V2 */ void kfd_signal_hw_exception_event(unsigned int pasid) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 7377513050e6..66852de410c8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -278,21 +278,28 @@ #define MAKE_GPUVM_APP_BASE(gpu_num) \ (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) -#define MAKE_GPUVM_APP_LIMIT(base) \ - (((uint64_t)(base) & \ - 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL) +#define MAKE_GPUVM_APP_LIMIT(base, size) \ + (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1) -#define MAKE_SCRATCH_APP_BASE(gpu_num) \ - (((uint64_t)(gpu_num) << 61) + 0x100000000L) +#define MAKE_SCRATCH_APP_BASE() \ + (((uint64_t)(0x1UL) << 61) + 0x100000000L) #define MAKE_SCRATCH_APP_LIMIT(base) \ (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) -#define MAKE_LDS_APP_BASE(gpu_num) \ - (((uint64_t)(gpu_num) << 61) + 0x0) +#define MAKE_LDS_APP_BASE() \ + (((uint64_t)(0x1UL) << 61) + 0x0) #define MAKE_LDS_APP_LIMIT(base) \ (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) +/* User mode manages most of the SVM aperture address space. The low + * 16MB are reserved for kernel use (CWSR trap handler and kernel IB + * for now). + */ +#define SVM_USER_BASE 0x1000000ull +#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE) +#define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE) + int kfd_init_apertures(struct kfd_process *process) { uint8_t id = 0; @@ -314,7 +321,7 @@ int kfd_init_apertures(struct kfd_process *process) return -1; } /* - * For 64 bit process aperture will be statically reserved in + * For 64 bit process apertures will be statically reserved in * the x86_64 non canonical process address space * amdkfd doesn't currently support apertures for 32 bit process */ @@ -323,23 +330,35 @@ int kfd_init_apertures(struct kfd_process *process) pdd->gpuvm_base = pdd->gpuvm_limit = 0; pdd->scratch_base = pdd->scratch_limit = 0; } else { - /* - * node id couldn't be 0 - the three MSB bits of - * aperture shoudn't be 0 + /* Same LDS and scratch apertures can be used + * on all GPUs. This allows using more dGPUs + * than placement options for apertures. */ - pdd->lds_base = MAKE_LDS_APP_BASE(id + 1); - + pdd->lds_base = MAKE_LDS_APP_BASE(); pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); - pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); - - pdd->gpuvm_limit = - MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base); - - pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1); - + pdd->scratch_base = MAKE_SCRATCH_APP_BASE(); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + + if (dev->device_info->needs_iommu_device) { + /* APUs: GPUVM aperture in + * non-canonical address space + */ + pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); + pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT( + pdd->gpuvm_base, + dev->shared_resources.gpuvm_size); + } else { + /* dGPUs: SVM aperture starting at 0 + * with small reserved space for kernel + */ + pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_limit = + dev->shared_resources.gpuvm_size - 1; + pdd->qpd.cwsr_base = SVM_CWSR_BASE; + pdd->qpd.ib_base = SVM_IB_BASE; + } } dev_dbg(kfd_device, "node id %u\n", id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c new file mode 100644 index 000000000000..c71817963eea --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -0,0 +1,357 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/printk.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/amd-iommu.h> +#include "kfd_priv.h" +#include "kfd_dbgmgr.h" +#include "kfd_topology.h" +#include "kfd_iommu.h" + +static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | + AMD_IOMMU_DEVICE_FLAG_PRI_SUP | + AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + +/** kfd_iommu_check_device - Check whether IOMMU is available for device + */ +int kfd_iommu_check_device(struct kfd_dev *kfd) +{ + struct amd_iommu_device_info iommu_info; + int err; + + if (!kfd->device_info->needs_iommu_device) + return -ENODEV; + + iommu_info.flags = 0; + err = amd_iommu_device_info(kfd->pdev, &iommu_info); + if (err) + return err; + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) + return -ENODEV; + + return 0; +} + +/** kfd_iommu_device_init - Initialize IOMMU for device + */ +int kfd_iommu_device_init(struct kfd_dev *kfd) +{ + struct amd_iommu_device_info iommu_info; + unsigned int pasid_limit; + int err; + + if (!kfd->device_info->needs_iommu_device) + return 0; + + iommu_info.flags = 0; + err = amd_iommu_device_info(kfd->pdev, &iommu_info); + if (err < 0) { + dev_err(kfd_device, + "error getting iommu info. is the iommu enabled?\n"); + return -ENODEV; + } + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { + dev_err(kfd_device, + "error required iommu flags ats %i, pri %i, pasid %i\n", + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) + != 0); + return -ENODEV; + } + + pasid_limit = min_t(unsigned int, + (unsigned int)(1 << kfd->device_info->max_pasid_bits), + iommu_info.max_pasids); + + if (!kfd_set_pasid_limit(pasid_limit)) { + dev_err(kfd_device, "error setting pasid limit\n"); + return -EBUSY; + } + + return 0; +} + +/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process + * + * Binds the given process to the given device using its PASID. This + * enables IOMMUv2 address translation for the process on the device. + * + * This function assumes that the process mutex is held. + */ +int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + struct kfd_process *p = pdd->process; + int err; + + if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND) + return 0; + + if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { + pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); + return -EINVAL; + } + + err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); + if (!err) + pdd->bound = PDD_BOUND; + + return err; +} + +/** kfd_iommu_unbind_process - Unbind process from all devices + * + * This removes all IOMMU device bindings of the process. To be used + * before process termination. + */ +void kfd_iommu_unbind_process(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + if (pdd->bound == PDD_BOUND) + amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); +} + +/* Callback for process shutdown invoked by the IOMMU driver */ +static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) +{ + struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); + struct kfd_process *p; + struct kfd_process_device *pdd; + + if (!dev) + return; + + /* + * Look for the process that matches the pasid. If there is no such + * process, we either released it in amdkfd's own notifier, or there + * is a bug. Unfortunately, there is no way to tell... + */ + p = kfd_lookup_process_by_pasid(pasid); + if (!p) + return; + + pr_debug("Unbinding process %d from IOMMU\n", pasid); + + mutex_lock(kfd_get_dbgmgr_mutex()); + + if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } + } + + mutex_unlock(kfd_get_dbgmgr_mutex()); + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (pdd) + /* For GPU relying on IOMMU, we need to dequeue here + * when PASID is still bound. + */ + kfd_process_dequeue_from_device(pdd); + + mutex_unlock(&p->mutex); + + kfd_unref_process(p); +} + +/* This function called by IOMMU driver on PPR failure */ +static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, + unsigned long address, u16 flags) +{ + struct kfd_dev *dev; + + dev_warn(kfd_device, + "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", + PCI_BUS_NUM(pdev->devfn), + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), + pasid, + address, + flags); + + dev = kfd_device_by_pci_dev(pdev); + if (!WARN_ON(!dev)) + kfd_signal_iommu_event(dev, pasid, address, + flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); + + return AMD_IOMMU_INV_PRI_RSP_INVALID; +} + +/* + * Bind processes do the device that have been temporarily unbound + * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. + */ +static int kfd_bind_processes_to_device(struct kfd_dev *kfd) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp; + int err = 0; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(kfd, p); + + if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { + mutex_unlock(&p->mutex); + continue; + } + + err = amd_iommu_bind_pasid(kfd->pdev, p->pasid, + p->lead_thread); + if (err < 0) { + pr_err("Unexpected pasid %d binding failure\n", + p->pasid); + mutex_unlock(&p->mutex); + break; + } + + pdd->bound = PDD_BOUND; + mutex_unlock(&p->mutex); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); + + return err; +} + +/* + * Mark currently bound processes as PDD_BOUND_SUSPENDED. These + * processes will be restored to PDD_BOUND state in + * kfd_bind_processes_to_device. + */ +static void kfd_unbind_processes_from_device(struct kfd_dev *kfd) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(kfd, p); + + if (WARN_ON(!pdd)) { + mutex_unlock(&p->mutex); + continue; + } + + if (pdd->bound == PDD_BOUND) + pdd->bound = PDD_BOUND_SUSPENDED; + mutex_unlock(&p->mutex); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); +} + +/** kfd_iommu_suspend - Prepare IOMMU for suspend + * + * This unbinds processes from the device and disables the IOMMU for + * the device. + */ +void kfd_iommu_suspend(struct kfd_dev *kfd) +{ + if (!kfd->device_info->needs_iommu_device) + return; + + kfd_unbind_processes_from_device(kfd); + + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); + amd_iommu_free_device(kfd->pdev); +} + +/** kfd_iommu_resume - Restore IOMMU after resume + * + * This reinitializes the IOMMU for the device and re-binds previously + * suspended processes to the device. + */ +int kfd_iommu_resume(struct kfd_dev *kfd) +{ + unsigned int pasid_limit; + int err; + + if (!kfd->device_info->needs_iommu_device) + return 0; + + pasid_limit = kfd_get_pasid_limit(); + + err = amd_iommu_init_device(kfd->pdev, pasid_limit); + if (err) + return -ENXIO; + + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, + iommu_invalid_ppr_cb); + + err = kfd_bind_processes_to_device(kfd); + if (err) { + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); + amd_iommu_free_device(kfd->pdev); + return err; + } + + return 0; +} + +extern bool amd_iommu_pc_supported(void); +extern u8 amd_iommu_pc_get_max_banks(u16 devid); +extern u8 amd_iommu_pc_get_max_counters(u16 devid); + +/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology + */ +int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) +{ + struct kfd_perf_properties *props; + + if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT)) + return 0; + + if (!amd_iommu_pc_supported()) + return 0; + + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + strcpy(props->block_name, "iommu"); + props->max_concurrent = amd_iommu_pc_get_max_banks(0) * + amd_iommu_pc_get_max_counters(0); /* assume one iommu */ + list_add_tail(&props->list, &kdev->perf_props); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h new file mode 100644 index 000000000000..dd23d9fdf6a8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h @@ -0,0 +1,78 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __KFD_IOMMU_H__ +#define __KFD_IOMMU_H__ + +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) + +#define KFD_SUPPORT_IOMMU_V2 + +int kfd_iommu_check_device(struct kfd_dev *kfd); +int kfd_iommu_device_init(struct kfd_dev *kfd); + +int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd); +void kfd_iommu_unbind_process(struct kfd_process *p); + +void kfd_iommu_suspend(struct kfd_dev *kfd); +int kfd_iommu_resume(struct kfd_dev *kfd); + +int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev); + +#else + +static inline int kfd_iommu_check_device(struct kfd_dev *kfd) +{ + return -ENODEV; +} +static inline int kfd_iommu_device_init(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kfd_iommu_bind_process_to_device( + struct kfd_process_device *pdd) +{ + return 0; +} +static inline void kfd_iommu_unbind_process(struct kfd_process *p) +{ + /* empty */ +} + +static inline void kfd_iommu_suspend(struct kfd_dev *kfd) +{ + /* empty */ +} +static inline int kfd_iommu_resume(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) +{ + return 0; +} + +#endif /* defined(CONFIG_AMD_IOMMU_V2) */ + +#endif /* __KFD_IOMMU_H__ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 5dc6567d4a13..69f496485331 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -297,10 +297,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, switch (dev->device_info->asic_family) { case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: kernel_queue_init_vi(&kq->ops_asic_specific); break; case CHIP_KAVERI: + case CHIP_HAWAII: kernel_queue_init_cik(&kq->ops_asic_specific); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 3ac72bed4f31..e0c07d24d251 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = { .interrupt = kgd2kfd_interrupt, .suspend = kgd2kfd_suspend, .resume = kgd2kfd_resume, + .schedule_evict_and_restore_process = + kgd2kfd_schedule_evict_and_restore_process, }; int sched_policy = KFD_SCHED_POLICY_HWS; @@ -69,6 +71,11 @@ module_param(send_sigterm, int, 0444); MODULE_PARM_DESC(send_sigterm, "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); +int debug_largebar; +module_param(debug_largebar, int, 0444); +MODULE_PARM_DESC(debug_largebar, + "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)"); + int ignore_crat; module_param(ignore_crat, int, 0444); MODULE_PARM_DESC(ignore_crat, @@ -126,7 +133,9 @@ static int __init kfd_module_init(void) if (err < 0) goto err_topology; - kfd_process_create_wq(); + err = kfd_process_create_wq(); + if (err < 0) + goto err_create_wq; kfd_debugfs_init(); @@ -136,6 +145,8 @@ static int __init kfd_module_init(void) return 0; +err_create_wq: + kfd_topology_shutdown(); err_topology: kfd_chardev_exit(); err_ioctl: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index dfd260ef81ff..ee7061e1c466 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -29,8 +29,15 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, switch (dev->device_info->asic_family) { case CHIP_KAVERI: return mqd_manager_init_cik(type, dev); + case CHIP_HAWAII: + return mqd_manager_init_cik_hawaii(type, dev); case CHIP_CARRIZO: return mqd_manager_init_vi(type, dev); + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + return mqd_manager_init_vi_tonga(type, dev); default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index f8ef4a051e08..c00c325ed3c9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -170,14 +170,19 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, mms); } -static int update_mqd(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) +static int __update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, unsigned int atc_bit) { struct cik_mqd *m; m = get_mqd(mqd); m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | - DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; + DEFAULT_MIN_AVAIL_SIZE; + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; + if (atc_bit) { + m->cp_hqd_pq_control |= PQ_ATC_EN; + m->cp_hqd_ib_control |= IB_ATC_EN; + } /* * Calculating queue size which is log base 2 of actual queue size -1 @@ -197,11 +202,24 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0); + q->queue_percent > 0 && + !q->is_evicted); return 0; } +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, 1); +} + +static int update_mqd_hawaii(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, 0); +} + static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -228,7 +246,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0); + q->queue_percent > 0 && + !q->is_evicted); return 0; } @@ -360,7 +379,8 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0); + q->queue_percent > 0 && + !q->is_evicted); return 0; } @@ -441,3 +461,15 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, return mqd; } +struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + mqd = mqd_manager_init_cik(type, dev); + if (!mqd) + return NULL; + if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) + mqd->update_mqd = update_mqd_hawaii; + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 971aec0637dc..89e4242e43e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -151,6 +151,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); + m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); m->cp_hqd_pq_doorbell_control = q->doorbell_off << @@ -196,7 +198,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0); + q->queue_percent > 0 && + !q->is_evicted); return 0; } @@ -208,6 +211,12 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, return __update_mqd(mm, mqd, q, MTYPE_CC, 1); } +static int update_mqd_tonga(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, MTYPE_UC, 0); +} + static int destroy_mqd(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout, uint32_t pipe_id, @@ -334,7 +343,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0); + q->queue_percent > 0 && + !q->is_evicted); return 0; } @@ -432,3 +442,16 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, return mqd; } + +struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + mqd = mqd_manager_init_vi(type, dev); + if (!mqd) + return NULL; + if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) + mqd->update_mqd = update_mqd_tonga; + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 0c3bc00978f7..89ba4c670ec5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -355,6 +355,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; } +/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size + * of this packet + * @gpu_addr - GPU address of the packet. It's a virtual address. + * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer + * Return - length of the packet + */ +uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + WARN_ON(!buffer); + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(*packet)); + + packet->header.u32All = build_pm4_header(IT_RELEASE_MEM, + sizeof(*packet)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + packet->bitfields2.atc = 0; + + packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel___release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return sizeof(*packet) / sizeof(unsigned int); +} + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { pm->dqm = dqm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0bedcf9cc08c..96a9cc0f02c9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -105,6 +105,12 @@ extern int cwsr_enable; extern int send_sigterm; /* + * This kernel module is used to simulate large bar machine on non-large bar + * enabled machines. + */ +extern int debug_largebar; + +/* * Ignore CRAT table during KFD initialization, can be used to work around * broken CRAT tables on some AMD systems */ @@ -158,6 +164,8 @@ struct kfd_device_info { uint8_t num_of_watch_points; uint16_t mqd_size_aligned; bool supports_cwsr; + bool needs_iommu_device; + bool needs_pci_atomics; }; struct kfd_mem_obj { @@ -333,7 +341,11 @@ enum kfd_queue_format { * @is_interop: Defines if this is a interop queue. Interop queue means that * the queue can access both graphics and compute resources. * - * @is_active: Defines if the queue is active or not. + * @is_evicted: Defines if the queue is evicted. Only active queues + * are evicted, rendering them inactive. + * + * @is_active: Defines if the queue is active or not. @is_active and + * @is_evicted are protected by the DQM lock. * * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid * of the queue. @@ -355,6 +367,7 @@ struct queue_properties { uint32_t __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; + bool is_evicted; bool is_active; /* Not relevant for user mode queues in cp scheduling */ unsigned int vmid; @@ -458,6 +471,7 @@ struct qcm_process_device { unsigned int queue_count; unsigned int vmid; bool is_debug; + unsigned int evicted; /* eviction counter, 0=active */ /* This flag tells if we should reset all wavefronts on * process termination @@ -480,10 +494,34 @@ struct qcm_process_device { /* CWSR memory */ void *cwsr_kaddr; + uint64_t cwsr_base; uint64_t tba_addr; uint64_t tma_addr; + + /* IB memory */ + uint64_t ib_base; + void *ib_kaddr; }; +/* KFD Memory Eviction */ + +/* Approx. wait time before attempting to restore evicted BOs */ +#define PROCESS_RESTORE_TIME_MS 100 +/* Approx. back off time if restore fails due to lack of memory */ +#define PROCESS_BACK_OFF_TIME_MS 100 +/* Approx. time before evicting the process again */ +#define PROCESS_ACTIVE_TIME_MS 10 + +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct dma_fence *fence); + +/* 8 byte handle containing GPU ID in the most significant 4 bytes and + * idr_handle in the least significant 4 bytes + */ +#define MAKE_HANDLE(gpu_id, idr_handle) \ + (((uint64_t)(gpu_id) << 32) + idr_handle) +#define GET_GPU_ID(handle) (handle >> 32) +#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) enum kfd_pdd_bound { PDD_UNBOUND = 0, @@ -516,8 +554,12 @@ struct kfd_process_device { uint64_t scratch_base; uint64_t scratch_limit; - /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ - enum kfd_pdd_bound bound; + /* VM context for GPUVM allocations */ + struct file *drm_file; + void *vm; + + /* GPUVM allocations storage */ + struct idr alloc_idr; /* Flag used to tell the pdd has dequeued from the dqm. * This is used to prevent dev->dqm->ops.process_termination() from @@ -525,6 +567,9 @@ struct kfd_process_device { * function. */ bool already_dequeued; + + /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ + enum kfd_pdd_bound bound; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -587,8 +632,30 @@ struct kfd_process { size_t signal_mapped_size; size_t signal_event_count; bool signal_event_limit_reached; + + /* Information used for memory eviction */ + void *kgd_process_info; + /* Eviction fence that is attached to all the BOs of this process. The + * fence will be triggered during eviction and new one will be created + * during restore + */ + struct dma_fence *ef; + + /* Work items for evicting and restoring BOs */ + struct delayed_work eviction_work; + struct delayed_work restore_work; + /* seqno of the last scheduled eviction */ + unsigned int last_eviction_seqno; + /* Approx. the last timestamp (in jiffies) when the process was + * restored after an eviction + */ + unsigned long last_restore_timestamp; }; +#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ +extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); +extern struct srcu_struct kfd_processes_srcu; + /** * Ioctl function type. * @@ -607,18 +674,20 @@ struct amdkfd_ioctl_desc { const char *name; }; -void kfd_process_create_wq(void); +int kfd_process_create_wq(void); void kfd_process_destroy_wq(void); struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); +struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); void kfd_unref_process(struct kfd_process *p); +void kfd_suspend_all_processes(void); +int kfd_resume_all_processes(void); +int kfd_process_device_init_vm(struct kfd_process_device *pdd, + struct file *drm_file); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p); -int kfd_bind_processes_to_device(struct kfd_dev *dev); -void kfd_unbind_processes_from_device(struct kfd_dev *dev); -void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid); struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p); struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, @@ -627,6 +696,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vma); +/* KFD process API for creating and translating handles */ +int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, + void *mem); +void *kfd_process_device_translate_handle(struct kfd_process_device *p, + int handle); +void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, + int handle); + /* Process device data iterator */ struct kfd_process_device *kfd_get_first_process_device_data( struct kfd_process *p); @@ -705,8 +782,12 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -768,6 +849,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm); +uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); + uint64_t kfd_get_number_elems(struct kfd_dev *kfd); /* Events */ @@ -789,12 +872,16 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, void kfd_signal_hw_exception_event(unsigned int pasid); int kfd_set_event(struct kfd_process *p, uint32_t event_id); int kfd_reset_event(struct kfd_process *p, uint32_t event_id); +int kfd_event_page_set(struct kfd_process *p, void *kernel_address, + uint64_t size); int kfd_event_create(struct file *devkfd, struct kfd_process *p, uint32_t event_type, bool auto_reset, uint32_t node_id, uint32_t *event_id, uint32_t *event_trigger_data, uint64_t *event_page_offset, uint32_t *event_slot_index); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); +void kfd_flush_tlb(struct kfd_process_device *pdd); + int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); /* Debugfs */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 4ff5f0fe6db8..1711ad0642f7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -30,35 +30,57 @@ #include <linux/notifier.h> #include <linux/compat.h> #include <linux/mman.h> +#include <linux/file.h> struct mm_struct; #include "kfd_priv.h" +#include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" +#include "kfd_iommu.h" /* * List of struct kfd_process (field kfd_process). * Unique/indexed by mm_struct* */ -#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ -static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); +DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); static DEFINE_MUTEX(kfd_processes_mutex); -DEFINE_STATIC_SRCU(kfd_processes_srcu); +DEFINE_SRCU(kfd_processes_srcu); +/* For process termination handling */ static struct workqueue_struct *kfd_process_wq; +/* Ordered, single-threaded workqueue for restoring evicted + * processes. Restoring multiple processes concurrently under memory + * pressure can lead to processes blocking each other from validating + * their BOs and result in a live-lock situation where processes + * remain evicted indefinitely. + */ +static struct workqueue_struct *kfd_restore_wq; + static struct kfd_process *find_process(const struct task_struct *thread); static void kfd_process_ref_release(struct kref *ref); static struct kfd_process *create_process(const struct task_struct *thread, struct file *filep); -static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep); + +static void evict_process_worker(struct work_struct *work); +static void restore_process_worker(struct work_struct *work); -void kfd_process_create_wq(void) +int kfd_process_create_wq(void) { if (!kfd_process_wq) kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); + if (!kfd_restore_wq) + kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); + + if (!kfd_process_wq || !kfd_restore_wq) { + kfd_process_destroy_wq(); + return -ENOMEM; + } + + return 0; } void kfd_process_destroy_wq(void) @@ -67,6 +89,116 @@ void kfd_process_destroy_wq(void) destroy_workqueue(kfd_process_wq); kfd_process_wq = NULL; } + if (kfd_restore_wq) { + destroy_workqueue(kfd_restore_wq); + kfd_restore_wq = NULL; + } +} + +static void kfd_process_free_gpuvm(struct kgd_mem *mem, + struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + + dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm); + dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem); +} + +/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process + * This function should be only called right after the process + * is created and when kfd_processes_mutex is still being held + * to avoid concurrency. Because of that exclusiveness, we do + * not need to take p->mutex. + */ +static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, + uint64_t gpu_va, uint32_t size, + uint32_t flags, void **kptr) +{ + struct kfd_dev *kdev = pdd->dev; + struct kgd_mem *mem = NULL; + int handle; + int err; + + err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, + pdd->vm, &mem, NULL, flags); + if (err) + goto err_alloc_mem; + + err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm); + if (err) + goto err_map_mem; + + err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true); + if (err) { + pr_debug("Sync memory failed, wait interrupted by user signal\n"); + goto sync_memory_failed; + } + + /* Create an obj handle so kfd_process_device_remove_obj_handle + * will take care of the bo removal when the process finishes. + * We do not need to take p->mutex, because the process is just + * created and the ioctls have not had the chance to run. + */ + handle = kfd_process_device_create_obj_handle(pdd, mem); + + if (handle < 0) { + err = handle; + goto free_gpuvm; + } + + if (kptr) { + err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd, + (struct kgd_mem *)mem, kptr, NULL); + if (err) { + pr_debug("Map GTT BO to kernel failed\n"); + goto free_obj_handle; + } + } + + return err; + +free_obj_handle: + kfd_process_device_remove_obj_handle(pdd, handle); +free_gpuvm: +sync_memory_failed: + kfd_process_free_gpuvm(mem, pdd); + return err; + +err_map_mem: + kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem); +err_alloc_mem: + *kptr = NULL; + return err; +} + +/* kfd_process_device_reserve_ib_mem - Reserve memory inside the + * process for IB usage The memory reserved is for KFD to submit + * IB to AMDGPU from kernel. If the memory is reserved + * successfully, ib_kaddr will have the CPU/kernel + * address. Check ib_kaddr before accessing the memory. + */ +static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) +{ + struct qcm_process_device *qpd = &pdd->qpd; + uint32_t flags = ALLOC_MEM_FLAGS_GTT | + ALLOC_MEM_FLAGS_NO_SUBSTITUTE | + ALLOC_MEM_FLAGS_WRITABLE | + ALLOC_MEM_FLAGS_EXECUTABLE; + void *kaddr; + int ret; + + if (qpd->ib_kaddr || !qpd->ib_base) + return 0; + + /* ib_base is only set for dGPU */ + ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, + &kaddr); + if (ret) + return ret; + + qpd->ib_kaddr = kaddr; + + return 0; } struct kfd_process *kfd_create_process(struct file *filep) @@ -145,6 +277,40 @@ void kfd_unref_process(struct kfd_process *p) kref_put(&p->ref, kfd_process_ref_release); } +static void kfd_process_device_free_bos(struct kfd_process_device *pdd) +{ + struct kfd_process *p = pdd->process; + void *mem; + int id; + + /* + * Remove all handles from idr and release appropriate + * local memory object + */ + idr_for_each_entry(&pdd->alloc_idr, mem, id) { + struct kfd_process_device *peer_pdd; + + list_for_each_entry(peer_pdd, &p->per_device_data, + per_device_list) { + if (!peer_pdd->vm) + continue; + peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu( + peer_pdd->dev->kgd, mem, peer_pdd->vm); + } + + pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem); + kfd_process_device_remove_obj_handle(pdd, id); + } +} + +static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + kfd_process_device_free_bos(pdd); +} + static void kfd_process_destroy_pdds(struct kfd_process *p) { struct kfd_process_device *pdd, *temp; @@ -154,12 +320,20 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", pdd->dev->id, p->pasid); + if (pdd->drm_file) + fput(pdd->drm_file); + else if (pdd->vm) + pdd->dev->kfd2kgd->destroy_process_vm( + pdd->dev->kgd, pdd->vm); + list_del(&pdd->per_device_list); - if (pdd->qpd.cwsr_kaddr) + if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, get_order(KFD_CWSR_TBA_TMA_SIZE)); + idr_destroy(&pdd->alloc_idr); + kfree(pdd); } } @@ -173,16 +347,13 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); - struct kfd_process_device *pdd; - pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid); + kfd_iommu_unbind_process(p); - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - if (pdd->bound == PDD_BOUND) - amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); - } + kfd_process_free_outstanding_kfd_bos(p); kfd_process_destroy_pdds(p); + dma_fence_put(p->ef); kfd_event_free_process(p); @@ -230,6 +401,9 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_unlock(&kfd_processes_mutex); synchronize_srcu(&kfd_processes_srcu); + cancel_delayed_work_sync(&p->eviction_work); + cancel_delayed_work_sync(&p->restore_work); + mutex_lock(&p->mutex); /* Iterate over all process device data structures and if the @@ -265,18 +439,18 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, }; -static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) +static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) { unsigned long offset; - struct kfd_process_device *pdd = NULL; - struct kfd_dev *dev = NULL; - struct qcm_process_device *qpd = NULL; + struct kfd_process_device *pdd; list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - dev = pdd->dev; - qpd = &pdd->qpd; - if (!dev->cwsr_enabled || qpd->cwsr_kaddr) + struct kfd_dev *dev = pdd->dev; + struct qcm_process_device *qpd = &pdd->qpd; + + if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) continue; + offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; qpd->tba_addr = (int64_t)vm_mmap(filep, 0, KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, @@ -301,6 +475,36 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) return 0; } +static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + struct qcm_process_device *qpd = &pdd->qpd; + uint32_t flags = ALLOC_MEM_FLAGS_GTT | + ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE; + void *kaddr; + int ret; + + if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) + return 0; + + /* cwsr_base is only set for dGPU */ + ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base, + KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr); + if (ret) + return ret; + + qpd->cwsr_kaddr = kaddr; + qpd->tba_addr = qpd->cwsr_base; + + memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); + + qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; + pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", + qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); + + return 0; +} + static struct kfd_process *create_process(const struct task_struct *thread, struct file *filep) { @@ -351,13 +555,18 @@ static struct kfd_process *create_process(const struct task_struct *thread, if (err != 0) goto err_init_apertures; - err = kfd_process_init_cwsr(process, filep); + INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); + INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); + process->last_restore_timestamp = get_jiffies_64(); + + err = kfd_process_init_cwsr_apu(process, filep); if (err) goto err_init_cwsr; return process; err_init_cwsr: + kfd_process_free_outstanding_kfd_bos(process); kfd_process_destroy_pdds(process); err_init_apertures: pqm_uninit(&process->pqm); @@ -402,14 +611,78 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); pdd->qpd.dqm = dev->dqm; pdd->qpd.pqm = &p->pqm; + pdd->qpd.evicted = 0; pdd->process = p; pdd->bound = PDD_UNBOUND; pdd->already_dequeued = false; list_add(&pdd->per_device_list, &p->per_device_data); + /* Init idr used for memory handle translation */ + idr_init(&pdd->alloc_idr); + return pdd; } +/** + * kfd_process_device_init_vm - Initialize a VM for a process-device + * + * @pdd: The process-device + * @drm_file: Optional pointer to a DRM file descriptor + * + * If @drm_file is specified, it will be used to acquire the VM from + * that file descriptor. If successful, the @pdd takes ownership of + * the file descriptor. + * + * If @drm_file is NULL, a new VM is created. + * + * Returns 0 on success, -errno on failure. + */ +int kfd_process_device_init_vm(struct kfd_process_device *pdd, + struct file *drm_file) +{ + struct kfd_process *p; + struct kfd_dev *dev; + int ret; + + if (pdd->vm) + return drm_file ? -EBUSY : 0; + + p = pdd->process; + dev = pdd->dev; + + if (drm_file) + ret = dev->kfd2kgd->acquire_process_vm( + dev->kgd, drm_file, + &pdd->vm, &p->kgd_process_info, &p->ef); + else + ret = dev->kfd2kgd->create_process_vm( + dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef); + if (ret) { + pr_err("Failed to create process VM object\n"); + return ret; + } + + ret = kfd_process_device_reserve_ib_mem(pdd); + if (ret) + goto err_reserve_ib_mem; + ret = kfd_process_device_init_cwsr_dgpu(pdd); + if (ret) + goto err_init_cwsr; + + pdd->drm_file = drm_file; + + return 0; + +err_init_cwsr: +err_reserve_ib_mem: + kfd_process_device_free_bos(pdd); + if (!drm_file) + dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm); + pdd->vm = NULL; + + return ret; +} + /* * Direct the IOMMU to bind the process (specifically the pasid->mm) * to the device. @@ -429,174 +702,291 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, return ERR_PTR(-ENOMEM); } - if (pdd->bound == PDD_BOUND) { - return pdd; - } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { - pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); - return ERR_PTR(-EINVAL); - } - - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); - if (err < 0) + err = kfd_iommu_bind_process_to_device(pdd); + if (err) return ERR_PTR(err); - pdd->bound = PDD_BOUND; + err = kfd_process_device_init_vm(pdd, NULL); + if (err) + return ERR_PTR(err); return pdd; } -/* - * Bind processes do the device that have been temporarily unbound - * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. +struct kfd_process_device *kfd_get_first_process_device_data( + struct kfd_process *p) +{ + return list_first_entry(&p->per_device_data, + struct kfd_process_device, + per_device_list); +} + +struct kfd_process_device *kfd_get_next_process_device_data( + struct kfd_process *p, + struct kfd_process_device *pdd) +{ + if (list_is_last(&pdd->per_device_list, &p->per_device_data)) + return NULL; + return list_next_entry(pdd, per_device_list); +} + +bool kfd_has_process_device_data(struct kfd_process *p) +{ + return !(list_empty(&p->per_device_data)); +} + +/* Create specific handle mapped to mem from process local memory idr + * Assumes that the process lock is held. */ -int kfd_bind_processes_to_device(struct kfd_dev *dev) +int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, + void *mem) { - struct kfd_process_device *pdd; - struct kfd_process *p; + return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL); +} + +/* Translate specific handle from process local memory idr + * Assumes that the process lock is held. + */ +void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, + int handle) +{ + if (handle < 0) + return NULL; + + return idr_find(&pdd->alloc_idr, handle); +} + +/* Remove specific handle from process local memory idr + * Assumes that the process lock is held. + */ +void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, + int handle) +{ + if (handle >= 0) + idr_remove(&pdd->alloc_idr, handle); +} + +/* This increments the process->ref counter. */ +struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) +{ + struct kfd_process *p, *ret_p = NULL; unsigned int temp; - int err = 0; int idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(dev, p); - - if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { - mutex_unlock(&p->mutex); - continue; - } - - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, - p->lead_thread); - if (err < 0) { - pr_err("Unexpected pasid %d binding failure\n", - p->pasid); - mutex_unlock(&p->mutex); + if (p->pasid == pasid) { + kref_get(&p->ref); + ret_p = p; break; } - - pdd->bound = PDD_BOUND; - mutex_unlock(&p->mutex); } srcu_read_unlock(&kfd_processes_srcu, idx); - return err; + return ret_p; } -/* - * Mark currently bound processes as PDD_BOUND_SUSPENDED. These - * processes will be restored to PDD_BOUND state in - * kfd_bind_processes_to_device. - */ -void kfd_unbind_processes_from_device(struct kfd_dev *dev) +/* This increments the process->ref counter. */ +struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) { - struct kfd_process_device *pdd; struct kfd_process *p; - unsigned int temp; int idx = srcu_read_lock(&kfd_processes_srcu); - hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(dev, p); - - if (WARN_ON(!pdd)) { - mutex_unlock(&p->mutex); - continue; - } - - if (pdd->bound == PDD_BOUND) - pdd->bound = PDD_BOUND_SUSPENDED; - mutex_unlock(&p->mutex); - } + p = find_process_by_mm(mm); + if (p) + kref_get(&p->ref); srcu_read_unlock(&kfd_processes_srcu, idx); + + return p; } -void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) +/* process_evict_queues - Evict all user queues of a process + * + * Eviction is reference-counted per process-device. This means multiple + * evictions from different sources can be nested safely. + */ +static int process_evict_queues(struct kfd_process *p) { - struct kfd_process *p; struct kfd_process_device *pdd; + int r = 0; + unsigned int n_evicted = 0; - /* - * Look for the process that matches the pasid. If there is no such - * process, we either released it in amdkfd's own notifier, or there - * is a bug. Unfortunately, there is no way to tell... - */ - p = kfd_lookup_process_by_pasid(pasid); - if (!p) - return; + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, + &pdd->qpd); + if (r) { + pr_err("Failed to evict process queues\n"); + goto fail; + } + n_evicted++; + } - pr_debug("Unbinding process %d from IOMMU\n", pasid); + return r; - mutex_lock(kfd_get_dbgmgr_mutex()); +fail: + /* To keep state consistent, roll back partial eviction by + * restoring queues + */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + if (n_evicted == 0) + break; + if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, + &pdd->qpd)) + pr_err("Failed to restore queues\n"); - if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { - if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { - kfd_dbgmgr_destroy(dev->dbgmgr); - dev->dbgmgr = NULL; - } + n_evicted--; } - mutex_unlock(kfd_get_dbgmgr_mutex()); - - mutex_lock(&p->mutex); + return r; +} - pdd = kfd_get_process_device_data(dev, p); - if (pdd) - /* For GPU relying on IOMMU, we need to dequeue here - * when PASID is still bound. - */ - kfd_process_dequeue_from_device(pdd); +/* process_restore_queues - Restore all user queues of a process */ +static int process_restore_queues(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + int r, ret = 0; - mutex_unlock(&p->mutex); + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, + &pdd->qpd); + if (r) { + pr_err("Failed to restore process queues\n"); + if (!ret) + ret = r; + } + } - kfd_unref_process(p); + return ret; } -struct kfd_process_device *kfd_get_first_process_device_data( - struct kfd_process *p) +static void evict_process_worker(struct work_struct *work) { - return list_first_entry(&p->per_device_data, - struct kfd_process_device, - per_device_list); + int ret; + struct kfd_process *p; + struct delayed_work *dwork; + + dwork = to_delayed_work(work); + + /* Process termination destroys this worker thread. So during the + * lifetime of this thread, kfd_process p will be valid + */ + p = container_of(dwork, struct kfd_process, eviction_work); + WARN_ONCE(p->last_eviction_seqno != p->ef->seqno, + "Eviction fence mismatch\n"); + + /* Narrow window of overlap between restore and evict work + * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos + * unreserves KFD BOs, it is possible to evicted again. But + * restore has few more steps of finish. So lets wait for any + * previous restore work to complete + */ + flush_delayed_work(&p->restore_work); + + pr_debug("Started evicting pasid %d\n", p->pasid); + ret = process_evict_queues(p); + if (!ret) { + dma_fence_signal(p->ef); + dma_fence_put(p->ef); + p->ef = NULL; + queue_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); + + pr_debug("Finished evicting pasid %d\n", p->pasid); + } else + pr_err("Failed to evict queues of pasid %d\n", p->pasid); } -struct kfd_process_device *kfd_get_next_process_device_data( - struct kfd_process *p, - struct kfd_process_device *pdd) +static void restore_process_worker(struct work_struct *work) { - if (list_is_last(&pdd->per_device_list, &p->per_device_data)) - return NULL; - return list_next_entry(pdd, per_device_list); + struct delayed_work *dwork; + struct kfd_process *p; + struct kfd_process_device *pdd; + int ret = 0; + + dwork = to_delayed_work(work); + + /* Process termination destroys this worker thread. So during the + * lifetime of this thread, kfd_process p will be valid + */ + p = container_of(dwork, struct kfd_process, restore_work); + + /* Call restore_process_bos on the first KGD device. This function + * takes care of restoring the whole process including other devices. + * Restore can fail if enough memory is not available. If so, + * reschedule again. + */ + pdd = list_first_entry(&p->per_device_data, + struct kfd_process_device, + per_device_list); + + pr_debug("Started restoring pasid %d\n", p->pasid); + + /* Setting last_restore_timestamp before successful restoration. + * Otherwise this would have to be set by KGD (restore_process_bos) + * before KFD BOs are unreserved. If not, the process can be evicted + * again before the timestamp is set. + * If restore fails, the timestamp will be set again in the next + * attempt. This would mean that the minimum GPU quanta would be + * PROCESS_ACTIVE_TIME_MS - (time to execute the following two + * functions) + */ + + p->last_restore_timestamp = get_jiffies_64(); + ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info, + &p->ef); + if (ret) { + pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n", + p->pasid, PROCESS_BACK_OFF_TIME_MS); + ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); + WARN(!ret, "reschedule restore work failed\n"); + return; + } + + ret = process_restore_queues(p); + if (!ret) + pr_debug("Finished restoring pasid %d\n", p->pasid); + else + pr_err("Failed to restore queues of pasid %d\n", p->pasid); } -bool kfd_has_process_device_data(struct kfd_process *p) +void kfd_suspend_all_processes(void) { - return !(list_empty(&p->per_device_data)); + struct kfd_process *p; + unsigned int temp; + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + cancel_delayed_work_sync(&p->eviction_work); + cancel_delayed_work_sync(&p->restore_work); + + if (process_evict_queues(p)) + pr_err("Failed to suspend process %d\n", p->pasid); + dma_fence_signal(p->ef); + dma_fence_put(p->ef); + p->ef = NULL; + } + srcu_read_unlock(&kfd_processes_srcu, idx); } -/* This increments the process->ref counter. */ -struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) +int kfd_resume_all_processes(void) { - struct kfd_process *p, *ret_p = NULL; + struct kfd_process *p; unsigned int temp; - - int idx = srcu_read_lock(&kfd_processes_srcu); + int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - if (p->pasid == pasid) { - kref_get(&p->ref); - ret_p = p; - break; + if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { + pr_err("Restore process %d failed during resume\n", + p->pasid); + ret = -EFAULT; } } - srcu_read_unlock(&kfd_processes_srcu, idx); - - return ret_p; + return ret; } int kfd_reserved_mem_mmap(struct kfd_process *process, @@ -633,6 +1023,22 @@ int kfd_reserved_mem_mmap(struct kfd_process *process, KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); } +void kfd_flush_tlb(struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + const struct kfd2kgd_calls *f2g = dev->kfd2kgd; + + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { + /* Nothing to flush until a VMID is assigned, which + * only happens when the first queue is created. + */ + if (pdd->qpd.vmid) + f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); + } else { + f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid); + } +} + #if defined(CONFIG_DEBUG_FS) int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 876380632668..7817e327ea6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ - if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && + if ((dev->dqm->sched_policy == + KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index c6a76090a725..ac28abc94e57 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -35,6 +35,7 @@ #include "kfd_crat.h" #include "kfd_topology.h" #include "kfd_device_queue_manager.h" +#include "kfd_iommu.h" /* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; @@ -440,6 +441,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.device_id); sysfs_show_32bit_prop(buffer, "location_id", dev->node_props.location_id); + sysfs_show_32bit_prop(buffer, "drm_render_minor", + dev->node_props.drm_render_minor); if (dev->gpu) { log_max_watch_addr = @@ -677,7 +680,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, } /* All hardware blocks have the same number of attributes. */ - num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr); + num_attrs = ARRAY_SIZE(perf_attr_iommu); list_for_each_entry(perf, &dev->perf_props, list) { perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) * num_attrs + sizeof(struct attribute_group), @@ -875,19 +878,8 @@ static void find_system_memory(const struct dmi_header *dm, */ static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) { - struct kfd_perf_properties *props; - - if (amd_iommu_pc_supported()) { - props = kfd_alloc_struct(props); - if (!props) - return -ENOMEM; - strcpy(props->block_name, "iommu"); - props->max_concurrent = amd_iommu_pc_get_max_banks(0) * - amd_iommu_pc_get_max_counters(0); /* assume one iommu */ - list_add_tail(&props->list, &kdev->perf_props); - } - - return 0; + /* These are the only counters supported so far */ + return kfd_iommu_add_perf_counters(kdev); } /* kfd_add_non_crat_information - Add information that is not currently @@ -1224,6 +1216,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd); dev->node_props.max_engine_clk_ccompute = cpufreq_quick_get_max(0) / 1000; + dev->node_props.drm_render_minor = + gpu->shared_resources.drm_render_minor; kfd_fill_mem_clk_max_info(dev); kfd_fill_iolink_non_crat_info(dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 53fca1f45401..eb54cfcaf039 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -25,7 +25,7 @@ #include <linux/types.h> #include <linux/list.h> -#include "kfd_priv.h" +#include "kfd_crat.h" #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128 @@ -71,6 +71,7 @@ struct kfd_node_properties { uint32_t location_id; uint32_t max_engine_clk_fcompute; uint32_t max_engine_clk_ccompute; + int32_t drm_render_minor; uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; @@ -183,8 +184,4 @@ struct kfd_topology_device *kfd_create_topology_device( struct list_head *device_list); void kfd_release_topology_device_list(struct list_head *device_list); -extern bool amd_iommu_pc_supported(void); -extern u8 amd_iommu_pc_get_max_banks(u16 devid); -extern u8 amd_iommu_pc_get_max_counters(u16 devid); - #endif /* __KFD_TOPOLOGY_H__ */ |