diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 426 |
1 files changed, 352 insertions, 74 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6b6d46e29e6e..2fcc6e079769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -32,12 +32,19 @@ #include "amdgpu_dma_buf.h" #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" +#include "kfd_smi_events.h" /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +/* + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * BO chunk + */ +#define VRAM_ALLOCATION_ALIGN (1 << 21) + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -108,7 +115,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) static size_t amdgpu_amdkfd_acc_size(uint64_t size) { @@ -148,7 +155,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { system_mem_needed = acc_size; ttm_mem_needed = acc_size; - vram_needed = size; + + /* + * Conservatively round up the allocation requirement to 2 MB + * to avoid fragmentation caused by 4K allocations in the tail + * 2M BO chunk. + */ + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size; @@ -173,7 +186,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - reserved_for_pt)) { + adev->gmc.real_vram_size - + atomic64_read(&adev->vram_pin_size) - + reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -205,7 +220,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; - adev->kfd.vram_used -= size; + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= acc_size; @@ -241,6 +256,42 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) kfree(bo->kfd_bo); } +/** + * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information + * about USERPTR or DOOREBELL or MMIO BO. + * @adev: Device for which dmamap BO is being created + * @mem: BO of peer device that is being DMA mapped. Provides parameters + * in building the dmamap BO + * @bo_out: Output parameter updated with handle of dmamap BO + */ +static int +create_dmamap_sg_bo(struct amdgpu_device *adev, + struct kgd_mem *mem, struct amdgpu_bo **bo_out) +{ + struct drm_gem_object *gem_obj; + int ret, align; + + ret = amdgpu_bo_reserve(mem->bo, false); + if (ret) + return ret; + + align = 1; + ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align, + AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE, + ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); + + amdgpu_bo_unreserve(mem->bo); + + if (ret) { + pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret); + return -EINVAL; + } + + *bo_out = gem_to_amdgpu_bo(gem_obj); + (*bo_out)->parent = amdgpu_bo_ref(mem->bo); + return ret; +} + /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * @@ -446,6 +497,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) return pte_flags; } +/** + * create_sg_table() - Create an sg_table for a contiguous DMA addr range + * @addr: The starting address to point to + * @size: Size of memory area in bytes being pointed to + * + * Allocates an instance of sg_table and initializes it to point to memory + * area specified by input parameters. The address used to build is assumed + * to be DMA mapped, if needed. + * + * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table + * because they are physically contiguous. + * + * Return: Initialized instance of SG Table or NULL + */ +static struct sg_table *create_sg_table(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg_dma_address(sg->sgl) = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int kfd_mem_dmamap_userptr(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -510,6 +593,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * An access request from the device that owns DOORBELL does not require DMA mapping. + * This is because the request doesn't go through PCIe root complex i.e. it instead + * loops back. The need to DMA map arises only when accessing peer device's DOORBELL + * + * In contrast, all access requests for MMIO need to be DMA mapped without regard to + * device ownership. This is because access requests for MMIO go through PCIe root + * complex. + * + * This is accomplished in two steps: + * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used + * in updating requesting device's page table + * - Signal TTM to mark memory pointed to by requesting device's BO as GPU + * accessible. This allows an update of requesting device's page table + * with entries associated with DOOREBELL or MMIO memory + * + * This method is invoked in the following contexts: + * - Mapping of DOORBELL or MMIO BO of same or peer device + * - Validating an evicted DOOREBELL or MMIO BO on device seeking access + * + * Return: ZERO if successful, NON-ZERO otherwise + */ +static int +kfd_mem_dmamap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + dma_addr_t dma_addr; + bool mmio; + int ret; + + /* Expect SG Table of dmapmap BO to be NULL */ + mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP); + if (unlikely(ttm->sg)) { + pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio); + return -EINVAL; + } + + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + dma_addr = mem->bo->tbo.sg->sgl->dma_address; + pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length); + pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr); + dma_addr = dma_map_resource(adev->dev, dma_addr, + mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_mapping_error(adev->dev, dma_addr); + if (unlikely(ret)) + return ret; + pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr); + + ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length); + if (unlikely(!ttm->sg)) { + ret = -ENOMEM; + goto unmap_sg; + } + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (unlikely(ret)) + goto free_sg; + + return ret; + +free_sg: + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; +unmap_sg: + dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length, + dir, DMA_ATTR_SKIP_CPU_SYNC); + return ret; +} + static int kfd_mem_dmamap_attachment(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -521,6 +685,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem, return kfd_mem_dmamap_userptr(mem, attachment); case KFD_MEM_ATT_DMABUF: return kfd_mem_dmamap_dmabuf(attachment); + case KFD_MEM_ATT_SG: + return kfd_mem_dmamap_sg_bo(mem, attachment); default: WARN_ON_ONCE(1); } @@ -561,6 +727,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * The method performs following steps: + * - Signal TTM to mark memory pointed to by BO as GPU inaccessible + * - Free SG Table that is used to encapsulate DMA mapped memory of + * peer device's DOORBELL or MMIO memory + * + * This method is invoked in the following contexts: + * UNMapping of DOORBELL or MMIO BO on a device having access to its memory + * Eviction of DOOREBELL or MMIO BO on device having access to its memory + * + * Return: void + */ +static void +kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + + if (unlikely(!ttm->sg)) { + pr_err("SG Table of BO is UNEXPECTEDLY NULL"); + return; + } + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address, + ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; + bo->tbo.sg = NULL; +} + static void kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -574,39 +784,15 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, case KFD_MEM_ATT_DMABUF: kfd_mem_dmaunmap_dmabuf(attachment); break; + case KFD_MEM_ATT_SG: + kfd_mem_dmaunmap_sg_bo(mem, attachment); + break; default: WARN_ON_ONCE(1); } } static int -kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem, - struct amdgpu_bo **bo) -{ - unsigned long bo_size = mem->bo->tbo.base.size; - struct drm_gem_object *gobj; - int ret; - - ret = amdgpu_bo_reserve(mem->bo, false); - if (ret) - return ret; - - ret = amdgpu_gem_object_create(adev, bo_size, 1, - AMDGPU_GEM_DOMAIN_CPU, - AMDGPU_GEM_CREATE_PREEMPTIBLE, - ttm_bo_type_sg, mem->bo->tbo.base.resv, - &gobj); - amdgpu_bo_unreserve(mem->bo); - if (ret) - return ret; - - *bo = gem_to_amdgpu_bo(gobj); - (*bo)->parent = amdgpu_bo_ref(mem->bo); - - return 0; -} - -static int kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo **bo) { @@ -656,6 +842,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + bool same_hive = false; int i, ret; if (!va) { @@ -663,6 +850,24 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, return -EINVAL; } + /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices + * + * The access path of MMIO and DOORBELL BOs of is always over PCIe. + * In contrast the access path of VRAM BOs depens upon the type of + * link that connects the peer device. Access over PCIe is allowed + * if peer device has large BAR. In contrast, access over xGMI is + * allowed for both small and large BAR configurations of peer device + */ + if ((adev != bo_adev) && + ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { + if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) + same_hive = amdgpu_xgmi_same_hive(adev, bo_adev); + if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev)) + return -EINVAL; + } + for (i = 0; i <= is_aql; i++) { attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL); if (unlikely(!attachment[i])) { @@ -673,9 +878,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, va + bo_size, vm); - if (adev == bo_adev || - (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || - (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) { + if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || + (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || + same_hive) { /* Mappings on the local GPU, or VRAM mappings in the * local hive, or userptr mapping IOMMU direct map mode * share the original BO @@ -691,26 +896,30 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { /* Create an SG BO to DMA-map userptrs on other GPUs */ attachment[i]->type = KFD_MEM_ATT_USERPTR; - ret = kfd_mem_attach_userptr(adev, mem, &bo[i]); + ret = create_dmamap_sg_bo(adev, mem, &bo[i]); if (ret) goto unwind; - } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT && - mem->bo->tbo.type != ttm_bo_type_sg) { - /* GTT BOs use DMA-mapping ability of dynamic-attach - * DMA bufs. TODO: The same should work for VRAM on - * large-BAR GPUs. - */ + /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */ + } else if (mem->bo->tbo.type == ttm_bo_type_sg) { + WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL || + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP), + "Handing invalid SG BO in ATTACH request"); + attachment[i]->type = KFD_MEM_ATT_SG; + ret = create_dmamap_sg_bo(adev, mem, &bo[i]); + if (ret) + goto unwind; + /* Enable acces to GTT and VRAM BOs of peer devices */ + } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT || + mem->domain == AMDGPU_GEM_DOMAIN_VRAM) { attachment[i]->type = KFD_MEM_ATT_DMABUF; ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); if (ret) goto unwind; + pr_debug("Employ DMABUF mechanism to enable peer GPU access\n"); } else { - /* FIXME: Need to DMA-map other BO types: - * large-BAR VRAM, doorbells, MMIO remap - */ - attachment[i]->type = KFD_MEM_ATT_SHARED; - bo[i] = mem->bo; - drm_gem_object_get(&bo[i]->tbo.base); + WARN_ONCE(true, "Handling invalid ATTACH request"); + ret = -EINVAL; + goto unwind; } /* Add BO to VM internal data structures */ @@ -1111,24 +1320,6 @@ update_gpuvm_pte_failed: return ret; } -static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) -{ - struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); - - if (!sg) - return NULL; - if (sg_alloc_table(sg, 1, GFP_KERNEL)) { - kfree(sg); - return NULL; - } - sg->sgl->dma_address = addr; - sg->sgl->length = size; -#ifdef CONFIG_NEED_SG_DMA_LENGTH - sg->sgl->dma_length = size; -#endif - return sg; -} - static int process_validate_vms(struct amdkfd_process_info *process_info) { struct amdgpu_vm *peer_vm; @@ -1457,6 +1648,22 @@ out_unlock: return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available; + + spin_lock(&kfd_mem_limit.mem_limit_lock); + available = adev->gmc.real_vram_size + - adev->kfd.vram_used + - atomic64_read(&adev->vram_pin_size) + - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + + return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN); +} + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, @@ -1497,7 +1704,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bo_type = ttm_bo_type_sg; if (size > UINT_MAX) return -EINVAL; - sg = create_doorbell_sg(*offset, size); + sg = create_sg_table(*offset, size); if (!sg) return -ENOMEM; } else { @@ -1907,8 +2114,69 @@ int amdgpu_amdkfd_gpuvm_sync_memory( return ret; } -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem, void **kptr, uint64_t *size) +/** + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count + * @adev: Device to which allocated BO belongs + * @bo: Buffer object to be mapped + * + * Before return, bo reference count is incremented. To release the reference and unpin/ + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. + */ +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (ret) { + pr_err("Failed to pin bo. ret %d\n", ret); + goto err_pin_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(&bo->tbo); + if (ret) { + pr_err("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_amdkfd_remove_eviction_fence( + bo, bo->kfd_bo->process_info->eviction_fence); + + amdgpu_bo_unreserve(bo); + + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unpin(bo); +err_pin_bo_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + + return ret; +} + +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access + * + * @mem: Buffer object to be mapped for CPU access + * @kptr[out]: pointer in kernel CPU address space + * @size[out]: size of the buffer + * + * Pins the BO and maps it for kernel CPU access. The eviction fence is removed + * from the BO, since pinned BOs cannot be evicted. The bo must remain on the + * validate_list, so the GPU mapping can be restored after a page table was + * evicted. + * + * Return: 0 on success, error code on failure + */ +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, + void **kptr, uint64_t *size) { int ret; struct amdgpu_bo *bo = mem->bo; @@ -1959,8 +2227,15 @@ bo_reserve_failed: return ret; } -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem) +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access + * + * @mem: Buffer object to be unmapped for CPU access + * + * Removes the kernel CPU mapping and unpins the BO. It does not restore the + * eviction fence, so this function should only be used for cleanup before the + * BO is destroyed. + */ +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; @@ -2072,7 +2347,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); schedule_delayed_work(&process_info->restore_userptr_work, @@ -2346,13 +2621,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) unlock_out: mutex_unlock(&process_info->lock); - mmput(mm); - put_task_struct(usertask); /* If validation failed, reschedule another attempt */ - if (evicted_bos) + if (evicted_bos) { schedule_delayed_work(&process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + + kfd_smi_event_queue_restore_rescheduled(mm); + } + mmput(mm); + put_task_struct(usertask); } /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given |