diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_vm.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 1152 |
1 files changed, 957 insertions, 195 deletions
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 690330352d4c..ec04bef8ae40 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -8,6 +8,7 @@ #include <linux/dma-fence-array.h> #include <linux/nospec.h> +#include <drm/drm_drv.h> #include <drm/drm_exec.h> #include <drm/drm_print.h> #include <drm/ttm/ttm_tt.h> @@ -33,7 +34,9 @@ #include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_pt.h" +#include "xe_pxp.h" #include "xe_res_cursor.h" +#include "xe_svm.h" #include "xe_sync.h" #include "xe_trace_bo.h" #include "xe_wa.h" @@ -269,6 +272,7 @@ out_up_write: return err; } +ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); /** * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM @@ -579,51 +583,26 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) { - struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); - struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); + struct xe_userptr *userptr = &uvma->userptr; struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; long err; - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - /* No need to stop gpu access if the userptr is not yet bound. */ - if (!userptr->initial_bind) { - up_write(&vm->userptr.notifier_lock); - return true; - } - /* * Tell exec and rebind worker they need to repin and rebind this * userptr. */ if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { spin_lock(&vm->userptr.invalidated_lock); list_move_tail(&userptr->invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); } - up_write(&vm->userptr.notifier_lock); - /* * Preempt fences turn into schedule disables, pipeline these. * Note that even in fault mode, we need to wait for binds and @@ -641,11 +620,37 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, false, MAX_SCHEDULE_TIMEOUT); XE_WARN_ON(err <= 0); - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { err = xe_vm_invalidate_vma(vma); XE_WARN_ON(err); } + xe_hmm_userptr_unmap(uvma); +} + +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + + down_write(&vm->userptr.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + __vma_userptr_invalidate(vm, uvma); + up_write(&vm->userptr.notifier_lock); trace_xe_vma_userptr_invalidate_complete(vma); return true; @@ -655,31 +660,71 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { .invalidate = vma_userptr_invalidate, }; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +/** + * xe_vma_userptr_force_invalidate() - force invalidate a userptr + * @uvma: The userptr vma to invalidate + * + * Perform a forced userptr invalidation for testing purposes. + */ +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + /* Protect against concurrent userptr pinning */ + lockdep_assert_held(&vm->lock); + /* Protect against concurrent notifiers */ + lockdep_assert_held(&vm->userptr.notifier_lock); + /* + * Protect against concurrent instances of this function and + * the critical exec sections + */ + xe_vm_assert_held(vm); + + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + uvma->userptr.notifier_seq)) + uvma->userptr.notifier_seq -= 2; + __vma_userptr_invalidate(vm, uvma); +} +#endif + int xe_vm_userptr_pin(struct xe_vm *vm) { struct xe_userptr_vma *uvma, *next; int err = 0; - LIST_HEAD(tmp_evict); xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); lockdep_assert_held_write(&vm->lock); /* Collect invalidated userptrs */ spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, userptr.invalidate_link) { list_del_init(&uvma->userptr.invalidate_link); - list_move_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); + list_add_tail(&uvma->userptr.repin_link, + &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); - /* Pin and move to temporary list */ + /* Pin and move to bind list */ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, userptr.repin_link) { err = xe_vma_userptr_pin_pages(uvma); if (err == -EFAULT) { list_del_init(&uvma->userptr.repin_link); + /* + * We might have already done the pin once already, but + * then had to retry before the re-bind happened, due + * some other condition in the caller, but in the + * meantime the userptr got dinged by the notifier such + * that we need to revalidate here, but this time we hit + * the EFAULT. In such a case make sure we remove + * ourselves from the rebind list to avoid going down in + * flames. + */ + if (!list_empty(&uvma->vma.combined_links.rebind)) + list_del_init(&uvma->vma.combined_links.rebind); /* Wait for pending binds */ xe_vm_lock(vm, false); @@ -687,13 +732,15 @@ int xe_vm_userptr_pin(struct xe_vm *vm) DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); + down_read(&vm->userptr.notifier_lock); err = xe_vm_invalidate_vma(&uvma->vma); + up_read(&vm->userptr.notifier_lock); xe_vm_unlock(vm); if (err) - return err; + break; } else { - if (err < 0) - return err; + if (err) + break; list_del_init(&uvma->userptr.repin_link); list_move_tail(&uvma->vma.combined_links.rebind, @@ -701,7 +748,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm) } } - return 0; + if (err) { + down_write(&vm->userptr.notifier_lock); + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + } + spin_unlock(&vm->userptr.invalidated_lock); + up_write(&vm->userptr.notifier_lock); + } + return err; } /** @@ -741,21 +800,47 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) } ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); +static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) +{ + struct xe_vma *vma; + + vma = gpuva_to_vma(op->base.prefetch.va); + + if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) + xa_destroy(&op->prefetch_range.range); +} + +static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return; + + list_for_each_entry(op, &vops->list, link) + xe_vma_svm_prefetch_op_fini(op); +} + static void xe_vma_ops_fini(struct xe_vma_ops *vops) { int i; + xe_vma_svm_prefetch_ops_fini(vops); + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) kfree(vops->pt_update_ops[i].ops); } -static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) +static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) { int i; + if (!inc_val) + return; + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) if (BIT(i) & tile_mask) - ++vops->pt_update_ops[i].num_ops; + vops->pt_update_ops[i].num_ops += inc_val; } static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, @@ -785,7 +870,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, xe_vm_populate_rebind(op, vma, tile_mask); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); return 0; } @@ -893,6 +978,179 @@ free_ops: return fence; } +static void xe_vm_populate_range_rebind(struct xe_vma_op *op, + struct xe_vma *vma, + struct xe_svm_range *range, + u8 tile_mask) +{ + INIT_LIST_HEAD(&op->link); + op->tile_mask = tile_mask; + op->base.op = DRM_GPUVA_OP_DRIVER; + op->subop = XE_VMA_SUBOP_MAP_RANGE; + op->map_range.vma = vma; + op->map_range.range = range; +} + +static int +xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, + struct xe_vma *vma, + struct xe_svm_range *range, + u8 tile_mask) +{ + struct xe_vma_op *op; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) + return -ENOMEM; + + xe_vm_populate_range_rebind(op, vma, range, tile_mask); + list_add_tail(&op->link, &vops->list); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); + + return 0; +} + +/** + * xe_vm_range_rebind() - VM range (re)bind + * @vm: The VM which the range belongs to. + * @vma: The VMA which the range belongs to. + * @range: SVM range to rebind. + * @tile_mask: Tile mask to bind the range to. + * + * (re)bind SVM range setting up GPU page tables for the range. + * + * Return: dma fence for rebind to signal completion on succees, ERR_PTR on + * failure + */ +struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, + struct xe_vma *vma, + struct xe_svm_range *range, + u8 tile_mask) +{ + struct dma_fence *fence = NULL; + struct xe_vma_ops vops; + struct xe_vma_op *op, *next_op; + struct xe_tile *tile; + u8 id; + int err; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); + + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + for_each_tile(tile, vm->xe, id) { + vops.pt_update_ops[id].wait_vm_bookkeep = true; + vops.pt_update_ops[tile->id].q = + xe_tile_migrate_exec_queue(tile); + } + + err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); + if (err) + return ERR_PTR(err); + + err = xe_vma_ops_alloc(&vops, false); + if (err) { + fence = ERR_PTR(err); + goto free_ops; + } + + fence = ops_execute(vm, &vops); + +free_ops: + list_for_each_entry_safe(op, next_op, &vops.list, link) { + list_del(&op->link); + kfree(op); + } + xe_vma_ops_fini(&vops); + + return fence; +} + +static void xe_vm_populate_range_unbind(struct xe_vma_op *op, + struct xe_svm_range *range) +{ + INIT_LIST_HEAD(&op->link); + op->tile_mask = range->tile_present; + op->base.op = DRM_GPUVA_OP_DRIVER; + op->subop = XE_VMA_SUBOP_UNMAP_RANGE; + op->unmap_range.range = range; +} + +static int +xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, + struct xe_svm_range *range) +{ + struct xe_vma_op *op; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) + return -ENOMEM; + + xe_vm_populate_range_unbind(op, range); + list_add_tail(&op->link, &vops->list); + xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); + + return 0; +} + +/** + * xe_vm_range_unbind() - VM range unbind + * @vm: The VM which the range belongs to. + * @range: SVM range to rebind. + * + * Unbind SVM range removing the GPU page tables for the range. + * + * Return: dma fence for unbind to signal completion on succees, ERR_PTR on + * failure + */ +struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, + struct xe_svm_range *range) +{ + struct dma_fence *fence = NULL; + struct xe_vma_ops vops; + struct xe_vma_op *op, *next_op; + struct xe_tile *tile; + u8 id; + int err; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); + + if (!range->tile_present) + return dma_fence_get_stub(); + + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + for_each_tile(tile, vm->xe, id) { + vops.pt_update_ops[id].wait_vm_bookkeep = true; + vops.pt_update_ops[tile->id].q = + xe_tile_migrate_exec_queue(tile); + } + + err = xe_vm_ops_add_range_unbind(&vops, range); + if (err) + return ERR_PTR(err); + + err = xe_vma_ops_alloc(&vops, false); + if (err) { + fence = ERR_PTR(err); + goto free_ops; + } + + fence = ops_execute(vm, &vops); + +free_ops: + list_for_each_entry_safe(op, next_op, &vops.list, link) { + list_del(&op->link); + kfree(op); + } + xe_vma_ops_fini(&vops); + + return fence; +} + static void xe_vma_free(struct xe_vma *vma) { if (xe_vma_is_userptr(vma)) @@ -901,9 +1159,10 @@ static void xe_vma_free(struct xe_vma *vma) kfree(vma); } -#define VMA_CREATE_FLAG_READ_ONLY BIT(0) -#define VMA_CREATE_FLAG_IS_NULL BIT(1) -#define VMA_CREATE_FLAG_DUMPABLE BIT(2) +#define VMA_CREATE_FLAG_READ_ONLY BIT(0) +#define VMA_CREATE_FLAG_IS_NULL BIT(1) +#define VMA_CREATE_FLAG_DUMPABLE BIT(2) +#define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, @@ -917,6 +1176,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); + bool is_cpu_addr_mirror = + (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -925,7 +1186,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, * Allocate and ensure that the xe_vma_is_userptr() return * matches what was allocated. */ - if (!bo && !is_null) { + if (!bo && !is_null && !is_cpu_addr_mirror) { struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); if (!uvma) @@ -937,6 +1198,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (!vma) return ERR_PTR(-ENOMEM); + if (is_cpu_addr_mirror) + vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; if (is_null) vma->gpuva.flags |= DRM_GPUVA_SPARSE; if (bo) @@ -979,7 +1242,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, drm_gpuva_link(&vma->gpuva, vm_bo); drm_gpuvm_bo_put(vm_bo); } else /* userptr or null */ { - if (!is_null) { + if (!is_null && !is_cpu_addr_mirror) { struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; u64 size = end - start + 1; int err; @@ -987,6 +1250,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, INIT_LIST_HEAD(&userptr->invalidate_link); INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; + mutex_init(&userptr->unmap_mutex); err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, @@ -1028,8 +1292,9 @@ static void xe_vma_destroy_late(struct xe_vma *vma) * them anymore */ mmu_interval_notifier_remove(&userptr->notifier); + mutex_destroy(&userptr->unmap_mutex); xe_vm_put(vm); - } else if (xe_vma_is_null(vma)) { + } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { xe_vm_put(vm); } else { xe_bo_put(xe_vma_bo(vma)); @@ -1066,9 +1331,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); list_del(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); - } else if (!xe_vma_is_null(vma)) { + } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); drm_gpuva_unlink(&vma->gpuva); @@ -1374,7 +1640,7 @@ static void xe_vm_free_scratch(struct xe_vm *vm) } } -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; struct xe_vm *vm; @@ -1382,6 +1648,12 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) struct xe_tile *tile; u8 id; + /* + * Since the GSCCS is not user-accessible, we don't expect a GSC VM to + * ever be in faulting mode. + */ + xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); + vm = kzalloc(sizeof(*vm), GFP_KERNEL); if (!vm) return ERR_PTR(-ENOMEM); @@ -1389,10 +1661,25 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->xe = xe; vm->size = 1ull << xe->info.va_bits; - vm->flags = flags; - init_rwsem(&vm->lock); + if (xef) + vm->xef = xe_file_get(xef); + /** + * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be + * manipulated under the PXP mutex. However, the PXP mutex can be taken + * under a user-VM lock when the PXP session is started at exec_queue + * creation time. Those are different VMs and therefore there is no risk + * of deadlock, but we need to tell lockdep that this is the case or it + * will print a warning. + */ + if (flags & XE_VM_FLAG_GSC) { + static struct lock_class_key gsc_vm_key; + + __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); + } else { + init_rwsem(&vm->lock); + } mutex_init(&vm->snap_mutex); INIT_LIST_HEAD(&vm->rebind_list); @@ -1420,13 +1707,21 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) * scheduler drops all the references of it, hence protecting the VM * for this case is necessary. */ - if (flags & XE_VM_FLAG_LR_MODE) + if (flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); xe_pm_runtime_get_noresume(xe); + } + + if (flags & XE_VM_FLAG_FAULT_MODE) { + err = xe_svm_init(vm); + if (err) + goto err_no_resv; + } vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { err = -ENOMEM; - goto err_no_resv; + goto err_svm_fini; } drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, @@ -1466,10 +1761,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = true; } - if (vm->flags & XE_VM_FLAG_LR_MODE) { - INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + if (vm->flags & XE_VM_FLAG_LR_MODE) vm->batch_invalidate_tlb = false; - } /* Fill pt_root after allocating scratch tables */ for_each_tile(tile, xe, id) { @@ -1502,6 +1795,20 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); + if (xef && xe->info.has_asid) { + u32 asid; + + down_write(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(1, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + up_write(&xe->usm.lock); + if (err < 0) + goto err_unlock_close; + + vm->usm.asid = asid; + } + trace_xe_vm_create(vm); return vm; @@ -1512,11 +1819,18 @@ err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); +err_svm_fini: + if (flags & XE_VM_FLAG_FAULT_MODE) { + vm->size = 0; /* close the vm */ + xe_svm_fini(vm); + } err_no_resv: mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); + if (vm->xef) + xe_file_put(vm->xef); kfree(vm); if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); @@ -1525,9 +1839,44 @@ err_no_resv: static void xe_vm_close(struct xe_vm *vm) { + struct xe_device *xe = vm->xe; + bool bound; + int idx; + + bound = drm_dev_enter(&xe->drm, &idx); + down_write(&vm->lock); + if (xe_vm_in_fault_mode(vm)) + xe_svm_notifier_lock(vm); + vm->size = 0; + + if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { + struct xe_tile *tile; + struct xe_gt *gt; + u8 id; + + /* Wait for pending binds */ + dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + + if (bound) { + for_each_tile(tile, xe, id) + if (vm->pt_root[id]) + xe_pt_clear(xe, vm->pt_root[id]); + + for_each_gt(gt, xe, id) + xe_gt_tlb_invalidation_vm(gt, vm); + } + } + + if (xe_vm_in_fault_mode(vm)) + xe_svm_notifier_unlock(vm); up_write(&vm->lock); + + if (bound) + drm_dev_exit(idx); } void xe_vm_close_and_put(struct xe_vm *vm) @@ -1544,6 +1893,8 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vm_close(vm); if (xe_vm_in_preempt_fence_mode(vm)) flush_work(&vm->preempt.rebind_work); + if (xe_vm_in_fault_mode(vm)) + xe_svm_close(vm); down_write(&vm->lock); for_each_tile(tile, xe, id) { @@ -1612,6 +1963,9 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vma_destroy_unlocked(vma); } + if (xe_vm_in_fault_mode(vm)) + xe_svm_fini(vm); + up_write(&vm->lock); down_write(&xe->usm.lock); @@ -1722,9 +2076,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_create *args = data; - struct xe_tile *tile; struct xe_vm *vm; - u32 id, asid; + u32 id; int err; u32 flags = 0; @@ -1745,16 +2098,14 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && - args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) + args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && + !xe->info.needs_scratch)) return -EINVAL; if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions)) - return -EINVAL; - if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) @@ -1762,29 +2113,10 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; - vm = xe_vm_create(xe, flags); + vm = xe_vm_create(xe, flags, xef); if (IS_ERR(vm)) return PTR_ERR(vm); - if (xe->info.has_asid) { - down_write(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - up_write(&xe->usm.lock); - if (err < 0) - goto err_close_and_put; - - vm->usm.asid = asid; - } - - vm->xef = xe_file_get(xef); - - /* Record BO memory for VM pagetable created against client */ - for_each_tile(tile, xe, id) - if (vm->pt_root[id]) - xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); - #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); @@ -1834,6 +2166,35 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; } +static bool vma_matches(struct xe_vma *vma, u64 page_addr) +{ + if (page_addr > xe_vma_end(vma) - 1 || + page_addr + SZ_4K - 1 < xe_vma_start(vma)) + return false; + + return true; +} + +/** + * xe_vm_find_vma_by_addr() - Find a VMA by its address + * + * @vm: the xe_vm the vma belongs to + * @page_addr: address to look up + */ +struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) +{ + struct xe_vma *vma = NULL; + + if (vm->usm.last_fault_vma) { /* Fast lookup */ + if (vma_matches(vm->usm.last_fault_vma, page_addr)) + vma = vm->usm.last_fault_vma; + } + if (!vma) + vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); + + return vma; +} + static const u32 region_to_mem_type[] = { XE_PL_TT, XE_PL_VRAM0, @@ -1900,13 +2261,39 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) } #endif +static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) +{ + if (!xe_vm_in_fault_mode(vm)) + return false; + + if (!xe_vm_has_scratch(vm)) + return false; + + if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) + return false; + + return true; +} + +static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) +{ + struct drm_gpuva_op *__op; + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + xe_vma_svm_prefetch_op_fini(op); + } +} + /* * Create operations list from IOCTL arguments, setup operations fields so parse * and commit steps are decoupled from IOCTL arguments. This step can fail. */ static struct drm_gpuva_ops * -vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, - u64 bo_offset_or_userptr, u64 addr, u64 range, +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, + struct xe_bo *bo, u64 bo_offset_or_userptr, + u64 addr, u64 range, u32 operation, u32 flags, u32 prefetch_region, u16 pat_index) { @@ -1914,6 +2301,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; struct drm_gpuvm_bo *vm_bo; + u64 range_end = addr + range; int err; lockdep_assert_held_write(&vm->lock); @@ -1968,17 +2356,87 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, op->map.read_only = flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + op->map.is_cpu_addr_mirror = flags & + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; + op->map.invalidate_on_bind = + __xe_vm_needs_clear_scratch_pages(vm, flags); } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { - op->prefetch.region = prefetch_region; - } + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + struct xe_tile *tile; + u8 id, tile_mask = 0; + u32 i; + + if (!xe_vma_is_cpu_addr_mirror(vma)) { + op->prefetch.region = prefetch_region; + break; + } + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + + for_each_tile(tile, vm->xe, id) + tile_mask |= 0x1 << id; + + xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); + op->prefetch_range.region = prefetch_region; + op->prefetch_range.ranges_count = 0; +alloc_next_range: + svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); + if (PTR_ERR(svm_range) == -ENOENT) { + u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); + + addr = ret == ULONG_MAX ? 0 : ret; + if (addr) + goto alloc_next_range; + else + goto print_op_label; + } + + if (IS_ERR(svm_range)) { + err = PTR_ERR(svm_range); + goto unwind_prefetch_ops; + } + + if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { + xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); + goto check_next_range; + } + + err = xa_alloc(&op->prefetch_range.range, + &i, svm_range, xa_limit_32b, + GFP_KERNEL); + + if (err) + goto unwind_prefetch_ops; + + op->prefetch_range.ranges_count++; + vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; + xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); +check_next_range: + if (range_end > xe_svm_range_end(svm_range) && + xe_svm_range_end(svm_range) < xe_vma_end(vma)) { + addr = xe_svm_range_end(svm_range); + goto alloc_next_range; + } + } +print_op_label: print_op(vm->xe, __op); } return ops; + +unwind_prefetch_ops: + xe_svm_prefetch_gpuva_ops_fini(ops); + drm_gpuva_ops_free(&vm->gpuvm, ops); + return ERR_PTR(err); } + ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, @@ -2160,6 +2618,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, VMA_CREATE_FLAG_IS_NULL : 0; flags |= op->map.dumpable ? VMA_CREATE_FLAG_DUMPABLE : 0; + flags |= op->map.is_cpu_addr_mirror ? + VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; vma = new_vma(vm, &op->base.map, op->map.pat_index, flags); @@ -2167,30 +2627,47 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return PTR_ERR(vma); op->map.vma = vma; - if (op->map.immediate || !xe_vm_in_fault_mode(vm)) + if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && + !op->map.is_cpu_addr_mirror) || + op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, - op->tile_mask); + op->tile_mask, 1); break; } case DRM_GPUVA_OP_REMAP: { struct xe_vma *old = gpuva_to_vma(op->base.remap.unmap->va); + bool skip = xe_vma_is_cpu_addr_mirror(old); + u64 start = xe_vma_start(old), end = xe_vma_end(old); + int num_remap_ops = 0; + + if (op->base.remap.prev) + start = op->base.remap.prev->va.addr + + op->base.remap.prev->va.range; + if (op->base.remap.next) + end = op->base.remap.next->va.addr; + + if (xe_vma_is_cpu_addr_mirror(old) && + xe_svm_has_mapping(vm, start, end)) + return -EBUSY; op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); - if (op->base.remap.prev) { - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; + flags |= op->base.remap.unmap->va->flags & + XE_VMA_READ_ONLY ? + VMA_CREATE_FLAG_READ_ONLY : 0; + flags |= op->base.remap.unmap->va->flags & + DRM_GPUVA_SPARSE ? + VMA_CREATE_FLAG_IS_NULL : 0; + flags |= op->base.remap.unmap->va->flags & + XE_VMA_DUMPABLE ? + VMA_CREATE_FLAG_DUMPABLE : 0; + flags |= xe_vma_is_cpu_addr_mirror(old) ? + VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; + if (op->base.remap.prev) { vma = new_vma(vm, op->base.remap.prev, old->pat_index, flags); if (IS_ERR(vma)) @@ -2202,9 +2679,10 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, * Userptr creates a new SG mapping so * we must also rebind. */ - op->remap.skip_prev = !xe_vma_is_userptr(old) && + op->remap.skip_prev = skip || + (!xe_vma_is_userptr(old) && IS_ALIGNED(xe_vma_end(vma), - xe_vma_max_pte_size(old)); + xe_vma_max_pte_size(old))); if (op->remap.skip_prev) { xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); op->remap.range -= @@ -2215,21 +2693,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } if (op->base.remap.next) { - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - vma = new_vma(vm, op->base.remap.next, old->pat_index, flags); if (IS_ERR(vma)) @@ -2241,9 +2709,10 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, * Userptr creates a new SG mapping so * we must also rebind. */ - op->remap.skip_next = !xe_vma_is_userptr(old) && + op->remap.skip_next = skip || + (!xe_vma_is_userptr(old) && IS_ALIGNED(xe_vma_start(vma), - xe_vma_max_pte_size(old)); + xe_vma_max_pte_size(old))); if (op->remap.skip_next) { xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); op->remap.range -= @@ -2253,16 +2722,41 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + if (!skip) + num_remap_ops++; + + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); break; } case DRM_GPUVA_OP_UNMAP: + vma = gpuva_to_vma(op->base.unmap.va); + + if (xe_vma_is_cpu_addr_mirror(vma) && + xe_svm_has_mapping(vm, xe_vma_start(vma), + xe_vma_end(vma))) + return -EBUSY; + + if (!xe_vma_is_cpu_addr_mirror(vma)) + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); + break; case DRM_GPUVA_OP_PREFETCH: - /* FIXME: Need to skip some prefetch ops */ - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + vma = gpuva_to_vma(op->base.prefetch.va); + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + if (err) + return err; + } + + if (xe_vma_is_cpu_addr_mirror(vma)) + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, + op->prefetch_range.ranges_count); + else + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); + break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -2388,6 +2882,57 @@ static int check_ufence(struct xe_vma *vma) return 0; } +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) +{ + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + int err = 0; + + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + struct xe_tile *tile; + unsigned long i; + u32 region; + + if (!xe_vma_is_cpu_addr_mirror(vma)) + return 0; + + region = op->prefetch_range.region; + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = devmem_possible; + ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; + + /* TODO: Threading the migration */ + xa_for_each(&op->prefetch_range.range, i, svm_range) { + if (!region) + xe_svm_range_migrate_to_smem(vm, svm_range); + + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { + tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; + err = xe_svm_alloc_vram(tile, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + return -ENODATA; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); + } + + err = xe_svm_range_get_pages(vm, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) + err = -ENODATA; + return err; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); + } + + return err; +} + static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_op *op) { @@ -2395,9 +2940,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: - err = vma_lock_and_validate(exec, op->map.vma, - !xe_vm_in_fault_mode(vm) || - op->map.immediate); + if (!op->map.invalidate_on_bind) + err = vma_lock_and_validate(exec, op->map.vma, + !xe_vm_in_fault_mode(vm) || + op->map.immediate); break; case DRM_GPUVA_OP_REMAP: err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); @@ -2424,7 +2970,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_PREFETCH: { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - u32 region = op->prefetch.region; + u32 region; + + if (xe_vma_is_cpu_addr_mirror(vma)) + region = op->prefetch_range.region; + else + region = op->prefetch.region; xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); @@ -2443,6 +2994,25 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, return err; } +static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + int err; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return 0; + + list_for_each_entry(op, &vops->list, link) { + if (op->base.op == DRM_GPUVA_OP_PREFETCH) { + err = prefetch_ranges(vm, op); + if (err) + return err; + } + } + + return 0; +} + static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_ops *vops) @@ -2488,6 +3058,8 @@ static void op_trace(struct xe_vma_op *op) case DRM_GPUVA_OP_PREFETCH: trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); break; + case DRM_GPUVA_OP_DRIVER: + break; default: XE_WARN_ON("NOT POSSIBLE"); } @@ -2665,14 +3237,15 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, } if (ufence) xe_sync_ufence_put(ufence); - for (i = 0; i < vops->num_syncs; i++) - xe_sync_entry_signal(vops->syncs + i, fence); - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); + if (fence) { + for (i = 0; i < vops->num_syncs; i++) + xe_sync_entry_signal(vops->syncs + i, fence); + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); + } } -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops) +static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, + struct xe_vma_ops *vops) { struct drm_exec exec; struct dma_fence *fence; @@ -2685,12 +3258,15 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, drm_exec_until_all_locked(&exec) { err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); drm_exec_retry_on_contention(&exec); - if (err) + if (err) { + fence = ERR_PTR(err); goto unlock; + } fence = ops_execute(vm, vops); if (IS_ERR(fence)) { - err = PTR_ERR(fence); + if (PTR_ERR(fence) == -ENODATA) + vm_bind_ioctl_ops_fini(vm, vops, NULL); goto unlock; } @@ -2699,7 +3275,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, unlock: drm_exec_fini(&exec); - return err; + return fence; } ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); @@ -2707,7 +3283,9 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); (DRM_XE_VM_BIND_FLAG_READONLY | \ DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ DRM_XE_VM_BIND_FLAG_NULL | \ - DRM_XE_VM_BIND_FLAG_DUMPABLE) + DRM_XE_VM_BIND_FLAG_DUMPABLE | \ + DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) #ifdef TEST_VM_OPS_ERROR #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) @@ -2718,7 +3296,7 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) -static int vm_bind_ioctl_check_args(struct xe_device *xe, +static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, struct drm_xe_vm_bind *args, struct drm_xe_vm_bind_op **bind_ops) { @@ -2743,9 +3321,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, if (!*bind_ops) return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; - err = __copy_from_user(*bind_ops, bind_user, - sizeof(struct drm_xe_vm_bind_op) * - args->num_binds); + err = copy_from_user(*bind_ops, bind_user, + sizeof(struct drm_xe_vm_bind_op) * + args->num_binds); if (XE_IOCTL_DBG(xe, err)) { err = -EFAULT; goto free_bind_ops; @@ -2763,9 +3341,18 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 obj_offset = (*bind_ops)[i].obj_offset; u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + bool is_cpu_addr_mirror = flags & + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; u16 pat_index = (*bind_ops)[i].pat_index; u16 coh_mode; + if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && + (!xe_vm_in_fault_mode(vm) || + !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { + err = -EINVAL; + goto free_bind_ops; + } + if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { err = -EINVAL; goto free_bind_ops; @@ -2786,13 +3373,14 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || - XE_IOCTL_DBG(xe, obj && is_null) || - XE_IOCTL_DBG(xe, obj_offset && is_null) || + XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || + XE_IOCTL_DBG(xe, obj_offset && (is_null || + is_cpu_addr_mirror)) || XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && - is_null) || + (is_null || is_cpu_addr_mirror)) || XE_IOCTL_DBG(xe, !obj && op == DRM_XE_VM_BIND_OP_MAP && - !is_null) || + !is_null && !is_cpu_addr_mirror) || XE_IOCTL_DBG(xe, !obj && op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_DBG(xe, addr && @@ -2830,6 +3418,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, free_bind_ops: if (args->num_binds > 1) kvfree(*bind_ops); + *bind_ops = NULL; return err; } @@ -2866,17 +3455,18 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, vops->q = q; vops->syncs = syncs; vops->num_syncs = num_syncs; + vops->flags = 0; } static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, u64 addr, u64 range, u64 obj_offset, - u16 pat_index) + u16 pat_index, u32 op, u32 bind_flags) { u16 coh_mode; - if (XE_IOCTL_DBG(xe, range > bo->size) || + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { + xe_bo_size(bo) - range)) { return -EINVAL; } @@ -2894,7 +3484,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { - return -EINVAL; + return -EINVAL; } } @@ -2902,7 +3492,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, if (bo->cpu_caching) { if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { - return -EINVAL; + return -EINVAL; } } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { /* @@ -2911,9 +3501,15 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, * how it was mapped on the CPU. Just assume is it * potentially cached on CPU side. */ - return -EINVAL; + return -EINVAL; } + /* If a BO is protected it can only be mapped if the key is still valid */ + if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && + op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) + if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) + return -ENOEXEC; + return 0; } @@ -2929,20 +3525,25 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_exec_queue *q = NULL; u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; - struct drm_xe_vm_bind_op *bind_ops; + struct drm_xe_vm_bind_op *bind_ops = NULL; struct xe_vma_ops vops; + struct dma_fence *fence; int err; int i; - err = vm_bind_ioctl_check_args(xe, args, &bind_ops); + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -EINVAL; + + err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); if (err) - return err; + goto put_vm; if (args->exec_queue_id) { q = xe_exec_queue_lookup(xef, args->exec_queue_id); if (XE_IOCTL_DBG(xe, !q)) { err = -ENOENT; - goto free_objs; + goto put_vm; } if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { @@ -2951,15 +3552,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) { - err = -EINVAL; - goto put_exec_queue; - } + /* Ensure all UNMAPs visible */ + xe_svm_flush(vm); err = down_write_killable(&vm->lock); if (err) - goto put_vm; + goto put_exec_queue; if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { err = -ENOENT; @@ -3002,6 +3600,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 obj = bind_ops[i].obj; u64 obj_offset = bind_ops[i].obj_offset; u16 pat_index = bind_ops[i].pat_index; + u32 op = bind_ops[i].op; + u32 bind_flags = bind_ops[i].flags; if (!obj) continue; @@ -3014,7 +3614,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bos[i] = gem_to_xe_bo(gem_obj); err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, - obj_offset, pat_index); + obj_offset, pat_index, op, + bind_flags); if (err) goto put_obj; } @@ -3062,7 +3663,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; u16 pat_index = bind_ops[i].pat_index; - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, + ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, addr, range, op, flags, prefetch_region, pat_index); if (IS_ERR(ops[i])) { @@ -3095,7 +3696,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) goto unwind_ops; - err = vm_bind_ioctl_ops_execute(vm, &vops); + err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); + if (err) + goto unwind_ops; + + fence = vm_bind_ioctl_ops_execute(vm, &vops); + if (IS_ERR(fence)) + err = PTR_ERR(fence); + else + dma_fence_put(fence); unwind_ops: if (err && err != -ENODATA) @@ -3116,12 +3725,11 @@ put_obj: xe_bo_put(bos[i]); release_vm_lock: up_write(&vm->lock); -put_vm: - xe_vm_put(vm); put_exec_queue: if (q) xe_exec_queue_put(q); -free_objs: +put_vm: + xe_vm_put(vm); kvfree(bos); kvfree(ops); if (args->num_binds > 1) @@ -3130,6 +3738,81 @@ free_objs: } /** + * xe_vm_bind_kernel_bo - bind a kernel BO to a VM + * @vm: VM to bind the BO to + * @bo: BO to bind + * @q: exec queue to use for the bind (optional) + * @addr: address at which to bind the BO + * @cache_lvl: PAT cache level to use + * + * Execute a VM bind map operation on a kernel-owned BO to bind it into a + * kernel-owned VM. + * + * Returns a dma_fence to track the binding completion if the job to do so was + * successfully submitted, an error pointer otherwise. + */ +struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, + struct xe_exec_queue *q, u64 addr, + enum xe_cache_level cache_lvl) +{ + struct xe_vma_ops vops; + struct drm_gpuva_ops *ops = NULL; + struct dma_fence *fence; + int err; + + xe_bo_get(bo); + xe_vm_get(vm); + if (q) + xe_exec_queue_get(q); + + down_write(&vm->lock); + + xe_vma_ops_init(&vops, vm, q, NULL, 0); + + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), + DRM_XE_VM_BIND_OP_MAP, 0, 0, + vm->xe->pat.idx[cache_lvl]); + if (IS_ERR(ops)) { + err = PTR_ERR(ops); + goto release_vm_lock; + } + + err = vm_bind_ioctl_ops_parse(vm, ops, &vops); + if (err) + goto release_vm_lock; + + xe_assert(vm->xe, !list_empty(&vops.list)); + + err = xe_vma_ops_alloc(&vops, false); + if (err) + goto unwind_ops; + + fence = vm_bind_ioctl_ops_execute(vm, &vops); + if (IS_ERR(fence)) + err = PTR_ERR(fence); + +unwind_ops: + if (err && err != -ENODATA) + vm_bind_ioctl_ops_unwind(vm, &ops, 1); + + xe_vma_ops_fini(&vops); + drm_gpuva_ops_free(&vm->gpuvm, ops); + +release_vm_lock: + up_write(&vm->lock); + + if (q) + xe_exec_queue_put(q); + xe_vm_put(vm); + xe_bo_put(bo); + + if (err) + fence = ERR_PTR(err); + + return fence; +} + +/** * xe_vm_lock() - Lock the vm's dma_resv object * @vm: The struct xe_vm whose lock is to be locked * @intr: Whether to perform any wait interruptible @@ -3158,6 +3841,68 @@ void xe_vm_unlock(struct xe_vm *vm) } /** + * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an + * address range + * @vm: The VM + * @start: start address + * @end: end address + * @tile_mask: mask for which gt's issue tlb invalidation + * + * Issue a range based TLB invalidation for gt's in tilemask + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask) +{ + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + struct xe_tile *tile; + u32 fence_id = 0; + u8 id; + int err; + + if (!tile_mask) + return 0; + + for_each_tile(tile, vm->xe, id) { + if (tile_mask & BIT(id)) { + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[fence_id], true); + + err = xe_gt_tlb_invalidation_range(tile->primary_gt, + &fence[fence_id], + start, + end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + + if (!tile->media_gt) + continue; + + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], true); + + err = xe_gt_tlb_invalidation_range(tile->media_gt, + &fence[fence_id], + start, + end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + } + } + +wait: + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); + + return err; +} + +/** * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock * @vma: VMA to invalidate * @@ -3170,27 +3915,34 @@ void xe_vm_unlock(struct xe_vm *vm) int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; + struct xe_vm *vm = xe_vma_vm(vma); struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; - u8 id; - u32 fence_id = 0; + u8 tile_mask = 0; int ret = 0; + u8 id; xe_assert(xe, !xe_vma_is_null(vma)); + xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); trace_xe_vma_invalidate(vma); - vm_dbg(&xe_vma_vm(vma)->xe->drm, + vm_dbg(&vm->xe->drm, "INVALIDATE: addr=0x%016llx, range=0x%016llx", xe_vma_start(vma), xe_vma_size(vma)); - /* Check that we don't race with page-table updates */ + /* + * Check that we don't race with page-table updates, tile_invalidated + * update is safe + */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { + lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || + (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && + lockdep_is_held(&xe_vm_resv(vm)->lock.base))); + WARN_ON_ONCE(!mmu_interval_check_retry (&to_userptr_vma(vma)->userptr.notifier, to_userptr_vma(vma)->userptr.notifier_seq)); - WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), + WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP)); } else { @@ -3198,41 +3950,48 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) } } - for_each_tile(tile, xe, id) { - if (xe_pt_zap_ptes(tile, vma)) { - xe_device_wmb(xe); - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], - true); + for_each_tile(tile, xe, id) + if (xe_pt_zap_ptes(tile, vma)) + tile_mask |= BIT(id); - ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; + xe_device_wmb(xe); - if (!tile->media_gt) - continue; + ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma), + xe_vma_end(vma), tile_mask); - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], - true); + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); - ret = xe_gt_tlb_invalidation_vma(tile->media_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; - } - } + return ret; +} -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); +int xe_vm_validate_protected(struct xe_vm *vm) +{ + struct drm_gpuva *gpuva; + int err = 0; - vma->tile_invalidated = vma->tile_mask; + if (!vm) + return -ENODEV; - return ret; + mutex_lock(&vm->snap_mutex); + + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + struct xe_bo *bo = vma->gpuva.gem.obj ? + gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; + + if (!bo) + continue; + + if (xe_bo_is_protected(bo)) { + err = xe_pxp_bo_key_check(vm->xe->pxp, bo); + if (err) + break; + } + } + + mutex_unlock(&vm->snap_mutex); + return err; } struct xe_vm_snapshot { @@ -3378,6 +4137,9 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) } drm_puts(p, "\n"); + + if (drm_coredump_printer_is_full(p)) + return; } } |