diff options
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pagefault.c | 26 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 74 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm.h | 9 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm_types.h | 29 |
4 files changed, 138 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index 918d595d1c1b..2fd55d7c98f9 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -250,6 +250,31 @@ static void xe_pagefault_print(struct xe_pagefault *pf) pf->consumer.engine_instance); } +static void xe_pagefault_save_to_vm(struct xe_device *xe, struct xe_pagefault *pf) +{ + struct xe_vm *vm; + + /* + * Pagefault may be asociated to VM that is not in fault mode. + * Perform asid_to_vm behavior, except if VM is not in fault + * mode, return VM anyways. + */ + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, pf->consumer.asid); + if (vm) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + if (IS_ERR(vm)) + return; + + xe_vm_add_fault_entry_pf(vm, pf); + + xe_vm_put(vm); +} + static void xe_pagefault_queue_work(struct work_struct *w) { struct xe_pagefault_queue *pf_queue = @@ -268,6 +293,7 @@ static void xe_pagefault_queue_work(struct work_struct *w) err = xe_pagefault_service(&pf); if (err) { + xe_pagefault_save_to_vm(gt_to_xe(pf.gt), &pf); if (!(pf.consumer.access_type & XE_PAGEFAULT_ACCESS_PREFETCH)) { xe_pagefault_print(&pf); xe_gt_info(pf.gt, "Fault response: Unsuccessful %pe\n", diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ff162e4fc84b..5ed98ec8674b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -27,6 +27,7 @@ #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue.h" +#include "xe_gt.h" #include "xe_migrate.h" #include "xe_pat.h" #include "xe_pm.h" @@ -577,6 +578,74 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } +/** + * xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list + * @vm: The VM. + * @pf: The pagefault. + * + * This function takes the data from the pagefault @pf and saves it to @vm->faults.list. + * + * The function exits silently if the list is full, and reports a warning if the pagefault + * could not be saved to the list. + */ +void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf) +{ + struct xe_vm_fault_entry *e; + struct xe_hw_engine *hwe; + + /* Do not report faults on reserved engines */ + hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class, + pf->consumer.engine_instance, false); + if (!hwe || xe_hw_engine_is_reserved(hwe)) + return; + + e = kzalloc_obj(*e); + if (!e) { + drm_warn(&vm->xe->drm, + "Could not allocate memory for fault!\n"); + return; + } + + guard(spinlock)(&vm->faults.lock); + + /* + * Limit the number of faults in the fault list to prevent + * memory overuse. + */ + if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) { + kfree(e); + return; + } + + e->address = pf->consumer.page_addr; + /* + * TODO: + * Address precision is currently always SZ_4K, but this may change + * in the future. + */ + e->address_precision = SZ_4K; + e->access_type = pf->consumer.access_type; + e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK, + pf->consumer.fault_type_level), + e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK, + pf->consumer.fault_type_level), + + list_add_tail(&e->list, &vm->faults.list); + vm->faults.len++; +} + +static void xe_vm_clear_fault_entries(struct xe_vm *vm) +{ + struct xe_vm_fault_entry *e, *tmp; + + guard(spinlock)(&vm->faults.lock); + list_for_each_entry_safe(e, tmp, &vm->faults.list, list) { + list_del(&e->list); + kfree(e); + } + vm->faults.len = 0; +} + static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) { int i; @@ -1538,6 +1607,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) INIT_LIST_HEAD(&vm->userptr.invalidated); spin_lock_init(&vm->userptr.invalidated_lock); + INIT_LIST_HEAD(&vm->faults.list); + spin_lock_init(&vm->faults.lock); + ttm_lru_bulk_move_init(&vm->lru_bulk_move); INIT_WORK(&vm->destroy_work, vm_destroy_work_func); @@ -1854,6 +1926,8 @@ void xe_vm_close_and_put(struct xe_vm *vm) } up_write(&xe->usm.lock); + xe_vm_clear_fault_entries(vm); + for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 0bc7ed23eeae..42767d2aebac 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -12,6 +12,12 @@ #include "xe_map.h" #include "xe_vm_types.h" +/** + * MAX_FAULTS_SAVED_PER_VM - Maximum number of faults each vm can store before future + * faults are discarded to prevent memory overuse + */ +#define MAX_FAULTS_SAVED_PER_VM 50 + struct drm_device; struct drm_printer; struct drm_file; @@ -22,6 +28,7 @@ struct dma_fence; struct xe_exec_queue; struct xe_file; +struct xe_pagefault; struct xe_sync_entry; struct xe_svm_range; struct drm_exec; @@ -318,6 +325,8 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); +void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf); + /** * xe_vm_set_validating() - Register this task as currently making bos resident * @allow_res_evict: Allow eviction of buffer objects bound to @vm when diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index fc811b5e308c..3ab2cef25426 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -24,6 +24,7 @@ struct drm_pagemap; struct xe_bo; +struct xe_pagefault; struct xe_svm_range; struct xe_sync_entry; struct xe_user_fence; @@ -176,6 +177,24 @@ struct xe_userptr_vma { struct xe_device; +/** + * struct xe_vm_fault_entry - Elements of vm->faults.list + * @list: link into @xe_vm.faults.list + * @address: address of the fault + * @address_precision: precision of faulted address + * @access_type: type of address access that resulted in fault + * @fault_type: type of fault reported + * @fault_level: fault level of the fault + */ +struct xe_vm_fault_entry { + struct list_head list; + u64 address; + u32 address_precision; + u8 access_type; + u8 fault_type; + u8 fault_level; +}; + struct xe_vm { /** @gpuvm: base GPUVM used to track VMAs */ struct drm_gpuvm gpuvm; @@ -333,6 +352,16 @@ struct xe_vm { bool capture_once; } error_capture; + /** @faults: List of all faults associated with this VM */ + struct { + /** @faults.lock: lock protecting @faults.list */ + spinlock_t lock; + /** @faults.list: list of xe_vm_fault_entry entries */ + struct list_head list; + /** @faults.len: length of @faults.list */ + unsigned int len; + } faults; + /** * @validation: Validation data only valid with the vm resv held. * Note: This is really task state of the task holding the vm resv, |
