diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-30 21:33:14 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-30 21:33:14 +0300 |
commit | aa32f1169148beb90d71494e2f2a1999ba7b5366 (patch) | |
tree | ec8c434bff07bf0beb2df08629089824927f62f9 /drivers/gpu/drm | |
parent | d5bb349dbbe27537e90a03b9597deeb07723a86d (diff) | |
parent | 93f4e735b6d98ee4b7a1252d81e815a983e359f2 (diff) | |
download | linux-aa32f1169148beb90d71494e2f2a1999ba7b5366.tar.xz |
Merge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull hmm updates from Jason Gunthorpe:
"This is another round of bug fixing and cleanup. This time the focus
is on the driver pattern to use mmu notifiers to monitor a VA range.
This code is lifted out of many drivers and hmm_mirror directly into
the mmu_notifier core and written using the best ideas from all the
driver implementations.
This removes many bugs from the drivers and has a very pleasing
diffstat. More drivers can still be converted, but that is for another
cycle.
- A shared branch with RDMA reworking the RDMA ODP implementation
- New mmu_interval_notifier API. This is focused on the use case of
monitoring a VA and simplifies the process for drivers
- A common seq-count locking scheme built into the
mmu_interval_notifier API usable by drivers that call
get_user_pages() or hmm_range_fault() with the VA range
- Conversion of mlx5 ODP, hfi1, radeon, nouveau, AMD GPU, and Xen
GntDev drivers to the new API. This deletes a lot of wonky driver
code.
- Two improvements for hmm_range_fault(), from testing done by Ralph"
* tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
mm/hmm: remove hmm_range_dma_map and hmm_range_dma_unmap
mm/hmm: make full use of walk_page_range()
xen/gntdev: use mmu_interval_notifier_insert
mm/hmm: remove hmm_mirror and related
drm/amdgpu: Use mmu_interval_notifier instead of hmm_mirror
drm/amdgpu: Use mmu_interval_insert instead of hmm_mirror
drm/amdgpu: Call find_vma under mmap_sem
nouveau: use mmu_interval_notifier instead of hmm_mirror
nouveau: use mmu_notifier directly for invalidate_range_start
drm/radeon: use mmu_interval_notifier_insert
RDMA/hfi1: Use mmu_interval_notifier_insert for user_exp_rcv
RDMA/odp: Use mmu_interval_notifier_insert()
mm/hmm: define the pre-processor related parts of hmm.h even if disabled
mm/hmm: allow hmm_range to be used with a mmu_interval_notifier or hmm_mirror
mm/mmu_notifier: add an interval tree notifier
mm/mmu_notifier: define the header pre-processor parts even if disabled
mm/hmm: allow snapshot of the special zero page
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 444 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 53 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 145 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_svm.c | 230 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_mn.c | 218 |
11 files changed, 360 insertions, 778 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bcc5d40a8d5f..0c229a92a24b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -967,6 +967,8 @@ struct amdgpu_device { struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; + struct mutex notifier_lock; + int asic_reset_res; struct work_struct xgmi_reset_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ae6f5446262c..7d35b5b66229 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -505,8 +505,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, * * Returns 0 for success, negative errno for errors. */ -static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, - uint64_t user_addr) +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) { struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; @@ -1199,7 +1198,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { - ret = init_user_pages(*mem, current->mm, user_addr); + ret = init_user_pages(*mem, user_addr); if (ret) goto allocate_init_user_pages_failed; } @@ -1744,6 +1743,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, return ret; } + /* + * FIXME: Cannot ignore the return code, must hold + * notifier_lock + */ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a169ff16277f..5ca905b4a0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -538,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->tv.num_shared = 2; amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -1219,11 +1217,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock. - * p->mn is hold until amdgpu_cs_submit is finished and fence is added - * to BOs. + /* No memory allocation is allowed while holding the notifier lock. + * The lock is held until amdgpu_cs_submit is finished and fence is + * added to BOs. */ - amdgpu_mn_lock(p->mn); + mutex_lock(&p->adev->notifier_lock); /* If userptr are invalidated after amdgpu_cs_parser_bos(), return * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. @@ -1266,13 +1264,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); return 0; error_abort: drm_sched_job_cleanup(&job->base); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); error_unlock: amdgpu_job_free(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4f76beafb2fa..c17505fba988 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2794,6 +2794,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); + mutex_init(&adev->notifier_lock); mutex_init(&adev->virt.dpm_mutex); mutex_init(&adev->psp.mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 392300f77b13..828b5167ff12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -51,439 +51,107 @@ #include "amdgpu_amdkfd.h" /** - * struct amdgpu_mn_node + * amdgpu_mn_invalidate_gfx - callback to notify about mm change * - * @it: interval node defining start-last of the affected address range - * @bos: list of all BOs in the affected address range - * - * Manages all BOs which are affected of a certain range of address space. - */ -struct amdgpu_mn_node { - struct interval_tree_node it; - struct list_head bos; -}; - -/** - * amdgpu_mn_destroy - destroy the HMM mirror - * - * @work: previously sheduled work item - * - * Lazy destroys the notifier from a work item - */ -static void amdgpu_mn_destroy(struct work_struct *work) -{ - struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); - struct amdgpu_device *adev = amn->adev; - struct amdgpu_mn_node *node, *next_node; - struct amdgpu_bo *bo, *next_bo; - - mutex_lock(&adev->mn_lock); - down_write(&amn->lock); - hash_del(&amn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, - &amn->objects.rb_root, it.rb) { - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { - bo->mn = NULL; - list_del_init(&bo->mn_list); - } - kfree(node); - } - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); - - hmm_mirror_unregister(&amn->mirror); - kfree(amn); -} - -/** - * amdgpu_hmm_mirror_release - callback to notify about mm destruction - * - * @mirror: the HMM mirror (mm) this callback is about - * - * Shedule a work item to lazy destroy HMM mirror. - */ -static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - - INIT_WORK(&amn->work, amdgpu_mn_destroy); - schedule_work(&amn->work); -} - -/** - * amdgpu_mn_lock - take the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_lock(struct amdgpu_mn *mn) -{ - if (mn) - down_write(&mn->lock); -} - -/** - * amdgpu_mn_unlock - drop the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_unlock(struct amdgpu_mn *mn) -{ - if (mn) - up_write(&mn->lock); -} - -/** - * amdgpu_mn_read_lock - take the read side lock for this notifier - * - * @amn: our notifier - * @blockable: is the notifier blockable - */ -static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) -{ - if (blockable) - down_read(&amn->lock); - else if (!down_read_trylock(&amn->lock)) - return -EAGAIN; - - return 0; -} - -/** - * amdgpu_mn_read_unlock - drop the read side lock for this notifier - * - * @amn: our notifier - */ -static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) -{ - up_read(&amn->lock); -} - -/** - * amdgpu_mn_invalidate_node - unmap all BOs of a node - * - * @node: the node with the BOs to unmap - * @start: start of address range affected - * @end: end of address range affected + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, - unsigned long start, - unsigned long end) +static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); long r; - list_for_each_entry(bo, &node->bos, mn_list) { - - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) - continue; - - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, - true, false, MAX_SCHEDULE_TIMEOUT); - if (r <= 0) - DRM_ERROR("(%ld) failed to wait for user bo\n", r); - } -} - -/** - * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change - * - * @mirror: the hmm_mirror (mm) is about to update - * @update: the update start, end address - * - * Block for operations on BOs to finish and mark pages as accessed and - * potentially dirty. - */ -static int -amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, - const struct mmu_notifier_range *update) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - unsigned long start = update->start; - unsigned long end = update->end; - bool blockable = mmu_notifier_range_blockable(update); - struct interval_tree_node *it; + if (!mmu_notifier_range_blockable(range)) + return false; - /* notification is exclusive, but interval is inclusive */ - end -= 1; + mutex_lock(&adev->notifier_lock); - /* TODO we should be able to split locking for interval tree and - * amdgpu_mn_invalidate_node - */ - if (amdgpu_mn_read_lock(amn, blockable)) - return -EAGAIN; + mmu_interval_set_seq(mni, cur_seq); - it = interval_tree_iter_first(&amn->objects, start, end); - while (it) { - struct amdgpu_mn_node *node; - - if (!blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } - - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); - - amdgpu_mn_invalidate_node(node, start, end); - } - - amdgpu_mn_read_unlock(amn); - - return 0; -} - -/** - * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change - * - * @mirror: the hmm_mirror (mm) is about to update - * @update: the update start, end address - * - * We temporarily evict all BOs between start and end. This - * necessitates evicting all user-mode queues of the process. The BOs - * are restorted in amdgpu_mn_invalidate_range_end_hsa. - */ -static int -amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, - const struct mmu_notifier_range *update) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - unsigned long start = update->start; - unsigned long end = update->end; - bool blockable = mmu_notifier_range_blockable(update); - struct interval_tree_node *it; - - /* notification is exclusive, but interval is inclusive */ - end -= 1; - - if (amdgpu_mn_read_lock(amn, blockable)) - return -EAGAIN; - - it = interval_tree_iter_first(&amn->objects, start, end); - while (it) { - struct amdgpu_mn_node *node; - struct amdgpu_bo *bo; - - if (!blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } - - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); - - list_for_each_entry(bo, &node->bos, mn_list) { - struct kgd_mem *mem = bo->kfd_bo; - - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - start, end)) - amdgpu_amdkfd_evict_userptr(mem, amn->mm); - } - } - - amdgpu_mn_read_unlock(amn); - - return 0; + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + mutex_unlock(&adev->notifier_lock); + if (r <= 0) + DRM_ERROR("(%ld) failed to wait for user bo\n", r); + return true; } -/* Low bits of any reasonable mm pointer will be unused due to struct - * alignment. Use these bits to make a unique key from the mm pointer - * and notifier type. - */ -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) - -static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { - [AMDGPU_MN_TYPE_GFX] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, - .release = amdgpu_hmm_mirror_release - }, - [AMDGPU_MN_TYPE_HSA] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, - .release = amdgpu_hmm_mirror_release - }, +static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = { + .invalidate = amdgpu_mn_invalidate_gfx, }; /** - * amdgpu_mn_get - create HMM mirror context + * amdgpu_mn_invalidate_hsa - callback to notify about mm change * - * @adev: amdgpu device pointer - * @type: type of MMU notifier context + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() * - * Creates a HMM mirror context for current->mm. + * We temporarily evict the BO attached to this range. This necessitates + * evicting all user-mode queues of the process. */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) +static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct mm_struct *mm = current->mm; - struct amdgpu_mn *amn; - unsigned long key = AMDGPU_MN_KEY(mm, type); - int r; - - mutex_lock(&adev->mn_lock); - if (down_write_killable(&mm->mmap_sem)) { - mutex_unlock(&adev->mn_lock); - return ERR_PTR(-EINTR); - } - - hash_for_each_possible(adev->mn_hash, amn, node, key) - if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) - goto release_locks; - - amn = kzalloc(sizeof(*amn), GFP_KERNEL); - if (!amn) { - amn = ERR_PTR(-ENOMEM); - goto release_locks; - } - - amn->adev = adev; - amn->mm = mm; - init_rwsem(&amn->lock); - amn->type = type; - amn->objects = RB_ROOT_CACHED; - - amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; - r = hmm_mirror_register(&amn->mirror, mm); - if (r) - goto free_amn; + struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); + if (!mmu_notifier_range_blockable(range)) + return false; -release_locks: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); + mutex_lock(&adev->notifier_lock); - return amn; + mmu_interval_set_seq(mni, cur_seq); -free_amn: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); - kfree(amn); + amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); + mutex_unlock(&adev->notifier_lock); - return ERR_PTR(r); + return true; } +static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = { + .invalidate = amdgpu_mn_invalidate_hsa, +}; + /** * amdgpu_mn_register - register a BO for notifier updates * * @bo: amdgpu buffer object * @addr: userptr addr we should monitor * - * Registers an HMM mirror for the given BO at the specified address. + * Registers a mmu_notifier for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { - unsigned long end = addr + amdgpu_bo_size(bo) - 1; - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - enum amdgpu_mn_type type = - bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; - struct amdgpu_mn *amn; - struct amdgpu_mn_node *node = NULL, *new_node; - struct list_head bos; - struct interval_tree_node *it; - - amn = amdgpu_mn_get(adev, type); - if (IS_ERR(amn)) - return PTR_ERR(amn); - - new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); - if (!new_node) - return -ENOMEM; - - INIT_LIST_HEAD(&bos); - - down_write(&amn->lock); - - while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { - kfree(node); - node = container_of(it, struct amdgpu_mn_node, it); - interval_tree_remove(&node->it, &amn->objects); - addr = min(it->start, addr); - end = max(it->last, end); - list_splice(&node->bos, &bos); - } - - if (!node) - node = new_node; - else - kfree(new_node); - - bo->mn = amn; - - node->it.start = addr; - node->it.last = end; - INIT_LIST_HEAD(&node->bos); - list_splice(&bos, &node->bos); - list_add(&bo->mn_list, &node->bos); - - interval_tree_insert(&node->it, &amn->objects); - - up_write(&amn->lock); - - return 0; + if (bo->kfd_bo) + return mmu_interval_notifier_insert(&bo->notifier, current->mm, + addr, amdgpu_bo_size(bo), + &amdgpu_mn_hsa_ops); + return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, + amdgpu_bo_size(bo), + &amdgpu_mn_gfx_ops); } /** - * amdgpu_mn_unregister - unregister a BO for HMM mirror updates + * amdgpu_mn_unregister - unregister a BO for notifier updates * * @bo: amdgpu buffer object * - * Remove any registration of HMM mirror updates from the buffer object. + * Remove any registration of mmu notifier updates from the buffer object. */ void amdgpu_mn_unregister(struct amdgpu_bo *bo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_mn *amn; - struct list_head *head; - - mutex_lock(&adev->mn_lock); - - amn = bo->mn; - if (amn == NULL) { - mutex_unlock(&adev->mn_lock); + if (!bo->notifier.mm) return; - } - - down_write(&amn->lock); - - /* save the next list entry for later */ - head = bo->mn_list.next; - - bo->mn = NULL; - list_del_init(&bo->mn_list); - - if (list_empty(head)) { - struct amdgpu_mn_node *node; - - node = container_of(head, struct amdgpu_mn_node, bos); - interval_tree_remove(&node->it, &amn->objects); - kfree(node); - } - - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); -} - -/* flags used by HMM internal, not related to CPU/GPU PTE flags */ -static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { - (1 << 0), /* HMM_PFN_VALID */ - (1 << 1), /* HMM_PFN_WRITE */ - 0 /* HMM_PFN_DEVICE_PRIVATE */ -}; - -static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { - 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ - 0, /* HMM_PFN_NONE */ - 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ -}; - -void amdgpu_hmm_init_range(struct hmm_range *range) -{ - if (range) { - range->flags = hmm_range_flags; - range->values = hmm_range_values; - range->pfn_shift = PAGE_SHIFT; - } + mmu_interval_notifier_remove(&bo->notifier); + bo->notifier.mm = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index b8ed68943625..a292238f75eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -30,63 +30,10 @@ #include <linux/workqueue.h> #include <linux/interval_tree.h> -enum amdgpu_mn_type { - AMDGPU_MN_TYPE_GFX, - AMDGPU_MN_TYPE_HSA, -}; - -/** - * struct amdgpu_mn - * - * @adev: amdgpu device pointer - * @mm: process address space - * @type: type of MMU notifier - * @work: destruction work item - * @node: hash table node to find structure by adev and mn - * @lock: rw semaphore protecting the notifier nodes - * @objects: interval tree containing amdgpu_mn_nodes - * @mirror: HMM mirror function support - * - * Data for each amdgpu device and process address space. - */ -struct amdgpu_mn { - /* constant after initialisation */ - struct amdgpu_device *adev; - struct mm_struct *mm; - enum amdgpu_mn_type type; - - /* only used on destruction */ - struct work_struct work; - - /* protected by adev->mn_lock */ - struct hlist_node node; - - /* objects protected by lock */ - struct rw_semaphore lock; - struct rb_root_cached objects; - -#ifdef CONFIG_HMM_MIRROR - /* HMM mirror */ - struct hmm_mirror mirror; -#endif -}; - #if defined(CONFIG_HMM_MIRROR) -void amdgpu_mn_lock(struct amdgpu_mn *mn); -void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); -void amdgpu_hmm_init_range(struct hmm_range *range); #else -static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} -static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) -{ - return NULL; -} static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 7e99f6c58c48..36dec51d1ef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -30,6 +30,9 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" +#ifdef CONFIG_MMU_NOTIFIER +#include <linux/mmu_notifier.h> +#endif #define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_MAX_PLACEMENTS 3 @@ -101,10 +104,12 @@ struct amdgpu_bo { struct ttm_bo_kmap_obj dma_buf_vmap; struct amdgpu_mn *mn; - union { - struct list_head mn_list; - struct list_head shadow_list; - }; + +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_interval_notifier notifier; +#endif + + struct list_head shadow_list; struct kgd_mem *kfd_bo; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 61d9b7774d42..2616e2eafdeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -35,6 +35,7 @@ #include <linux/hmm.h> #include <linux/pagemap.h> #include <linux/sched/task.h> +#include <linux/sched/mm.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/swap.h> @@ -769,6 +770,20 @@ struct amdgpu_ttm_tt { #endif }; +#ifdef CONFIG_DRM_AMDGPU_USERPTR +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + /** * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user * memory and start HMM tracking CPU page table update @@ -776,85 +791,89 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - -#define MAX_RETRY_HMM_RANGE_FAULT 16 - int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { - struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct mm_struct *mm = gtt->usertask->mm; unsigned long start = gtt->userptr; struct vm_area_struct *vma; struct hmm_range *range; + unsigned long timeout; + struct mm_struct *mm; unsigned long i; - uint64_t *pfns; int r = 0; - if (!mm) /* Happens during process shutdown */ - return -ESRCH; - - if (unlikely(!mirror)) { - DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); - r = -EFAULT; - goto out; + mm = bo->notifier.mm; + if (unlikely(!mm)) { + DRM_DEBUG_DRIVER("BO is not registered?\n"); + return -EFAULT; } - vma = find_vma(mm, start); - if (unlikely(!vma || start < vma->vm_start)) { - r = -EFAULT; - goto out; - } - if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && - vma->vm_file)) { - r = -EPERM; - goto out; - } + /* Another get_user_pages is running at the same time?? */ + if (WARN_ON(gtt->range)) + return -EFAULT; + + if (!mmget_not_zero(mm)) /* Happens during process shutdown */ + return -ESRCH; range = kzalloc(sizeof(*range), GFP_KERNEL); if (unlikely(!range)) { r = -ENOMEM; goto out; } + range->notifier = &bo->notifier; + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->start = bo->notifier.interval_tree.start; + range->end = bo->notifier.interval_tree.last + 1; + range->default_flags = hmm_range_flags[HMM_PFN_VALID]; + if (!amdgpu_ttm_tt_is_readonly(ttm)) + range->default_flags |= range->flags[HMM_PFN_WRITE]; - pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); - if (unlikely(!pfns)) { + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns), + GFP_KERNEL); + if (unlikely(!range->pfns)) { r = -ENOMEM; goto out_free_ranges; } - amdgpu_hmm_init_range(range); - range->default_flags = range->flags[HMM_PFN_VALID]; - range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? - 0 : range->flags[HMM_PFN_WRITE]; - range->pfn_flags_mask = 0; - range->pfns = pfns; - range->start = start; - range->end = start + ttm->num_pages * PAGE_SIZE; - - hmm_range_register(range, mirror); + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + if (unlikely(!vma || start < vma->vm_start)) { + r = -EFAULT; + goto out_unlock; + } + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vma->vm_file)) { + r = -EPERM; + goto out_unlock; + } + up_read(&mm->mmap_sem); + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - /* - * Just wait for range to be valid, safe to ignore return value as we - * will use the return value of hmm_range_fault() below under the - * mmap_sem to ascertain the validity of the range. - */ - hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); +retry: + range->notifier_seq = mmu_interval_read_begin(&bo->notifier); down_read(&mm->mmap_sem); r = hmm_range_fault(range, 0); up_read(&mm->mmap_sem); - - if (unlikely(r < 0)) + if (unlikely(r <= 0)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) + goto retry; goto out_free_pfns; + } for (i = 0; i < ttm->num_pages; i++) { - pages[i] = hmm_device_entry_to_page(range, pfns[i]); + /* FIXME: The pages cannot be touched outside the notifier_lock */ + pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); if (unlikely(!pages[i])) { pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", - i, pfns[i]); + i, range->pfns[i]); r = -ENOMEM; goto out_free_pfns; @@ -862,15 +881,18 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) } gtt->range = range; + mmput(mm); return 0; +out_unlock: + up_read(&mm->mmap_sem); out_free_pfns: - hmm_range_unregister(range); - kvfree(pfns); + kvfree(range->pfns); out_free_ranges: kfree(range); out: + mmput(mm); return r; } @@ -895,15 +917,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) "No user pages to check\n"); if (gtt->range) { - r = hmm_range_valid(gtt->range); - hmm_range_unregister(gtt->range); - + /* + * FIXME: Must always hold notifier_lock for this, and must + * not ignore the return code. + */ + r = mmu_interval_read_retry(gtt->range->notifier, + gtt->range->notifier_seq); kvfree(gtt->range->pfns); kfree(gtt->range); gtt->range = NULL; } - return r; + return !r; } #endif @@ -984,10 +1009,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->range && - ttm->pages[0] == hmm_device_entry_to_page(gtt->range, - gtt->range->pfns[0])) - WARN_ONCE(1, "Missing get_user_page_done\n"); + if (gtt->range) { + unsigned long i; + + for (i = 0; i < ttm->num_pages; i++) { + if (ttm->pages[i] != + hmm_device_entry_to_page(gtt->range, + gtt->range->pfns[i])) + break; + } + + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); + } #endif } diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 668d4bd0c118..df9bf1fd1bc0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -88,6 +88,7 @@ nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst) } struct nouveau_svmm { + struct mmu_notifier notifier; struct nouveau_vmm *vmm; struct { unsigned long start; @@ -95,9 +96,6 @@ struct nouveau_svmm { } unmanaged; struct mutex mutex; - - struct mm_struct *mm; - struct hmm_mirror mirror; }; #define SVMM_DBG(s,f,a...) \ @@ -251,10 +249,11 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit) } static int -nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror, - const struct mmu_notifier_range *update) +nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn, + const struct mmu_notifier_range *update) { - struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror); + struct nouveau_svmm *svmm = + container_of(mn, struct nouveau_svmm, notifier); unsigned long start = update->start; unsigned long limit = update->end; @@ -264,6 +263,9 @@ nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror, SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit); mutex_lock(&svmm->mutex); + if (unlikely(!svmm->vmm)) + goto out; + if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) { if (start < svmm->unmanaged.start) { nouveau_svmm_invalidate(svmm, start, @@ -273,19 +275,20 @@ nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror, } nouveau_svmm_invalidate(svmm, start, limit); + +out: mutex_unlock(&svmm->mutex); return 0; } -static void -nouveau_svmm_release(struct hmm_mirror *mirror) +static void nouveau_svmm_free_notifier(struct mmu_notifier *mn) { + kfree(container_of(mn, struct nouveau_svmm, notifier)); } -static const struct hmm_mirror_ops -nouveau_svmm = { - .sync_cpu_device_pagetables = nouveau_svmm_sync_cpu_device_pagetables, - .release = nouveau_svmm_release, +static const struct mmu_notifier_ops nouveau_mn_ops = { + .invalidate_range_start = nouveau_svmm_invalidate_range_start, + .free_notifier = nouveau_svmm_free_notifier, }; void @@ -293,8 +296,10 @@ nouveau_svmm_fini(struct nouveau_svmm **psvmm) { struct nouveau_svmm *svmm = *psvmm; if (svmm) { - hmm_mirror_unregister(&svmm->mirror); - kfree(*psvmm); + mutex_lock(&svmm->mutex); + svmm->vmm = NULL; + mutex_unlock(&svmm->mutex); + mmu_notifier_put(&svmm->notifier); *psvmm = NULL; } } @@ -320,7 +325,7 @@ nouveau_svmm_init(struct drm_device *dev, void *data, mutex_lock(&cli->mutex); if (cli->svm.cli) { ret = -EBUSY; - goto done; + goto out_free; } /* Allocate a new GPU VMM that can support SVM (managed by the @@ -335,24 +340,26 @@ nouveau_svmm_init(struct drm_device *dev, void *data, .fault_replay = true, }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm); if (ret) - goto done; - - /* Enable HMM mirroring of CPU address-space to VMM. */ - svmm->mm = get_task_mm(current); - down_write(&svmm->mm->mmap_sem); - svmm->mirror.ops = &nouveau_svmm; - ret = hmm_mirror_register(&svmm->mirror, svmm->mm); - if (ret == 0) { - cli->svm.svmm = svmm; - cli->svm.cli = cli; - } - up_write(&svmm->mm->mmap_sem); - mmput(svmm->mm); + goto out_free; -done: + down_write(¤t->mm->mmap_sem); + svmm->notifier.ops = &nouveau_mn_ops; + ret = __mmu_notifier_register(&svmm->notifier, current->mm); if (ret) - nouveau_svmm_fini(&svmm); + goto out_mm_unlock; + /* Note, ownership of svmm transfers to mmu_notifier */ + + cli->svm.svmm = svmm; + cli->svm.cli = cli; + up_write(¤t->mm->mmap_sem); mutex_unlock(&cli->mutex); + return 0; + +out_mm_unlock: + up_write(¤t->mm->mmap_sem); +out_free: + mutex_unlock(&cli->mutex); + kfree(svmm); return ret; } @@ -475,43 +482,90 @@ nouveau_svm_fault_cache(struct nouveau_svm *svm, fault->inst, fault->addr, fault->access); } -static inline bool -nouveau_range_done(struct hmm_range *range) +struct svm_notifier { + struct mmu_interval_notifier notifier; + struct nouveau_svmm *svmm; +}; + +static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - bool ret = hmm_range_valid(range); + struct svm_notifier *sn = + container_of(mni, struct svm_notifier, notifier); - hmm_range_unregister(range); - return ret; + /* + * serializes the update to mni->invalidate_seq done by caller and + * prevents invalidation of the PTE from progressing while HW is being + * programmed. This is very hacky and only works because the normal + * notifier that does invalidation is always called after the range + * notifier. + */ + if (mmu_notifier_range_blockable(range)) + mutex_lock(&sn->svmm->mutex); + else if (!mutex_trylock(&sn->svmm->mutex)) + return false; + mmu_interval_set_seq(mni, cur_seq); + mutex_unlock(&sn->svmm->mutex); + return true; } -static int -nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range) +static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = { + .invalidate = nouveau_svm_range_invalidate, +}; + +static int nouveau_range_fault(struct nouveau_svmm *svmm, + struct nouveau_drm *drm, void *data, u32 size, + u64 *pfns, struct svm_notifier *notifier) { + unsigned long timeout = + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + /* Have HMM fault pages within the fault window to the GPU. */ + struct hmm_range range = { + .notifier = ¬ifier->notifier, + .start = notifier->notifier.interval_tree.start, + .end = notifier->notifier.interval_tree.last + 1, + .pfns = pfns, + .flags = nouveau_svm_pfn_flags, + .values = nouveau_svm_pfn_values, + .pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT, + }; + struct mm_struct *mm = notifier->notifier.mm; long ret; - range->default_flags = 0; - range->pfn_flags_mask = -1UL; + while (true) { + if (time_after(jiffies, timeout)) + return -EBUSY; + + range.notifier_seq = mmu_interval_read_begin(range.notifier); + range.default_flags = 0; + range.pfn_flags_mask = -1UL; + down_read(&mm->mmap_sem); + ret = hmm_range_fault(&range, 0); + up_read(&mm->mmap_sem); + if (ret <= 0) { + if (ret == 0 || ret == -EBUSY) + continue; + return ret; + } - ret = hmm_range_register(range, &svmm->mirror); - if (ret) { - up_read(&svmm->mm->mmap_sem); - return (int)ret; + mutex_lock(&svmm->mutex); + if (mmu_interval_read_retry(range.notifier, + range.notifier_seq)) { + mutex_unlock(&svmm->mutex); + continue; + } + break; } - if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { - up_read(&svmm->mm->mmap_sem); - return -EBUSY; - } + nouveau_dmem_convert_pfn(drm, &range); - ret = hmm_range_fault(range, 0); - if (ret <= 0) { - if (ret == 0) - ret = -EBUSY; - up_read(&svmm->mm->mmap_sem); - hmm_range_unregister(range); - return ret; - } - return 0; + svmm->vmm->vmm.object.client->super = true; + ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL); + svmm->vmm->vmm.object.client->super = false; + mutex_unlock(&svmm->mutex); + + return ret; } static int @@ -531,7 +585,6 @@ nouveau_svm_fault(struct nvif_notify *notify) } i; u64 phys[16]; } args; - struct hmm_range range; struct vm_area_struct *vma; u64 inst, start, limit; int fi, fn, pi, fill; @@ -587,6 +640,9 @@ nouveau_svm_fault(struct nvif_notify *notify) args.i.p.version = 0; for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) { + struct svm_notifier notifier; + struct mm_struct *mm; + /* Cancel any faults from non-SVM channels. */ if (!(svmm = buffer->fault[fi]->svmm)) { nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); @@ -606,24 +662,32 @@ nouveau_svm_fault(struct nvif_notify *notify) start = max_t(u64, start, svmm->unmanaged.limit); SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit); + mm = svmm->notifier.mm; + if (!mmget_not_zero(mm)) { + nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); + continue; + } + /* Intersect fault window with the CPU VMA, cancelling * the fault if the address is invalid. */ - down_read(&svmm->mm->mmap_sem); - vma = find_vma_intersection(svmm->mm, start, limit); + down_read(&mm->mmap_sem); + vma = find_vma_intersection(mm, start, limit); if (!vma) { SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit); - up_read(&svmm->mm->mmap_sem); + up_read(&mm->mmap_sem); + mmput(mm); nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); continue; } start = max_t(u64, start, vma->vm_start); limit = min_t(u64, limit, vma->vm_end); + up_read(&mm->mmap_sem); SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit); if (buffer->fault[fi]->addr != start) { SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr); - up_read(&svmm->mm->mmap_sem); + mmput(mm); nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); continue; } @@ -679,33 +743,19 @@ nouveau_svm_fault(struct nvif_notify *notify) args.i.p.addr, args.i.p.addr + args.i.p.size, fn - fi); - /* Have HMM fault pages within the fault window to the GPU. */ - range.start = args.i.p.addr; - range.end = args.i.p.addr + args.i.p.size; - range.pfns = args.phys; - range.flags = nouveau_svm_pfn_flags; - range.values = nouveau_svm_pfn_values; - range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; -again: - ret = nouveau_range_fault(svmm, &range); - if (ret == 0) { - mutex_lock(&svmm->mutex); - if (!nouveau_range_done(&range)) { - mutex_unlock(&svmm->mutex); - goto again; - } - - nouveau_dmem_convert_pfn(svm->drm, &range); - - svmm->vmm->vmm.object.client->super = true; - ret = nvif_object_ioctl(&svmm->vmm->vmm.object, - &args, sizeof(args.i) + - pi * sizeof(args.phys[0]), - NULL); - svmm->vmm->vmm.object.client->super = false; - mutex_unlock(&svmm->mutex); - up_read(&svmm->mm->mmap_sem); + notifier.svmm = svmm; + ret = mmu_interval_notifier_insert(¬ifier.notifier, + svmm->notifier.mm, + args.i.p.addr, args.i.p.size, + &nouveau_svm_mni_ops); + if (!ret) { + ret = nouveau_range_fault( + svmm, svm->drm, &args, + sizeof(args.i) + pi * sizeof(args.phys[0]), + args.phys, ¬ifier); + mmu_interval_notifier_remove(¬ifier.notifier); } + mmput(mm); /* Cancel any faults in the window whose pages didn't manage * to keep their valid bit, or stay writeable when required. @@ -714,10 +764,10 @@ again: */ while (fi < fn) { struct nouveau_svm_fault *fault = buffer->fault[fi++]; - pi = (fault->addr - range.start) >> PAGE_SHIFT; + pi = (fault->addr - args.i.p.addr) >> PAGE_SHIFT; if (ret || - !(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_V) || - (!(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_W) && + !(args.phys[pi] & NVIF_VMM_PFNMAP_V0_V) || + (!(args.phys[pi] & NVIF_VMM_PFNMAP_V0_W) && fault->access != 0 && fault->access != 3)) { nouveau_svm_fault_cancel_fault(svm, fault); continue; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index d59b004f6695..30e32adc1fc6 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -68,6 +68,10 @@ #include <linux/hashtable.h> #include <linux/dma-fence.h> +#ifdef CONFIG_MMU_NOTIFIER +#include <linux/mmu_notifier.h> +#endif + #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_placement.h> @@ -509,8 +513,9 @@ struct radeon_bo { struct ttm_bo_kmap_obj dma_buf_vmap; pid_t pid; - struct radeon_mn *mn; - struct list_head mn_list; +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_interval_notifier notifier; +#endif }; #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, tbo.base) diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index dbab9a3a969b..f93829f08a4d 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -36,131 +36,51 @@ #include "radeon.h" -struct radeon_mn { - struct mmu_notifier mn; - - /* objects protected by lock */ - struct mutex lock; - struct rb_root_cached objects; -}; - -struct radeon_mn_node { - struct interval_tree_node it; - struct list_head bos; -}; - /** - * radeon_mn_invalidate_range_start - callback to notify about mm change + * radeon_mn_invalidate - callback to notify about mm change * * @mn: our notifier - * @mn: the mm this callback is about - * @start: start of updated range - * @end: end of updated range + * @range: the VMA under invalidation * * We block for all BOs between start and end to be idle and * unmap them by move them into system domain again. */ -static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); + struct radeon_bo *bo = container_of(mn, struct radeon_bo, notifier); struct ttm_operation_ctx ctx = { false, false }; - struct interval_tree_node *it; - unsigned long end; - int ret = 0; - - /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; - - /* TODO we should be able to split locking for interval tree and - * the tear down. - */ - if (mmu_notifier_range_blockable(range)) - mutex_lock(&rmn->lock); - else if (!mutex_trylock(&rmn->lock)) - return -EAGAIN; - - it = interval_tree_iter_first(&rmn->objects, range->start, end); - while (it) { - struct radeon_mn_node *node; - struct radeon_bo *bo; - long r; - - if (!mmu_notifier_range_blockable(range)) { - ret = -EAGAIN; - goto out_unlock; - } - - node = container_of(it, struct radeon_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + long r; - list_for_each_entry(bo, &node->bos, mn_list) { + if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound) + return true; - if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound) - continue; + if (!mmu_notifier_range_blockable(range)) + return false; - r = radeon_bo_reserve(bo, true); - if (r) { - DRM_ERROR("(%ld) failed to reserve user bo\n", r); - continue; - } - - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, - true, false, MAX_SCHEDULE_TIMEOUT); - if (r <= 0) - DRM_ERROR("(%ld) failed to wait for user bo\n", r); - - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - DRM_ERROR("(%ld) failed to validate user bo\n", r); - - radeon_bo_unreserve(bo); - } + r = radeon_bo_reserve(bo, true); + if (r) { + DRM_ERROR("(%ld) failed to reserve user bo\n", r); + return true; } - -out_unlock: - mutex_unlock(&rmn->lock); - - return ret; -} - -static void radeon_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) -{ - struct mmu_notifier_range range = { - .mm = mm, - .start = 0, - .end = ULONG_MAX, - .flags = 0, - .event = MMU_NOTIFY_UNMAP, - }; - - radeon_mn_invalidate_range_start(mn, &range); -} - -static struct mmu_notifier *radeon_mn_alloc_notifier(struct mm_struct *mm) -{ - struct radeon_mn *rmn; - rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); - if (!rmn) - return ERR_PTR(-ENOMEM); + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + if (r <= 0) + DRM_ERROR("(%ld) failed to wait for user bo\n", r); - mutex_init(&rmn->lock); - rmn->objects = RB_ROOT_CACHED; - return &rmn->mn; -} + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + DRM_ERROR("(%ld) failed to validate user bo\n", r); -static void radeon_mn_free_notifier(struct mmu_notifier *mn) -{ - kfree(container_of(mn, struct radeon_mn, mn)); + radeon_bo_unreserve(bo); + return true; } -static const struct mmu_notifier_ops radeon_mn_ops = { - .release = radeon_mn_release, - .invalidate_range_start = radeon_mn_invalidate_range_start, - .alloc_notifier = radeon_mn_alloc_notifier, - .free_notifier = radeon_mn_free_notifier, +static const struct mmu_interval_notifier_ops radeon_mn_ops = { + .invalidate = radeon_mn_invalidate, }; /** @@ -174,51 +94,20 @@ static const struct mmu_notifier_ops radeon_mn_ops = { */ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) { - unsigned long end = addr + radeon_bo_size(bo) - 1; - struct mmu_notifier *mn; - struct radeon_mn *rmn; - struct radeon_mn_node *node = NULL; - struct list_head bos; - struct interval_tree_node *it; - - mn = mmu_notifier_get(&radeon_mn_ops, current->mm); - if (IS_ERR(mn)) - return PTR_ERR(mn); - rmn = container_of(mn, struct radeon_mn, mn); - - INIT_LIST_HEAD(&bos); - - mutex_lock(&rmn->lock); - - while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { - kfree(node); - node = container_of(it, struct radeon_mn_node, it); - interval_tree_remove(&node->it, &rmn->objects); - addr = min(it->start, addr); - end = max(it->last, end); - list_splice(&node->bos, &bos); - } - - if (!node) { - node = kmalloc(sizeof(struct radeon_mn_node), GFP_KERNEL); - if (!node) { - mutex_unlock(&rmn->lock); - return -ENOMEM; - } - } - - bo->mn = rmn; - - node->it.start = addr; - node->it.last = end; - INIT_LIST_HEAD(&node->bos); - list_splice(&bos, &node->bos); - list_add(&bo->mn_list, &node->bos); - - interval_tree_insert(&node->it, &rmn->objects); - - mutex_unlock(&rmn->lock); - + int ret; + + ret = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, + radeon_bo_size(bo), &radeon_mn_ops); + if (ret) + return ret; + + /* + * FIXME: radeon appears to allow get_user_pages to run during + * invalidate_range_start/end, which is not a safe way to read the + * PTEs. It should use the mmu_interval_read_begin() scheme around the + * get_user_pages to ensure that the PTEs are read properly + */ + mmu_interval_read_begin(&bo->notifier); return 0; } @@ -231,27 +120,8 @@ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) */ void radeon_mn_unregister(struct radeon_bo *bo) { - struct radeon_mn *rmn = bo->mn; - struct list_head *head; - - if (!rmn) + if (!bo->notifier.mm) return; - - mutex_lock(&rmn->lock); - /* save the next list entry for later */ - head = bo->mn_list.next; - - list_del(&bo->mn_list); - - if (list_empty(head)) { - struct radeon_mn_node *node; - node = container_of(head, struct radeon_mn_node, bos); - interval_tree_remove(&node->it, &rmn->objects); - kfree(node); - } - - mutex_unlock(&rmn->lock); - - mmu_notifier_put(&rmn->mn); - bo->mn = NULL; + mmu_interval_notifier_remove(&bo->notifier); + bo->notifier.mm = NULL; } |