diff options
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/Kconfig | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 88 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/Kconfig | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_dmem.c | 456 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_dmem.h | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_drm.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_svm.c | 23 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_device.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_drv.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_mn.c | 156 |
15 files changed, 264 insertions, 540 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index f6e5c0282fc1..2e98c016cb47 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -27,7 +27,9 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - depends on HMM_MIRROR + depends on MMU + select HMM_MIRROR + select MMU_NOTIFIER help This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it isn't already selected to enabled full userptr support. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 48a2070e72f2..bdf849da32e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> #include <drm/drm_probe_helper.h> +#include <linux/mmu_notifier.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -1469,6 +1470,7 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); + mmu_notifier_synchronize(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index f1f8cdd695d3..31d4deb5d294 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -195,13 +195,14 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, - const struct hmm_update *update) +static int +amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, + const struct mmu_notifier_range *update) { struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); unsigned long start = update->start; unsigned long end = update->end; - bool blockable = update->blockable; + bool blockable = mmu_notifier_range_blockable(update); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ @@ -243,13 +244,14 @@ static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, * necessitates evicting all user-mode queues of the process. The BOs * are restorted in amdgpu_mn_invalidate_range_end_hsa. */ -static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, - const struct hmm_update *update) +static int +amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, + const struct mmu_notifier_range *update) { struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); unsigned long start = update->start; unsigned long end = update->end; - bool blockable = update->blockable; + bool blockable = mmu_notifier_range_blockable(update); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ @@ -482,6 +484,5 @@ void amdgpu_hmm_init_range(struct hmm_range *range) range->flags = hmm_range_flags; range->values = hmm_range_values; range->pfn_shift = PAGE_SHIFT; - INIT_LIST_HEAD(&range->list); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 13b144c8f67d..dff41d0a85fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -794,7 +794,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) struct hmm_range *range; unsigned long i; uint64_t *pfns; - int retry = 0; int r = 0; if (!mm) /* Happens during process shutdown */ @@ -835,10 +834,11 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) 0 : range->flags[HMM_PFN_WRITE]; range->pfn_flags_mask = 0; range->pfns = pfns; - hmm_range_register(range, mirror, start, - start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); + range->start = start; + range->end = start + ttm->num_pages * PAGE_SIZE; + + hmm_range_register(range, mirror); -retry: /* * Just wait for range to be valid, safe to ignore return value as we * will use the return value of hmm_range_fault() below under the @@ -847,24 +847,12 @@ retry: hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); down_read(&mm->mmap_sem); - - r = hmm_range_fault(range, true); - if (unlikely(r < 0)) { - if (likely(r == -EAGAIN)) { - /* - * return -EAGAIN, mmap_sem is dropped - */ - if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) - goto retry; - else - pr_err("Retry hmm fault too many times\n"); - } - - goto out_up_read; - } - + r = hmm_range_fault(range, 0); up_read(&mm->mmap_sem); + if (unlikely(r < 0)) + goto out_free_pfns; + for (i = 0; i < ttm->num_pages; i++) { pages[i] = hmm_device_entry_to_page(range, pfns[i]); if (unlikely(!pages[i])) { @@ -880,9 +868,6 @@ retry: return 0; -out_up_read: - if (likely(r != -EAGAIN)) - up_read(&mm->mmap_sem); out_free_pfns: hmm_range_unregister(range); kvfree(pfns); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3bb75d11a662..c89326125d71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -687,9 +687,6 @@ struct kfd_process { /* We want to receive a notification when the mm_struct is destroyed */ struct mmu_notifier mmu_notifier; - /* Use for delayed freeing of kfd_process structure */ - struct rcu_head rcu; - unsigned int pasid; unsigned int doorbell_index; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 0c6ac043ae3c..40e3fc0c6942 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -62,8 +62,8 @@ static struct workqueue_struct *kfd_restore_wq; static struct kfd_process *find_process(const struct task_struct *thread); static void kfd_process_ref_release(struct kref *ref); -static struct kfd_process *create_process(const struct task_struct *thread, - struct file *filep); +static struct kfd_process *create_process(const struct task_struct *thread); +static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep); static void evict_process_worker(struct work_struct *work); static void restore_process_worker(struct work_struct *work); @@ -289,7 +289,15 @@ struct kfd_process *kfd_create_process(struct file *filep) if (process) { pr_debug("Process already found\n"); } else { - process = create_process(thread, filep); + process = create_process(thread); + if (IS_ERR(process)) + goto out; + + ret = kfd_process_init_cwsr_apu(process, filep); + if (ret) { + process = ERR_PTR(ret); + goto out; + } if (!procfs.kobj) goto out; @@ -478,11 +486,9 @@ static void kfd_process_ref_release(struct kref *ref) queue_work(kfd_process_wq, &p->release_work); } -static void kfd_process_destroy_delayed(struct rcu_head *rcu) +static void kfd_process_free_notifier(struct mmu_notifier *mn) { - struct kfd_process *p = container_of(rcu, struct kfd_process, rcu); - - kfd_unref_process(p); + kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); } static void kfd_process_notifier_release(struct mmu_notifier *mn, @@ -534,12 +540,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_unlock(&p->mutex); - mmu_notifier_unregister_no_release(&p->mmu_notifier, mm); - mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); + mmu_notifier_put(&p->mmu_notifier); } static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, + .free_notifier = kfd_process_free_notifier, }; static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) @@ -609,81 +615,69 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) return 0; } -static struct kfd_process *create_process(const struct task_struct *thread, - struct file *filep) +/* + * On return the kfd_process is fully operational and will be freed when the + * mm is released + */ +static struct kfd_process *create_process(const struct task_struct *thread) { struct kfd_process *process; int err = -ENOMEM; process = kzalloc(sizeof(*process), GFP_KERNEL); - if (!process) goto err_alloc_process; - process->pasid = kfd_pasid_alloc(); - if (process->pasid == 0) - goto err_alloc_pasid; - - if (kfd_alloc_process_doorbells(process) < 0) - goto err_alloc_doorbells; - kref_init(&process->ref); - mutex_init(&process->mutex); - process->mm = thread->mm; - - /* register notifier */ - process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; - err = mmu_notifier_register(&process->mmu_notifier, process->mm); - if (err) - goto err_mmu_notifier; - - hash_add_rcu(kfd_processes_table, &process->kfd_processes, - (uintptr_t)process->mm); - process->lead_thread = thread->group_leader; - get_task_struct(process->lead_thread); - INIT_LIST_HEAD(&process->per_device_data); - + INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); + INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); + process->last_restore_timestamp = get_jiffies_64(); kfd_event_init_process(process); + process->is_32bit_user_mode = in_compat_syscall(); + + process->pasid = kfd_pasid_alloc(); + if (process->pasid == 0) + goto err_alloc_pasid; + + if (kfd_alloc_process_doorbells(process) < 0) + goto err_alloc_doorbells; err = pqm_init(&process->pqm, process); if (err != 0) goto err_process_pqm_init; /* init process apertures*/ - process->is_32bit_user_mode = in_compat_syscall(); err = kfd_init_apertures(process); if (err != 0) goto err_init_apertures; - INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); - INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); - process->last_restore_timestamp = get_jiffies_64(); - - err = kfd_process_init_cwsr_apu(process, filep); + /* Must be last, have to use release destruction after this */ + process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; + err = mmu_notifier_register(&process->mmu_notifier, process->mm); if (err) - goto err_init_cwsr; + goto err_register_notifier; + + get_task_struct(process->lead_thread); + hash_add_rcu(kfd_processes_table, &process->kfd_processes, + (uintptr_t)process->mm); return process; -err_init_cwsr: +err_register_notifier: kfd_process_free_outstanding_kfd_bos(process); kfd_process_destroy_pdds(process); err_init_apertures: pqm_uninit(&process->pqm); err_process_pqm_init: - hash_del_rcu(&process->kfd_processes); - synchronize_rcu(); - mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); -err_mmu_notifier: - mutex_destroy(&process->mutex); kfd_free_process_doorbells(process); err_alloc_doorbells: kfd_pasid_free(process->pasid); err_alloc_pasid: + mutex_destroy(&process->mutex); kfree(process); err_alloc_process: return ERR_PTR(err); diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 96b9814e6d06..3558df043592 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -86,9 +86,10 @@ config DRM_NOUVEAU_SVM bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support" depends on DEVICE_PRIVATE depends on DRM_NOUVEAU - depends on HMM_MIRROR + depends on MMU depends on STAGING - select MIGRATE_VMA_HELPER + select HMM_MIRROR + select MMU_NOTIFIER default n help Say Y here if you want to enable experimental support for diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 1333220787a1..fa1439941596 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -44,8 +44,6 @@ #define DMEM_CHUNK_SIZE (2UL << 20) #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) -struct nouveau_migrate; - enum nouveau_aper { NOUVEAU_APER_VIRT, NOUVEAU_APER_VRAM, @@ -86,21 +84,13 @@ static inline struct nouveau_dmem *page_to_dmem(struct page *page) return container_of(page->pgmap, struct nouveau_dmem, pagemap); } -struct nouveau_dmem_fault { - struct nouveau_drm *drm; - struct nouveau_fence *fence; - dma_addr_t *dma; - unsigned long npages; -}; +static unsigned long nouveau_dmem_page_addr(struct page *page) +{ + struct nouveau_dmem_chunk *chunk = page->zone_device_data; + unsigned long idx = page_to_pfn(page) - chunk->pfn_first; -struct nouveau_migrate { - struct vm_area_struct *vma; - struct nouveau_drm *drm; - struct nouveau_fence *fence; - unsigned long npages; - dma_addr_t *dma; - unsigned long dma_nr; -}; + return (idx << PAGE_SHIFT) + chunk->bo->bo.offset; +} static void nouveau_dmem_page_free(struct page *page) { @@ -125,165 +115,90 @@ static void nouveau_dmem_page_free(struct page *page) spin_unlock(&chunk->lock); } -static void -nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static void nouveau_dmem_fence_done(struct nouveau_fence **fence) { - struct nouveau_dmem_fault *fault = private; - struct nouveau_drm *drm = fault->drm; - struct device *dev = drm->dev->dev; - unsigned long addr, i, npages = 0; - nouveau_migrate_copy_t copy; - int ret; - - - /* First allocate new memory */ - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *dpage, *spage; - - dst_pfns[i] = 0; - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) - continue; - - dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr); - if (!dpage) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - continue; - } - lock_page(dpage); - - dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED; - npages++; - } - - /* Allocate storage for DMA addresses, so we can unmap later. */ - fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL); - if (!fault->dma) - goto error; - - /* Copy things over */ - copy = drm->dmem->migrate.copy_func; - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct nouveau_dmem_chunk *chunk; - struct page *spage, *dpage; - u64 src_addr, dst_addr; - - dpage = migrate_pfn_to_page(dst_pfns[i]); - if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; - - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } - - fault->dma[fault->npages] = - dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, fault->dma[fault->npages])) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } - - dst_addr = fault->dma[fault->npages++]; - - chunk = spage->zone_device_data; - src_addr = page_to_pfn(spage) - chunk->pfn_first; - src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; - - ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr, - NOUVEAU_APER_VRAM, src_addr); - if (ret) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } + if (fence) { + nouveau_fence_wait(*fence, true, false); + nouveau_fence_unref(fence); + } else { + /* + * FIXME wait for channel to be IDLE before calling finalizing + * the hmem object. + */ } +} - nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence); - - return; - -error: - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { - struct page *page; +static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm, + struct vm_fault *vmf, struct migrate_vma *args, + dma_addr_t *dma_addr) +{ + struct device *dev = drm->dev->dev; + struct page *dpage, *spage; - if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; + spage = migrate_pfn_to_page(args->src[0]); + if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE)) + return 0; - page = migrate_pfn_to_page(dst_pfns[i]); - dst_pfns[i] = MIGRATE_PFN_ERROR; - if (page == NULL) - continue; + dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address); + if (!dpage) + return VM_FAULT_SIGBUS; + lock_page(dpage); - __free_page(page); - } -} + *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) + goto error_free_page; -void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, - const unsigned long *src_pfns, - const unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) -{ - struct nouveau_dmem_fault *fault = private; - struct nouveau_drm *drm = fault->drm; + if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr, + NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage))) + goto error_dma_unmap; - if (fault->fence) { - nouveau_fence_wait(fault->fence, true, false); - nouveau_fence_unref(&fault->fence); - } else { - /* - * FIXME wait for channel to be IDLE before calling finalizing - * the hmem object below (nouveau_migrate_hmem_fini()). - */ - } + args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + return 0; - while (fault->npages--) { - dma_unmap_page(drm->dev->dev, fault->dma[fault->npages], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - } - kfree(fault->dma); +error_dma_unmap: + dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +error_free_page: + __free_page(dpage); + return VM_FAULT_SIGBUS; } -static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { - .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy, - .finalize_and_map = nouveau_dmem_fault_finalize_and_map, -}; - static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) { struct nouveau_dmem *dmem = page_to_dmem(vmf->page); - unsigned long src[1] = {0}, dst[1] = {0}; - struct nouveau_dmem_fault fault = { .drm = dmem->drm }; - int ret; + struct nouveau_drm *drm = dmem->drm; + struct nouveau_fence *fence; + unsigned long src = 0, dst = 0; + dma_addr_t dma_addr = 0; + vm_fault_t ret; + struct migrate_vma args = { + .vma = vmf->vma, + .start = vmf->address, + .end = vmf->address + PAGE_SIZE, + .src = &src, + .dst = &dst, + }; /* * FIXME what we really want is to find some heuristic to migrate more * than just one page on CPU fault. When such fault happens it is very * likely that more surrounding page will CPU fault too. */ - ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma, - vmf->address, vmf->address + PAGE_SIZE, - src, dst, &fault); - if (ret) + if (migrate_vma_setup(&args) < 0) return VM_FAULT_SIGBUS; + if (!args.cpages) + return 0; - if (dst[0] == MIGRATE_PFN_ERROR) - return VM_FAULT_SIGBUS; + ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr); + if (ret || dst == 0) + goto done; - return 0; + nouveau_fence_new(dmem->migrate.chan, false, &fence); + migrate_vma_pages(&args); + nouveau_dmem_fence_done(&fence); + dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +done: + migrate_vma_finalize(&args); + return ret; } static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { @@ -642,188 +557,115 @@ out_free: drm->dmem = NULL; } -static void -nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, + unsigned long src, dma_addr_t *dma_addr) { - struct nouveau_migrate *migrate = private; - struct nouveau_drm *drm = migrate->drm; struct device *dev = drm->dev->dev; - unsigned long addr, i, npages = 0; - nouveau_migrate_copy_t copy; - int ret; - - /* First allocate new memory */ - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *dpage, *spage; - - dst_pfns[i] = 0; - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) - continue; - - dpage = nouveau_dmem_page_alloc_locked(drm); - if (!dpage) - continue; - - dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED | - MIGRATE_PFN_DEVICE; - npages++; - } - - if (!npages) - return; - - /* Allocate storage for DMA addresses, so we can unmap later. */ - migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL); - if (!migrate->dma) - goto error; - - /* Copy things over */ - copy = drm->dmem->migrate.copy_func; - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct nouveau_dmem_chunk *chunk; - struct page *spage, *dpage; - u64 src_addr, dst_addr; - - dpage = migrate_pfn_to_page(dst_pfns[i]); - if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; - - chunk = dpage->zone_device_data; - dst_addr = page_to_pfn(dpage) - chunk->pfn_first; - dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset; - - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - - migrate->dma[migrate->dma_nr] = - dma_map_page_attrs(dev, spage, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - - src_addr = migrate->dma[migrate->dma_nr++]; + struct page *dpage, *spage; - ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr, - NOUVEAU_APER_HOST, src_addr); - if (ret) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - } - - nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence); + spage = migrate_pfn_to_page(src); + if (!spage || !(src & MIGRATE_PFN_MIGRATE)) + goto out; - return; + dpage = nouveau_dmem_page_alloc_locked(drm); + if (!dpage) + return 0; -error: - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { - struct page *page; + *dma_addr = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) + goto out_free_page; - if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; + if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_VRAM, + nouveau_dmem_page_addr(dpage), NOUVEAU_APER_HOST, + *dma_addr)) + goto out_dma_unmap; - page = migrate_pfn_to_page(dst_pfns[i]); - dst_pfns[i] = MIGRATE_PFN_ERROR; - if (page == NULL) - continue; + return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; - __free_page(page); - } +out_dma_unmap: + dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +out_free_page: + nouveau_dmem_page_free_locked(drm, dpage); +out: + return 0; } -void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, - const unsigned long *src_pfns, - const unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, + struct migrate_vma *args, dma_addr_t *dma_addrs) { - struct nouveau_migrate *migrate = private; - struct nouveau_drm *drm = migrate->drm; - - if (migrate->fence) { - nouveau_fence_wait(migrate->fence, true, false); - nouveau_fence_unref(&migrate->fence); - } else { - /* - * FIXME wait for channel to be IDLE before finalizing - * the hmem object below (nouveau_migrate_hmem_fini()) ? - */ + struct nouveau_fence *fence; + unsigned long addr = args->start, nr_dma = 0, i; + + for (i = 0; addr < args->end; i++) { + args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i], + dma_addrs + nr_dma); + if (args->dst[i]) + nr_dma++; + addr += PAGE_SIZE; } - while (migrate->dma_nr--) { - dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - } - kfree(migrate->dma); + nouveau_fence_new(drm->dmem->migrate.chan, false, &fence); + migrate_vma_pages(args); + nouveau_dmem_fence_done(&fence); + while (nr_dma--) { + dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE, + DMA_BIDIRECTIONAL); + } /* - * FIXME optimization: update GPU page table to point to newly - * migrated memory. + * FIXME optimization: update GPU page table to point to newly migrated + * memory. */ + migrate_vma_finalize(args); } -static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { - .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, - .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, -}; - int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long *src_pfns, *dst_pfns, npages; - struct nouveau_migrate migrate = {0}; - unsigned long i, c, max; - int ret = 0; - - npages = (end - start) >> PAGE_SHIFT; - max = min(SG_MAX_SINGLE_ALLOC, npages); - src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); - if (src_pfns == NULL) - return -ENOMEM; - dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); - if (dst_pfns == NULL) { - kfree(src_pfns); - return -ENOMEM; - } + unsigned long npages = (end - start) >> PAGE_SHIFT; + unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); + dma_addr_t *dma_addrs; + struct migrate_vma args = { + .vma = vma, + .start = start, + }; + unsigned long c, i; + int ret = -ENOMEM; + + args.src = kcalloc(max, sizeof(args.src), GFP_KERNEL); + if (!args.src) + goto out; + args.dst = kcalloc(max, sizeof(args.dst), GFP_KERNEL); + if (!args.dst) + goto out_free_src; - migrate.drm = drm; - migrate.vma = vma; - migrate.npages = npages; - for (i = 0; i < npages; i += c) { - unsigned long next; + dma_addrs = kmalloc_array(max, sizeof(*dma_addrs), GFP_KERNEL); + if (!dma_addrs) + goto out_free_dst; + for (i = 0; i < npages; i += c) { c = min(SG_MAX_SINGLE_ALLOC, npages); - next = start + (c << PAGE_SHIFT); - ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, - next, src_pfns, dst_pfns, &migrate); + args.end = start + (c << PAGE_SHIFT); + ret = migrate_vma_setup(&args); if (ret) - goto out; - start = next; + goto out_free_dma; + + if (args.cpages) + nouveau_dmem_migrate_chunk(drm, &args, dma_addrs); + args.start = args.end; } + ret = 0; +out_free_dma: + kfree(dma_addrs); +out_free_dst: + kfree(args.dst); +out_free_src: + kfree(args.src); out: - kfree(dst_pfns); - kfree(src_pfns); return ret; } @@ -841,11 +683,10 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, npages = (range->end - range->start) >> PAGE_SHIFT; for (i = 0; i < npages; ++i) { - struct nouveau_dmem_chunk *chunk; struct page *page; uint64_t addr; - page = hmm_pfn_to_page(range, range->pfns[i]); + page = hmm_device_entry_to_page(range, range->pfns[i]); if (page == NULL) continue; @@ -859,10 +700,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, continue; } - chunk = page->zone_device_data; - addr = page_to_pfn(page) - chunk->pfn_first; - addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT; - + addr = nouveau_dmem_page_addr(page); range->pfns[i] &= ((1UL << range->pfn_shift) - 1); range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.h b/drivers/gpu/drm/nouveau/nouveau_dmem.h index 9d97d756fb7d..92394be5d649 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.h +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.h @@ -45,16 +45,5 @@ static inline void nouveau_dmem_init(struct nouveau_drm *drm) {} static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {} static inline void nouveau_dmem_suspend(struct nouveau_drm *drm) {} static inline void nouveau_dmem_resume(struct nouveau_drm *drm) {} - -static inline int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, - struct hmm_range *range) {} #endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index bdc948352467..2cd83849600f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -28,6 +28,7 @@ #include <linux/pci.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> +#include <linux/mmu_notifier.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_ioctl.h> @@ -1290,6 +1291,8 @@ nouveau_drm_exit(void) #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER platform_driver_unregister(&nouveau_platform_driver); #endif + if (IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)) + mmu_notifier_synchronize(); } module_init(nouveau_drm_init); diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index a835cebb6d90..668d4bd0c118 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -252,13 +252,13 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit) static int nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror, - const struct hmm_update *update) + const struct mmu_notifier_range *update) { struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror); unsigned long start = update->start; unsigned long limit = update->end; - if (!update->blockable) + if (!mmu_notifier_range_blockable(update)) return -EAGAIN; SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit); @@ -485,31 +485,29 @@ nouveau_range_done(struct hmm_range *range) } static int -nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) +nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range) { long ret; range->default_flags = 0; range->pfn_flags_mask = -1UL; - ret = hmm_range_register(range, mirror, - range->start, range->end, - PAGE_SHIFT); + ret = hmm_range_register(range, &svmm->mirror); if (ret) { - up_read(&range->vma->vm_mm->mmap_sem); + up_read(&svmm->mm->mmap_sem); return (int)ret; } if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { - up_read(&range->vma->vm_mm->mmap_sem); - return -EAGAIN; + up_read(&svmm->mm->mmap_sem); + return -EBUSY; } - ret = hmm_range_fault(range, true); + ret = hmm_range_fault(range, 0); if (ret <= 0) { if (ret == 0) ret = -EBUSY; - up_read(&range->vma->vm_mm->mmap_sem); + up_read(&svmm->mm->mmap_sem); hmm_range_unregister(range); return ret; } @@ -682,7 +680,6 @@ nouveau_svm_fault(struct nvif_notify *notify) args.i.p.addr + args.i.p.size, fn - fi); /* Have HMM fault pages within the fault window to the GPU. */ - range.vma = vma; range.start = args.i.p.addr; range.end = args.i.p.addr + args.i.p.size; range.pfns = args.phys; @@ -690,7 +687,7 @@ nouveau_svm_fault(struct nvif_notify *notify) range.values = nouveau_svm_pfn_values; range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; again: - ret = nouveau_range_fault(&svmm->mirror, &range); + ret = nouveau_range_fault(svmm, &range); if (ret == 0) { mutex_lock(&svmm->mutex); if (!nouveau_range_done(&range)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 05b88491ccb9..d59b004f6695 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2449,9 +2449,6 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; - - struct mutex mn_lock; - DECLARE_HASHTABLE(mn_hash, 7); }; bool radeon_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 88eb7cb522bb..5d017f0aec66 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1325,8 +1325,6 @@ int radeon_device_init(struct radeon_device *rdev, init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock); init_waitqueue_head(&rdev->irq.vblank_queue); - mutex_init(&rdev->mn_lock); - hash_init(rdev->mn_hash); r = radeon_gem_init(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 5838162f687f..431e6b64b77d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -35,6 +35,7 @@ #include <linux/module.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> +#include <linux/mmu_notifier.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> @@ -623,6 +624,7 @@ static void __exit radeon_exit(void) { pci_unregister_driver(pdriver); radeon_unregister_atpx_handler(); + mmu_notifier_synchronize(); } module_init(radeon_init); diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 6902f998ede9..dbab9a3a969b 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -37,17 +37,8 @@ #include "radeon.h" struct radeon_mn { - /* constant after initialisation */ - struct radeon_device *rdev; - struct mm_struct *mm; struct mmu_notifier mn; - /* only used on destruction */ - struct work_struct work; - - /* protected by rdev->mn_lock */ - struct hlist_node node; - /* objects protected by lock */ struct mutex lock; struct rb_root_cached objects; @@ -59,55 +50,6 @@ struct radeon_mn_node { }; /** - * radeon_mn_destroy - destroy the rmn - * - * @work: previously sheduled work item - * - * Lazy destroys the notifier from a work item - */ -static void radeon_mn_destroy(struct work_struct *work) -{ - struct radeon_mn *rmn = container_of(work, struct radeon_mn, work); - struct radeon_device *rdev = rmn->rdev; - struct radeon_mn_node *node, *next_node; - struct radeon_bo *bo, *next_bo; - - mutex_lock(&rdev->mn_lock); - mutex_lock(&rmn->lock); - hash_del(&rmn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, - &rmn->objects.rb_root, it.rb) { - - interval_tree_remove(&node->it, &rmn->objects); - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { - bo->mn = NULL; - list_del_init(&bo->mn_list); - } - kfree(node); - } - mutex_unlock(&rmn->lock); - mutex_unlock(&rdev->mn_lock); - mmu_notifier_unregister(&rmn->mn, rmn->mm); - kfree(rmn); -} - -/** - * radeon_mn_release - callback to notify about mm destruction - * - * @mn: our notifier - * @mn: the mm this callback is about - * - * Shedule a work item to lazy destroy our notifier. - */ -static void radeon_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); - INIT_WORK(&rmn->work, radeon_mn_destroy); - schedule_work(&rmn->work); -} - -/** * radeon_mn_invalidate_range_start - callback to notify about mm change * * @mn: our notifier @@ -183,65 +125,44 @@ out_unlock: return ret; } -static const struct mmu_notifier_ops radeon_mn_ops = { - .release = radeon_mn_release, - .invalidate_range_start = radeon_mn_invalidate_range_start, -}; +static void radeon_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct mmu_notifier_range range = { + .mm = mm, + .start = 0, + .end = ULONG_MAX, + .flags = 0, + .event = MMU_NOTIFY_UNMAP, + }; + + radeon_mn_invalidate_range_start(mn, &range); +} -/** - * radeon_mn_get - create notifier context - * - * @rdev: radeon device pointer - * - * Creates a notifier context for current->mm. - */ -static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) +static struct mmu_notifier *radeon_mn_alloc_notifier(struct mm_struct *mm) { - struct mm_struct *mm = current->mm; struct radeon_mn *rmn; - int r; - - if (down_write_killable(&mm->mmap_sem)) - return ERR_PTR(-EINTR); - - mutex_lock(&rdev->mn_lock); - - hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) - if (rmn->mm == mm) - goto release_locks; rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); - if (!rmn) { - rmn = ERR_PTR(-ENOMEM); - goto release_locks; - } + if (!rmn) + return ERR_PTR(-ENOMEM); - rmn->rdev = rdev; - rmn->mm = mm; - rmn->mn.ops = &radeon_mn_ops; mutex_init(&rmn->lock); rmn->objects = RB_ROOT_CACHED; - - r = __mmu_notifier_register(&rmn->mn, mm); - if (r) - goto free_rmn; - - hash_add(rdev->mn_hash, &rmn->node, (unsigned long)mm); - -release_locks: - mutex_unlock(&rdev->mn_lock); - up_write(&mm->mmap_sem); - - return rmn; - -free_rmn: - mutex_unlock(&rdev->mn_lock); - up_write(&mm->mmap_sem); - kfree(rmn); + return &rmn->mn; +} - return ERR_PTR(r); +static void radeon_mn_free_notifier(struct mmu_notifier *mn) +{ + kfree(container_of(mn, struct radeon_mn, mn)); } +static const struct mmu_notifier_ops radeon_mn_ops = { + .release = radeon_mn_release, + .invalidate_range_start = radeon_mn_invalidate_range_start, + .alloc_notifier = radeon_mn_alloc_notifier, + .free_notifier = radeon_mn_free_notifier, +}; + /** * radeon_mn_register - register a BO for notifier updates * @@ -254,15 +175,16 @@ free_rmn: int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) { unsigned long end = addr + radeon_bo_size(bo) - 1; - struct radeon_device *rdev = bo->rdev; + struct mmu_notifier *mn; struct radeon_mn *rmn; struct radeon_mn_node *node = NULL; struct list_head bos; struct interval_tree_node *it; - rmn = radeon_mn_get(rdev); - if (IS_ERR(rmn)) - return PTR_ERR(rmn); + mn = mmu_notifier_get(&radeon_mn_ops, current->mm); + if (IS_ERR(mn)) + return PTR_ERR(mn); + rmn = container_of(mn, struct radeon_mn, mn); INIT_LIST_HEAD(&bos); @@ -309,22 +231,16 @@ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) */ void radeon_mn_unregister(struct radeon_bo *bo) { - struct radeon_device *rdev = bo->rdev; - struct radeon_mn *rmn; + struct radeon_mn *rmn = bo->mn; struct list_head *head; - mutex_lock(&rdev->mn_lock); - rmn = bo->mn; - if (rmn == NULL) { - mutex_unlock(&rdev->mn_lock); + if (!rmn) return; - } mutex_lock(&rmn->lock); /* save the next list entry for later */ head = bo->mn_list.next; - bo->mn = NULL; list_del(&bo->mn_list); if (list_empty(head)) { @@ -335,5 +251,7 @@ void radeon_mn_unregister(struct radeon_bo *bo) } mutex_unlock(&rmn->lock); - mutex_unlock(&rdev->mn_lock); + + mmu_notifier_put(&rmn->mn); + bo->mn = NULL; } |