diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 314 |
1 files changed, 156 insertions, 158 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0c52d1f9fe0f..5c05644b9b96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -29,20 +29,26 @@ * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> * Dave Airlie */ + +#include <linux/dma-mapping.h> +#include <linux/iommu.h> +#include <linux/hmm.h> +#include <linux/pagemap.h> +#include <linux/sched/task.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/swiotlb.h> + #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_module.h> #include <drm/ttm/ttm_page_alloc.h> -#include <drm/drmP.h> + +#include <drm/drm_debugfs.h> #include <drm/amdgpu_drm.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/swiotlb.h> -#include <linux/swap.h> -#include <linux/pagemap.h> -#include <linux/debugfs.h> -#include <linux/iommu.h> + #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -703,143 +709,183 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ -struct amdgpu_ttm_gup_task_list { - struct list_head list; - struct task_struct *task; -}; - struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; struct task_struct *usertask; uint32_t userflags; - spinlock_t guptasklock; - struct list_head guptasks; - atomic_t mmu_invalidations; - uint32_t last_set_pages; +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + struct hmm_range *range; +#endif }; /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + +#define MAX_RETRY_HMM_RANGE_FAULT 16 + +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { + struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; + struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; - int r; + unsigned long start = gtt->userptr; + struct vm_area_struct *vma; + struct hmm_range *range; + unsigned long i; + uint64_t *pfns; + int retry = 0; + int r = 0; if (!mm) /* Happens during process shutdown */ return -ESRCH; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; + if (unlikely(!mirror)) { + DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); + r = -EFAULT; + goto out; + } - down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + if (unlikely(!vma || start < vma->vm_start)) { + r = -EFAULT; + goto out; + } + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vma->vm_file)) { + r = -EPERM; + goto out; + } - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* - * check that we only use anonymous memory to prevent problems - * with writeback - */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (unlikely(!range)) { + r = -ENOMEM; + goto out; + } - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); - return -EPERM; - } + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) { + r = -ENOMEM; + goto out_free_ranges; } - /* loop enough times using contiguous pages of memory */ - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **p = pages + pinned; - struct amdgpu_ttm_gup_task_list guptask; + amdgpu_hmm_init_range(range); + range->default_flags = range->flags[HMM_PFN_VALID]; + range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : range->flags[HMM_PFN_WRITE]; + range->pfn_flags_mask = 0; + range->pfns = pfns; + hmm_range_register(range, mirror, start, + start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); + +retry: + /* + * Just wait for range to be valid, safe to ignore return value as we + * will use the return value of hmm_range_fault() below under the + * mmap_sem to ascertain the validity of the range. + */ + hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); + + down_read(&mm->mmap_sem); - guptask.task = current; - spin_lock(>t->guptasklock); - list_add(&guptask.list, >t->guptasks); - spin_unlock(>t->guptasklock); + r = hmm_range_fault(range, true); + if (unlikely(r < 0)) { + if (likely(r == -EAGAIN)) { + /* + * return -EAGAIN, mmap_sem is dropped + */ + if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) + goto retry; + else + pr_err("Retry hmm fault too many times\n"); + } - if (mm == current->mm) - r = get_user_pages(userptr, num_pages, flags, p, NULL); - else - r = get_user_pages_remote(gtt->usertask, - mm, userptr, num_pages, - flags, p, NULL, NULL); + goto out_up_read; + } - spin_lock(>t->guptasklock); - list_del(&guptask.list); - spin_unlock(>t->guptasklock); + up_read(&mm->mmap_sem); - if (r < 0) - goto release_pages; + for (i = 0; i < ttm->num_pages; i++) { + pages[i] = hmm_device_entry_to_page(range, pfns[i]); + if (unlikely(!pages[i])) { + pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", + i, pfns[i]); + r = -ENOMEM; - pinned += r; + goto out_free_pfns; + } + } - } while (pinned < ttm->num_pages); + gtt->range = range; - up_read(&mm->mmap_sem); return 0; -release_pages: - release_pages(pages, pinned); - up_read(&mm->mmap_sem); +out_up_read: + if (likely(r != -EAGAIN)) + up_read(&mm->mmap_sem); +out_free_pfns: + hmm_range_unregister(range); + kvfree(pfns); +out_free_ranges: + kfree(range); +out: return r; } /** - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated * - * Called by amdgpu_cs_list_validate(). This creates the page list - * that backs user memory and will ultimately be mapped into the device - * address space. + * Returns: true if pages are still valid */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; + bool r = false; + + if (!gtt || !gtt->userptr) + return false; - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); - for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) - put_page(ttm->pages[i]); + DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", + gtt->userptr, ttm->num_pages); - ttm->pages[i] = pages ? pages[i] : NULL; + WARN_ONCE(!gtt->range || !gtt->range->pfns, + "No user pages to check\n"); + + if (gtt->range) { + r = hmm_range_valid(gtt->range); + hmm_range_unregister(gtt->range); + + kvfree(gtt->range->pfns); + kfree(gtt->range); + gtt->range = NULL; } + + return r; } +#endif /** - * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. * - * Called while unpinning userptr pages + * Called by amdgpu_cs_list_validate(). This creates the page list + * that backs user memory and will ultimately be mapped into the device + * address space. */ -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; - - for (i = 0; i < ttm->num_pages; ++i) { - struct page *page = ttm->pages[i]; + unsigned long i; - if (!page) - continue; - - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - set_page_dirty(page); - - mark_page_accessed(page); - } + for (i = 0; i < ttm->num_pages; ++i) + ttm->pages[i] = pages ? pages[i] : NULL; } /** @@ -901,10 +947,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - /* mark the pages as dirty */ - amdgpu_ttm_tt_mark_user_pages(ttm); - sg_free_table(ttm->sg); + +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + if (gtt->range && + ttm->pages[0] == hmm_device_entry_to_page(gtt->range, + gtt->range->pfns[0])) + WARN_ONCE(1, "Missing get_user_page_done\n"); +#endif } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -925,8 +975,8 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, goto gart_bind_fail; /* Patch mtype of the second part BO */ - flags &= ~AMDGPU_PTE_MTYPE_MASK; - flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); + flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); r = amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), @@ -1254,11 +1304,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->usertask = current->group_leader; get_task_struct(gtt->usertask); - spin_lock_init(>t->guptasklock); - INIT_LIST_HEAD(>t->guptasks); - atomic_set(>t->mmu_invalidations, 0); - gtt->last_set_pages = 0; - return 0; } @@ -1287,7 +1332,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1300,48 +1344,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; - /* Search the lists of tasks that hold this mapping and see - * if current is one of them. If it is return false. - */ - spin_lock(>t->guptasklock); - list_for_each_entry(entry, >t->guptasks, list) { - if (entry->task == current) { - spin_unlock(>t->guptasklock); - return false; - } - } - spin_unlock(>t->guptasklock); - - atomic_inc(>t->mmu_invalidations); - return true; } /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? - */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, - int *last_invalidated) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - int prev_invalidated = *last_invalidated; - - *last_invalidated = atomic_read(>t->mmu_invalidations); - return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; + return true; } /** @@ -1753,44 +1769,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize various on-chip memory pools */ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, - adev->gds.mem.total_size); + adev->gds.gds_size); if (r) { DRM_ERROR("Failed initializing GDS heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - 4, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, - adev->gds.gws.total_size); + adev->gds.gws_size); if (r) { DRM_ERROR("Failed initializing gws heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, - adev->gds.oa.total_size); + adev->gds.oa_size); if (r) { DRM_ERROR("Failed initializing oa heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { |