diff options
author | Tony Lindgren <tony@atomide.com> | 2016-03-30 20:36:06 +0300 |
---|---|---|
committer | Tony Lindgren <tony@atomide.com> | 2016-03-30 20:36:06 +0300 |
commit | 1809de7e7d37c585e01a1bcc583ea92b78fc759d (patch) | |
tree | 76c5b35c2b04eafce86a1a729c02ab705eba44bc /drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |
parent | ebf24414809200915b9ddf7f109bba7c278c8210 (diff) | |
parent | 3ca4a238106dedc285193ee47f494a6584b6fd2f (diff) | |
download | linux-1809de7e7d37c585e01a1bcc583ea92b78fc759d.tar.xz |
Merge tag 'for-v4.6-rc/omap-fixes-a' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into omap-for-v4.6/fixes
ARM: OMAP2+: first hwmod fix for v4.6-rc
Fix a longstanding bug in the hwmod code that could cause
hardware SYSCONFIG register values to not match the kernel's
idea of what they should be, and that could result in lower
performance during IP block idle entry.
Basic build, boot, and PM test logs are available here:
http://www.pwsan.com/omap/testlogs/omap-hwmod-fixes-a-for-v4.6-rc/20160326231727/
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 179 |
1 files changed, 125 insertions, 54 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8a1752ff3d8e..ab34190859a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -77,6 +77,8 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) static int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; int r; adev->mman.mem_global_referenced = false; @@ -106,13 +108,27 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) return r; } + ring = adev->mman.buffer_funcs_ring; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; + r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up TTM BO move run queue.\n"); + drm_global_item_unref(&adev->mman.mem_global_ref); + drm_global_item_unref(&adev->mman.bo_global_ref.ref); + return r; + } + adev->mman.mem_global_referenced = true; + return 0; } static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) { if (adev->mman.mem_global_referenced) { + amd_sched_entity_fini(adev->mman.entity.sched, + &adev->mman.entity); drm_global_item_unref(&adev->mman.bo_global_ref.ref); drm_global_item_unref(&adev->mman.mem_global_ref); adev->mman.mem_global_referenced = false; @@ -478,32 +494,32 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re /* * TTM backend functions. */ +struct amdgpu_ttm_gup_task_list { + struct list_head list; + struct task_struct *task; +}; + struct amdgpu_ttm_tt { - struct ttm_dma_tt ttm; - struct amdgpu_device *adev; - u64 offset; - uint64_t userptr; - struct mm_struct *usermm; - uint32_t userflags; + struct ttm_dma_tt ttm; + struct amdgpu_device *adev; + u64 offset; + uint64_t userptr; + struct mm_struct *usermm; + uint32_t userflags; + spinlock_t guptasklock; + struct list_head guptasks; + atomic_t mmu_invalidations; }; -/* prepare the sg table with the user pages */ -static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) +int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned pinned = 0, nents; - int r; - int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); - enum dma_data_direction direction = write ? - DMA_BIDIRECTIONAL : DMA_TO_DEVICE; - - if (current->mm != gtt->usermm) - return -EPERM; + unsigned pinned = 0; + int r; if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* check that we only pin down anonymous memory + /* check that we only use anonymous memory to prevent problems with writeback */ unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; struct vm_area_struct *vma; @@ -516,10 +532,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) do { unsigned num_pages = ttm->num_pages - pinned; uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **pages = ttm->pages + pinned; + struct page **p = pages + pinned; + struct amdgpu_ttm_gup_task_list guptask; + + guptask.task = current; + spin_lock(>t->guptasklock); + list_add(&guptask.list, >t->guptasks); + spin_unlock(>t->guptasklock); + + r = get_user_pages(userptr, num_pages, write, 0, p, NULL); + + spin_lock(>t->guptasklock); + list_del(&guptask.list); + spin_unlock(>t->guptasklock); - r = get_user_pages(current, current->mm, userptr, num_pages, - write, 0, pages, NULL); if (r < 0) goto release_pages; @@ -527,6 +553,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) } while (pinned < ttm->num_pages); + return 0; + +release_pages: + release_pages(pages, pinned, 0); + return r; +} + +/* prepare the sg table with the user pages */ +static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) +{ + struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev); + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned nents; + int r; + + int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, ttm->num_pages << PAGE_SHIFT, GFP_KERNEL); @@ -545,9 +590,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) release_sg: kfree(ttm->sg); - -release_pages: - release_pages(ttm->pages, pinned, 0); return r; } @@ -712,7 +754,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (pci_dma_mapping_error(adev->pdev, gtt->ttm.dma_address[i])) { - while (--i) { + while (i--) { pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i], PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); gtt->ttm.dma_address[i] = 0; @@ -770,17 +812,59 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->userptr = addr; gtt->usermm = current->mm; gtt->userflags = flags; + spin_lock_init(>t->guptasklock); + INIT_LIST_HEAD(>t->guptasks); + atomic_set(>t->mmu_invalidations, 0); + return 0; } -bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm) +struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL) + return NULL; + + return gtt->usermm; +} + +bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, + unsigned long end) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_gup_task_list *entry; + unsigned long size; + + if (gtt == NULL || !gtt->userptr) + return false; + + size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; + if (gtt->userptr > end || gtt->userptr + size <= start) return false; - return !!gtt->userptr; + spin_lock(>t->guptasklock); + list_for_each_entry(entry, >t->guptasks, list) { + if (entry->task == current) { + spin_unlock(>t->guptasklock); + return false; + } + } + spin_unlock(>t->guptasklock); + + atomic_inc(>t->mmu_invalidations); + + return true; +} + +bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, + int *last_invalidated) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int prev_invalidated = *last_invalidated; + + *last_invalidated = atomic_read(>t->mmu_invalidations); + return prev_invalidated != *last_invalidated; } bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) @@ -808,7 +892,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, flags |= AMDGPU_PTE_SNOOPED; } - if (adev->asic_type >= CHIP_TOPAZ) + if (adev->asic_type >= CHIP_TONGA) flags |= AMDGPU_PTE_EXECUTABLE; flags |= AMDGPU_PTE_READABLE; @@ -996,9 +1080,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, struct fence **fence) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + uint32_t max_bytes; unsigned num_loops, num_dw; - struct amdgpu_ib *ib; unsigned i; int r; @@ -1010,20 +1095,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, while (num_dw & 0x7) num_dw++; - ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!ib) - return -ENOMEM; - - r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib); - if (r) { - kfree(ib); + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); + if (r) return r; - } - - ib->length_dw = 0; if (resv) { - r = amdgpu_sync_resv(adev, &ib->sync, resv, + r = amdgpu_sync_resv(adev, &job->sync, resv, AMDGPU_FENCE_OWNER_UNDEFINED); if (r) { DRM_ERROR("sync failed (%d).\n", r); @@ -1034,31 +1111,25 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, for (i = 0; i < num_loops; i++) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset, - cur_size_in_bytes); + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, + dst_offset, cur_size_in_bytes); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; byte_count -= cur_size_in_bytes; } - amdgpu_vm_pad_ib(adev, ib); - WARN_ON(ib->length_dw > num_dw); - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, - &amdgpu_vm_free_job, - AMDGPU_FENCE_OWNER_UNDEFINED, - fence); + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + r = amdgpu_job_submit(job, ring, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, fence); if (r) goto error_free; - if (!amdgpu_enable_scheduler) { - amdgpu_ib_free(adev, ib); - kfree(ib); - } return 0; + error_free: - amdgpu_ib_free(adev, ib); - kfree(ib); + amdgpu_job_free(job); return r; } |