diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c')
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c | 192 |
1 files changed, 85 insertions, 107 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 14107b5b7811..4c20fec64d96 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -56,7 +56,6 @@ struct gk20a_instobj { /* CPU mapping */ u32 *vaddr; - struct list_head vaddr_node; }; #define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) @@ -66,7 +65,6 @@ struct gk20a_instobj { struct gk20a_instobj_dma { struct gk20a_instobj base; - u32 *cpuaddr; dma_addr_t handle; struct nvkm_mm_node r; }; @@ -79,6 +77,11 @@ struct gk20a_instobj_dma { struct gk20a_instobj_iommu { struct gk20a_instobj base; + /* to link into gk20a_instmem::vaddr_lru */ + struct list_head vaddr_node; + /* how many clients are using vaddr? */ + u32 use_cpt; + /* will point to the higher half of pages */ dma_addr_t *dma_addrs; /* array of base.mem->size pages (+ dma_addr_ts) */ @@ -107,8 +110,6 @@ struct gk20a_instmem { /* Only used by DMA API */ struct dma_attrs attrs; - - void __iomem * (*cpu_map)(struct nvkm_memory *); }; #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base) @@ -130,70 +131,58 @@ gk20a_instobj_size(struct nvkm_memory *memory) return (u64)gk20a_instobj(memory)->mem.size << 12; } -static void __iomem * -gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory) -{ -#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) - struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory); - struct device *dev = node->base.imem->base.subdev.device->dev; - int npages = nvkm_memory_size(memory) >> 12; - struct page *pages[npages]; - int i; - - /* we shouldn't see a gk20a on anything but arm/arm64 anyways */ - /* phys_to_page does not exist on all platforms... */ - pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT); - for (i = 1; i < npages; i++) - pages[i] = pages[0] + i; - - return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); -#else - BUG(); - return NULL; -#endif -} - -static void __iomem * -gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory) +/* + * Recycle the vaddr of obj. Must be called with gk20a_instmem::lock held. + */ +static void +gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) { - struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); - int npages = nvkm_memory_size(memory) >> 12; - - return vmap(node->pages, npages, VM_MAP, - pgprot_writecombine(PAGE_KERNEL)); + struct gk20a_instmem *imem = obj->base.imem; + /* there should not be any user left... */ + WARN_ON(obj->use_cpt); + list_del(&obj->vaddr_node); + vunmap(obj->base.vaddr); + obj->base.vaddr = NULL; + imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); + nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, + imem->vaddr_max); } /* - * Must be called while holding gk20a_instmem_lock + * Must be called while holding gk20a_instmem::lock */ static void gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size) { while (imem->vaddr_use + size > imem->vaddr_max) { - struct gk20a_instobj *obj; - /* no candidate that can be unmapped, abort... */ if (list_empty(&imem->vaddr_lru)) break; - obj = list_first_entry(&imem->vaddr_lru, struct gk20a_instobj, - vaddr_node); - list_del(&obj->vaddr_node); - vunmap(obj->vaddr); - obj->vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&obj->memory); - nvkm_debug(&imem->base.subdev, "(GC) vaddr used: %x/%x\n", - imem->vaddr_use, imem->vaddr_max); - + gk20a_instobj_iommu_recycle_vaddr( + list_first_entry(&imem->vaddr_lru, + struct gk20a_instobj_iommu, vaddr_node)); } } static void __iomem * -gk20a_instobj_acquire(struct nvkm_memory *memory) +gk20a_instobj_acquire_dma(struct nvkm_memory *memory) { struct gk20a_instobj *node = gk20a_instobj(memory); struct gk20a_instmem *imem = node->imem; struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; + + nvkm_ltc_flush(ltc); + + return node->vaddr; +} + +static void __iomem * +gk20a_instobj_acquire_iommu(struct nvkm_memory *memory) +{ + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); + struct gk20a_instmem *imem = node->base.imem; + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; const u64 size = nvkm_memory_size(memory); unsigned long flags; @@ -201,19 +190,21 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) spin_lock_irqsave(&imem->lock, flags); - if (node->vaddr) { - /* remove us from the LRU list since we cannot be unmapped */ - list_del(&node->vaddr_node); - + if (node->base.vaddr) { + if (!node->use_cpt) { + /* remove from LRU list since mapping in use again */ + list_del(&node->vaddr_node); + } goto out; } /* try to free some address space if we reached the limit */ gk20a_instmem_vaddr_gc(imem, size); - node->vaddr = imem->cpu_map(memory); - - if (!node->vaddr) { + /* map the pages */ + node->base.vaddr = vmap(node->pages, size >> PAGE_SHIFT, VM_MAP, + pgprot_writecombine(PAGE_KERNEL)); + if (!node->base.vaddr) { nvkm_error(&imem->base.subdev, "cannot map instobj - " "this is not going to end well...\n"); goto out; @@ -224,24 +215,41 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) imem->vaddr_use, imem->vaddr_max); out: + node->use_cpt++; spin_unlock_irqrestore(&imem->lock, flags); - return node->vaddr; + return node->base.vaddr; } static void -gk20a_instobj_release(struct nvkm_memory *memory) +gk20a_instobj_release_dma(struct nvkm_memory *memory) { struct gk20a_instobj *node = gk20a_instobj(memory); struct gk20a_instmem *imem = node->imem; struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; + + nvkm_ltc_invalidate(ltc); +} + +static void +gk20a_instobj_release_iommu(struct nvkm_memory *memory) +{ + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); + struct gk20a_instmem *imem = node->base.imem; + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; unsigned long flags; spin_lock_irqsave(&imem->lock, flags); - /* add ourselves to the LRU list so our CPU mapping can be freed */ - list_add_tail(&node->vaddr_node, &imem->vaddr_lru); + /* we should at least have one user to release... */ + if (WARN_ON(node->use_cpt == 0)) + goto out; + + /* add unused objs to the LRU list to recycle their mapping */ + if (--node->use_cpt == 0) + list_add_tail(&node->vaddr_node, &imem->vaddr_lru); +out: spin_unlock_irqrestore(&imem->lock, flags); wmb(); @@ -272,37 +280,6 @@ gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) nvkm_vm_map_at(vma, offset, &node->mem); } -/* - * Clear the CPU mapping of an instobj if it exists - */ -static void -gk20a_instobj_dtor(struct gk20a_instobj *node) -{ - struct gk20a_instmem *imem = node->imem; - struct gk20a_instobj *obj; - unsigned long flags; - - spin_lock_irqsave(&imem->lock, flags); - - if (!node->vaddr) - goto out; - - list_for_each_entry(obj, &imem->vaddr_lru, vaddr_node) { - if (obj == node) { - list_del(&obj->vaddr_node); - break; - } - } - vunmap(node->vaddr); - node->vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&node->memory); - nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", - imem->vaddr_use, imem->vaddr_max); - -out: - spin_unlock_irqrestore(&imem->lock, flags); -} - static void * gk20a_instobj_dtor_dma(struct nvkm_memory *memory) { @@ -310,12 +287,10 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory) struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; - gk20a_instobj_dtor(&node->base); - - if (unlikely(!node->cpuaddr)) + if (unlikely(!node->base.vaddr)) goto out; - dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr, + dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr, node->handle, &imem->attrs); out: @@ -329,13 +304,20 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; struct nvkm_mm_node *r; + unsigned long flags; int i; - gk20a_instobj_dtor(&node->base); - if (unlikely(list_empty(&node->base.mem.regions))) goto out; + spin_lock_irqsave(&imem->lock, flags); + + /* vaddr has already been recycled */ + if (node->base.vaddr) + gk20a_instobj_iommu_recycle_vaddr(node); + + spin_unlock_irqrestore(&imem->lock, flags); + r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node, rl_entry); @@ -366,8 +348,8 @@ gk20a_instobj_func_dma = { .target = gk20a_instobj_target, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, - .acquire = gk20a_instobj_acquire, - .release = gk20a_instobj_release, + .acquire = gk20a_instobj_acquire_dma, + .release = gk20a_instobj_release_dma, .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, @@ -379,8 +361,8 @@ gk20a_instobj_func_iommu = { .target = gk20a_instobj_target, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, - .acquire = gk20a_instobj_acquire, - .release = gk20a_instobj_release, + .acquire = gk20a_instobj_acquire_iommu, + .release = gk20a_instobj_release_iommu, .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, @@ -400,10 +382,10 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); - node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, - &node->handle, GFP_KERNEL, - &imem->attrs); - if (!node->cpuaddr) { + node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, + &node->handle, GFP_KERNEL, + &imem->attrs); + if (!node->base.vaddr) { nvkm_error(subdev, "cannot allocate DMA memory\n"); return -ENOMEM; } @@ -609,18 +591,14 @@ gk20a_instmem_new(struct nvkm_device *device, int index, imem->mm = &tdev->iommu.mm; imem->domain = tdev->iommu.domain; imem->iommu_pgshift = tdev->iommu.pgshift; - imem->cpu_map = gk20a_instobj_cpu_map_iommu; imem->iommu_bit = tdev->func->iommu_bit; nvkm_info(&imem->base.subdev, "using IOMMU\n"); } else { init_dma_attrs(&imem->attrs); - /* We will access the memory through our own mapping */ dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs); dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs); - imem->cpu_map = gk20a_instobj_cpu_map_dma; nvkm_info(&imem->base.subdev, "using DMA API\n"); } |