summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/nouveau/nouveau_bo.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/nouveau/nouveau_bo.c')
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c335
1 files changed, 225 insertions, 110 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index c41e1c200ef5..a52184007f5f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -32,6 +32,8 @@
#include "nouveau_drm.h"
#include "nouveau_drv.h"
#include "nouveau_dma.h"
+#include "nouveau_mm.h"
+#include "nouveau_vm.h"
#include <linux/log2.h>
#include <linux/slab.h>
@@ -46,82 +48,54 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
if (unlikely(nvbo->gem))
DRM_ERROR("bo %p still attached to GEM object\n", bo);
- if (nvbo->tile)
- nv10_mem_expire_tiling(dev, nvbo->tile, NULL);
-
+ nv10_mem_put_tile_region(dev, nvbo->tile, NULL);
+ if (nvbo->vma.node) {
+ nouveau_vm_unmap(&nvbo->vma);
+ nouveau_vm_put(&nvbo->vma);
+ }
kfree(nvbo);
}
static void
-nouveau_bo_fixup_align(struct drm_device *dev,
- uint32_t tile_mode, uint32_t tile_flags,
- int *align, int *size)
+nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, int *size,
+ int *page_shift)
{
- struct drm_nouveau_private *dev_priv = dev->dev_private;
-
- /*
- * Some of the tile_flags have a periodic structure of N*4096 bytes,
- * align to to that as well as the page size. Align the size to the
- * appropriate boundaries. This does imply that sizes are rounded up
- * 3-7 pages, so be aware of this and do not waste memory by allocating
- * many small buffers.
- */
- if (dev_priv->card_type == NV_50) {
- uint32_t block_size = dev_priv->vram_size >> 15;
- int i;
-
- switch (tile_flags) {
- case 0x1800:
- case 0x2800:
- case 0x4800:
- case 0x7a00:
- if (is_power_of_2(block_size)) {
- for (i = 1; i < 10; i++) {
- *align = 12 * i * block_size;
- if (!(*align % 65536))
- break;
- }
- } else {
- for (i = 1; i < 10; i++) {
- *align = 8 * i * block_size;
- if (!(*align % 65536))
- break;
- }
- }
- *size = roundup(*size, *align);
- break;
- default:
- break;
- }
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev);
- } else {
- if (tile_mode) {
+ if (dev_priv->card_type < NV_50) {
+ if (nvbo->tile_mode) {
if (dev_priv->chipset >= 0x40) {
*align = 65536;
- *size = roundup(*size, 64 * tile_mode);
+ *size = roundup(*size, 64 * nvbo->tile_mode);
} else if (dev_priv->chipset >= 0x30) {
*align = 32768;
- *size = roundup(*size, 64 * tile_mode);
+ *size = roundup(*size, 64 * nvbo->tile_mode);
} else if (dev_priv->chipset >= 0x20) {
*align = 16384;
- *size = roundup(*size, 64 * tile_mode);
+ *size = roundup(*size, 64 * nvbo->tile_mode);
} else if (dev_priv->chipset >= 0x10) {
*align = 16384;
- *size = roundup(*size, 32 * tile_mode);
+ *size = roundup(*size, 32 * nvbo->tile_mode);
}
}
+ } else {
+ if (likely(dev_priv->chan_vm)) {
+ if (*size > 256 * 1024)
+ *page_shift = dev_priv->chan_vm->lpg_shift;
+ else
+ *page_shift = dev_priv->chan_vm->spg_shift;
+ } else {
+ *page_shift = 12;
+ }
+
+ *size = roundup(*size, (1 << *page_shift));
+ *align = max((1 << *page_shift), *align);
}
- /* ALIGN works only on powers of two. */
*size = roundup(*size, PAGE_SIZE);
-
- if (dev_priv->card_type == NV_50) {
- *size = roundup(*size, 65536);
- *align = max(65536, *align);
- }
}
int
@@ -132,7 +106,7 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_bo *nvbo;
- int ret = 0;
+ int ret = 0, page_shift = 0;
nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL);
if (!nvbo)
@@ -145,10 +119,19 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
nvbo->tile_flags = tile_flags;
nvbo->bo.bdev = &dev_priv->ttm.bdev;
- nouveau_bo_fixup_align(dev, tile_mode, nouveau_bo_tile_layout(nvbo),
- &align, &size);
+ nouveau_bo_fixup_align(nvbo, &align, &size, &page_shift);
align >>= PAGE_SHIFT;
+ if (!nvbo->no_vm && dev_priv->chan_vm) {
+ ret = nouveau_vm_get(dev_priv->chan_vm, size, page_shift,
+ NV_MEM_ACCESS_RW, &nvbo->vma);
+ if (ret) {
+ kfree(nvbo);
+ return ret;
+ }
+ }
+
+ nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
nouveau_bo_placement_set(nvbo, flags, 0);
nvbo->channel = chan;
@@ -161,6 +144,11 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
}
nvbo->channel = NULL;
+ if (nvbo->vma.node) {
+ if (nvbo->bo.mem.mem_type == TTM_PL_VRAM)
+ nvbo->bo.offset = nvbo->vma.offset;
+ }
+
*pnvbo = nvbo;
return 0;
}
@@ -182,17 +170,17 @@ static void
set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
{
struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev);
+ int vram_pages = dev_priv->vram_size >> PAGE_SHIFT;
if (dev_priv->card_type == NV_10 &&
- nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM)) {
+ nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) &&
+ nvbo->bo.mem.num_pages < vram_pages / 2) {
/*
* Make sure that the color and depth buffers are handled
* by independent memory controller units. Up to a 9x
* speed up when alpha-blending and depth-test are enabled
* at the same time.
*/
- int vram_pages = dev_priv->vram_size >> PAGE_SHIFT;
-
if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) {
nvbo->placement.fpfn = vram_pages / 2;
nvbo->placement.lpfn = ~0;
@@ -244,7 +232,7 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype)
nouveau_bo_placement_set(nvbo, memtype, 0);
- ret = ttm_bo_validate(bo, &nvbo->placement, false, false, false);
+ ret = nouveau_bo_validate(nvbo, false, false, false);
if (ret == 0) {
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
@@ -280,7 +268,7 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo)
nouveau_bo_placement_set(nvbo, bo->mem.placement, 0);
- ret = ttm_bo_validate(bo, &nvbo->placement, false, false, false);
+ ret = nouveau_bo_validate(nvbo, false, false, false);
if (ret == 0) {
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
@@ -319,6 +307,25 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
ttm_bo_kunmap(&nvbo->kmap);
}
+int
+nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
+ bool no_wait_reserve, bool no_wait_gpu)
+{
+ int ret;
+
+ ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement, interruptible,
+ no_wait_reserve, no_wait_gpu);
+ if (ret)
+ return ret;
+
+ if (nvbo->vma.node) {
+ if (nvbo->bo.mem.mem_type == TTM_PL_VRAM)
+ nvbo->bo.offset = nvbo->vma.offset;
+ }
+
+ return 0;
+}
+
u16
nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index)
{
@@ -410,37 +417,40 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
man->default_caching = TTM_PL_FLAG_CACHED;
break;
case TTM_PL_VRAM:
- man->func = &ttm_bo_manager_func;
+ if (dev_priv->card_type >= NV_50) {
+ man->func = &nouveau_vram_manager;
+ man->io_reserve_fastpath = false;
+ man->use_io_reserve_lru = true;
+ } else {
+ man->func = &ttm_bo_manager_func;
+ }
man->flags = TTM_MEMTYPE_FLAG_FIXED |
TTM_MEMTYPE_FLAG_MAPPABLE;
man->available_caching = TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_WC;
man->default_caching = TTM_PL_FLAG_WC;
- if (dev_priv->card_type == NV_50)
- man->gpu_offset = 0x40000000;
- else
- man->gpu_offset = 0;
break;
case TTM_PL_TT:
man->func = &ttm_bo_manager_func;
switch (dev_priv->gart_info.type) {
case NOUVEAU_GART_AGP:
man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
- man->available_caching = TTM_PL_FLAG_UNCACHED;
- man->default_caching = TTM_PL_FLAG_UNCACHED;
+ man->available_caching = TTM_PL_FLAG_UNCACHED |
+ TTM_PL_FLAG_WC;
+ man->default_caching = TTM_PL_FLAG_WC;
break;
case NOUVEAU_GART_SGDMA:
man->flags = TTM_MEMTYPE_FLAG_MAPPABLE |
TTM_MEMTYPE_FLAG_CMA;
man->available_caching = TTM_PL_MASK_CACHING;
man->default_caching = TTM_PL_FLAG_CACHED;
+ man->gpu_offset = dev_priv->gart_info.aper_base;
break;
default:
NV_ERROR(dev, "Unknown GART type: %d\n",
dev_priv->gart_info.type);
return -EINVAL;
}
- man->gpu_offset = dev_priv->vm_gart_base;
break;
default:
NV_ERROR(dev, "Unsupported memory type %u\n", (unsigned)type);
@@ -485,16 +495,9 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
if (ret)
return ret;
- if (nvbo->channel) {
- ret = nouveau_fence_sync(fence, nvbo->channel);
- if (ret)
- goto out;
- }
-
ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, NULL, evict,
no_wait_reserve, no_wait_gpu, new_mem);
-out:
- nouveau_fence_unref((void *)&fence);
+ nouveau_fence_unref(&fence);
return ret;
}
@@ -516,6 +519,58 @@ nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
}
static int
+nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
+ struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+{
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct nouveau_bo *nvbo = nouveau_bo(bo);
+ u64 src_offset = old_mem->start << PAGE_SHIFT;
+ u64 dst_offset = new_mem->start << PAGE_SHIFT;
+ u32 page_count = new_mem->num_pages;
+ int ret;
+
+ if (!nvbo->no_vm) {
+ if (old_mem->mem_type == TTM_PL_VRAM)
+ src_offset = nvbo->vma.offset;
+ else
+ src_offset += dev_priv->gart_info.aper_base;
+
+ if (new_mem->mem_type == TTM_PL_VRAM)
+ dst_offset = nvbo->vma.offset;
+ else
+ dst_offset += dev_priv->gart_info.aper_base;
+ }
+
+ page_count = new_mem->num_pages;
+ while (page_count) {
+ int line_count = (page_count > 2047) ? 2047 : page_count;
+
+ ret = RING_SPACE(chan, 12);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0238, 2);
+ OUT_RING (chan, upper_32_bits(dst_offset));
+ OUT_RING (chan, lower_32_bits(dst_offset));
+ BEGIN_NVC0(chan, 2, NvSubM2MF, 0x030c, 6);
+ OUT_RING (chan, upper_32_bits(src_offset));
+ OUT_RING (chan, lower_32_bits(src_offset));
+ OUT_RING (chan, PAGE_SIZE); /* src_pitch */
+ OUT_RING (chan, PAGE_SIZE); /* dst_pitch */
+ OUT_RING (chan, PAGE_SIZE); /* line_length */
+ OUT_RING (chan, line_count);
+ BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0300, 1);
+ OUT_RING (chan, 0x00100110);
+
+ page_count -= line_count;
+ src_offset += (PAGE_SIZE * line_count);
+ dst_offset += (PAGE_SIZE * line_count);
+ }
+
+ return 0;
+}
+
+static int
nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
{
@@ -529,14 +584,14 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
dst_offset = new_mem->start << PAGE_SHIFT;
if (!nvbo->no_vm) {
if (old_mem->mem_type == TTM_PL_VRAM)
- src_offset += dev_priv->vm_vram_base;
+ src_offset = nvbo->vma.offset;
else
- src_offset += dev_priv->vm_gart_base;
+ src_offset += dev_priv->gart_info.aper_base;
if (new_mem->mem_type == TTM_PL_VRAM)
- dst_offset += dev_priv->vm_vram_base;
+ dst_offset = nvbo->vma.offset;
else
- dst_offset += dev_priv->vm_gart_base;
+ dst_offset += dev_priv->gart_info.aper_base;
}
ret = RING_SPACE(chan, 3);
@@ -683,17 +738,27 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
int ret;
chan = nvbo->channel;
- if (!chan || nvbo->no_vm)
+ if (!chan || nvbo->no_vm) {
chan = dev_priv->channel;
+ mutex_lock_nested(&chan->mutex, NOUVEAU_KCHANNEL_MUTEX);
+ }
if (dev_priv->card_type < NV_50)
ret = nv04_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
else
+ if (dev_priv->card_type < NV_C0)
ret = nv50_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
- if (ret)
- return ret;
+ else
+ ret = nvc0_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
+ if (ret == 0) {
+ ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict,
+ no_wait_reserve,
+ no_wait_gpu, new_mem);
+ }
- return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+ if (chan == dev_priv->channel)
+ mutex_unlock(&chan->mutex);
+ return ret;
}
static int
@@ -724,7 +789,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
if (ret)
goto out;
- ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+ ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
out:
ttm_bo_mem_put(bo, &tmp_mem);
return ret;
@@ -750,11 +815,11 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
if (ret)
return ret;
- ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, &tmp_mem);
+ ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
if (ret)
goto out;
- ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+ ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, new_mem);
if (ret)
goto out;
@@ -771,7 +836,6 @@ nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
struct drm_device *dev = dev_priv->dev;
struct nouveau_bo *nvbo = nouveau_bo(bo);
uint64_t offset;
- int ret;
if (nvbo->no_vm || new_mem->mem_type != TTM_PL_VRAM) {
/* Nothing to do. */
@@ -781,18 +845,12 @@ nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
offset = new_mem->start << PAGE_SHIFT;
- if (dev_priv->card_type == NV_50) {
- ret = nv50_mem_vm_bind_linear(dev,
- offset + dev_priv->vm_vram_base,
- new_mem->size,
- nouveau_bo_tile_layout(nvbo),
- offset);
- if (ret)
- return ret;
-
+ if (dev_priv->chan_vm) {
+ nouveau_vm_map(&nvbo->vma, new_mem->mm_node);
} else if (dev_priv->card_type >= NV_10) {
*new_tile = nv10_mem_set_tiling(dev, offset, new_mem->size,
- nvbo->tile_mode);
+ nvbo->tile_mode,
+ nvbo->tile_flags);
}
return 0;
@@ -808,9 +866,7 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
if (dev_priv->card_type >= NV_10 &&
dev_priv->card_type < NV_50) {
- if (*old_tile)
- nv10_mem_expire_tiling(dev, *old_tile, bo->sync_obj);
-
+ nv10_mem_put_tile_region(dev, *old_tile, bo->sync_obj);
*old_tile = new_tile;
}
}
@@ -879,6 +935,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
struct drm_device *dev = dev_priv->dev;
+ int ret;
mem->bus.addr = NULL;
mem->bus.offset = 0;
@@ -901,9 +958,40 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
#endif
break;
case TTM_PL_VRAM:
- mem->bus.offset = mem->start << PAGE_SHIFT;
+ {
+ struct nouveau_vram *vram = mem->mm_node;
+ u8 page_shift;
+
+ if (!dev_priv->bar1_vm) {
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.base = pci_resource_start(dev->pdev, 1);
+ mem->bus.is_iomem = true;
+ break;
+ }
+
+ if (dev_priv->card_type == NV_C0)
+ page_shift = vram->page_shift;
+ else
+ page_shift = 12;
+
+ ret = nouveau_vm_get(dev_priv->bar1_vm, mem->bus.size,
+ page_shift, NV_MEM_ACCESS_RW,
+ &vram->bar_vma);
+ if (ret)
+ return ret;
+
+ nouveau_vm_map(&vram->bar_vma, vram);
+ if (ret) {
+ nouveau_vm_put(&vram->bar_vma);
+ return ret;
+ }
+
+ mem->bus.offset = vram->bar_vma.offset;
+ if (dev_priv->card_type == NV_50) /*XXX*/
+ mem->bus.offset -= 0x0020000000ULL;
mem->bus.base = pci_resource_start(dev->pdev, 1);
mem->bus.is_iomem = true;
+ }
break;
default:
return -EINVAL;
@@ -914,6 +1002,17 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
static void
nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
{
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
+ struct nouveau_vram *vram = mem->mm_node;
+
+ if (!dev_priv->bar1_vm || mem->mem_type != TTM_PL_VRAM)
+ return;
+
+ if (!vram->bar_vma.node)
+ return;
+
+ nouveau_vm_unmap(&vram->bar_vma);
+ nouveau_vm_put(&vram->bar_vma);
}
static int
@@ -939,7 +1038,23 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
nvbo->placement.fpfn = 0;
nvbo->placement.lpfn = dev_priv->fb_mappable_pages;
nouveau_bo_placement_set(nvbo, TTM_PL_VRAM, 0);
- return ttm_bo_validate(bo, &nvbo->placement, false, true, false);
+ return nouveau_bo_validate(nvbo, false, true, false);
+}
+
+void
+nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence)
+{
+ struct nouveau_fence *old_fence;
+
+ if (likely(fence))
+ nouveau_fence_ref(fence);
+
+ spin_lock(&nvbo->bo.bdev->fence_lock);
+ old_fence = nvbo->bo.sync_obj;
+ nvbo->bo.sync_obj = fence;
+ spin_unlock(&nvbo->bo.bdev->fence_lock);
+
+ nouveau_fence_unref(&old_fence);
}
struct ttm_bo_driver nouveau_bo_driver = {
@@ -949,11 +1064,11 @@ struct ttm_bo_driver nouveau_bo_driver = {
.evict_flags = nouveau_bo_evict_flags,
.move = nouveau_bo_move,
.verify_access = nouveau_bo_verify_access,
- .sync_obj_signaled = nouveau_fence_signalled,
- .sync_obj_wait = nouveau_fence_wait,
- .sync_obj_flush = nouveau_fence_flush,
- .sync_obj_unref = nouveau_fence_unref,
- .sync_obj_ref = nouveau_fence_ref,
+ .sync_obj_signaled = __nouveau_fence_signalled,
+ .sync_obj_wait = __nouveau_fence_wait,
+ .sync_obj_flush = __nouveau_fence_flush,
+ .sync_obj_unref = __nouveau_fence_unref,
+ .sync_obj_ref = __nouveau_fence_ref,
.fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
.io_mem_reserve = &nouveau_ttm_io_mem_reserve,
.io_mem_free = &nouveau_ttm_io_mem_free,