diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-24 23:42:54 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-24 23:42:54 +0400 |
commit | f2fde3a65e88330017b816faf2ef75f141d21375 (patch) | |
tree | 57152ab5756e7ed1c58742e7e16f13a45ff11f21 /drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
parent | 28f3d717618156c0dcd2f497d791b578a7931d87 (diff) | |
parent | 8c914028f5ddaa417b7d0f4b7fdc24caceaa8043 (diff) | |
download | linux-f2fde3a65e88330017b816faf2ef75f141d21375.tar.xz |
Merge branch 'drm-core-next' of git://people.freedesktop.org/~airlied/linux
Pull main drm updates from Dave Airlie:
"This is the main merge window request for the drm.
It's big, but jam packed will lots of features and of course 0
regressions. (okay maybe there'll be one).
Highlights:
- new KMS drivers for server GPU chipsets: ast, mgag200 and cirrus
(qemu only). These drivers use the generic modesetting drivers.
- initial prime/dma-buf support for i915, nouveau, radeon, udl and
exynos
- switcheroo audio support: so GPUs with HDMI can turn off the sound
driver without crashing stuff.
- There are some patches drifting outside drivers/gpu into x86 and
EFI for better handling of multiple video adapters in Apple Macs,
they've got correct acks except one trivial fixup.
- Core:
edid parser has better DMT and reduced blanking support,
crtc properties,
plane properties,
- Drivers:
exynos: add 2D core accel support, prime support, hdmi features
intel: more Haswell support, initial Valleyview support, more
hdmi infoframe fixes, update MAINTAINERS for Daniel, lots of
cleanups and fixes
radeon: more HDMI audio support, improved GPU lockup recovery
support, remove nested mutexes, less memory copying on PCIE, fix
bus master enable race (kexec), improved fence handling
gma500: cleanups, 1080p support, acpi fixes
nouveau: better nva3 memory reclocking, kepler accel (needs
external firmware rip), async buffer moves on nv84+ hw.
I've some more dma-buf patches that rely on the dma-buf merge for vmap
stuff, and I've a few fixes building up, but I'd decided I'd better
get rid of the main pull sooner rather than later, so the audio guys
are also unblocked."
Fix up trivial conflict due to some duplicated changes in
drivers/gpu/drm/i915/intel_ringbuffer.c
* 'drm-core-next' of git://people.freedesktop.org/~airlied/linux: (605 commits)
drm/nouveau/nvd9: Fix GPIO initialisation sequence.
drm/nouveau: Unregister switcheroo client on exit
drm/nouveau: Check dsm on switcheroo unregister
drm/nouveau: fix a minor annoyance in an output string
drm/nouveau: turn a BUG into a WARN
drm/nv50: decode PGRAPH DATA_ERROR = 0x24
drm/nouveau/disp: fix dithering not being enabled on some eDP macbooks
drm/nvd9/copy: initialise copy engine, seems to work like nvc0
drm/nvc0/ttm: use copy engines for async buffer moves
drm/nva3/ttm: use copy engine for async buffer moves
drm/nv98/ttm: add in a (disabled) crypto engine buffer copy method
drm/nv84/ttm: use crypto engine for async buffer copies
drm/nouveau/ttm: untangle code to support accelerated buffer moves
drm/nouveau/fbcon: use fence for sync, rather than notifier
drm/nv98/crypt: non-stub implementation of the engine hooks
drm/nouveau/fifo: turn all fifo modules into engine modules
drm/nv50/graph: remove ability to do interrupt-driven context switching
drm/nv50: remove manual context unload on context destruction
drm/nv50: remove execution engine context saves on suspend
drm/nv50/fifo: use hardware channel kickoff functionality
...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 200 |
1 files changed, 95 insertions, 105 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index de431942ded4..974a9f1068a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -266,6 +266,12 @@ eb_destroy(struct eb_objects *eb) kfree(eb); } +static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) +{ + return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || + obj->cache_level != I915_CACHE_NONE); +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, @@ -273,6 +279,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; + struct drm_i915_gem_object *target_i915_obj; uint32_t target_offset; int ret = -EINVAL; @@ -281,7 +288,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (unlikely(target_obj == NULL)) return -ENOENT; - target_offset = to_intel_bo(target_obj)->gtt_offset; + target_i915_obj = to_intel_bo(target_obj); + target_offset = target_i915_obj->gtt_offset; /* The target buffer should have appeared before us in the * exec_object list, so it should have a GTT space bound by now. @@ -352,11 +360,19 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return ret; } + /* We can't wait for rendering with pagefaults disabled */ + if (obj->active && in_atomic()) + return -EFAULT; + reloc->delta += target_offset; - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) { + if (use_cpu_reloc(obj)) { uint32_t page_offset = reloc->offset & ~PAGE_MASK; char *vaddr; + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret) + return ret; + vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]); *(uint32_t *)(vaddr + page_offset) = reloc->delta; kunmap_atomic(vaddr); @@ -365,11 +381,11 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, uint32_t __iomem *reloc_entry; void __iomem *reloc_page; - /* We can't wait for rendering with pagefaults disabled */ - if (obj->active && in_atomic()) - return -EFAULT; + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ret; - ret = i915_gem_object_set_to_gtt_domain(obj, 1); + ret = i915_gem_object_put_fence(obj); if (ret) return ret; @@ -383,6 +399,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, io_mapping_unmap_atomic(reloc_page); } + /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and + * pipe_control writes because the gpu doesn't properly redirect them + * through the ppgtt for non_secure batchbuffers. */ + if (unlikely(IS_GEN6(dev) && + reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && + !target_i915_obj->has_global_gtt_mapping)) { + i915_gem_gtt_bind_object(target_i915_obj, + target_i915_obj->cache_level); + } + /* and update the user's relocation entry */ reloc->presumed_offset = target_offset; @@ -393,30 +419,46 @@ static int i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, struct eb_objects *eb) { +#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) + struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *user_relocs; struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - int i, ret; + int remain, ret; user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; - for (i = 0; i < entry->relocation_count; i++) { - struct drm_i915_gem_relocation_entry reloc; - if (__copy_from_user_inatomic(&reloc, - user_relocs+i, - sizeof(reloc))) + remain = entry->relocation_count; + while (remain) { + struct drm_i915_gem_relocation_entry *r = stack_reloc; + int count = remain; + if (count > ARRAY_SIZE(stack_reloc)) + count = ARRAY_SIZE(stack_reloc); + remain -= count; + + if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) return -EFAULT; - ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); - if (ret) - return ret; + do { + u64 offset = r->presumed_offset; - if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset, - &reloc.presumed_offset, - sizeof(reloc.presumed_offset))) - return -EFAULT; + ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); + if (ret) + return ret; + + if (r->presumed_offset != offset && + __copy_to_user_inatomic(&user_relocs->presumed_offset, + &r->presumed_offset, + sizeof(r->presumed_offset))) { + return -EFAULT; + } + + user_relocs++; + r++; + } while (--count); } return 0; +#undef N_RELOC } static int @@ -465,6 +507,13 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, #define __EXEC_OBJECT_HAS_FENCE (1<<31) static int +need_reloc_mappable(struct drm_i915_gem_object *obj) +{ + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + return entry->relocation_count && !use_cpu_reloc(obj); +} + +static int pin_and_fence_object(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring) { @@ -477,8 +526,7 @@ pin_and_fence_object(struct drm_i915_gem_object *obj, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = - entry->relocation_count ? true : need_fence; + need_mappable = need_fence || need_reloc_mappable(obj); ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); if (ret) @@ -486,18 +534,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj, if (has_fenced_gpu_access) { if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - if (obj->tiling_mode) { - ret = i915_gem_object_get_fence(obj, ring); - if (ret) - goto err_unpin; + ret = i915_gem_object_get_fence(obj); + if (ret) + goto err_unpin; + if (i915_gem_object_pin_fence(obj)) entry->flags |= __EXEC_OBJECT_HAS_FENCE; - i915_gem_object_pin_fence(obj); - } else { - ret = i915_gem_object_put_fence(obj); - if (ret) - goto err_unpin; - } + obj->pending_fenced_gpu_access = true; } } @@ -535,8 +578,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = - entry->relocation_count ? true : need_fence; + need_mappable = need_fence || need_reloc_mappable(obj); if (need_mappable) list_move(&obj->exec_list, &ordered_objects); @@ -576,8 +618,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = - entry->relocation_count ? true : need_fence; + need_mappable = need_fence || need_reloc_mappable(obj); if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || (need_mappable && !obj->map_and_fenceable)) @@ -798,64 +839,6 @@ i915_gem_execbuffer_flush(struct drm_device *dev, return 0; } -static bool -intel_enable_semaphores(struct drm_device *dev) -{ - if (INTEL_INFO(dev)->gen < 6) - return 0; - - if (i915_semaphores >= 0) - return i915_semaphores; - - /* Disable semaphores on SNB */ - if (INTEL_INFO(dev)->gen == 6) - return 0; - - return 1; -} - -static int -i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *to) -{ - struct intel_ring_buffer *from = obj->ring; - u32 seqno; - int ret, idx; - - if (from == NULL || to == from) - return 0; - - /* XXX gpu semaphores are implicated in various hard hangs on SNB */ - if (!intel_enable_semaphores(obj->base.dev)) - return i915_gem_object_wait_rendering(obj); - - idx = intel_ring_sync_index(from, to); - - seqno = obj->last_rendering_seqno; - if (seqno <= from->sync_seqno[idx]) - return 0; - - if (seqno == from->outstanding_lazy_request) { - struct drm_i915_gem_request *request; - - request = kzalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) - return -ENOMEM; - - ret = i915_add_request(from, NULL, request); - if (ret) { - kfree(request); - return ret; - } - - seqno = request->seqno; - } - - from->sync_seqno[idx] = seqno; - - return to->sync_to(to, from, seqno - 1); -} - static int i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) { @@ -917,7 +900,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, } list_for_each_entry(obj, objects, exec_list) { - ret = i915_gem_execbuffer_sync_rings(obj, ring); + ret = i915_gem_object_sync(obj, ring); if (ret) return ret; } @@ -955,7 +938,7 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, if (!access_ok(VERIFY_WRITE, ptr, length)) return -EFAULT; - if (fault_in_pages_readable(ptr, length)) + if (fault_in_multipages_readable(ptr, length)) return -EFAULT; } @@ -984,11 +967,14 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, obj->pending_gpu_write = true; list_move_tail(&obj->gpu_write_list, &ring->gpu_write_list); - intel_mark_busy(ring->dev, obj); + if (obj->pin_count) /* check for potential scanout */ + intel_mark_busy(ring->dev, obj); } trace_i915_gem_object_change_domain(obj, old_read, old_write); } + + intel_mark_busy(ring->dev, NULL); } static void @@ -1078,17 +1064,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ring = &dev_priv->ring[RCS]; break; case I915_EXEC_BSD: - if (!HAS_BSD(dev)) { - DRM_DEBUG("execbuf with invalid ring (BSD)\n"); - return -EINVAL; - } ring = &dev_priv->ring[VCS]; break; case I915_EXEC_BLT: - if (!HAS_BLT(dev)) { - DRM_DEBUG("execbuf with invalid ring (BLT)\n"); - return -EINVAL; - } ring = &dev_priv->ring[BCS]; break; default: @@ -1096,6 +1074,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, (int)(args->flags & I915_EXEC_RING_MASK)); return -EINVAL; } + if (!intel_ring_initialized(ring)) { + DRM_DEBUG("execbuf with invalid ring: %d\n", + (int)(args->flags & I915_EXEC_RING_MASK)); + return -EINVAL; + } mode = args->flags & I915_EXEC_CONSTANTS_MASK; mask = I915_EXEC_CONSTANTS_MASK; @@ -1133,11 +1116,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (INTEL_INFO(dev)->gen >= 5) { + DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); + return -EINVAL; + } + if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { DRM_DEBUG("execbuf with %u cliprects\n", args->num_cliprects); return -EINVAL; } + cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects), GFP_KERNEL); if (cliprects == NULL) { @@ -1242,9 +1231,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * so every billion or so execbuffers, we need to stall * the GPU in order to reset the counters. */ - ret = i915_gpu_idle(dev, true); + ret = i915_gpu_idle(dev); if (ret) goto err; + i915_gem_retire_requests(dev); BUG_ON(ring->sync_seqno[i]); } |