diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 200 |
1 files changed, 95 insertions, 105 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index de431942ded4..974a9f1068a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -266,6 +266,12 @@ eb_destroy(struct eb_objects *eb) kfree(eb); } +static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) +{ + return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || + obj->cache_level != I915_CACHE_NONE); +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, @@ -273,6 +279,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; + struct drm_i915_gem_object *target_i915_obj; uint32_t target_offset; int ret = -EINVAL; @@ -281,7 +288,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (unlikely(target_obj == NULL)) return -ENOENT; - target_offset = to_intel_bo(target_obj)->gtt_offset; + target_i915_obj = to_intel_bo(target_obj); + target_offset = target_i915_obj->gtt_offset; /* The target buffer should have appeared before us in the * exec_object list, so it should have a GTT space bound by now. @@ -352,11 +360,19 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return ret; } + /* We can't wait for rendering with pagefaults disabled */ + if (obj->active && in_atomic()) + return -EFAULT; + reloc->delta += target_offset; - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) { + if (use_cpu_reloc(obj)) { uint32_t page_offset = reloc->offset & ~PAGE_MASK; char *vaddr; + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret) + return ret; + vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]); *(uint32_t *)(vaddr + page_offset) = reloc->delta; kunmap_atomic(vaddr); @@ -365,11 +381,11 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, uint32_t __iomem *reloc_entry; void __iomem *reloc_page; - /* We can't wait for rendering with pagefaults disabled */ - if (obj->active && in_atomic()) - return -EFAULT; + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ret; - ret = i915_gem_object_set_to_gtt_domain(obj, 1); + ret = i915_gem_object_put_fence(obj); if (ret) return ret; @@ -383,6 +399,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, io_mapping_unmap_atomic(reloc_page); } + /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and + * pipe_control writes because the gpu doesn't properly redirect them + * through the ppgtt for non_secure batchbuffers. */ + if (unlikely(IS_GEN6(dev) && + reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && + !target_i915_obj->has_global_gtt_mapping)) { + i915_gem_gtt_bind_object(target_i915_obj, + target_i915_obj->cache_level); + } + /* and update the user's relocation entry */ reloc->presumed_offset = target_offset; @@ -393,30 +419,46 @@ static int i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, struct eb_objects *eb) { +#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) + struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *user_relocs; struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - int i, ret; + int remain, ret; user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; - for (i = 0; i < entry->relocation_count; i++) { - struct drm_i915_gem_relocation_entry reloc; - if (__copy_from_user_inatomic(&reloc, - user_relocs+i, - sizeof(reloc))) + remain = entry->relocation_count; + while (remain) { + struct drm_i915_gem_relocation_entry *r = stack_reloc; + int count = remain; + if (count > ARRAY_SIZE(stack_reloc)) + count = ARRAY_SIZE(stack_reloc); + remain -= count; + + if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) return -EFAULT; - ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); - if (ret) - return ret; + do { + u64 offset = r->presumed_offset; - if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset, - &reloc.presumed_offset, - sizeof(reloc.presumed_offset))) - return -EFAULT; + ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); + if (ret) + return ret; + + if (r->presumed_offset != offset && + __copy_to_user_inatomic(&user_relocs->presumed_offset, + &r->presumed_offset, + sizeof(r->presumed_offset))) { + return -EFAULT; + } + + user_relocs++; + r++; + } while (--count); } return 0; +#undef N_RELOC } static int @@ -465,6 +507,13 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, #define __EXEC_OBJECT_HAS_FENCE (1<<31) static int +need_reloc_mappable(struct drm_i915_gem_object *obj) +{ + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + return entry->relocation_count && !use_cpu_reloc(obj); +} + +static int pin_and_fence_object(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring) { @@ -477,8 +526,7 @@ pin_and_fence_object(struct drm_i915_gem_object *obj, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = - entry->relocation_count ? true : need_fence; + need_mappable = need_fence || need_reloc_mappable(obj); ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); if (ret) @@ -486,18 +534,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj, if (has_fenced_gpu_access) { if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - if (obj->tiling_mode) { - ret = i915_gem_object_get_fence(obj, ring); - if (ret) - goto err_unpin; + ret = i915_gem_object_get_fence(obj); + if (ret) + goto err_unpin; + if (i915_gem_object_pin_fence(obj)) entry->flags |= __EXEC_OBJECT_HAS_FENCE; - i915_gem_object_pin_fence(obj); - } else { - ret = i915_gem_object_put_fence(obj); - if (ret) - goto err_unpin; - } + obj->pending_fenced_gpu_access = true; } } @@ -535,8 +578,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = - entry->relocation_count ? true : need_fence; + need_mappable = need_fence || need_reloc_mappable(obj); if (need_mappable) list_move(&obj->exec_list, &ordered_objects); @@ -576,8 +618,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = - entry->relocation_count ? true : need_fence; + need_mappable = need_fence || need_reloc_mappable(obj); if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || (need_mappable && !obj->map_and_fenceable)) @@ -798,64 +839,6 @@ i915_gem_execbuffer_flush(struct drm_device *dev, return 0; } -static bool -intel_enable_semaphores(struct drm_device *dev) -{ - if (INTEL_INFO(dev)->gen < 6) - return 0; - - if (i915_semaphores >= 0) - return i915_semaphores; - - /* Disable semaphores on SNB */ - if (INTEL_INFO(dev)->gen == 6) - return 0; - - return 1; -} - -static int -i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *to) -{ - struct intel_ring_buffer *from = obj->ring; - u32 seqno; - int ret, idx; - - if (from == NULL || to == from) - return 0; - - /* XXX gpu semaphores are implicated in various hard hangs on SNB */ - if (!intel_enable_semaphores(obj->base.dev)) - return i915_gem_object_wait_rendering(obj); - - idx = intel_ring_sync_index(from, to); - - seqno = obj->last_rendering_seqno; - if (seqno <= from->sync_seqno[idx]) - return 0; - - if (seqno == from->outstanding_lazy_request) { - struct drm_i915_gem_request *request; - - request = kzalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) - return -ENOMEM; - - ret = i915_add_request(from, NULL, request); - if (ret) { - kfree(request); - return ret; - } - - seqno = request->seqno; - } - - from->sync_seqno[idx] = seqno; - - return to->sync_to(to, from, seqno - 1); -} - static int i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) { @@ -917,7 +900,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, } list_for_each_entry(obj, objects, exec_list) { - ret = i915_gem_execbuffer_sync_rings(obj, ring); + ret = i915_gem_object_sync(obj, ring); if (ret) return ret; } @@ -955,7 +938,7 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, if (!access_ok(VERIFY_WRITE, ptr, length)) return -EFAULT; - if (fault_in_pages_readable(ptr, length)) + if (fault_in_multipages_readable(ptr, length)) return -EFAULT; } @@ -984,11 +967,14 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, obj->pending_gpu_write = true; list_move_tail(&obj->gpu_write_list, &ring->gpu_write_list); - intel_mark_busy(ring->dev, obj); + if (obj->pin_count) /* check for potential scanout */ + intel_mark_busy(ring->dev, obj); } trace_i915_gem_object_change_domain(obj, old_read, old_write); } + + intel_mark_busy(ring->dev, NULL); } static void @@ -1078,17 +1064,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ring = &dev_priv->ring[RCS]; break; case I915_EXEC_BSD: - if (!HAS_BSD(dev)) { - DRM_DEBUG("execbuf with invalid ring (BSD)\n"); - return -EINVAL; - } ring = &dev_priv->ring[VCS]; break; case I915_EXEC_BLT: - if (!HAS_BLT(dev)) { - DRM_DEBUG("execbuf with invalid ring (BLT)\n"); - return -EINVAL; - } ring = &dev_priv->ring[BCS]; break; default: @@ -1096,6 +1074,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, (int)(args->flags & I915_EXEC_RING_MASK)); return -EINVAL; } + if (!intel_ring_initialized(ring)) { + DRM_DEBUG("execbuf with invalid ring: %d\n", + (int)(args->flags & I915_EXEC_RING_MASK)); + return -EINVAL; + } mode = args->flags & I915_EXEC_CONSTANTS_MASK; mask = I915_EXEC_CONSTANTS_MASK; @@ -1133,11 +1116,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (INTEL_INFO(dev)->gen >= 5) { + DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); + return -EINVAL; + } + if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { DRM_DEBUG("execbuf with %u cliprects\n", args->num_cliprects); return -EINVAL; } + cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects), GFP_KERNEL); if (cliprects == NULL) { @@ -1242,9 +1231,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * so every billion or so execbuffers, we need to stall * the GPU in order to reset the counters. */ - ret = i915_gpu_idle(dev, true); + ret = i915_gpu_idle(dev); if (ret) goto err; + i915_gem_retire_requests(dev); BUG_ON(ring->sync_seqno[i]); } |