summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c200
1 files changed, 95 insertions, 105 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index de431942ded4..974a9f1068a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -266,6 +266,12 @@ eb_destroy(struct eb_objects *eb)
kfree(eb);
}
+static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
+{
+ return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
+ obj->cache_level != I915_CACHE_NONE);
+}
+
static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
struct eb_objects *eb,
@@ -273,6 +279,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
{
struct drm_device *dev = obj->base.dev;
struct drm_gem_object *target_obj;
+ struct drm_i915_gem_object *target_i915_obj;
uint32_t target_offset;
int ret = -EINVAL;
@@ -281,7 +288,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
if (unlikely(target_obj == NULL))
return -ENOENT;
- target_offset = to_intel_bo(target_obj)->gtt_offset;
+ target_i915_obj = to_intel_bo(target_obj);
+ target_offset = target_i915_obj->gtt_offset;
/* The target buffer should have appeared before us in the
* exec_object list, so it should have a GTT space bound by now.
@@ -352,11 +360,19 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
return ret;
}
+ /* We can't wait for rendering with pagefaults disabled */
+ if (obj->active && in_atomic())
+ return -EFAULT;
+
reloc->delta += target_offset;
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
+ if (use_cpu_reloc(obj)) {
uint32_t page_offset = reloc->offset & ~PAGE_MASK;
char *vaddr;
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ if (ret)
+ return ret;
+
vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
*(uint32_t *)(vaddr + page_offset) = reloc->delta;
kunmap_atomic(vaddr);
@@ -365,11 +381,11 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
uint32_t __iomem *reloc_entry;
void __iomem *reloc_page;
- /* We can't wait for rendering with pagefaults disabled */
- if (obj->active && in_atomic())
- return -EFAULT;
+ ret = i915_gem_object_set_to_gtt_domain(obj, true);
+ if (ret)
+ return ret;
- ret = i915_gem_object_set_to_gtt_domain(obj, 1);
+ ret = i915_gem_object_put_fence(obj);
if (ret)
return ret;
@@ -383,6 +399,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
io_mapping_unmap_atomic(reloc_page);
}
+ /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
+ * pipe_control writes because the gpu doesn't properly redirect them
+ * through the ppgtt for non_secure batchbuffers. */
+ if (unlikely(IS_GEN6(dev) &&
+ reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
+ !target_i915_obj->has_global_gtt_mapping)) {
+ i915_gem_gtt_bind_object(target_i915_obj,
+ target_i915_obj->cache_level);
+ }
+
/* and update the user's relocation entry */
reloc->presumed_offset = target_offset;
@@ -393,30 +419,46 @@ static int
i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
struct eb_objects *eb)
{
+#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
+ struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
struct drm_i915_gem_relocation_entry __user *user_relocs;
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
- int i, ret;
+ int remain, ret;
user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
- for (i = 0; i < entry->relocation_count; i++) {
- struct drm_i915_gem_relocation_entry reloc;
- if (__copy_from_user_inatomic(&reloc,
- user_relocs+i,
- sizeof(reloc)))
+ remain = entry->relocation_count;
+ while (remain) {
+ struct drm_i915_gem_relocation_entry *r = stack_reloc;
+ int count = remain;
+ if (count > ARRAY_SIZE(stack_reloc))
+ count = ARRAY_SIZE(stack_reloc);
+ remain -= count;
+
+ if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
return -EFAULT;
- ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
- if (ret)
- return ret;
+ do {
+ u64 offset = r->presumed_offset;
- if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
- &reloc.presumed_offset,
- sizeof(reloc.presumed_offset)))
- return -EFAULT;
+ ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
+ if (ret)
+ return ret;
+
+ if (r->presumed_offset != offset &&
+ __copy_to_user_inatomic(&user_relocs->presumed_offset,
+ &r->presumed_offset,
+ sizeof(r->presumed_offset))) {
+ return -EFAULT;
+ }
+
+ user_relocs++;
+ r++;
+ } while (--count);
}
return 0;
+#undef N_RELOC
}
static int
@@ -465,6 +507,13 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
#define __EXEC_OBJECT_HAS_FENCE (1<<31)
static int
+need_reloc_mappable(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+ return entry->relocation_count && !use_cpu_reloc(obj);
+}
+
+static int
pin_and_fence_object(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *ring)
{
@@ -477,8 +526,7 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable =
- entry->relocation_count ? true : need_fence;
+ need_mappable = need_fence || need_reloc_mappable(obj);
ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
if (ret)
@@ -486,18 +534,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
if (has_fenced_gpu_access) {
if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
- if (obj->tiling_mode) {
- ret = i915_gem_object_get_fence(obj, ring);
- if (ret)
- goto err_unpin;
+ ret = i915_gem_object_get_fence(obj);
+ if (ret)
+ goto err_unpin;
+ if (i915_gem_object_pin_fence(obj))
entry->flags |= __EXEC_OBJECT_HAS_FENCE;
- i915_gem_object_pin_fence(obj);
- } else {
- ret = i915_gem_object_put_fence(obj);
- if (ret)
- goto err_unpin;
- }
+
obj->pending_fenced_gpu_access = true;
}
}
@@ -535,8 +578,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable =
- entry->relocation_count ? true : need_fence;
+ need_mappable = need_fence || need_reloc_mappable(obj);
if (need_mappable)
list_move(&obj->exec_list, &ordered_objects);
@@ -576,8 +618,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable =
- entry->relocation_count ? true : need_fence;
+ need_mappable = need_fence || need_reloc_mappable(obj);
if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
(need_mappable && !obj->map_and_fenceable))
@@ -798,64 +839,6 @@ i915_gem_execbuffer_flush(struct drm_device *dev,
return 0;
}
-static bool
-intel_enable_semaphores(struct drm_device *dev)
-{
- if (INTEL_INFO(dev)->gen < 6)
- return 0;
-
- if (i915_semaphores >= 0)
- return i915_semaphores;
-
- /* Disable semaphores on SNB */
- if (INTEL_INFO(dev)->gen == 6)
- return 0;
-
- return 1;
-}
-
-static int
-i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
- struct intel_ring_buffer *to)
-{
- struct intel_ring_buffer *from = obj->ring;
- u32 seqno;
- int ret, idx;
-
- if (from == NULL || to == from)
- return 0;
-
- /* XXX gpu semaphores are implicated in various hard hangs on SNB */
- if (!intel_enable_semaphores(obj->base.dev))
- return i915_gem_object_wait_rendering(obj);
-
- idx = intel_ring_sync_index(from, to);
-
- seqno = obj->last_rendering_seqno;
- if (seqno <= from->sync_seqno[idx])
- return 0;
-
- if (seqno == from->outstanding_lazy_request) {
- struct drm_i915_gem_request *request;
-
- request = kzalloc(sizeof(*request), GFP_KERNEL);
- if (request == NULL)
- return -ENOMEM;
-
- ret = i915_add_request(from, NULL, request);
- if (ret) {
- kfree(request);
- return ret;
- }
-
- seqno = request->seqno;
- }
-
- from->sync_seqno[idx] = seqno;
-
- return to->sync_to(to, from, seqno - 1);
-}
-
static int
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
{
@@ -917,7 +900,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
}
list_for_each_entry(obj, objects, exec_list) {
- ret = i915_gem_execbuffer_sync_rings(obj, ring);
+ ret = i915_gem_object_sync(obj, ring);
if (ret)
return ret;
}
@@ -955,7 +938,7 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
if (!access_ok(VERIFY_WRITE, ptr, length))
return -EFAULT;
- if (fault_in_pages_readable(ptr, length))
+ if (fault_in_multipages_readable(ptr, length))
return -EFAULT;
}
@@ -984,11 +967,14 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
obj->pending_gpu_write = true;
list_move_tail(&obj->gpu_write_list,
&ring->gpu_write_list);
- intel_mark_busy(ring->dev, obj);
+ if (obj->pin_count) /* check for potential scanout */
+ intel_mark_busy(ring->dev, obj);
}
trace_i915_gem_object_change_domain(obj, old_read, old_write);
}
+
+ intel_mark_busy(ring->dev, NULL);
}
static void
@@ -1078,17 +1064,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
ring = &dev_priv->ring[RCS];
break;
case I915_EXEC_BSD:
- if (!HAS_BSD(dev)) {
- DRM_DEBUG("execbuf with invalid ring (BSD)\n");
- return -EINVAL;
- }
ring = &dev_priv->ring[VCS];
break;
case I915_EXEC_BLT:
- if (!HAS_BLT(dev)) {
- DRM_DEBUG("execbuf with invalid ring (BLT)\n");
- return -EINVAL;
- }
ring = &dev_priv->ring[BCS];
break;
default:
@@ -1096,6 +1074,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
(int)(args->flags & I915_EXEC_RING_MASK));
return -EINVAL;
}
+ if (!intel_ring_initialized(ring)) {
+ DRM_DEBUG("execbuf with invalid ring: %d\n",
+ (int)(args->flags & I915_EXEC_RING_MASK));
+ return -EINVAL;
+ }
mode = args->flags & I915_EXEC_CONSTANTS_MASK;
mask = I915_EXEC_CONSTANTS_MASK;
@@ -1133,11 +1116,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
return -EINVAL;
}
+ if (INTEL_INFO(dev)->gen >= 5) {
+ DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
+ return -EINVAL;
+ }
+
if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
DRM_DEBUG("execbuf with %u cliprects\n",
args->num_cliprects);
return -EINVAL;
}
+
cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
GFP_KERNEL);
if (cliprects == NULL) {
@@ -1242,9 +1231,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* so every billion or so execbuffers, we need to stall
* the GPU in order to reset the counters.
*/
- ret = i915_gpu_idle(dev, true);
+ ret = i915_gpu_idle(dev);
if (ret)
goto err;
+ i915_gem_retire_requests(dev);
BUG_ON(ring->sync_seqno[i]);
}