summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c119
1 files changed, 75 insertions, 44 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 61129e6759eb..e69834341ef0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -268,7 +268,6 @@ eb_destroy(struct eb_objects *eb)
static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
struct eb_objects *eb,
- struct drm_i915_gem_exec_object2 *entry,
struct drm_i915_gem_relocation_entry *reloc)
{
struct drm_device *dev = obj->base.dev;
@@ -411,10 +410,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
static int
i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
- struct eb_objects *eb,
- struct drm_i915_gem_exec_object2 *entry)
+ struct eb_objects *eb)
{
struct drm_i915_gem_relocation_entry __user *user_relocs;
+ struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
int i, ret;
user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
@@ -426,7 +425,7 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
sizeof(reloc)))
return -EFAULT;
- ret = i915_gem_execbuffer_relocate_entry(obj, eb, entry, &reloc);
+ ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
if (ret)
return ret;
@@ -442,13 +441,13 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
static int
i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
struct eb_objects *eb,
- struct drm_i915_gem_exec_object2 *entry,
struct drm_i915_gem_relocation_entry *relocs)
{
+ const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
int i, ret;
for (i = 0; i < entry->relocation_count; i++) {
- ret = i915_gem_execbuffer_relocate_entry(obj, eb, entry, &relocs[i]);
+ ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
if (ret)
return ret;
}
@@ -459,8 +458,7 @@ i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
static int
i915_gem_execbuffer_relocate(struct drm_device *dev,
struct eb_objects *eb,
- struct list_head *objects,
- struct drm_i915_gem_exec_object2 *exec)
+ struct list_head *objects)
{
struct drm_i915_gem_object *obj;
int ret;
@@ -468,7 +466,7 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
list_for_each_entry(obj, objects, exec_list) {
obj->base.pending_read_domains = 0;
obj->base.pending_write_domain = 0;
- ret = i915_gem_execbuffer_relocate_object(obj, eb, exec++);
+ ret = i915_gem_execbuffer_relocate_object(obj, eb);
if (ret)
return ret;
}
@@ -479,13 +477,36 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct drm_file *file,
- struct list_head *objects,
- struct drm_i915_gem_exec_object2 *exec)
+ struct list_head *objects)
{
struct drm_i915_gem_object *obj;
- struct drm_i915_gem_exec_object2 *entry;
int ret, retry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
+ struct list_head ordered_objects;
+
+ INIT_LIST_HEAD(&ordered_objects);
+ while (!list_empty(objects)) {
+ struct drm_i915_gem_exec_object2 *entry;
+ bool need_fence, need_mappable;
+
+ obj = list_first_entry(objects,
+ struct drm_i915_gem_object,
+ exec_list);
+ entry = obj->exec_entry;
+
+ need_fence =
+ has_fenced_gpu_access &&
+ entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+ obj->tiling_mode != I915_TILING_NONE;
+ need_mappable =
+ entry->relocation_count ? true : need_fence;
+
+ if (need_mappable)
+ list_move(&obj->exec_list, &ordered_objects);
+ else
+ list_move_tail(&obj->exec_list, &ordered_objects);
+ }
+ list_splice(&ordered_objects, objects);
/* Attempt to pin all of the buffers into the GTT.
* This is done in 3 phases:
@@ -504,14 +525,11 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
ret = 0;
/* Unbind any ill-fitting objects or pin. */
- entry = exec;
list_for_each_entry(obj, objects, exec_list) {
+ struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
bool need_fence, need_mappable;
-
- if (!obj->gtt_space) {
- entry++;
+ if (!obj->gtt_space)
continue;
- }
need_fence =
has_fenced_gpu_access &&
@@ -534,8 +552,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
}
/* Bind fresh objects */
- entry = exec;
list_for_each_entry(obj, objects, exec_list) {
+ struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
bool need_fence;
need_fence =
@@ -570,7 +588,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
}
entry->offset = obj->gtt_offset;
- entry++;
}
/* Decrement pin count for bound objects */
@@ -622,7 +639,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
int i, total, ret;
/* We may process another execbuffer during the unlock... */
- while (list_empty(objects)) {
+ while (!list_empty(objects)) {
obj = list_first_entry(objects,
struct drm_i915_gem_object,
exec_list);
@@ -665,7 +682,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
}
/* reacquire the objects */
- INIT_LIST_HEAD(objects);
eb_reset(eb);
for (i = 0; i < count; i++) {
struct drm_i915_gem_object *obj;
@@ -681,10 +697,11 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
list_add_tail(&obj->exec_list, objects);
obj->exec_handle = exec[i].handle;
+ obj->exec_entry = &exec[i];
eb_add_object(eb, obj);
}
- ret = i915_gem_execbuffer_reserve(ring, file, objects, exec);
+ ret = i915_gem_execbuffer_reserve(ring, file, objects);
if (ret)
goto err;
@@ -693,7 +710,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
obj->base.pending_read_domains = 0;
obj->base.pending_write_domain = 0;
ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
- exec,
reloc + total);
if (ret)
goto err;
@@ -713,25 +729,34 @@ err:
return ret;
}
-static void
+static int
i915_gem_execbuffer_flush(struct drm_device *dev,
uint32_t invalidate_domains,
uint32_t flush_domains,
uint32_t flush_rings)
{
drm_i915_private_t *dev_priv = dev->dev_private;
- int i;
+ int i, ret;
if (flush_domains & I915_GEM_DOMAIN_CPU)
intel_gtt_chipset_flush();
+ if (flush_domains & I915_GEM_DOMAIN_GTT)
+ wmb();
+
if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
for (i = 0; i < I915_NUM_RINGS; i++)
- if (flush_rings & (1 << i))
- i915_gem_flush_ring(dev, &dev_priv->ring[i],
- invalidate_domains,
- flush_domains);
+ if (flush_rings & (1 << i)) {
+ ret = i915_gem_flush_ring(dev,
+ &dev_priv->ring[i],
+ invalidate_domains,
+ flush_domains);
+ if (ret)
+ return ret;
+ }
}
+
+ return 0;
}
static int
@@ -795,10 +820,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
cd.invalidate_domains,
cd.flush_domains);
#endif
- i915_gem_execbuffer_flush(ring->dev,
- cd.invalidate_domains,
- cd.flush_domains,
- cd.flush_rings);
+ ret = i915_gem_execbuffer_flush(ring->dev,
+ cd.invalidate_domains,
+ cd.flush_domains,
+ cd.flush_rings);
+ if (ret)
+ return ret;
}
list_for_each_entry(obj, objects, exec_list) {
@@ -921,7 +948,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
struct intel_ring_buffer *ring)
{
struct drm_i915_gem_request *request;
- u32 flush_domains;
+ u32 invalidate;
/*
* Ensure that the commands in the batch buffer are
@@ -929,11 +956,13 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
*
* The sampler always gets flushed on i965 (sigh).
*/
- flush_domains = 0;
+ invalidate = I915_GEM_DOMAIN_COMMAND;
if (INTEL_INFO(dev)->gen >= 4)
- flush_domains |= I915_GEM_DOMAIN_SAMPLER;
-
- ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
+ invalidate |= I915_GEM_DOMAIN_SAMPLER;
+ if (ring->flush(ring, invalidate, 0)) {
+ i915_gem_next_request_seqno(dev, ring);
+ return;
+ }
/* Add a breadcrumb for the completion of the batch buffer */
request = kzalloc(sizeof(*request), GFP_KERNEL);
@@ -1098,16 +1127,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
list_add_tail(&obj->exec_list, &objects);
obj->exec_handle = exec[i].handle;
+ obj->exec_entry = &exec[i];
eb_add_object(eb, obj);
}
+ /* take note of the batch buffer before we might reorder the lists */
+ batch_obj = list_entry(objects.prev,
+ struct drm_i915_gem_object,
+ exec_list);
+
/* Move the objects en-masse into the GTT, evicting if necessary. */
- ret = i915_gem_execbuffer_reserve(ring, file, &objects, exec);
+ ret = i915_gem_execbuffer_reserve(ring, file, &objects);
if (ret)
goto err;
/* The objects are in their final locations, apply the relocations. */
- ret = i915_gem_execbuffer_relocate(dev, eb, &objects, exec);
+ ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
if (ret) {
if (ret == -EFAULT) {
ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
@@ -1121,9 +1156,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
/* Set the pending read domains for the batch buffer to COMMAND */
- batch_obj = list_entry(objects.prev,
- struct drm_i915_gem_object,
- exec_list);
if (batch_obj->base.pending_write_domain) {
DRM_ERROR("Attempting to use self-modifying batch buffer\n");
ret = -EINVAL;
@@ -1340,4 +1372,3 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
drm_free_large(exec2_list);
return ret;
}
-