From 419fa72a1973c9ba2fd2c2505dc889f54b857459 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Jan 2013 10:53:13 +0000 Subject: drm/i915: Mark a temporary allocation for copy-from-user as such The difference is that the kernel will then know that this memory will be reclaimable in the near future. Signed-off-by: Chris Wilson Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 163bb52bd3b3..986eb98f0d25 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1113,7 +1113,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, } exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, - GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); if (exec2_list == NULL) exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); -- cgit v1.2.3 From 3b96eff447b4ca34ca8ccd42e2651be2955f34b4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Jan 2013 10:53:14 +0000 Subject: drm/i915: Take the handle idr spinlock once for looking up the exec objects Signed-off-by: Chris Wilson Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 86 ++++++++++++++++-------------- 1 file changed, 46 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 986eb98f0d25..da103c179e3f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -69,6 +69,46 @@ eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) &eb->buckets[obj->exec_handle & eb->and]); } +static int +eb_lookup_objects(struct eb_objects *eb, + struct drm_i915_gem_exec_object2 *exec, + int count, + struct drm_file *file, + struct list_head *objects) +{ + int i; + + spin_lock(&file->table_lock); + for (i = 0; i < count; i++) { + struct drm_i915_gem_object *obj; + + obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); + if (obj == NULL) { + spin_unlock(&file->table_lock); + DRM_DEBUG("Invalid object handle %d at index %d\n", + exec[i].handle, i); + return -ENOENT; + } + + if (!list_empty(&obj->exec_list)) { + spin_unlock(&file->table_lock); + DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", + obj, exec[i].handle, i); + return -EINVAL; + } + + drm_gem_object_reference(&obj->base); + list_add_tail(&obj->exec_list, objects); + + obj->exec_handle = exec[i].handle; + obj->exec_entry = &exec[i]; + eb_add_object(eb, obj); + } + spin_unlock(&file->table_lock); + + return 0; +} + static struct drm_i915_gem_object * eb_get_object(struct eb_objects *eb, unsigned long handle) { @@ -550,21 +590,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, /* reacquire the objects */ eb_reset(eb); - for (i = 0; i < count; i++) { - obj = to_intel_bo(drm_gem_object_lookup(dev, file, - exec[i].handle)); - if (&obj->base == NULL) { - DRM_DEBUG("Invalid object handle %d at index %d\n", - exec[i].handle, i); - ret = -ENOENT; - goto err; - } - - list_add_tail(&obj->exec_list, objects); - obj->exec_handle = exec[i].handle; - obj->exec_entry = &exec[i]; - eb_add_object(eb, obj); - } + ret = eb_lookup_objects(eb, exec, count, file, objects); + if (ret) + goto err; ret = i915_gem_execbuffer_reserve(ring, file, objects); if (ret) @@ -872,31 +900,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* Look up object handles */ INIT_LIST_HEAD(&objects); - for (i = 0; i < args->buffer_count; i++) { - struct drm_i915_gem_object *obj; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, - exec[i].handle)); - if (&obj->base == NULL) { - DRM_DEBUG("Invalid object handle %d at index %d\n", - exec[i].handle, i); - /* prevent error path from reading uninitialized data */ - ret = -ENOENT; - goto err; - } - - if (!list_empty(&obj->exec_list)) { - DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", - obj, exec[i].handle, i); - ret = -EINVAL; - goto err; - } - - list_add_tail(&obj->exec_list, &objects); - obj->exec_handle = exec[i].handle; - obj->exec_entry = &exec[i]; - eb_add_object(eb, obj); - } + ret = eb_lookup_objects(eb, exec, args->buffer_count, file, &objects); + if (ret) + goto err; /* take note of the batch buffer before we might reorder the lists */ batch_obj = list_entry(objects.prev, -- cgit v1.2.3 From bcffc3faa692d6b2ef734e4f0c8f097175284db6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Jan 2013 10:53:15 +0000 Subject: drm/i915: Move the execbuffer objects list from the stack into the tracker Instead of passing around the eb-objects hashtable and a separate object list, we can include the object list into the eb-objects structure for convenience. Signed-off-by: Chris Wilson Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 58 ++++++++++++++---------------- 1 file changed, 27 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index da103c179e3f..386677f8fd38 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,6 +34,7 @@ #include struct eb_objects { + struct list_head objects; int and; struct hlist_head buckets[0]; }; @@ -53,6 +54,7 @@ eb_create(int size) return eb; eb->and = count - 1; + INIT_LIST_HEAD(&eb->objects); return eb; } @@ -73,8 +75,7 @@ static int eb_lookup_objects(struct eb_objects *eb, struct drm_i915_gem_exec_object2 *exec, int count, - struct drm_file *file, - struct list_head *objects) + struct drm_file *file) { int i; @@ -98,7 +99,7 @@ eb_lookup_objects(struct eb_objects *eb, } drm_gem_object_reference(&obj->base); - list_add_tail(&obj->exec_list, objects); + list_add_tail(&obj->exec_list, &eb->objects); obj->exec_handle = exec[i].handle; obj->exec_entry = &exec[i]; @@ -129,6 +130,15 @@ eb_get_object(struct eb_objects *eb, unsigned long handle) static void eb_destroy(struct eb_objects *eb) { + while (!list_empty(&eb->objects)) { + struct drm_i915_gem_object *obj; + + obj = list_first_entry(&eb->objects, + struct drm_i915_gem_object, + exec_list); + list_del_init(&obj->exec_list); + drm_gem_object_unreference(&obj->base); + } kfree(eb); } @@ -328,8 +338,7 @@ i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, static int i915_gem_execbuffer_relocate(struct drm_device *dev, - struct eb_objects *eb, - struct list_head *objects) + struct eb_objects *eb) { struct drm_i915_gem_object *obj; int ret = 0; @@ -342,7 +351,7 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, * lockdep complains vehemently. */ pagefault_disable(); - list_for_each_entry(obj, objects, exec_list) { + list_for_each_entry(obj, &eb->objects, exec_list) { ret = i915_gem_execbuffer_relocate_object(obj, eb); if (ret) break; @@ -531,7 +540,6 @@ static int i915_gem_execbuffer_relocate_slow(struct drm_device *dev, struct drm_file *file, struct intel_ring_buffer *ring, - struct list_head *objects, struct eb_objects *eb, struct drm_i915_gem_exec_object2 *exec, int count) @@ -542,8 +550,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, int i, total, ret; /* We may process another execbuffer during the unlock... */ - while (!list_empty(objects)) { - obj = list_first_entry(objects, + while (!list_empty(&eb->objects)) { + obj = list_first_entry(&eb->objects, struct drm_i915_gem_object, exec_list); list_del_init(&obj->exec_list); @@ -590,15 +598,15 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, /* reacquire the objects */ eb_reset(eb); - ret = eb_lookup_objects(eb, exec, count, file, objects); + ret = eb_lookup_objects(eb, exec, count, file); if (ret) goto err; - ret = i915_gem_execbuffer_reserve(ring, file, objects); + ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects); if (ret) goto err; - list_for_each_entry(obj, objects, exec_list) { + list_for_each_entry(obj, &eb->objects, exec_list) { int offset = obj->exec_entry - exec; ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, reloc + reloc_offset[offset]); @@ -756,7 +764,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_i915_gem_exec_object2 *exec) { drm_i915_private_t *dev_priv = dev->dev_private; - struct list_head objects; struct eb_objects *eb; struct drm_i915_gem_object *batch_obj; struct drm_clip_rect *cliprects = NULL; @@ -899,28 +906,26 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Look up object handles */ - INIT_LIST_HEAD(&objects); - ret = eb_lookup_objects(eb, exec, args->buffer_count, file, &objects); + ret = eb_lookup_objects(eb, exec, args->buffer_count, file); if (ret) goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = list_entry(objects.prev, + batch_obj = list_entry(eb->objects.prev, struct drm_i915_gem_object, exec_list); /* Move the objects en-masse into the GTT, evicting if necessary. */ - ret = i915_gem_execbuffer_reserve(ring, file, &objects); + ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects); if (ret) goto err; /* The objects are in their final locations, apply the relocations. */ - ret = i915_gem_execbuffer_relocate(dev, eb, &objects); + ret = i915_gem_execbuffer_relocate(dev, eb); if (ret) { if (ret == -EFAULT) { ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, - &objects, eb, - exec, + eb, exec, args->buffer_count); BUG_ON(!mutex_is_locked(&dev->struct_mutex)); } @@ -943,7 +948,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); - ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); + ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects); if (ret) goto err; @@ -997,20 +1002,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); - i915_gem_execbuffer_move_to_active(&objects, ring); + i915_gem_execbuffer_move_to_active(&eb->objects, ring); i915_gem_execbuffer_retire_commands(dev, file, ring); err: eb_destroy(eb); - while (!list_empty(&objects)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&objects, - struct drm_i915_gem_object, - exec_list); - list_del_init(&obj->exec_list); - drm_gem_object_unreference(&obj->base); - } mutex_unlock(&dev->struct_mutex); -- cgit v1.2.3 From ed5982e6ce5f106abcbf071f80730db344a6da42 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 17 Jan 2013 22:23:36 +0100 Subject: drm/i915: Allow userspace to hint that the relocations were known Userspace is able to hint to the kernel that its command stream and auxiliary state buffers already hold the correct presumed addresses and so the relocation process may be skipped if the kernel does not need to move any buffers in preparation for the execbuffer. Thus for the common case where the allotment of buffers is static between batches, we can avoid the overhead of individually checking the relocation entries. Note that this requires userspace to supply the domain tracking and requests for workarounds itself that would otherwise be computed based upon the relocation entries. Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 632000.0/sec. i3-330m: 748000.0/sec to 830000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson Reviewed-by: Imre Deak [danvet: Fixup merge conflict in userspace header due to different baseline trees.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 68 ++++++++++++++++++++---------- include/uapi/drm/i915_drm.h | 14 ++++++ 3 files changed, 62 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 6d8a1dc74934..a6e047d533ec 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -992,6 +992,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_PINNED_BATCHES: value = 1; break; + case I915_PARAM_HAS_EXEC_NO_RELOC: + value = 1; + break; default: DRM_DEBUG_DRIVER("Unknown parameter %d\n", param->param); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 386677f8fd38..34f6cdffa9f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -373,7 +373,8 @@ need_reloc_mappable(struct drm_i915_gem_object *obj) static int i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *ring) + struct intel_ring_buffer *ring, + bool *need_reloc) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; @@ -414,7 +415,20 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, obj->has_aliasing_ppgtt_mapping = 1; } - entry->offset = obj->gtt_offset; + if (entry->offset != obj->gtt_offset) { + entry->offset = obj->gtt_offset; + *need_reloc = true; + } + + if (entry->flags & EXEC_OBJECT_WRITE) { + obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; + obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; + } + + if (entry->flags & EXEC_OBJECT_NEEDS_GTT && + !obj->has_global_gtt_mapping) + i915_gem_gtt_bind_object(obj, obj->cache_level); + return 0; } @@ -440,7 +454,8 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) static int i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct drm_file *file, - struct list_head *objects) + struct list_head *objects, + bool *need_relocs) { struct drm_i915_gem_object *obj; struct list_head ordered_objects; @@ -468,7 +483,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, else list_move_tail(&obj->exec_list, &ordered_objects); - obj->base.pending_read_domains = 0; + obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_write_domain = 0; obj->pending_fenced_gpu_access = false; } @@ -508,7 +523,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, (need_mappable && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else - ret = i915_gem_execbuffer_reserve_object(obj, ring); + ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); if (ret) goto err; } @@ -518,7 +533,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, if (obj->gtt_space) continue; - ret = i915_gem_execbuffer_reserve_object(obj, ring); + ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); if (ret) goto err; } @@ -538,16 +553,18 @@ err: /* Decrement pin count for bound objects */ static int i915_gem_execbuffer_relocate_slow(struct drm_device *dev, + struct drm_i915_gem_execbuffer2 *args, struct drm_file *file, struct intel_ring_buffer *ring, struct eb_objects *eb, - struct drm_i915_gem_exec_object2 *exec, - int count) + struct drm_i915_gem_exec_object2 *exec) { struct drm_i915_gem_relocation_entry *reloc; struct drm_i915_gem_object *obj; + bool need_relocs; int *reloc_offset; int i, total, ret; + int count = args->buffer_count; /* We may process another execbuffer during the unlock... */ while (!list_empty(&eb->objects)) { @@ -602,7 +619,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, if (ret) goto err; - ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects); + need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; + ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs); if (ret) goto err; @@ -660,6 +678,9 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) { + if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS) + return false; + return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; } @@ -673,6 +694,9 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr; int length; /* limited by fault_in_pages_readable() */ + if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) + return -EINVAL; + /* First check for malicious input causing overflow */ if (exec[i].relocation_count > INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) @@ -680,9 +704,6 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, length = exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry); - if (!access_ok(VERIFY_READ, ptr, length)) - return -EFAULT; - /* we may also need to update the presumed offsets */ if (!access_ok(VERIFY_WRITE, ptr, length)) return -EFAULT; @@ -704,8 +725,10 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; - obj->base.read_domains = obj->base.pending_read_domains; obj->base.write_domain = obj->base.pending_write_domain; + if (obj->base.write_domain == 0) + obj->base.pending_read_domains |= obj->base.read_domains; + obj->base.read_domains = obj->base.pending_read_domains; obj->fenced_gpu_access = obj->pending_fenced_gpu_access; i915_gem_object_move_to_active(obj, ring); @@ -770,14 +793,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct intel_ring_buffer *ring; u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 exec_start, exec_len; - u32 mask; - u32 flags; + u32 mask, flags; int ret, mode, i; + bool need_relocs; - if (!i915_gem_check_execbuffer(args)) { - DRM_DEBUG("execbuf with invalid offset/length\n"); + if (!i915_gem_check_execbuffer(args)) return -EINVAL; - } ret = validate_exec_list(exec, args->buffer_count); if (ret) @@ -916,17 +937,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, exec_list); /* Move the objects en-masse into the GTT, evicting if necessary. */ - ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects); + need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; + ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs); if (ret) goto err; /* The objects are in their final locations, apply the relocations. */ - ret = i915_gem_execbuffer_relocate(dev, eb); + if (need_relocs) + ret = i915_gem_execbuffer_relocate(dev, eb); if (ret) { if (ret == -EFAULT) { - ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, - eb, exec, - args->buffer_count); + ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, + eb, exec); BUG_ON(!mutex_is_locked(&dev->struct_mutex)); } if (ret) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c4d2e9c74002..2430b6ad6a85 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -308,6 +308,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 #define I915_PARAM_HAS_SECURE_BATCHES 23 #define I915_PARAM_HAS_PINNED_BATCHES 24 +#define I915_PARAM_HAS_EXEC_NO_RELOC 25 typedef struct drm_i915_getparam { int param; @@ -628,7 +629,11 @@ struct drm_i915_gem_exec_object2 { __u64 offset; #define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) __u64 flags; + __u64 rsvd1; __u64 rsvd2; }; @@ -687,6 +692,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_IS_PINNED (1<<10) +/** Provide a hint to the kernel that the command stream and auxilliary + * state buffers already holds the correct presumed addresses and so the + * relocation process may be skipped if no buffers need to be moved in + * preparation for the execbuffer. + */ +#define I915_EXEC_NO_RELOC (1<<11) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1) + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK -- cgit v1.2.3 From eef90ccb8a4d50b219a95cc53878ebb007315b32 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Jan 2013 10:53:17 +0000 Subject: drm/i915: Use the reloc.handle as an index into the execbuffer array Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 623000.0/sec. i3-330m: 748000.0/sec to 789000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 100 ++++++++++++++++++----------- include/uapi/drm/i915_drm.h | 8 ++- 3 files changed, 71 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a6e047d533ec..442118293883 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -995,6 +995,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_NO_RELOC: value = 1; break; + case I915_PARAM_HAS_EXEC_HANDLE_LUT: + value = 1; + break; default: DRM_DEBUG_DRIVER("Unknown parameter %d\n", param->param); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 34f6cdffa9f8..f5a11ecf5494 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -36,24 +36,40 @@ struct eb_objects { struct list_head objects; int and; - struct hlist_head buckets[0]; + union { + struct drm_i915_gem_object *lut[0]; + struct hlist_head buckets[0]; + }; }; static struct eb_objects * -eb_create(int size) +eb_create(struct drm_i915_gem_execbuffer2 *args) { - struct eb_objects *eb; - int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; - BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); - while (count > size) - count >>= 1; - eb = kzalloc(count*sizeof(struct hlist_head) + - sizeof(struct eb_objects), - GFP_KERNEL); - if (eb == NULL) - return eb; - - eb->and = count - 1; + struct eb_objects *eb = NULL; + + if (args->flags & I915_EXEC_HANDLE_LUT) { + int size = args->buffer_count; + size *= sizeof(struct drm_i915_gem_object *); + size += sizeof(struct eb_objects); + eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); + } + + if (eb == NULL) { + int size = args->buffer_count; + int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; + BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); + while (count > 2*size) + count >>= 1; + eb = kzalloc(count*sizeof(struct hlist_head) + + sizeof(struct eb_objects), + GFP_TEMPORARY); + if (eb == NULL) + return eb; + + eb->and = count - 1; + } else + eb->and = -args->buffer_count; + INIT_LIST_HEAD(&eb->objects); return eb; } @@ -61,26 +77,20 @@ eb_create(int size) static void eb_reset(struct eb_objects *eb) { - memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); -} - -static void -eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) -{ - hlist_add_head(&obj->exec_node, - &eb->buckets[obj->exec_handle & eb->and]); + if (eb->and >= 0) + memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } static int eb_lookup_objects(struct eb_objects *eb, struct drm_i915_gem_exec_object2 *exec, - int count, + const struct drm_i915_gem_execbuffer2 *args, struct drm_file *file) { int i; spin_lock(&file->table_lock); - for (i = 0; i < count; i++) { + for (i = 0; i < args->buffer_count; i++) { struct drm_i915_gem_object *obj; obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); @@ -101,9 +111,15 @@ eb_lookup_objects(struct eb_objects *eb, drm_gem_object_reference(&obj->base); list_add_tail(&obj->exec_list, &eb->objects); - obj->exec_handle = exec[i].handle; obj->exec_entry = &exec[i]; - eb_add_object(eb, obj); + if (eb->and < 0) { + eb->lut[i] = obj; + } else { + uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; + obj->exec_handle = handle; + hlist_add_head(&obj->exec_node, + &eb->buckets[handle & eb->and]); + } } spin_unlock(&file->table_lock); @@ -113,18 +129,24 @@ eb_lookup_objects(struct eb_objects *eb, static struct drm_i915_gem_object * eb_get_object(struct eb_objects *eb, unsigned long handle) { - struct hlist_head *head; - struct hlist_node *node; - struct drm_i915_gem_object *obj; + if (eb->and < 0) { + if (handle >= -eb->and) + return NULL; + return eb->lut[handle]; + } else { + struct hlist_head *head; + struct hlist_node *node; - head = &eb->buckets[handle & eb->and]; - hlist_for_each(node, head) { - obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); - if (obj->exec_handle == handle) - return obj; - } + head = &eb->buckets[handle & eb->and]; + hlist_for_each(node, head) { + struct drm_i915_gem_object *obj; - return NULL; + obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); + if (obj->exec_handle == handle) + return obj; + } + return NULL; + } } static void @@ -615,7 +637,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, /* reacquire the objects */ eb_reset(eb); - ret = eb_lookup_objects(eb, exec, count, file); + ret = eb_lookup_objects(eb, exec, args, file); if (ret) goto err; @@ -919,7 +941,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - eb = eb_create(args->buffer_count); + eb = eb_create(args); if (eb == NULL) { mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; @@ -927,7 +949,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Look up object handles */ - ret = eb_lookup_objects(eb, exec, args->buffer_count, file); + ret = eb_lookup_objects(eb, exec, args, file); if (ret) goto err; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2430b6ad6a85..07d59419fe6b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_SECURE_BATCHES 23 #define I915_PARAM_HAS_PINNED_BATCHES 24 #define I915_PARAM_HAS_EXEC_NO_RELOC 25 +#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 typedef struct drm_i915_getparam { int param; @@ -699,7 +700,12 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_NO_RELOC (1<<11) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1) +/** Use the reloc.handle as an index into the exec object array rather + * than as the per-file handle. + */ +#define I915_EXEC_HANDLE_LUT (1<<12) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.2.3 From 5d4545aef561ad47f91bcf75814af20c104b5a9e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 17 Jan 2013 12:45:15 -0800 Subject: drm/i915: Create a gtt structure The purpose of the gtt structure is to help isolate our gtt specific properties from the rest of the code (in doing so it help us finish the isolation from the AGP connection). The following members are pulled out (and renamed): gtt_start gtt_total gtt_mappable_end gtt_mappable gtt_base_addr gsm The gtt structure will serve as a nice place to put gen specific gtt routines in upcoming patches. As far as what else I feel belongs in this structure: it is meant to encapsulate the GTT's physical properties. This is why I've not added fields which track various drm_mm properties, or things like gtt_mtrr (which is itself a pretty transient field). Reviewed-by: Rodrigo Vivi [Ben modified commit messages] Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_dma.c | 20 ++++++++++---------- drivers/gpu/drm/i915/i915_drv.h | 29 +++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem.c | 14 +++++++------- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fb.c | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 4 ++-- 12 files changed, 61 insertions(+), 48 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 35d326d70fab..773b23ecc83b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -259,8 +259,8 @@ static int i915_gem_object_info(struct seq_file *m, void* data) count, size); seq_printf(m, "%zu [%zu] gtt total\n", - dev_priv->mm.gtt_total, - dev_priv->mm.gtt_mappable_end - dev_priv->mm.gtt_start); + dev_priv->gtt.total, + dev_priv->gtt.mappable_end - dev_priv->gtt.start); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 442118293883..bb622135798a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1076,7 +1076,7 @@ static int i915_set_status_page(struct drm_device *dev, void *data, ring->status_page.gfx_addr = hws->addr & (0x1ffff<<12); dev_priv->dri1.gfx_hws_cpu_addr = - ioremap_wc(dev_priv->mm.gtt_base_addr + hws->addr, 4096); + ioremap_wc(dev_priv->gtt.mappable_base + hws->addr, 4096); if (dev_priv->dri1.gfx_hws_cpu_addr == NULL) { i915_dma_cleanup(dev); ring->status_page.gfx_addr = 0; @@ -1543,17 +1543,17 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) } aperture_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; - dev_priv->mm.gtt_base_addr = dev_priv->mm.gtt->gma_bus_addr; + dev_priv->gtt.mappable_base = dev_priv->mm.gtt->gma_bus_addr; - dev_priv->mm.gtt_mapping = - io_mapping_create_wc(dev_priv->mm.gtt_base_addr, + dev_priv->gtt.mappable = + io_mapping_create_wc(dev_priv->gtt.mappable_base, aperture_size); - if (dev_priv->mm.gtt_mapping == NULL) { + if (dev_priv->gtt.mappable == NULL) { ret = -EIO; goto out_rmmap; } - i915_mtrr_setup(dev_priv, dev_priv->mm.gtt_base_addr, + i915_mtrr_setup(dev_priv, dev_priv->gtt.mappable_base, aperture_size); /* The i915 workqueue is primarily used for batched retirement of @@ -1658,11 +1658,11 @@ out_gem_unload: out_mtrrfree: if (dev_priv->mm.gtt_mtrr >= 0) { mtrr_del(dev_priv->mm.gtt_mtrr, - dev_priv->mm.gtt_base_addr, + dev_priv->gtt.mappable_base, aperture_size); dev_priv->mm.gtt_mtrr = -1; } - io_mapping_free(dev_priv->mm.gtt_mapping); + io_mapping_free(dev_priv->gtt.mappable); out_rmmap: pci_iounmap(dev->pdev, dev_priv->regs); put_gmch: @@ -1696,10 +1696,10 @@ int i915_driver_unload(struct drm_device *dev) /* Cancel the retire work handler, which should be idle now. */ cancel_delayed_work_sync(&dev_priv->mm.retire_work); - io_mapping_free(dev_priv->mm.gtt_mapping); + io_mapping_free(dev_priv->gtt.mappable); if (dev_priv->mm.gtt_mtrr >= 0) { mtrr_del(dev_priv->mm.gtt_mtrr, - dev_priv->mm.gtt_base_addr, + dev_priv->gtt.mappable_base, dev_priv->mm.gtt->gtt_mappable_entries * PAGE_SIZE); dev_priv->mm.gtt_mtrr = -1; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6e2c10b65c8d..f3f2e5e1393f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -364,6 +364,25 @@ struct intel_device_info { u8 has_llc:1; }; +/* The Graphics Translation Table is the way in which GEN hardware translates a + * Graphics Virtual Address into a Physical Address. In addition to the normal + * collateral associated with any va->pa translations GEN hardware also has a + * portion of the GTT which can be mapped by the CPU and remain both coherent + * and correct (in cases like swizzling). That region is referred to as GMADR in + * the spec. + */ +struct i915_gtt { + unsigned long start; /* Start offset of used GTT */ + size_t total; /* Total size GTT can map */ + + unsigned long mappable_end; /* End offset that we can CPU map */ + struct io_mapping *mappable; /* Mapping to our CPU mappable region */ + phys_addr_t mappable_base; /* PA of our GMADR */ + + /** "Graphics Stolen Memory" holds the global PTEs */ + void __iomem *gsm; +}; + #define I915_PPGTT_PD_ENTRIES 512 #define I915_PPGTT_PT_ENTRIES 1024 struct i915_hw_ppgtt { @@ -781,6 +800,8 @@ typedef struct drm_i915_private { /* Register state */ bool modeset_on_lid; + struct i915_gtt gtt; + struct { /** Bridge to intel-gtt-ko */ struct intel_gtt *gtt; @@ -799,15 +820,8 @@ typedef struct drm_i915_private { struct list_head unbound_list; /** Usable portion of the GTT for GEM */ - unsigned long gtt_start; - unsigned long gtt_mappable_end; unsigned long stolen_base; /* limited to low memory (32-bit) */ - /** "Graphics Stolen Memory" holds the global PTEs */ - void __iomem *gsm; - - struct io_mapping *gtt_mapping; - phys_addr_t gtt_base_addr; int gtt_mtrr; /** PPGTT used for aliasing the PPGTT with the GTT */ @@ -885,7 +899,6 @@ typedef struct drm_i915_private { struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT]; /* accounting, useful for userland debugging */ - size_t gtt_total; size_t object_memory; u32 object_count; } mm; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a2bb18914329..51fdf16181a7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -186,7 +186,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned += obj->gtt_space->size; mutex_unlock(&dev->struct_mutex); - args->aper_size = dev_priv->mm.gtt_total; + args->aper_size = dev_priv->gtt.total; args->aper_available_size = args->aper_size - pinned; return 0; @@ -637,7 +637,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, * source page isn't available. Return the error and we'll * retry in the slow path. */ - if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, + if (fast_user_write(dev_priv->gtt.mappable, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; goto out_unpin; @@ -1362,7 +1362,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) obj->fault_mappable = true; - pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) + + pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) + page_offset; /* Finally, remap it using the new GTT offset */ @@ -1544,7 +1544,7 @@ i915_gem_mmap_gtt(struct drm_file *file, goto unlock; } - if (obj->base.size > dev_priv->mm.gtt_mappable_end) { + if (obj->base.size > dev_priv->gtt.mappable_end) { ret = -E2BIG; goto out; } @@ -2910,7 +2910,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, * before evicting everything in a vain attempt to find space. */ if (obj->base.size > - (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { + (map_and_fenceable ? dev_priv->gtt.mappable_end : dev_priv->gtt.total)) { DRM_ERROR("Attempting to bind an object larger than the aperture\n"); return -E2BIG; } @@ -2931,7 +2931,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, if (map_and_fenceable) ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, size, alignment, obj->cache_level, - 0, dev_priv->mm.gtt_mappable_end); + 0, dev_priv->gtt.mappable_end); else ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, size, alignment, obj->cache_level); @@ -2971,7 +2971,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, (node->start & (fence_alignment - 1)) == 0; mappable = - obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; + obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end; obj->map_and_fenceable = mappable && fenceable; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 776a3225184c..c86d5d9356fd 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -80,7 +80,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, if (mappable) drm_mm_init_scan_with_range(&dev_priv->mm.gtt_space, min_size, alignment, cache_level, - 0, dev_priv->mm.gtt_mappable_end); + 0, dev_priv->gtt.mappable_end); else drm_mm_init_scan(&dev_priv->mm.gtt_space, min_size, alignment, cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f5a11ecf5494..27269103b621 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -281,7 +281,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Map the page containing the relocation we're going to perform. */ reloc->offset += obj->gtt_offset; - reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, reloc->offset & PAGE_MASK); reloc_entry = (uint32_t __iomem *) (reloc_page + (reloc->offset & ~PAGE_MASK)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8c42ddd467b6..61bfb12e1016 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -290,7 +290,7 @@ void i915_gem_init_ppgtt(struct drm_device *dev) return; - pd_addr = (gtt_pte_t __iomem*)dev_priv->mm.gsm + ppgtt->pd_offset/sizeof(gtt_pte_t); + pd_addr = (gtt_pte_t __iomem*)dev_priv->gtt.gsm + ppgtt->pd_offset/sizeof(gtt_pte_t); for (i = 0; i < ppgtt->num_pd_entries; i++) { dma_addr_t pt_addr; @@ -367,7 +367,7 @@ static void i915_ggtt_clear_range(struct drm_device *dev, { struct drm_i915_private *dev_priv = dev->dev_private; gtt_pte_t scratch_pte; - gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->mm.gsm + first_entry; + gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; int i; @@ -393,8 +393,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct drm_i915_gem_object *obj; /* First fill our portion of the GTT with scratch pages */ - i915_ggtt_clear_range(dev, dev_priv->mm.gtt_start / PAGE_SIZE, - dev_priv->mm.gtt_total / PAGE_SIZE); + i915_ggtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE, + dev_priv->gtt.total / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) { i915_gem_clflush_object(obj); @@ -433,7 +433,7 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj, const int first_entry = obj->gtt_space->start >> PAGE_SHIFT; const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; gtt_pte_t __iomem *gtt_entries = - (gtt_pte_t __iomem *)dev_priv->mm.gsm + first_entry; + (gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; int unused, i = 0; unsigned int len, m = 0; dma_addr_t addr; @@ -556,9 +556,9 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, obj->has_global_gtt_mapping = 1; } - dev_priv->mm.gtt_start = start; - dev_priv->mm.gtt_mappable_end = mappable_end; - dev_priv->mm.gtt_total = end - start; + dev_priv->gtt.start = start; + dev_priv->gtt.mappable_end = mappable_end; + dev_priv->gtt.total = end - start; /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &dev_priv->mm.gtt_space, @@ -752,9 +752,9 @@ int i915_gem_gtt_init(struct drm_device *dev) goto err_out; } - dev_priv->mm.gsm = ioremap_wc(gtt_bus_addr, - dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t)); - if (!dev_priv->mm.gsm) { + dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, + dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t)); + if (!dev_priv->gtt.gsm) { DRM_ERROR("Failed to map the gtt page table\n"); teardown_scratch_page(dev); ret = -ENOMEM; @@ -778,7 +778,7 @@ err_out: void i915_gem_gtt_fini(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - iounmap(dev_priv->mm.gsm); + iounmap(dev_priv->gtt.gsm); teardown_scratch_page(dev); if (INTEL_INFO(dev)->gen < 6) intel_gmch_remove(); diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index e76f0d8470a1..abcba2f5a788 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -357,7 +357,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, obj->map_and_fenceable = obj->gtt_space == NULL || - (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end && + (obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end && i915_gem_object_fence_ok(obj, args->tiling_mode)); /* Rebind if we need a change of alignment */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 202813128441..cc49a6ddc052 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -939,7 +939,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv, goto unwind; local_irq_save(flags); - if (reloc_offset < dev_priv->mm.gtt_mappable_end && + if (reloc_offset < dev_priv->gtt.mappable_end && src->has_global_gtt_mapping) { void __iomem *s; @@ -948,7 +948,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv, * captures what the GPU read. */ - s = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + s = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, reloc_offset); memcpy_fromio(d, s, PAGE_SIZE); io_mapping_unmap_atomic(s); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 40b6b5e9d6ef..e4c5067a54d3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8687,7 +8687,7 @@ void intel_modeset_init(struct drm_device *dev) dev->mode_config.max_width = 8192; dev->mode_config.max_height = 8192; } - dev->mode_config.fb_base = dev_priv->mm.gtt_base_addr; + dev->mode_config.fb_base = dev_priv->gtt.mappable_base; DRM_DEBUG_KMS("%d display pipe%s available.\n", dev_priv->num_pipe, dev_priv->num_pipe > 1 ? "s" : ""); diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 71d55801c0d9..ce02af8ca96e 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -142,7 +142,7 @@ static int intelfb_create(struct intel_fbdev *ifbdev, info->fix.smem_len = size; info->screen_base = - ioremap_wc(dev_priv->mm.gtt_base_addr + obj->gtt_offset, + ioremap_wc(dev_priv->gtt.mappable_base + obj->gtt_offset, size); if (!info->screen_base) { ret = -ENOSPC; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index fabe0acf808d..ba978bf93a2e 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -195,7 +195,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay) if (OVERLAY_NEEDS_PHYSICAL(overlay->dev)) regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_obj->handle->vaddr; else - regs = io_mapping_map_wc(dev_priv->mm.gtt_mapping, + regs = io_mapping_map_wc(dev_priv->gtt.mappable, overlay->reg_bo->gtt_offset); return regs; @@ -1434,7 +1434,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay) regs = (struct overlay_registers __iomem *) overlay->reg_bo->phys_obj->handle->vaddr; else - regs = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + regs = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, overlay->reg_bo->gtt_offset); return regs; -- cgit v1.2.3