diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 591 |
1 files changed, 363 insertions, 228 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cdfb9da0e4ce..76d3d1ab73c6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -41,6 +41,9 @@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *o static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, bool force); static __must_check int +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly); +static __must_check int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, unsigned alignment, @@ -61,8 +64,8 @@ static unsigned long i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc); static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc); -static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); -static long i915_gem_shrink_all(struct drm_i915_private *dev_priv); +static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target); +static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); static bool cpu_cache_is_coherent(struct drm_device *dev, @@ -258,7 +261,7 @@ i915_gem_dumb_create(struct drm_file *file, struct drm_mode_create_dumb *args) { /* have to work out size/pitch and return them */ - args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); + args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, args->size, &args->handle); @@ -432,11 +435,9 @@ i915_gem_shmem_pread(struct drm_device *dev, * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); - if (i915_gem_obj_bound_any(obj)) { - ret = i915_gem_object_set_to_gtt_domain(obj, false); - if (ret) - return ret; - } + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; } ret = i915_gem_object_get_pages(obj); @@ -748,11 +749,9 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ needs_clflush_after = cpu_write_needs_clflush(obj); - if (i915_gem_obj_bound_any(obj)) { - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; - } + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; } /* Same trick applies to invalidate partially written cachelines read * before writing. */ @@ -966,12 +965,31 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); ret = 0; - if (seqno == ring->outstanding_lazy_request) + if (seqno == ring->outstanding_lazy_seqno) ret = i915_add_request(ring, NULL); return ret; } +static void fake_irq(unsigned long data) +{ + wake_up_process((struct task_struct *)data); +} + +static bool missed_irq(struct drm_i915_private *dev_priv, + struct intel_ring_buffer *ring) +{ + return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); +} + +static bool can_wait_boost(struct drm_i915_file_private *file_priv) +{ + if (file_priv == NULL) + return true; + + return !atomic_xchg(&file_priv->rps_wait_boost, true); +} + /** * __wait_seqno - wait until execution of seqno has finished * @ring: the ring expected to report seqno @@ -992,13 +1010,14 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) */ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, unsigned reset_counter, - bool interruptible, struct timespec *timeout) + bool interruptible, + struct timespec *timeout, + struct drm_i915_file_private *file_priv) { drm_i915_private_t *dev_priv = ring->dev->dev_private; - struct timespec before, now, wait_time={1,0}; - unsigned long timeout_jiffies; - long end; - bool wait_forever = true; + struct timespec before, now; + DEFINE_WAIT(wait); + long timeout_jiffies; int ret; WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n"); @@ -1006,51 +1025,79 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) return 0; - trace_i915_gem_request_wait_begin(ring, seqno); + timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1; - if (timeout != NULL) { - wait_time = *timeout; - wait_forever = false; + if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) { + gen6_rps_boost(dev_priv); + if (file_priv) + mod_delayed_work(dev_priv->wq, + &file_priv->mm.idle_work, + msecs_to_jiffies(100)); } - timeout_jiffies = timespec_to_jiffies_timeout(&wait_time); - - if (WARN_ON(!ring->irq_get(ring))) + if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)) && + WARN_ON(!ring->irq_get(ring))) return -ENODEV; - /* Record current time in case interrupted by signal, or wedged * */ + /* Record current time in case interrupted by signal, or wedged */ + trace_i915_gem_request_wait_begin(ring, seqno); getrawmonotonic(&before); + for (;;) { + struct timer_list timer; + unsigned long expire; -#define EXIT_COND \ - (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ - i915_reset_in_progress(&dev_priv->gpu_error) || \ - reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) - do { - if (interruptible) - end = wait_event_interruptible_timeout(ring->irq_queue, - EXIT_COND, - timeout_jiffies); - else - end = wait_event_timeout(ring->irq_queue, EXIT_COND, - timeout_jiffies); + prepare_to_wait(&ring->irq_queue, &wait, + interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); /* We need to check whether any gpu reset happened in between * the caller grabbing the seqno and now ... */ - if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) - end = -EAGAIN; + if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { + /* ... but upgrade the -EAGAIN to an -EIO if the gpu + * is truely gone. */ + ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); + if (ret == 0) + ret = -EAGAIN; + break; + } - /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely - * gone. */ - ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); - if (ret) - end = ret; - } while (end == 0 && wait_forever); + if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) { + ret = 0; + break; + } + if (interruptible && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + if (timeout_jiffies <= 0) { + ret = -ETIME; + break; + } + + timer.function = NULL; + if (timeout || missed_irq(dev_priv, ring)) { + setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); + expire = jiffies + (missed_irq(dev_priv, ring) ? 1: timeout_jiffies); + mod_timer(&timer, expire); + } + + io_schedule(); + + if (timeout) + timeout_jiffies = expire - jiffies; + + if (timer.function) { + del_singleshot_timer_sync(&timer); + destroy_timer_on_stack(&timer); + } + } getrawmonotonic(&now); + trace_i915_gem_request_wait_end(ring, seqno); ring->irq_put(ring); - trace_i915_gem_request_wait_end(ring, seqno); -#undef EXIT_COND + + finish_wait(&ring->irq_queue, &wait); if (timeout) { struct timespec sleep_time = timespec_sub(now, before); @@ -1059,17 +1106,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, set_normalized_timespec(timeout, 0, 0); } - switch (end) { - case -EIO: - case -EAGAIN: /* Wedged */ - case -ERESTARTSYS: /* Signal */ - return (int)end; - case 0: /* Timeout */ - return -ETIME; - default: /* Completed */ - WARN_ON(end < 0); /* We're not aware of other errors */ - return 0; - } + return ret; } /** @@ -1097,7 +1134,7 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) return __wait_seqno(ring, seqno, atomic_read(&dev_priv->gpu_error.reset_counter), - interruptible, NULL); + interruptible, NULL, NULL); } static int @@ -1147,6 +1184,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, */ static __must_check int i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, + struct drm_file *file, bool readonly) { struct drm_device *dev = obj->base.dev; @@ -1173,7 +1211,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); mutex_unlock(&dev->struct_mutex); - ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); + ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file->driver_priv); mutex_lock(&dev->struct_mutex); if (ret) return ret; @@ -1222,7 +1260,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, * We will repeat the flush holding the lock in the normal manner * to catch cases where we are gazumped. */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); + ret = i915_gem_object_wait_rendering__nonblocking(obj, file, !write_domain); if (ret) goto unref; @@ -1690,13 +1728,13 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) return 0; } -static long +static unsigned long __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, bool purgeable_only) { struct list_head still_bound_list; struct drm_i915_gem_object *obj, *next; - long count = 0; + unsigned long count = 0; list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, @@ -1762,13 +1800,13 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, return count; } -static long +static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target) { return __i915_gem_shrink(dev_priv, target, true); } -static long +static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *obj, *next; @@ -1778,9 +1816,8 @@ i915_gem_shrink_all(struct drm_i915_private *dev_priv) list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, global_list) { - if (obj->pages_pin_count == 0) + if (i915_gem_object_put_pages(obj) == 0) freed += obj->base.size >> PAGE_SHIFT; - i915_gem_object_put_pages(obj); } return freed; } @@ -1865,6 +1902,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) sg->length += PAGE_SIZE; } last_pfn = page_to_pfn(page); + + /* Check that the i965g/gm workaround works. */ + WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); } #ifdef CONFIG_SWIOTLB if (!swiotlb_nr_tbl()) @@ -1918,7 +1958,7 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) return 0; } -void +static void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring) { @@ -1957,6 +1997,13 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, } } +void i915_vma_move_to_active(struct i915_vma *vma, + struct intel_ring_buffer *ring) +{ + list_move_tail(&vma->mm_list, &vma->vm->active_list); + return i915_gem_object_move_to_active(vma->obj, ring); +} + static void i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) { @@ -2078,11 +2125,10 @@ int __i915_add_request(struct intel_ring_buffer *ring, if (ret) return ret; - request = kmalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) + request = ring->preallocated_lazy_request; + if (WARN_ON(request == NULL)) return -ENOMEM; - /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the @@ -2091,17 +2137,13 @@ int __i915_add_request(struct intel_ring_buffer *ring, request_ring_position = intel_ring_get_tail(ring); ret = ring->add_request(ring); - if (ret) { - kfree(request); + if (ret) return ret; - } request->seqno = intel_ring_get_seqno(ring); request->ring = ring; request->head = request_start; request->tail = request_ring_position; - request->ctx = ring->last_context; - request->batch_obj = obj; /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this @@ -2109,7 +2151,12 @@ int __i915_add_request(struct intel_ring_buffer *ring, * inactive_list and lose its active reference. Hence we do not need * to explicitly hold another reference here. */ + request->batch_obj = obj; + /* Hold a reference to the current context so that we can inspect + * it later in case a hangcheck error event fires. + */ + request->ctx = ring->last_context; if (request->ctx) i915_gem_context_reference(request->ctx); @@ -2129,12 +2176,14 @@ int __i915_add_request(struct intel_ring_buffer *ring, } trace_i915_gem_request_add(ring, request->seqno); - ring->outstanding_lazy_request = 0; + ring->outstanding_lazy_seqno = 0; + ring->preallocated_lazy_request = NULL; if (!dev_priv->ums.mm_suspended) { i915_queue_hangcheck(ring->dev); if (was_empty) { + cancel_delayed_work_sync(&dev_priv->mm.idle_work); queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); @@ -2156,10 +2205,8 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) return; spin_lock(&file_priv->mm.lock); - if (request->file_priv) { - list_del(&request->client_list); - request->file_priv = NULL; - } + list_del(&request->client_list); + request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); } @@ -2224,6 +2271,21 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request, return false; } +static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs) +{ + const unsigned long elapsed = get_seconds() - hs->guilty_ts; + + if (hs->banned) + return true; + + if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { + DRM_ERROR("context hanging too fast, declaring banned!\n"); + return true; + } + + return false; +} + static void i915_set_reset_status(struct intel_ring_buffer *ring, struct drm_i915_gem_request *request, u32 acthd) @@ -2260,10 +2322,13 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring, hs = &request->file_priv->hang_stats; if (hs) { - if (guilty) + if (guilty) { + hs->banned = i915_context_is_banned(hs); hs->batch_active++; - else + hs->guilty_ts = get_seconds(); + } else { hs->batch_pending++; + } } } @@ -2278,15 +2343,24 @@ static void i915_gem_free_request(struct drm_i915_gem_request *request) kfree(request); } -static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, - struct intel_ring_buffer *ring) +static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, + struct intel_ring_buffer *ring) { - u32 completed_seqno; - u32 acthd; + u32 completed_seqno = ring->get_seqno(ring, false); + u32 acthd = intel_ring_get_active_head(ring); + struct drm_i915_gem_request *request; - acthd = intel_ring_get_active_head(ring); - completed_seqno = ring->get_seqno(ring, false); + list_for_each_entry(request, &ring->request_list, list) { + if (i915_seqno_passed(completed_seqno, request->seqno)) + continue; + + i915_set_reset_status(ring, request, acthd); + } +} +static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, + struct intel_ring_buffer *ring) +{ while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; @@ -2294,9 +2368,6 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, struct drm_i915_gem_request, list); - if (request->seqno > completed_seqno) - i915_set_reset_status(ring, request, acthd); - i915_gem_free_request(request); } @@ -2338,8 +2409,18 @@ void i915_gem_reset(struct drm_device *dev) struct intel_ring_buffer *ring; int i; + /* + * Before we free the objects from the requests, we need to inspect + * them for finding the guilty party. As the requests only borrow + * their reference to the objects, the inspection must be done first. + */ + for_each_ring(ring, dev_priv, i) + i915_gem_reset_ring_status(dev_priv, ring); + for_each_ring(ring, dev_priv, i) - i915_gem_reset_ring_lists(dev_priv, ring); + i915_gem_reset_ring_cleanup(dev_priv, ring); + + i915_gem_cleanup_ringbuffer(dev); i915_gem_restore_fences(dev); } @@ -2405,57 +2486,53 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) WARN_ON(i915_verify_lists(ring->dev)); } -void +bool i915_gem_retire_requests(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; + bool idle = true; int i; - for_each_ring(ring, dev_priv, i) + for_each_ring(ring, dev_priv, i) { i915_gem_retire_requests_ring(ring); + idle &= list_empty(&ring->request_list); + } + + if (idle) + mod_delayed_work(dev_priv->wq, + &dev_priv->mm.idle_work, + msecs_to_jiffies(100)); + + return idle; } static void i915_gem_retire_work_handler(struct work_struct *work) { - drm_i915_private_t *dev_priv; - struct drm_device *dev; - struct intel_ring_buffer *ring; + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), mm.retire_work.work); + struct drm_device *dev = dev_priv->dev; bool idle; - int i; - - dev_priv = container_of(work, drm_i915_private_t, - mm.retire_work.work); - dev = dev_priv->dev; /* Come back later if the device is busy... */ - if (!mutex_trylock(&dev->struct_mutex)) { - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, - round_jiffies_up_relative(HZ)); - return; - } - - i915_gem_retire_requests(dev); - - /* Send a periodic flush down the ring so we don't hold onto GEM - * objects indefinitely. - */ - idle = true; - for_each_ring(ring, dev_priv, i) { - if (ring->gpu_caches_dirty) - i915_add_request(ring, NULL); - - idle &= list_empty(&ring->request_list); + idle = false; + if (mutex_trylock(&dev->struct_mutex)) { + idle = i915_gem_retire_requests(dev); + mutex_unlock(&dev->struct_mutex); } - - if (!dev_priv->ums.mm_suspended && !idle) + if (!idle) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); - if (idle) - intel_mark_idle(dev); +} - mutex_unlock(&dev->struct_mutex); +static void +i915_gem_idle_work_handler(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), mm.idle_work.work); + + intel_mark_idle(dev_priv->dev); } /** @@ -2553,7 +2630,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); mutex_unlock(&dev->struct_mutex); - ret = __wait_seqno(ring, seqno, reset_counter, true, timeout); + ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv); if (timeout) args->timeout_ns = timespec_to_ns(timeout); return ret; @@ -2600,6 +2677,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, if (ret) return ret; + trace_i915_gem_ring_sync_to(from, to, seqno); ret = to->sync_to(to, from, seqno); if (!ret) /* We use last_read_seqno because sync_to() @@ -2641,11 +2719,17 @@ int i915_vma_unbind(struct i915_vma *vma) drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret; + /* For now we only ever use 1 vma per object */ + WARN_ON(!list_is_singular(&obj->vma_list)); + if (list_empty(&vma->vma_link)) return 0; - if (!drm_mm_node_allocated(&vma->node)) - goto destroy; + if (!drm_mm_node_allocated(&vma->node)) { + i915_gem_vma_destroy(vma); + + return 0; + } if (obj->pin_count) return -EBUSY; @@ -2685,13 +2769,10 @@ int i915_vma_unbind(struct i915_vma *vma) drm_mm_remove_node(&vma->node); -destroy: i915_gem_vma_destroy(vma); /* Since the unbound list is global, only move to that list if - * no more VMAs exist. - * NB: Until we have real VMAs there will only ever be one */ - WARN_ON(!list_empty(&obj->vma_list)); + * no more VMAs exist. */ if (list_empty(&obj->vma_list)) list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); @@ -2887,6 +2968,7 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, obj->stride, obj->tiling_mode); switch (INTEL_INFO(dev)->gen) { + case 8: case 7: case 6: case 5: @@ -3389,8 +3471,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) /* And bump the LRU for this access */ if (i915_gem_object_is_inactive(obj)) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, - &dev_priv->gtt.base); + struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); if (vma) list_move_tail(&vma->mm_list, &dev_priv->gtt.base.inactive_list); @@ -3761,7 +3842,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) if (seqno == 0) return 0; - ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); + ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL); if (ret == 0) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); @@ -3865,6 +3946,11 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, goto out; } + if (obj->user_pin_count == ULONG_MAX) { + ret = -EBUSY; + goto out; + } + if (obj->user_pin_count == 0) { ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false); if (ret) @@ -4015,7 +4101,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->ring_list); - INIT_LIST_HEAD(&obj->exec_list); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); @@ -4087,13 +4172,6 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, return obj; } -int i915_gem_init_object(struct drm_gem_object *obj) -{ - BUG(); - - return 0; -} - void i915_gem_free_object(struct drm_gem_object *gem_obj) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); @@ -4147,9 +4225,20 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) i915_gem_object_free(obj); } -struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, +struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { + struct i915_vma *vma; + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm) + return vma; + + return NULL; +} + +static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); @@ -4169,76 +4258,103 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, return vma; } +struct i915_vma * +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma; + + vma = i915_gem_obj_to_vma(obj, vm); + if (!vma) + vma = __i915_gem_vma_create(obj, vm); + + return vma; +} + void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); + + /* Keep the vma as a placeholder in the execbuffer reservation lists */ + if (!list_empty(&vma->exec_list)) + return; + list_del(&vma->vma_link); + kfree(vma); } int -i915_gem_idle(struct drm_device *dev) +i915_gem_suspend(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - int ret; + int ret = 0; - if (dev_priv->ums.mm_suspended) { - mutex_unlock(&dev->struct_mutex); - return 0; - } + mutex_lock(&dev->struct_mutex); + if (dev_priv->ums.mm_suspended) + goto err; ret = i915_gpu_idle(dev); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return ret; - } + if (ret) + goto err; + i915_gem_retire_requests(dev); /* Under UMS, be paranoid and evict. */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) i915_gem_evict_everything(dev); - del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); - i915_kernel_lost_context(dev); i915_gem_cleanup_ringbuffer(dev); - /* Cancel the retire work handler, which should be idle now. */ + /* Hack! Don't let anybody do execbuf while we don't control the chip. + * We need to replace this with a semaphore, or something. + * And not confound ums.mm_suspended! + */ + dev_priv->ums.mm_suspended = !drm_core_check_feature(dev, + DRIVER_MODESET); + mutex_unlock(&dev->struct_mutex); + + del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); cancel_delayed_work_sync(&dev_priv->mm.retire_work); + cancel_delayed_work_sync(&dev_priv->mm.idle_work); return 0; + +err: + mutex_unlock(&dev->struct_mutex); + return ret; } -void i915_gem_l3_remap(struct drm_device *dev) +int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice) { + struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - u32 misccpctl; - int i; - - if (!HAS_L3_GPU_CACHE(dev)) - return; + u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); + u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; + int i, ret; - if (!dev_priv->l3_parity.remap_info) - return; + if (!HAS_L3_DPF(dev) || !remap_info) + return 0; - misccpctl = I915_READ(GEN7_MISCCPCTL); - I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); - POSTING_READ(GEN7_MISCCPCTL); + ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); + if (ret) + return ret; + /* + * Note: We do not worry about the concurrent register cacheline hang + * here because no other code should access these registers other than + * at initialization time. + */ for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { - u32 remap = I915_READ(GEN7_L3LOG_BASE + i); - if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) - DRM_DEBUG("0x%x was already programmed to %x\n", - GEN7_L3LOG_BASE + i, remap); - if (remap && !dev_priv->l3_parity.remap_info[i/4]) - DRM_DEBUG_DRIVER("Clearing remapped register\n"); - I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, reg_base + i); + intel_ring_emit(ring, remap_info[i/4]); } - /* Make sure all the writes land before disabling dop clock gating */ - POSTING_READ(GEN7_L3LOG_BASE); + intel_ring_advance(ring); - I915_WRITE(GEN7_MISCCPCTL, misccpctl); + return ret; } void i915_gem_init_swizzling(struct drm_device *dev) @@ -4260,6 +4376,8 @@ void i915_gem_init_swizzling(struct drm_device *dev) I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); else if (IS_GEN7(dev)) I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); + else if (IS_GEN8(dev)) + I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); else BUG(); } @@ -4330,7 +4448,7 @@ int i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - int ret; + int ret, i; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4338,20 +4456,25 @@ i915_gem_init_hw(struct drm_device *dev) if (dev_priv->ellc_size) I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); + if (IS_HASWELL(dev)) + I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? + LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); + if (HAS_PCH_NOP(dev)) { u32 temp = I915_READ(GEN7_MSG_CTL); temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); I915_WRITE(GEN7_MSG_CTL, temp); } - i915_gem_l3_remap(dev); - i915_gem_init_swizzling(dev); ret = i915_gem_init_rings(dev); if (ret) return ret; + for (i = 0; i < NUM_L3_SLICES(dev); i++) + i915_gem_l3_remap(&dev_priv->ring[RCS], i); + /* * XXX: There was some w/a described somewhere suggesting loading * contexts before PPGTT. @@ -4454,26 +4577,12 @@ int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - int ret; - if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; drm_irq_uninstall(dev); - mutex_lock(&dev->struct_mutex); - ret = i915_gem_idle(dev); - - /* Hack! Don't let anybody do execbuf while we don't control the chip. - * We need to replace this with a semaphore, or something. - * And not confound ums.mm_suspended! - */ - if (ret != 0) - dev_priv->ums.mm_suspended = 1; - mutex_unlock(&dev->struct_mutex); - - return ret; + return i915_gem_suspend(dev); } void @@ -4484,11 +4593,9 @@ i915_gem_lastclose(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_MODESET)) return; - mutex_lock(&dev->struct_mutex); - ret = i915_gem_idle(dev); + ret = i915_gem_suspend(dev); if (ret) DRM_ERROR("failed to idle hardware: %d\n", ret); - mutex_unlock(&dev->struct_mutex); } static void @@ -4523,6 +4630,7 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->vm_list); i915_init_vm(dev_priv, &dev_priv->gtt.base); + INIT_LIST_HEAD(&dev_priv->context_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); @@ -4532,6 +4640,8 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); INIT_DELAYED_WORK(&dev_priv->mm.retire_work, i915_gem_retire_work_handler); + INIT_DELAYED_WORK(&dev_priv->mm.idle_work, + i915_gem_idle_work_handler); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ @@ -4582,7 +4692,7 @@ static int i915_gem_init_phys_object(struct drm_device *dev, if (dev_priv->mm.phys_objs[id - 1] || !size) return 0; - phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); + phys_obj = kzalloc(sizeof(*phys_obj), GFP_KERNEL); if (!phys_obj) return -ENOMEM; @@ -4756,6 +4866,8 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + cancel_delayed_work_sync(&file_priv->mm.idle_work); + /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone * file_priv. @@ -4773,6 +4885,38 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) spin_unlock(&file_priv->mm.lock); } +static void +i915_gem_file_idle_work_handler(struct work_struct *work) +{ + struct drm_i915_file_private *file_priv = + container_of(work, typeof(*file_priv), mm.idle_work.work); + + atomic_set(&file_priv->rps_wait_boost, false); +} + +int i915_gem_open(struct drm_device *dev, struct drm_file *file) +{ + struct drm_i915_file_private *file_priv; + + DRM_DEBUG_DRIVER("\n"); + + file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); + if (!file_priv) + return -ENOMEM; + + file->driver_priv = file_priv; + file_priv->dev_priv = dev->dev_private; + + spin_lock_init(&file_priv->mm.lock); + INIT_LIST_HEAD(&file_priv->mm.request_list); + INIT_DELAYED_WORK(&file_priv->mm.idle_work, + i915_gem_file_idle_work_handler); + + idr_init(&file_priv->context_idr); + + return 0; +} + static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) { if (!mutex_is_locked(mutex)) @@ -4823,6 +4967,7 @@ i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) if (unlock) mutex_unlock(&dev->struct_mutex); + return count; } @@ -4859,11 +5004,10 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o, bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) { - struct drm_i915_private *dev_priv = o->base.dev->dev_private; - struct i915_address_space *vm; + struct i915_vma *vma; - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - if (i915_gem_obj_bound(o, vm)) + list_for_each_entry(vma, &o->vma_list, vma_link) + if (drm_mm_node_allocated(&vma->node)) return true; return false; @@ -4895,7 +5039,6 @@ i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private, mm.inactive_shrinker); struct drm_device *dev = dev_priv->dev; - int nr_to_scan = sc->nr_to_scan; unsigned long freed; bool unlock = true; @@ -4909,38 +5052,30 @@ i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc) unlock = false; } - freed = i915_gem_purge(dev_priv, nr_to_scan); - if (freed < nr_to_scan) - freed += __i915_gem_shrink(dev_priv, nr_to_scan, - false); - if (freed < nr_to_scan) + freed = i915_gem_purge(dev_priv, sc->nr_to_scan); + if (freed < sc->nr_to_scan) + freed += __i915_gem_shrink(dev_priv, + sc->nr_to_scan - freed, + false); + if (freed < sc->nr_to_scan) freed += i915_gem_shrink_all(dev_priv); if (unlock) mutex_unlock(&dev->struct_mutex); + return freed; } -struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) +struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, vma_link) - if (vma->vm == vm) - return vma; - return NULL; -} - -struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; + if (WARN_ON(list_empty(&obj->vma_list))) + return NULL; - vma = i915_gem_obj_to_vma(obj, vm); - if (!vma) - vma = i915_gem_vma_create(obj, vm); + vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); + if (WARN_ON(vma->vm != obj_to_ggtt(obj))) + return NULL; return vma; } |