diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 593 |
1 files changed, 345 insertions, 248 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 17c5097721e8..fcc73a6ab503 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -65,7 +65,7 @@ insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) { memset(node, 0, sizeof(*node)); - return drm_mm_insert_node_in_range(&ggtt->base.mm, node, + return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, size, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); @@ -139,6 +139,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) static u32 __i915_gem_park(struct drm_i915_private *i915) { + GEM_TRACE("\n"); + lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(i915->gt.active_requests); GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); @@ -181,6 +183,8 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) void i915_gem_park(struct drm_i915_private *i915) { + GEM_TRACE("\n"); + lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(i915->gt.active_requests); @@ -193,6 +197,8 @@ void i915_gem_park(struct drm_i915_private *i915) void i915_gem_unpark(struct drm_i915_private *i915) { + GEM_TRACE("\n"); + lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!i915->gt.active_requests); @@ -243,17 +249,17 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct i915_vma *vma; u64 pinned; - pinned = ggtt->base.reserved; + pinned = ggtt->vm.reserved; mutex_lock(&dev->struct_mutex); - list_for_each_entry(vma, &ggtt->base.active_list, vm_link) + list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) if (i915_vma_is_pinned(vma)) pinned += vma->node.size; - list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) + list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) if (i915_vma_is_pinned(vma)) pinned += vma->node.size; mutex_unlock(&dev->struct_mutex); - args->aper_size = ggtt->base.total; + args->aper_size = ggtt->vm.total; args->aper_available_size = args->aper_size - pinned; return 0; @@ -796,7 +802,7 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) * that was!). */ - wmb(); + i915_gem_chipset_flush(dev_priv); intel_runtime_pm_get(dev_priv); spin_lock_irq(&dev_priv->uncore.lock); @@ -831,6 +837,10 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) } break; + case I915_GEM_DOMAIN_WC: + wmb(); + break; + case I915_GEM_DOMAIN_CPU: i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); break; @@ -1217,9 +1227,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, page_length = remain < page_length ? remain : page_length; if (node.allocated) { wmb(); - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), - node.start, I915_CACHE_NONE, 0); + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), + node.start, I915_CACHE_NONE, 0); wmb(); } else { page_base += offset & PAGE_MASK; @@ -1240,8 +1250,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, out_unpin: if (node.allocated) { wmb(); - ggtt->base.clear_range(&ggtt->base, - node.start, node.size); + ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); remove_mappable_node(&node); } else { i915_vma_unpin(vma); @@ -1420,9 +1429,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, page_length = remain < page_length ? remain : page_length; if (node.allocated) { wmb(); /* flush the write before we modify the GGTT */ - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), - node.start, I915_CACHE_NONE, 0); + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), + node.start, I915_CACHE_NONE, 0); wmb(); /* flush modifications to the GGTT (insert_page) */ } else { page_base += offset & PAGE_MASK; @@ -1449,8 +1458,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, out_unpin: if (node.allocated) { wmb(); - ggtt->base.clear_range(&ggtt->base, - node.start, node.size); + ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); remove_mappable_node(&node); } else { i915_vma_unpin(vma); @@ -1619,6 +1627,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, goto err; } + /* Writes not allowed into this read-only object */ + if (i915_gem_object_is_readonly(obj)) { + ret = -EINVAL; + goto err; + } + trace_i915_gem_object_pwrite(obj, args->offset, args->size); ret = -ENODEV; @@ -1991,9 +2005,9 @@ compute_partial_view(struct drm_i915_gem_object *obj, * The current feature set supported by i915_gem_fault() and thus GTT mmaps * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). */ -int i915_gem_fault(struct vm_fault *vmf) +vm_fault_t i915_gem_fault(struct vm_fault *vmf) { -#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ +#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) struct vm_area_struct *area = vmf->vma; struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); struct drm_device *dev = obj->base.dev; @@ -2004,6 +2018,10 @@ int i915_gem_fault(struct vm_fault *vmf) pgoff_t page_offset; int ret; + /* Sanity check that we allow writing into this object */ + if (i915_gem_object_is_readonly(obj) && write) + return VM_FAULT_SIGBUS; + /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; @@ -2114,10 +2132,9 @@ err: * fail). But any other -EIO isn't ours (e.g. swap in failure) * and so needs to be reported. */ - if (!i915_terminally_wedged(&dev_priv->gpu_error)) { - ret = VM_FAULT_SIGBUS; - break; - } + if (!i915_terminally_wedged(&dev_priv->gpu_error)) + return VM_FAULT_SIGBUS; + /* else: fall through */ case -EAGAIN: /* * EAGAIN means the gpu is hung and we'll wait for the error @@ -2132,21 +2149,16 @@ err: * EBUSY is ok: this just means that another thread * already did the job. */ - ret = VM_FAULT_NOPAGE; - break; + return VM_FAULT_NOPAGE; case -ENOMEM: - ret = VM_FAULT_OOM; - break; + return VM_FAULT_OOM; case -ENOSPC: case -EFAULT: - ret = VM_FAULT_SIGBUS; - break; + return VM_FAULT_SIGBUS; default: WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); - ret = VM_FAULT_SIGBUS; - break; + return VM_FAULT_SIGBUS; } - return ret; } static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) @@ -2265,7 +2277,9 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) /* Attempt to reap some mmap space from dead objects */ do { - err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); + err = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (err) break; @@ -2410,29 +2424,15 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_unlock(); } -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass) +static struct sg_table * +__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *pages; - if (i915_gem_object_has_pinned_pages(obj)) - return; - - GEM_BUG_ON(obj->bind_count); - if (!i915_gem_object_has_pages(obj)) - return; - - /* May be called by shrinker from within get_pages() (on another bo) */ - mutex_lock_nested(&obj->mm.lock, subclass); - if (unlikely(atomic_read(&obj->mm.pages_pin_count))) - goto unlock; - - /* ->put_pages might need to allocate memory for the bit17 swizzle - * array, hence protect them from being reaped by removing them from gtt - * lists early. */ pages = fetch_and_zero(&obj->mm.pages); - GEM_BUG_ON(!pages); + if (!pages) + return NULL; spin_lock(&i915->mm.obj_lock); list_del(&obj->mm.link); @@ -2451,12 +2451,37 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, } __i915_gem_object_reset_page_iter(obj); + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + + return pages; +} + +void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, + enum i915_mm_subclass subclass) +{ + struct sg_table *pages; + if (i915_gem_object_has_pinned_pages(obj)) + return; + + GEM_BUG_ON(obj->bind_count); + if (!i915_gem_object_has_pages(obj)) + return; + + /* May be called by shrinker from within get_pages() (on another bo) */ + mutex_lock_nested(&obj->mm.lock, subclass); + if (unlikely(atomic_read(&obj->mm.pages_pin_count))) + goto unlock; + + /* + * ->put_pages might need to allocate memory for the bit17 swizzle + * array, hence protect them from being reaped by removing them from gtt + * lists early. + */ + pages = __i915_gem_object_unset_pages(obj); if (!IS_ERR(pages)) obj->ops->put_pages(obj, pages); - obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; - unlock: mutex_unlock(&obj->mm.lock); } @@ -2974,16 +2999,16 @@ static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; - DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s\n", - ctx->name, atomic_read(&ctx->guilty_count), - score, yesno(banned && bannable)); - /* Cool contexts don't accumulate client ban score */ if (!bannable) return; - if (banned) + if (banned) { + DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", + ctx->name, atomic_read(&ctx->guilty_count), + score); i915_gem_context_set_banned(ctx); + } if (!IS_ERR_OR_NULL(ctx->file_priv)) i915_gem_client_mark_guilty(ctx->file_priv, ctx); @@ -3031,7 +3056,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { - struct i915_request *request = NULL; + struct i915_request *request; /* * During the reset sequence, we must prevent the engine from @@ -3042,52 +3067,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) */ intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - /* - * Prevent the signaler thread from updating the request - * state (by calling dma_fence_signal) as we are processing - * the reset. The write from the GPU of the seqno is - * asynchronous and the signaler thread may see a different - * value to us and declare the request complete, even though - * the reset routine have picked that request as the active - * (incomplete) request. This conflict is not handled - * gracefully! - */ - kthread_park(engine->breadcrumbs.signaler); - - /* - * Prevent request submission to the hardware until we have - * completed the reset in i915_gem_reset_finish(). If a request - * is completed by one engine, it may then queue a request - * to a second via its execlists->tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. - * Turning off the execlists->tasklet until the reset is over - * prevents the race. - * - * Note that this needs to be a single atomic operation on the - * tasklet (flush existing tasks, prevent new tasks) to prevent - * a race between reset and set-wedged. It is not, so we do the best - * we can atm and make sure we don't lock the machine up in the more - * common case of recursively being called from set-wedged from inside - * i915_reset. - */ - if (!atomic_read(&engine->execlists.tasklet.count)) - tasklet_kill(&engine->execlists.tasklet); - tasklet_disable(&engine->execlists.tasklet); - - /* - * We're using worker to queue preemption requests from the tasklet in - * GuC submission mode. - * Even though tasklet was disabled, we may still have a worker queued. - * Let's make sure that all workers scheduled before disabling the - * tasklet are completed before continuing with the reset. - */ - if (engine->i915->guc.preempt_wq) - flush_workqueue(engine->i915->guc.preempt_wq); - - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - - request = i915_gem_find_active_request(engine); + request = engine->reset.prepare(engine); if (request && request->fence.error == -EIO) request = ERR_PTR(-EIO); /* Previous reset failed! */ @@ -3117,43 +3097,24 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) return err; } -static void skip_request(struct i915_request *request) -{ - void *vaddr = request->ring->vaddr; - u32 head; - - /* As this request likely depends on state from the lost - * context, clear out all the user operations leaving the - * breadcrumb at the end (so we get the fence notifications). - */ - head = request->head; - if (request->postfix < head) { - memset(vaddr + head, 0, request->ring->size - head); - head = 0; - } - memset(vaddr + head, 0, request->postfix - head); - - dma_fence_set_error(&request->fence, -EIO); -} - static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - struct i915_gem_context *hung_ctx = request->ctx; + struct i915_gem_context *hung_ctx = request->gem_context; struct i915_timeline *timeline = request->timeline; unsigned long flags; GEM_BUG_ON(timeline == &engine->timeline); spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); + spin_lock(&timeline->lock); list_for_each_entry_continue(request, &engine->timeline.requests, link) - if (request->ctx == hung_ctx) - skip_request(request); + if (request->gem_context == hung_ctx) + i915_request_skip(request, -EIO); list_for_each_entry(request, &timeline->requests, link) - skip_request(request); + i915_request_skip(request, -EIO); spin_unlock(&timeline->lock); spin_unlock_irqrestore(&engine->timeline.lock, flags); @@ -3195,11 +3156,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine, } if (stalled) { - i915_gem_context_mark_guilty(request->ctx); - skip_request(request); + i915_gem_context_mark_guilty(request->gem_context); + i915_request_skip(request, -EIO); /* If this context is now banned, skip all pending requests. */ - if (i915_gem_context_is_banned(request->ctx)) + if (i915_gem_context_is_banned(request->gem_context)) engine_skip_context(request); } else { /* @@ -3209,15 +3170,17 @@ i915_gem_reset_request(struct intel_engine_cs *engine, */ request = i915_gem_find_active_request(engine); if (request) { - i915_gem_context_mark_innocent(request->ctx); + unsigned long flags; + + i915_gem_context_mark_innocent(request->gem_context); dma_fence_set_error(&request->fence, -EAGAIN); /* Rewind the engine to replay the incomplete rq */ - spin_lock_irq(&engine->timeline.lock); + spin_lock_irqsave(&engine->timeline.lock, flags); request = list_prev_entry(request, link); if (&request->link == &engine->timeline.requests) request = NULL; - spin_unlock_irq(&engine->timeline.lock); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } } @@ -3238,13 +3201,8 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, if (request) request = i915_gem_reset_request(engine, request, stalled); - if (request) { - DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", - engine->name, request->global_seqno); - } - /* Setup the CS to resume from the breadcrumb of the hung request */ - engine->reset_hw(engine, request); + engine->reset.reset(engine, request); } void i915_gem_reset(struct drm_i915_private *dev_priv, @@ -3258,14 +3216,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, i915_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) { - struct i915_gem_context *ctx; + struct intel_context *ce; i915_gem_reset_engine(engine, engine->hangcheck.active_request, stalled_mask & ENGINE_MASK(id)); - ctx = fetch_and_zero(&engine->last_retired_context); - if (ctx) - intel_context_unpin(ctx, engine); + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); /* * Ostensibily, we always want a context loaded for powersaving, @@ -3283,7 +3241,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, rq = i915_request_alloc(engine, dev_priv->kernel_context); if (!IS_ERR(rq)) - __i915_request_add(rq, false); + i915_request_add(rq); } } @@ -3292,8 +3250,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) { - tasklet_enable(&engine->execlists.tasklet); - kthread_unpark(engine->breadcrumbs.signaler); + engine->reset.finish(engine); intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } @@ -3571,6 +3528,22 @@ new_requests_since_last_retire(const struct drm_i915_private *i915) work_pending(&i915->gt.idle_work.work)); } +static void assert_kernel_context_is_current(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (i915_terminally_wedged(&i915->gpu_error)) + return; + + GEM_BUG_ON(i915->gt.active_requests); + for_each_engine(engine, i915, id) { + GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); + GEM_BUG_ON(engine->last_retired_context != + to_intel_context(i915->kernel_context, engine)); + } +} + static void i915_gem_idle_work_handler(struct work_struct *work) { @@ -3582,6 +3555,24 @@ i915_gem_idle_work_handler(struct work_struct *work) if (!READ_ONCE(dev_priv->gt.awake)) return; + if (READ_ONCE(dev_priv->gt.active_requests)) + return; + + /* + * Flush out the last user context, leaving only the pinned + * kernel context resident. When we are idling on the kernel_context, + * no more new requests (with a context switch) are emitted and we + * can finally rest. A consequence is that the idle work handler is + * always called at least twice before idling (and if the system is + * idle that implies a round trip through the retire worker). + */ + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_switch_to_kernel_context(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + + GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", + READ_ONCE(dev_priv->gt.active_requests)); + /* * Wait for last execlists context complete, but bail out in case a * new request is submitted. As we don't trust the hardware, we @@ -3615,6 +3606,8 @@ i915_gem_idle_work_handler(struct work_struct *work) epoch = __i915_gem_park(dev_priv); + assert_kernel_context_is_current(dev_priv); + rearm_hangcheck = false; out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3761,9 +3754,31 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) +static long wait_for_timeline(struct i915_timeline *tl, + unsigned int flags, long timeout) { - return i915_gem_active_wait(&tl->last_request, flags); + struct i915_request *rq; + + rq = i915_gem_active_get_unlocked(&tl->last_request); + if (!rq) + return timeout; + + /* + * "Race-to-idle". + * + * Switching to the kernel context is often used a synchronous + * step prior to idling, e.g. in suspend for flushing all + * current operations to memory before sleeping. These we + * want to complete as quickly as possible to avoid prolonged + * stalls, so allow the gpu to boost to maximum clocks. + */ + if (flags & I915_WAIT_FOR_IDLE_BOOST) + gen6_rps_boost(rq, NULL); + + timeout = i915_request_wait(rq, flags, timeout); + i915_request_put(rq); + + return timeout; } static int wait_for_engines(struct drm_i915_private *i915) @@ -3779,8 +3794,13 @@ static int wait_for_engines(struct drm_i915_private *i915) return 0; } -int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) +int i915_gem_wait_for_idle(struct drm_i915_private *i915, + unsigned int flags, long timeout) { + GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", + flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", + timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); + /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) return 0; @@ -3792,26 +3812,31 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) lockdep_assert_held(&i915->drm.struct_mutex); list_for_each_entry(tl, &i915->gt.timelines, link) { - err = wait_for_timeline(tl, flags); - if (err) - return err; + timeout = wait_for_timeline(tl, flags, timeout); + if (timeout < 0) + return timeout; } - i915_retire_requests(i915); - return wait_for_engines(i915); + err = wait_for_engines(i915); + if (err) + return err; + + i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); } else { struct intel_engine_cs *engine; enum intel_engine_id id; - int err; for_each_engine(engine, i915, id) { - err = wait_for_timeline(&engine->timeline, flags); - if (err) - return err; - } + struct i915_timeline *tl = &engine->timeline; - return 0; + timeout = wait_for_timeline(tl, flags, timeout); + if (timeout < 0) + return timeout; + } } + + return 0; } static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) @@ -4385,7 +4410,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 flags) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_address_space *vm = &dev_priv->ggtt.base; + struct i915_address_space *vm = &dev_priv->ggtt.vm; struct i915_vma *vma; int ret; @@ -4973,25 +4998,25 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) i915_gem_object_put(obj); } -static void assert_kernel_context_is_current(struct drm_i915_private *i915) +void i915_gem_sanitize(struct drm_i915_private *i915) { - struct i915_gem_context *kernel_context = i915->kernel_context; - struct intel_engine_cs *engine; - enum intel_engine_id id; + int err; - for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); - GEM_BUG_ON(engine->last_retired_context != kernel_context); - } -} + GEM_TRACE("\n"); -void i915_gem_sanitize(struct drm_i915_private *i915) -{ - if (i915_terminally_wedged(&i915->gpu_error)) { - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&i915->drm.struct_mutex); + + intel_runtime_pm_get(i915); + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + + /* + * As we have just resumed the machine and woken the device up from + * deep PCI sleep (presumably D3_cold), assume the HW has been reset + * back to defaults, recovering from whatever wedged state we left it + * in and so worth trying to use the device once more. + */ + if (i915_terminally_wedged(&i915->gpu_error)) i915_gem_unset_wedged(i915); - mutex_unlock(&i915->drm.struct_mutex); - } /* * If we inherit context state from the BIOS or earlier occupants @@ -5001,60 +5026,94 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * it may impact the display and we are uncertain about the stability * of the reset, so this could be applied to even earlier gen. */ + err = -ENODEV; if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) - WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); + err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); + if (!err) + intel_engines_sanitize(i915); + + intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); + intel_runtime_pm_put(i915); + + i915_gem_contexts_lost(i915); + mutex_unlock(&i915->drm.struct_mutex); } -int i915_gem_suspend(struct drm_i915_private *dev_priv) +int i915_gem_suspend(struct drm_i915_private *i915) { - struct drm_device *dev = &dev_priv->drm; int ret; - intel_runtime_pm_get(dev_priv); - intel_suspend_gt_powersave(dev_priv); + GEM_TRACE("\n"); - mutex_lock(&dev->struct_mutex); + intel_runtime_pm_get(i915); + intel_suspend_gt_powersave(i915); - /* We have to flush all the executing contexts to main memory so + mutex_lock(&i915->drm.struct_mutex); + + /* + * We have to flush all the executing contexts to main memory so * that they can saved in the hibernation image. To ensure the last * context image is coherent, we have to switch away from it. That - * leaves the dev_priv->kernel_context still active when + * leaves the i915->kernel_context still active when * we actually suspend, and its image in memory may not match the GPU * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - if (!i915_terminally_wedged(&dev_priv->gpu_error)) { - ret = i915_gem_switch_to_kernel_context(dev_priv); + if (!i915_terminally_wedged(&i915->gpu_error)) { + ret = i915_gem_switch_to_kernel_context(i915); if (ret) goto err_unlock; - ret = i915_gem_wait_for_idle(dev_priv, + ret = i915_gem_wait_for_idle(i915, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + MAX_SCHEDULE_TIMEOUT); if (ret && ret != -EIO) goto err_unlock; - assert_kernel_context_is_current(dev_priv); + assert_kernel_context_is_current(i915); } - i915_gem_contexts_lost(dev_priv); - mutex_unlock(&dev->struct_mutex); + i915_retire_requests(i915); /* ensure we flush after wedging */ + + mutex_unlock(&i915->drm.struct_mutex); - intel_uc_suspend(dev_priv); + intel_uc_suspend(i915); - cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - cancel_delayed_work_sync(&dev_priv->gt.retire_work); + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + cancel_delayed_work_sync(&i915->gt.retire_work); - /* As the idle_work is rearming if it detects a race, play safe and + /* + * As the idle_work is rearming if it detects a race, play safe and * repeat the flush until it is definitely idle. */ - drain_delayed_work(&dev_priv->gt.idle_work); + drain_delayed_work(&i915->gt.idle_work); - /* Assert that we sucessfully flushed all the work and + /* + * Assert that we successfully flushed all the work and * reset the GPU back to its idle, low power state. */ - WARN_ON(dev_priv->gt.awake); - if (WARN_ON(!intel_engines_are_idle(dev_priv))) - i915_gem_set_wedged(dev_priv); /* no hope, discard everything */ + WARN_ON(i915->gt.awake); + if (WARN_ON(!intel_engines_are_idle(i915))) + i915_gem_set_wedged(i915); /* no hope, discard everything */ + + intel_runtime_pm_put(i915); + return 0; + +err_unlock: + mutex_unlock(&i915->drm.struct_mutex); + intel_runtime_pm_put(i915); + return ret; +} + +void i915_gem_suspend_late(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct list_head *phases[] = { + &i915->mm.unbound_list, + &i915->mm.bound_list, + NULL + }, **phase; /* * Neither the BIOS, ourselves or any other kernel @@ -5075,20 +5134,22 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ - intel_uc_sanitize(dev_priv); - i915_gem_sanitize(dev_priv); - intel_runtime_pm_put(dev_priv); - return 0; + mutex_lock(&i915->drm.struct_mutex); + for (phase = phases; *phase; phase++) { + list_for_each_entry(obj, *phase, mm.link) + WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + } + mutex_unlock(&i915->drm.struct_mutex); -err_unlock: - mutex_unlock(&dev->struct_mutex); - intel_runtime_pm_put(dev_priv); - return ret; + intel_uc_sanitize(i915); + i915_gem_sanitize(i915); } void i915_gem_resume(struct drm_i915_private *i915) { + GEM_TRACE("\n"); + WARN_ON(i915->gt.awake); mutex_lock(&i915->drm.struct_mutex); @@ -5262,8 +5323,18 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) /* Only when the HW is re-initialised, can we replay the requests */ ret = __i915_gem_restart_engines(dev_priv); + if (ret) + goto cleanup_uc; + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + return 0; + +cleanup_uc: + intel_uc_fini_hw(dev_priv); out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + return ret; } @@ -5300,7 +5371,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (engine->init_context) err = engine->init_context(rq); - __i915_request_add(rq, true); + i915_request_add(rq); if (err) goto err_active; } @@ -5309,9 +5380,11 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (err) goto err_active; - err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); - if (err) + if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { + i915_gem_set_wedged(i915); + err = -EIO; /* Caller will declare us wedged */ goto err_active; + } assert_kernel_context_is_current(i915); @@ -5374,7 +5447,9 @@ err_active: if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) goto out_ctx; - if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) + if (WARN_ON(i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT))) goto out_ctx; i915_gem_contexts_lost(i915); @@ -5385,12 +5460,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; - /* - * We need to fallback to 4K pages since gvt gtt handling doesn't - * support huge page entries - we will need to check either hypervisor - * mm can support huge guest page or just do emulation in gvt. - */ - if (intel_vgpu_active(dev_priv)) + /* We need to fallback to 4K pages if host doesn't support huge gtt. */ + if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) mkwrite_device_info(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K; @@ -5408,13 +5479,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) return ret; - ret = intel_wopcm_init(&dev_priv->wopcm); + ret = intel_uc_init_misc(dev_priv); if (ret) return ret; - ret = intel_uc_init_misc(dev_priv); + ret = intel_wopcm_init(&dev_priv->wopcm); if (ret) - return ret; + goto err_uc_misc; /* This is just a security blanket to placate dragons. * On some systems, we very sporadically observe that the first TLBs @@ -5490,8 +5561,14 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * driver doesn't explode during runtime. */ err_init_hw: - i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); - i915_gem_contexts_lost(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + + WARN_ON(i915_gem_suspend(dev_priv)); + i915_gem_suspend_late(dev_priv); + + i915_gem_drain_workqueue(dev_priv); + + mutex_lock(&dev_priv->drm.struct_mutex); intel_uc_fini_hw(dev_priv); err_uc_init: intel_uc_fini(dev_priv); @@ -5508,6 +5585,7 @@ err_unlock: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); mutex_unlock(&dev_priv->drm.struct_mutex); +err_uc_misc: intel_uc_fini_misc(dev_priv); if (ret != -EIO) @@ -5520,7 +5598,8 @@ err_unlock: * for all other failure, such as an allocation failure, bail. */ if (!i915_terminally_wedged(&dev_priv->gpu_error)) { - DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); + i915_load_error(dev_priv, + "Failed to initialize GPU, declaring it wedged!\n"); i915_gem_set_wedged(dev_priv); } ret = 0; @@ -5530,6 +5609,28 @@ err_unlock: return ret; } +void i915_gem_fini(struct drm_i915_private *dev_priv) +{ + i915_gem_suspend_late(dev_priv); + + /* Flush any outstanding unpin_work. */ + i915_gem_drain_workqueue(dev_priv); + + mutex_lock(&dev_priv->drm.struct_mutex); + intel_uc_fini_hw(dev_priv); + intel_uc_fini(dev_priv); + i915_gem_cleanup_engines(dev_priv); + i915_gem_contexts_fini(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + + intel_uc_fini_misc(dev_priv); + i915_gem_cleanup_userptr(dev_priv); + + i915_gem_drain_freed_objects(dev_priv); + + WARN_ON(!list_empty(&dev_priv->contexts.list)); +} + void i915_gem_init_mmio(struct drm_i915_private *i915) { i915_gem_sanitize(i915); @@ -5694,16 +5795,17 @@ int i915_gem_freeze(struct drm_i915_private *dev_priv) return 0; } -int i915_gem_freeze_late(struct drm_i915_private *dev_priv) +int i915_gem_freeze_late(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj; struct list_head *phases[] = { - &dev_priv->mm.unbound_list, - &dev_priv->mm.bound_list, + &i915->mm.unbound_list, + &i915->mm.bound_list, NULL - }, **p; + }, **phase; - /* Called just before we write the hibernation image. + /* + * Called just before we write the hibernation image. * * We need to update the domain tracking to reflect that the CPU * will be accessing all the pages to create and restore from the @@ -5717,15 +5819,15 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) * the objects as well, see i915_gem_freeze() */ - i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); - i915_gem_drain_freed_objects(dev_priv); + i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); + i915_gem_drain_freed_objects(i915); - spin_lock(&dev_priv->mm.obj_lock); - for (p = phases; *p; p++) { - list_for_each_entry(obj, *p, mm.link) - __start_cpu_write(obj); + mutex_lock(&i915->drm.struct_mutex); + for (phase = phases; *phase; phase++) { + list_for_each_entry(obj, *phase, mm.link) + WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); } - spin_unlock(&dev_priv->mm.obj_lock); + mutex_unlock(&i915->drm.struct_mutex); return 0; } @@ -6045,16 +6147,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) goto err_unlock; } - pages = fetch_and_zero(&obj->mm.pages); - if (pages) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - - __i915_gem_object_reset_page_iter(obj); - - spin_lock(&i915->mm.obj_lock); - list_del(&obj->mm.link); - spin_unlock(&i915->mm.obj_lock); - } + pages = __i915_gem_object_unset_pages(obj); obj->ops = &i915_gem_phys_ops; @@ -6072,7 +6165,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) err_xfer: obj->ops = &i915_gem_object_ops; - obj->mm.pages = pages; + if (!IS_ERR_OR_NULL(pages)) { + unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); + + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + } err_unlock: mutex_unlock(&obj->mm.lock); return err; |