diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
25 files changed, 936 insertions, 976 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 81366aa4812b..0598e5382a1d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -217,7 +217,7 @@ static void clear_pages_worker(struct work_struct *work) 0); out_request: if (unlikely(err)) { - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); err = 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 151a1e8ae36a..68326ad3b2e0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -67,14 +67,11 @@ #include <linux/log2.h> #include <linux/nospec.h> -#include <drm/i915_drm.h> - #include "gt/gen6_ppgtt.h" #include "gt/intel_context.h" +#include "gt/intel_context_param.h" #include "gt/intel_engine_heartbeat.h" -#include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" -#include "gt/intel_lrc_reg.h" #include "gt/intel_ring.h" #include "i915_gem_context.h" @@ -245,7 +242,6 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count) if (!e->engines[count]) continue; - RCU_INIT_POINTER(e->engines[count]->gem_context, NULL); intel_context_put(e->engines[count]); } kfree(e); @@ -258,7 +254,51 @@ static void free_engines(struct i915_gem_engines *e) static void free_engines_rcu(struct rcu_head *rcu) { - free_engines(container_of(rcu, struct i915_gem_engines, rcu)); + struct i915_gem_engines *engines = + container_of(rcu, struct i915_gem_engines, rcu); + + i915_sw_fence_fini(&engines->fence); + free_engines(engines); +} + +static int __i915_sw_fence_call +engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + struct i915_gem_engines *engines = + container_of(fence, typeof(*engines), fence); + + switch (state) { + case FENCE_COMPLETE: + if (!list_empty(&engines->link)) { + struct i915_gem_context *ctx = engines->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->stale.lock, flags); + list_del(&engines->link); + spin_unlock_irqrestore(&ctx->stale.lock, flags); + } + i915_gem_context_put(engines->ctx); + break; + + case FENCE_FREE: + init_rcu_head(&engines->rcu); + call_rcu(&engines->rcu, free_engines_rcu); + break; + } + + return NOTIFY_DONE; +} + +static struct i915_gem_engines *alloc_engines(unsigned int count) +{ + struct i915_gem_engines *e; + + e = kzalloc(struct_size(e, engines, count), GFP_KERNEL); + if (!e) + return NULL; + + i915_sw_fence_init(&e->fence, engines_notify); + return e; } static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) @@ -268,11 +308,10 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) struct i915_gem_engines *e; enum intel_engine_id id; - e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); + e = alloc_engines(I915_NUM_ENGINES); if (!e) return ERR_PTR(-ENOMEM); - init_rcu_head(&e->rcu); for_each_engine(engine, gt, id) { struct intel_context *ce; @@ -306,7 +345,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) list_del(&ctx->link); spin_unlock(&ctx->i915->gem.contexts.lock); - free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); if (ctx->timeline) @@ -421,7 +459,7 @@ static struct intel_engine_cs *__active_engine(struct i915_request *rq) } engine = NULL; - if (i915_request_is_active(rq) && !rq->fence.error) + if (i915_request_is_active(rq) && rq->fence.error != -EIO) engine = rq->engine; spin_unlock_irq(&locked->active.lock); @@ -452,7 +490,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) return engine; } -static void kill_context(struct i915_gem_context *ctx) +static void kill_engines(struct i915_gem_engines *engines) { struct i915_gem_engines_iter it; struct intel_context *ce; @@ -464,7 +502,7 @@ static void kill_context(struct i915_gem_context *ctx) * However, we only care about pending requests, so only include * engines on which there are incomplete requests. */ - for_each_gem_engine(ce, __context_engines_static(ctx), it) { + for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; if (intel_context_set_banned(ce)) @@ -486,8 +524,82 @@ static void kill_context(struct i915_gem_context *ctx) * the context from the GPU, we have to resort to a full * reset. We hope the collateral damage is worth it. */ - __reset_context(ctx, engine); + __reset_context(engines->ctx, engine); + } +} + +static void kill_stale_engines(struct i915_gem_context *ctx) +{ + struct i915_gem_engines *pos, *next; + + spin_lock_irq(&ctx->stale.lock); + GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + list_for_each_entry_safe(pos, next, &ctx->stale.engines, link) { + if (!i915_sw_fence_await(&pos->fence)) { + list_del_init(&pos->link); + continue; + } + + spin_unlock_irq(&ctx->stale.lock); + + kill_engines(pos); + + spin_lock_irq(&ctx->stale.lock); + GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); + list_safe_reset_next(pos, next, link); + list_del_init(&pos->link); /* decouple from FENCE_COMPLETE */ + + i915_sw_fence_complete(&pos->fence); } + spin_unlock_irq(&ctx->stale.lock); +} + +static void kill_context(struct i915_gem_context *ctx) +{ + kill_stale_engines(ctx); +} + +static void engines_idle_release(struct i915_gem_context *ctx, + struct i915_gem_engines *engines) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + + INIT_LIST_HEAD(&engines->link); + + engines->ctx = i915_gem_context_get(ctx); + + for_each_gem_engine(ce, engines, it) { + struct dma_fence *fence; + int err = 0; + + /* serialises with execbuf */ + set_bit(CONTEXT_CLOSED_BIT, &ce->flags); + if (!intel_context_pin_if_active(ce)) + continue; + + fence = i915_active_fence_get(&ce->timeline->last_request); + if (fence) { + err = i915_sw_fence_await_dma_fence(&engines->fence, + fence, 0, + GFP_KERNEL); + dma_fence_put(fence); + } + intel_context_unpin(ce); + if (err < 0) + goto kill; + } + + spin_lock_irq(&ctx->stale.lock); + if (!i915_gem_context_is_closed(ctx)) + list_add_tail(&engines->link, &ctx->stale.engines); + spin_unlock_irq(&ctx->stale.lock); + +kill: + if (list_empty(&engines->link)) /* raced, already closed */ + kill_engines(engines); + + i915_sw_fence_commit(&engines->fence); } static void set_closed_name(struct i915_gem_context *ctx) @@ -511,11 +623,16 @@ static void context_close(struct i915_gem_context *ctx) { struct i915_address_space *vm; + /* Flush any concurrent set_engines() */ + mutex_lock(&ctx->engines_mutex); + engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1)); i915_gem_context_set_closed(ctx); - set_closed_name(ctx); + mutex_unlock(&ctx->engines_mutex); mutex_lock(&ctx->mutex); + set_closed_name(ctx); + vm = i915_gem_context_vm(ctx); if (vm) i915_vm_close(vm); @@ -604,6 +721,9 @@ __create_context(struct drm_i915_private *i915) ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); + spin_lock_init(&ctx->stale.lock); + INIT_LIST_HEAD(&ctx->stale.engines); + mutex_init(&ctx->engines_mutex); e = default_engines(ctx); if (IS_ERR(e)) { @@ -637,23 +757,30 @@ err_free: return ERR_PTR(err); } -static void +static int context_apply_all(struct i915_gem_context *ctx, - void (*fn)(struct intel_context *ce, void *data), + int (*fn)(struct intel_context *ce, void *data), void *data) { struct i915_gem_engines_iter it; struct intel_context *ce; + int err = 0; - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) - fn(ce, data); + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + err = fn(ce, data); + if (err) + break; + } i915_gem_context_unlock_engines(ctx); + + return err; } -static void __apply_ppgtt(struct intel_context *ce, void *vm) +static int __apply_ppgtt(struct intel_context *ce, void *vm) { i915_vm_put(ce->vm); ce->vm = i915_vm_get(vm); + return 0; } static struct i915_address_space * @@ -691,9 +818,10 @@ static void __set_timeline(struct intel_timeline **dst, intel_timeline_put(old); } -static void __apply_timeline(struct intel_context *ce, void *timeline) +static int __apply_timeline(struct intel_context *ce, void *timeline) { __set_timeline(&ce->timeline, timeline); + return 0; } static void __assign_timeline(struct i915_gem_context *ctx, @@ -724,8 +852,8 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) { - DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", - PTR_ERR(ppgtt)); + drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n", + PTR_ERR(ppgtt)); context_close(ctx); return ERR_CAST(ppgtt); } @@ -767,20 +895,15 @@ static void init_contexts(struct i915_gem_contexts *gc) void i915_gem_init__contexts(struct drm_i915_private *i915) { init_contexts(&i915->gem.contexts); - DRM_DEBUG_DRIVER("%s context support initialized\n", - DRIVER_CAPS(i915)->has_logical_contexts ? - "logical" : "fake"); + drm_dbg(&i915->drm, "%s context support initialized\n", + DRIVER_CAPS(i915)->has_logical_contexts ? + "logical" : "fake"); } void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { flush_work(&i915->gem.contexts.free_work); -} - -static int vm_idr_cleanup(int id, void *p, void *data) -{ - i915_vm_put(p); - return 0; + rcu_barrier(); /* and flush the left over RCU frees */ } static int gem_context_register(struct i915_gem_context *ctx, @@ -820,8 +943,8 @@ int i915_gem_context_open(struct drm_i915_private *i915, xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC); - mutex_init(&file_priv->vm_idr_lock); - idr_init_base(&file_priv->vm_idr, 1); + /* 0 reserved for invalid/unassigned ppgtt */ + xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1); ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) { @@ -839,9 +962,8 @@ int i915_gem_context_open(struct drm_i915_private *i915, err_ctx: context_close(ctx); err: - idr_destroy(&file_priv->vm_idr); + xa_destroy(&file_priv->vm_xa); xa_destroy(&file_priv->context_xa); - mutex_destroy(&file_priv->vm_idr_lock); return err; } @@ -849,6 +971,7 @@ void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_private *i915 = file_priv->dev_priv; + struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx; @@ -856,9 +979,9 @@ void i915_gem_context_close(struct drm_file *file) context_close(ctx); xa_destroy(&file_priv->context_xa); - idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); - idr_destroy(&file_priv->vm_idr); - mutex_destroy(&file_priv->vm_idr_lock); + xa_for_each(&file_priv->vm_xa, idx, vm) + i915_vm_put(vm); + xa_destroy(&file_priv->vm_xa); contexts_flush_free(&i915->gem.contexts); } @@ -870,6 +993,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_vm_control *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_ppgtt *ppgtt; + u32 id; int err; if (!HAS_FULL_PPGTT(i915)) @@ -892,23 +1016,15 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, goto err_put; } - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); + err = xa_alloc(&file_priv->vm_xa, &id, &ppgtt->vm, + xa_limit_32b, GFP_KERNEL); if (err) goto err_put; - err = idr_alloc(&file_priv->vm_idr, &ppgtt->vm, 0, 0, GFP_KERNEL); - if (err < 0) - goto err_unlock; - - GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */ - - mutex_unlock(&file_priv->vm_idr_lock); - - args->vm_id = err; + GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */ + args->vm_id = id; return 0; -err_unlock: - mutex_unlock(&file_priv->vm_idr_lock); err_put: i915_vm_put(&ppgtt->vm); return err; @@ -920,8 +1036,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_gem_vm_control *args = data; struct i915_address_space *vm; - int err; - u32 id; if (args->flags) return -EINVAL; @@ -929,17 +1043,7 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, if (args->extensions) return -EINVAL; - id = args->vm_id; - if (!id) - return -ENOENT; - - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - - vm = idr_remove(&file_priv->vm_idr, id); - - mutex_unlock(&file_priv->vm_idr_lock); + vm = xa_erase(&file_priv->vm_xa, args->vm_id); if (!vm) return -ENOENT; @@ -965,6 +1069,30 @@ static void cb_retire(struct i915_active *base) kfree(cb); } +static inline struct i915_gem_engines * +__context_engines_await(const struct i915_gem_context *ctx) +{ + struct i915_gem_engines *engines; + + rcu_read_lock(); + do { + engines = rcu_dereference(ctx->engines); + if (unlikely(!engines)) + break; + + if (unlikely(!i915_sw_fence_await(&engines->fence))) + continue; + + if (likely(engines == rcu_access_pointer(ctx->engines))) + break; + + i915_sw_fence_complete(&engines->fence); + } while (1); + rcu_read_unlock(); + + return engines; +} + I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); static int context_barrier_task(struct i915_gem_context *ctx, intel_engine_mask_t engines, @@ -975,6 +1103,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, { struct context_barrier_task *cb; struct i915_gem_engines_iter it; + struct i915_gem_engines *e; struct intel_context *ce; int err = 0; @@ -991,7 +1120,13 @@ static int context_barrier_task(struct i915_gem_context *ctx, return err; } - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + e = __context_engines_await(ctx); + if (!e) { + i915_active_release(&cb->base); + return -ENOENT; + } + + for_each_gem_engine(ce, e, it) { struct i915_request *rq; if (I915_SELFTEST_ONLY(context_barrier_inject_fault & @@ -1022,7 +1157,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (err) break; } - i915_gem_context_unlock_engines(ctx); + i915_sw_fence_complete(&e->fence); cb->task = err ? NULL : task; /* caller needs to unwind instead */ cb->data = data; @@ -1037,7 +1172,8 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, struct drm_i915_gem_context_param *args) { struct i915_address_space *vm; - int ret; + int err; + u32 id; if (!rcu_access_pointer(ctx->vm)) return -ENODEV; @@ -1045,27 +1181,22 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, rcu_read_lock(); vm = context_get_vm_rcu(ctx); rcu_read_unlock(); + if (!vm) + return -ENODEV; - ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (ret) + err = xa_alloc(&file_priv->vm_xa, &id, vm, xa_limit_32b, GFP_KERNEL); + if (err) goto err_put; - ret = idr_alloc(&file_priv->vm_idr, vm, 0, 0, GFP_KERNEL); - GEM_BUG_ON(!ret); - if (ret < 0) - goto err_unlock; - i915_vm_open(vm); + GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */ + args->value = id; args->size = 0; - args->value = ret; - ret = 0; -err_unlock: - mutex_unlock(&file_priv->vm_idr_lock); err_put: i915_vm_put(vm); - return ret; + return err; } static void set_ppgtt_barrier(void *data) @@ -1167,7 +1298,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, return -ENOENT; rcu_read_lock(); - vm = idr_find(&file_priv->vm_idr, args->value); + vm = xa_load(&file_priv->vm_xa, args->value); if (vm && !kref_get_unless_zero(&vm->ref)) vm = NULL; rcu_read_unlock(); @@ -1213,87 +1344,61 @@ out: return err; } -static int gen8_emit_rpcs_config(struct i915_request *rq, - struct intel_context *ce, - struct intel_sseu sseu) +static int __apply_ringsize(struct intel_context *ce, void *sz) { - u64 offset; - u32 *cs; + return intel_context_set_ring_size(ce, (unsigned long)sz); +} - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); +static int set_ringsize(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) + return -ENODEV; + + if (args->size) + return -EINVAL; - offset = i915_ggtt_offset(ce->state) + - LRC_STATE_PN * PAGE_SIZE + - CTX_R_PWR_CLK_STATE * 4; + if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE)) + return -EINVAL; - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); + if (args->value < I915_GTT_PAGE_SIZE) + return -EINVAL; - intel_ring_advance(rq, cs); + if (args->value > 128 * I915_GTT_PAGE_SIZE) + return -EINVAL; - return 0; + return context_apply_all(ctx, + __apply_ringsize, + __intel_context_ring_size(args->value)); } -static int -gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) +static int __get_ringsize(struct intel_context *ce, void *arg) { - struct i915_request *rq; - int ret; - - lockdep_assert_held(&ce->pin_mutex); - - /* - * If the context is not idle, we have to submit an ordered request to - * modify its context image via the kernel context (writing to our own - * image, or into the registers directory, does not stick). Pristine - * and idle contexts will be configured on pinning. - */ - if (!intel_context_pin_if_active(ce)) - return 0; + long sz; - rq = intel_engine_create_kernel_request(ce->engine); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - goto out_unpin; - } - - /* Serialise with the remote context */ - ret = intel_context_prepare_remote_request(ce, rq); - if (ret == 0) - ret = gen8_emit_rpcs_config(rq, ce, sseu); + sz = intel_context_get_ring_size(ce); + GEM_BUG_ON(sz > INT_MAX); - i915_request_add(rq); -out_unpin: - intel_context_unpin(ce); - return ret; + return sz; /* stop on first engine */ } -static int -intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) +static int get_ringsize(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) { - int ret; + int sz; - GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8); - - ret = intel_context_lock_pinned(ce); - if (ret) - return ret; + if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) + return -ENODEV; - /* Nothing to do if unmodified. */ - if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) - goto unlock; + if (args->size) + return -EINVAL; - ret = gen8_modify_rpcs(ce, sseu); - if (!ret) - ce->sseu = sseu; + sz = context_apply_all(ctx, __get_ringsize, NULL); + if (sz < 0) + return sz; -unlock: - intel_context_unlock_pinned(ce); - return ret; + args->value = sz; + return 0; } static int @@ -1460,6 +1565,7 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) struct i915_context_engines_load_balance __user *ext = container_of_user(base, typeof(*ext), base); const struct set_engines *set = data; + struct drm_i915_private *i915 = set->ctx->i915; struct intel_engine_cs *stack[16]; struct intel_engine_cs **siblings; struct intel_context *ce; @@ -1467,24 +1573,25 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) unsigned int n; int err; - if (!HAS_EXECLISTS(set->ctx->i915)) + if (!HAS_EXECLISTS(i915)) return -ENODEV; - if (USES_GUC_SUBMISSION(set->ctx->i915)) + if (intel_uc_uses_guc_submission(&i915->gt.uc)) return -ENODEV; /* not implement yet */ if (get_user(idx, &ext->engine_index)) return -EFAULT; if (idx >= set->engines->num_engines) { - DRM_DEBUG("Invalid placement value, %d >= %d\n", - idx, set->engines->num_engines); + drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", + idx, set->engines->num_engines); return -EINVAL; } idx = array_index_nospec(idx, set->engines->num_engines); if (set->engines->engines[idx]) { - DRM_DEBUG("Invalid placement[%d], already occupied\n", idx); + drm_dbg(&i915->drm, + "Invalid placement[%d], already occupied\n", idx); return -EEXIST; } @@ -1516,12 +1623,13 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) goto out_siblings; } - siblings[n] = intel_engine_lookup_user(set->ctx->i915, + siblings[n] = intel_engine_lookup_user(i915, ci.engine_class, ci.engine_instance); if (!siblings[n]) { - DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n", - n, ci.engine_class, ci.engine_instance); + drm_dbg(&i915->drm, + "Invalid sibling[%d]: { class:%d, inst:%d }\n", + n, ci.engine_class, ci.engine_instance); err = -EINVAL; goto out_siblings; } @@ -1554,6 +1662,7 @@ set_engines__bond(struct i915_user_extension __user *base, void *data) struct i915_context_engines_bond __user *ext = container_of_user(base, typeof(*ext), base); const struct set_engines *set = data; + struct drm_i915_private *i915 = set->ctx->i915; struct i915_engine_class_instance ci; struct intel_engine_cs *virtual; struct intel_engine_cs *master; @@ -1564,14 +1673,15 @@ set_engines__bond(struct i915_user_extension __user *base, void *data) return -EFAULT; if (idx >= set->engines->num_engines) { - DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n", - idx, set->engines->num_engines); + drm_dbg(&i915->drm, + "Invalid index for virtual engine: %d >= %d\n", + idx, set->engines->num_engines); return -EINVAL; } idx = array_index_nospec(idx, set->engines->num_engines); if (!set->engines->engines[idx]) { - DRM_DEBUG("Invalid engine at %d\n", idx); + drm_dbg(&i915->drm, "Invalid engine at %d\n", idx); return -EINVAL; } virtual = set->engines->engines[idx]->engine; @@ -1589,11 +1699,12 @@ set_engines__bond(struct i915_user_extension __user *base, void *data) if (copy_from_user(&ci, &ext->master, sizeof(ci))) return -EFAULT; - master = intel_engine_lookup_user(set->ctx->i915, + master = intel_engine_lookup_user(i915, ci.engine_class, ci.engine_instance); if (!master) { - DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n", - ci.engine_class, ci.engine_instance); + drm_dbg(&i915->drm, + "Unrecognised master engine: { class:%u, instance:%u }\n", + ci.engine_class, ci.engine_instance); return -EINVAL; } @@ -1606,12 +1717,13 @@ set_engines__bond(struct i915_user_extension __user *base, void *data) if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) return -EFAULT; - bond = intel_engine_lookup_user(set->ctx->i915, + bond = intel_engine_lookup_user(i915, ci.engine_class, ci.engine_instance); if (!bond) { - DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", - n, ci.engine_class, ci.engine_instance); + drm_dbg(&i915->drm, + "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", + n, ci.engine_class, ci.engine_instance); return -EINVAL; } @@ -1640,6 +1752,7 @@ static int set_engines(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { + struct drm_i915_private *i915 = ctx->i915; struct i915_context_param_engines __user *user = u64_to_user_ptr(args->value); struct set_engines set = { .ctx = ctx }; @@ -1661,8 +1774,8 @@ set_engines(struct i915_gem_context *ctx, BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); if (args->size < sizeof(*user) || !IS_ALIGNED(args->size, sizeof(*user->engines))) { - DRM_DEBUG("Invalid size for engine array: %d\n", - args->size); + drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", + args->size); return -EINVAL; } @@ -1671,13 +1784,10 @@ set_engines(struct i915_gem_context *ctx, * first 64 engines defined here. */ num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); - - set.engines = kmalloc(struct_size(set.engines, engines, num_engines), - GFP_KERNEL); + set.engines = alloc_engines(num_engines); if (!set.engines) return -ENOMEM; - init_rcu_head(&set.engines->rcu); for (n = 0; n < num_engines; n++) { struct i915_engine_class_instance ci; struct intel_engine_cs *engine; @@ -1698,8 +1808,9 @@ set_engines(struct i915_gem_context *ctx, ci.engine_class, ci.engine_instance); if (!engine) { - DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n", - n, ci.engine_class, ci.engine_instance); + drm_dbg(&i915->drm, + "Invalid engine[%d]: { class:%d, instance:%d }\n", + n, ci.engine_class, ci.engine_instance); __free_engines(set.engines, n); return -ENOENT; } @@ -1729,6 +1840,11 @@ set_engines(struct i915_gem_context *ctx, replace: mutex_lock(&ctx->engines_mutex); + if (i915_gem_context_is_closed(ctx)) { + mutex_unlock(&ctx->engines_mutex); + free_engines(set.engines); + return -ENOENT; + } if (args->size) i915_gem_context_set_user_engines(ctx); else @@ -1736,7 +1852,8 @@ replace: set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1); mutex_unlock(&ctx->engines_mutex); - call_rcu(&set.engines->rcu, free_engines_rcu); + /* Keep track of old engine sets for kill_context() */ + engines_idle_release(ctx, set.engines); return 0; } @@ -1747,11 +1864,10 @@ __copy_engines(struct i915_gem_engines *e) struct i915_gem_engines *copy; unsigned int n; - copy = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); + copy = alloc_engines(e->num_engines); if (!copy) return ERR_PTR(-ENOMEM); - init_rcu_head(©->rcu); for (n = 0; n < e->num_engines; n++) { if (e->engines[n]) copy->engines[n] = intel_context_get(e->engines[n]); @@ -1852,17 +1968,19 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static void __apply_priority(struct intel_context *ce, void *arg) +static int __apply_priority(struct intel_context *ce, void *arg) { struct i915_gem_context *ctx = arg; if (!intel_engine_has_semaphores(ce->engine)) - return; + return 0; if (ctx->sched.priority >= I915_PRIORITY_NORMAL) intel_context_set_use_semaphores(ce); else intel_context_clear_use_semaphores(ce); + + return 0; } static int set_priority(struct i915_gem_context *ctx, @@ -1955,6 +2073,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_persistence(ctx, args); break; + case I915_CONTEXT_PARAM_RINGSIZE: + ret = set_ringsize(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -1983,6 +2105,18 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) return ctx_setparam(arg->fpriv, arg->ctx, &local.param); } +static int copy_ring_size(struct intel_context *dst, + struct intel_context *src) +{ + long sz; + + sz = intel_context_get_ring_size(src); + if (sz < 0) + return sz; + + return intel_context_set_ring_size(dst, sz); +} + static int clone_engines(struct i915_gem_context *dst, struct i915_gem_context *src) { @@ -1991,11 +2125,10 @@ static int clone_engines(struct i915_gem_context *dst, bool user_engines; unsigned long n; - clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); + clone = alloc_engines(e->num_engines); if (!clone) goto err_unlock; - init_rcu_head(&clone->rcu); for (n = 0; n < e->num_engines; n++) { struct intel_engine_cs *engine; @@ -2025,6 +2158,12 @@ static int clone_engines(struct i915_gem_context *dst, } intel_context_set_gem(clone->engines[n], dst); + + /* Copy across the preferred ringsize */ + if (copy_ring_size(clone->engines[n], e->engines[n])) { + __free_engines(clone, n + 1); + goto err_unlock; + } } clone->num_engines = n; @@ -2032,8 +2171,7 @@ static int clone_engines(struct i915_gem_context *dst, i915_gem_context_unlock_engines(src); /* Serialised by constructor */ - free_engines(__context_engines_static(dst)); - RCU_INIT_POINTER(dst->engines, clone); + engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1)); if (user_engines) i915_gem_context_set_user_engines(dst); else @@ -2213,8 +2351,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, ext_data.fpriv = file->driver_priv; if (client_is_banned(ext_data.fpriv)) { - DRM_DEBUG("client %s[%d] banned from creating ctx\n", - current->comm, task_pid_nr(current)); + drm_dbg(&i915->drm, + "client %s[%d] banned from creating ctx\n", + current->comm, task_pid_nr(current)); return -EIO; } @@ -2236,7 +2375,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, goto err_ctx; args->ctx_id = id; - DRM_DEBUG("HW context %d created\n", args->ctx_id); + drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id); return 0; @@ -2386,6 +2525,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_persistent(ctx); break; + case I915_CONTEXT_PARAM_RINGSIZE: + ret = get_ringsize(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -2459,6 +2602,9 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) const struct i915_gem_engines *e = it->engines; struct intel_context *ctx; + if (unlikely(!e)) + return NULL; + do { if (it->idx >= e->num_engines) return NULL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 3ae61a355d87..f1d884d304bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -192,12 +192,16 @@ i915_gem_context_unlock_engines(struct i915_gem_context *ctx) static inline struct intel_context * i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) { - struct intel_context *ce = ERR_PTR(-EINVAL); + struct intel_context *ce; rcu_read_lock(); { struct i915_gem_engines *e = rcu_dereference(ctx->engines); - if (likely(idx < e->num_engines && e->engines[idx])) + if (unlikely(!e)) /* context was closed! */ + ce = ERR_PTR(-ENOENT); + else if (likely(idx < e->num_engines && e->engines[idx])) ce = intel_context_get(e->engines[idx]); + else + ce = ERR_PTR(-EINVAL); } rcu_read_unlock(); return ce; @@ -207,7 +211,6 @@ static inline void i915_gem_engines_iter_init(struct i915_gem_engines_iter *it, struct i915_gem_engines *engines) { - GEM_BUG_ON(!engines); it->engines = engines; it->idx = 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 017ca803ab47..28760bd03265 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -20,6 +20,7 @@ #include "gt/intel_context_types.h" #include "i915_scheduler.h" +#include "i915_sw_fence.h" struct pid; @@ -30,7 +31,12 @@ struct intel_timeline; struct intel_ring; struct i915_gem_engines { - struct rcu_head rcu; + union { + struct list_head link; + struct rcu_head rcu; + }; + struct i915_sw_fence fence; + struct i915_gem_context *ctx; unsigned int num_engines; struct intel_context *engines[]; }; @@ -173,6 +179,11 @@ struct i915_gem_context { * context in messages. */ char name[TASK_COMM_LEN + 8]; + + struct { + spinlock_t lock; + struct list_head engines; + } stale; }; #endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 372b57ca0efc..7db5a793739d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -48,7 +48,9 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme src = sg_next(src); } - if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + if (!dma_map_sg_attrs(attachment->dev, + st->sgl, st->nents, dir, + DMA_ATTR_SKIP_CPU_SYNC)) { ret = -ENOMEM; goto err_free_sg; } @@ -71,7 +73,9 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, { struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir); + dma_unmap_sg_attrs(attachment->dev, + sg->sgl, sg->nents, dir, + DMA_ATTR_SKIP_CPU_SYNC); sg_free_table(sg); kfree(sg); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 7643a30ba4cd..36d069504836 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -10,7 +10,6 @@ #include <linux/uaccess.h> #include <drm/drm_syncobj.h> -#include <drm/i915_drm.h> #include "display/intel_frontbuffer.h" @@ -28,6 +27,19 @@ #include "i915_sw_fence_work.h" #include "i915_trace.h" +struct eb_vma { + struct i915_vma *vma; + unsigned int flags; + + /** This vma's place in the execbuf reservation list */ + struct drm_i915_gem_exec_object2 *exec; + struct list_head bind_link; + struct list_head reloc_link; + + struct hlist_node node; + u32 handle; +}; + enum { FORCE_CPU_RELOC = 1, FORCE_GTT_RELOC, @@ -35,17 +47,15 @@ enum { #define DBG_FORCE_RELOC 0 /* choose one of the above! */ }; -#define __EXEC_OBJECT_HAS_REF BIT(31) -#define __EXEC_OBJECT_HAS_PIN BIT(30) -#define __EXEC_OBJECT_HAS_FENCE BIT(29) -#define __EXEC_OBJECT_NEEDS_MAP BIT(28) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(27) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */ +#define __EXEC_OBJECT_HAS_PIN BIT(31) +#define __EXEC_OBJECT_HAS_FENCE BIT(30) +#define __EXEC_OBJECT_NEEDS_MAP BIT(29) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(28) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) -#define __EXEC_VALIDATED BIT(30) -#define __EXEC_INTERNAL_FLAGS (~0u << 30) +#define __EXEC_INTERNAL_FLAGS (~0u << 31) #define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -220,15 +230,14 @@ struct i915_execbuffer { struct drm_file *file; /** per-file lookup tables and limits */ struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ - struct i915_vma **vma; - unsigned int *flags; + struct eb_vma *vma; struct intel_engine_cs *engine; /** engine to queue the request to */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ struct i915_request *request; /** our request to build */ - struct i915_vma *batch; /** identity of the batch obj/vma */ + struct eb_vma *batch; /** identity of the batch obj/vma */ struct i915_vma *trampoline; /** trampoline used for chaining */ /** actual size of execobj[] as we may extend it for the cmdparser */ @@ -276,8 +285,6 @@ struct i915_execbuffer { struct hlist_head *buckets; /** ht for relocation handles */ }; -#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) - static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { return intel_engine_requires_cmd_parser(eb->engine) || @@ -364,9 +371,9 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, static inline bool eb_pin_vma(struct i915_execbuffer *eb, const struct drm_i915_gem_exec_object2 *entry, - struct i915_vma *vma) + struct eb_vma *ev) { - unsigned int exec_flags = *vma->exec_flags; + struct i915_vma *vma = ev->vma; u64 pin_flags; if (vma->node.size) @@ -375,24 +382,24 @@ eb_pin_vma(struct i915_execbuffer *eb, pin_flags = entry->offset & PIN_OFFSET_MASK; pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT)) + if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)) pin_flags |= PIN_GLOBAL; if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) return false; - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { + if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { if (unlikely(i915_vma_pin_fence(vma))) { i915_vma_unpin(vma); return false; } if (vma->fence) - exec_flags |= __EXEC_OBJECT_HAS_FENCE; + ev->flags |= __EXEC_OBJECT_HAS_FENCE; } - *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; - return !eb_vma_misplaced(entry, vma, exec_flags); + ev->flags |= __EXEC_OBJECT_HAS_PIN; + return !eb_vma_misplaced(entry, vma, ev->flags); } static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) @@ -406,13 +413,13 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) } static inline void -eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags) +eb_unreserve_vma(struct eb_vma *ev) { - if (!(*flags & __EXEC_OBJECT_HAS_PIN)) + if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) return; - __eb_unreserve_vma(vma, *flags); - *flags &= ~__EXEC_OBJECT_RESERVED; + __eb_unreserve_vma(ev->vma, ev->flags); + ev->flags &= ~__EXEC_OBJECT_RESERVED; } static int @@ -442,13 +449,6 @@ eb_validate_vma(struct i915_execbuffer *eb, } else { entry->pad_to_size = 0; } - - if (unlikely(vma->exec_flags)) { - DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", - entry->handle, (int)(entry - eb->exec)); - return -EINVAL; - } - /* * From drm_mm perspective address space is continuous, * so from this point we're always using non-canonical @@ -471,41 +471,29 @@ eb_validate_vma(struct i915_execbuffer *eb, return 0; } -static int +static void eb_add_vma(struct i915_execbuffer *eb, unsigned int i, unsigned batch_idx, struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; - int err; + struct eb_vma *ev = &eb->vma[i]; GEM_BUG_ON(i915_vma_is_closed(vma)); - if (!(eb->args->flags & __EXEC_VALIDATED)) { - err = eb_validate_vma(eb, entry, vma); - if (unlikely(err)) - return err; - } + ev->vma = i915_vma_get(vma); + ev->exec = entry; + ev->flags = entry->flags; if (eb->lut_size > 0) { - vma->exec_handle = entry->handle; - hlist_add_head(&vma->exec_node, + ev->handle = entry->handle; + hlist_add_head(&ev->node, &eb->buckets[hash_32(entry->handle, eb->lut_size)]); } if (entry->relocation_count) - list_add_tail(&vma->reloc_link, &eb->relocs); - - /* - * Stash a pointer from the vma to execobj, so we can query its flags, - * size, alignment etc as provided by the user. Also we stash a pointer - * to the vma inside the execobj so that we can use a direct lookup - * to find the right target VMA when doing relocations. - */ - eb->vma[i] = vma; - eb->flags[i] = entry->flags; - vma->exec_flags = &eb->flags[i]; + list_add_tail(&ev->reloc_link, &eb->relocs); /* * SNA is doing fancy tricks with compressing batch buffers, which leads @@ -518,30 +506,23 @@ eb_add_vma(struct i915_execbuffer *eb, */ if (i == batch_idx) { if (entry->relocation_count && - !(eb->flags[i] & EXEC_OBJECT_PINNED)) - eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; + !(ev->flags & EXEC_OBJECT_PINNED)) + ev->flags |= __EXEC_OBJECT_NEEDS_BIAS; if (eb->reloc_cache.has_fence) - eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; + ev->flags |= EXEC_OBJECT_NEEDS_FENCE; - eb->batch = vma; + eb->batch = ev; } - err = 0; - if (eb_pin_vma(eb, entry, vma)) { + if (eb_pin_vma(eb, entry, ev)) { if (entry->offset != vma->node.start) { entry->offset = vma->node.start | UPDATE; eb->args->flags |= __EXEC_HAS_RELOC; } } else { - eb_unreserve_vma(vma, vma->exec_flags); - - list_add_tail(&vma->exec_link, &eb->unbound); - if (drm_mm_node_allocated(&vma->node)) - err = i915_vma_unbind(vma); - if (unlikely(err)) - vma->exec_flags = NULL; + eb_unreserve_vma(ev); + list_add_tail(&ev->bind_link, &eb->unbound); } - return err; } static inline int use_cpu_reloc(const struct reloc_cache *cache, @@ -562,14 +543,14 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache, } static int eb_reserve_vma(const struct i915_execbuffer *eb, - struct i915_vma *vma) + struct eb_vma *ev, + u64 pin_flags) { - struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); - unsigned int exec_flags = *vma->exec_flags; - u64 pin_flags; + struct drm_i915_gem_exec_object2 *entry = ev->exec; + unsigned int exec_flags = ev->flags; + struct i915_vma *vma = ev->vma; int err; - pin_flags = PIN_USER | PIN_NONBLOCK; if (exec_flags & EXEC_OBJECT_NEEDS_GTT) pin_flags |= PIN_GLOBAL; @@ -583,11 +564,16 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) pin_flags |= PIN_MAPPABLE; - if (exec_flags & EXEC_OBJECT_PINNED) { + if (exec_flags & EXEC_OBJECT_PINNED) pin_flags |= entry->offset | PIN_OFFSET_FIXED; - pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */ - } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) { + else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + + if (drm_mm_node_allocated(&vma->node) && + eb_vma_misplaced(entry, vma, ev->flags)) { + err = i915_vma_unbind(vma); + if (err) + return err; } err = i915_vma_pin(vma, @@ -612,8 +598,8 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, exec_flags |= __EXEC_OBJECT_HAS_FENCE; } - *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; - GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags)); + ev->flags = exec_flags | __EXEC_OBJECT_HAS_PIN; + GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags)); return 0; } @@ -621,10 +607,11 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, static int eb_reserve(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; + unsigned int pin_flags = PIN_USER | PIN_NONBLOCK; struct list_head last; - struct i915_vma *vma; + struct eb_vma *ev; unsigned int i, pass; - int err; + int err = 0; /* * Attempt to pin all of the buffers into the GTT. @@ -640,44 +627,54 @@ static int eb_reserve(struct i915_execbuffer *eb) * room for the earlier objects *unless* we need to defragment. */ + if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex)) + return -EINTR; + pass = 0; - err = 0; do { - list_for_each_entry(vma, &eb->unbound, exec_link) { - err = eb_reserve_vma(eb, vma); + list_for_each_entry(ev, &eb->unbound, bind_link) { + err = eb_reserve_vma(eb, ev, pin_flags); if (err) break; } - if (err != -ENOSPC) - return err; + if (!(err == -ENOSPC || err == -EAGAIN)) + break; /* Resort *all* the objects into priority order */ INIT_LIST_HEAD(&eb->unbound); INIT_LIST_HEAD(&last); for (i = 0; i < count; i++) { - unsigned int flags = eb->flags[i]; - struct i915_vma *vma = eb->vma[i]; + unsigned int flags; + ev = &eb->vma[i]; + flags = ev->flags; if (flags & EXEC_OBJECT_PINNED && flags & __EXEC_OBJECT_HAS_PIN) continue; - eb_unreserve_vma(vma, &eb->flags[i]); + eb_unreserve_vma(ev); if (flags & EXEC_OBJECT_PINNED) /* Pinned must have their slot */ - list_add(&vma->exec_link, &eb->unbound); + list_add(&ev->bind_link, &eb->unbound); else if (flags & __EXEC_OBJECT_NEEDS_MAP) /* Map require the lowest 256MiB (aperture) */ - list_add_tail(&vma->exec_link, &eb->unbound); + list_add_tail(&ev->bind_link, &eb->unbound); else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) /* Prioritise 4GiB region for restricted bo */ - list_add(&vma->exec_link, &last); + list_add(&ev->bind_link, &last); else - list_add_tail(&vma->exec_link, &last); + list_add_tail(&ev->bind_link, &last); } list_splice_tail(&last, &eb->unbound); + if (err == -EAGAIN) { + mutex_unlock(&eb->i915->drm.struct_mutex); + flush_workqueue(eb->i915->mm.userptr_wq); + mutex_lock(&eb->i915->drm.struct_mutex); + continue; + } + switch (pass++) { case 0: break; @@ -688,13 +685,20 @@ static int eb_reserve(struct i915_execbuffer *eb) err = i915_gem_evict_vm(eb->context->vm); mutex_unlock(&eb->context->vm->mutex); if (err) - return err; + goto unlock; break; default: - return -ENOSPC; + err = -ENOSPC; + goto unlock; } + + pin_flags = PIN_USER; } while (1); + +unlock: + mutex_unlock(&eb->i915->drm.struct_mutex); + return err; } static unsigned int eb_batch_index(const struct i915_execbuffer *eb) @@ -731,17 +735,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) unsigned int i, batch; int err; + if (unlikely(i915_gem_context_is_closed(eb->gem_context))) + return -ENOENT; + INIT_LIST_HEAD(&eb->relocs); INIT_LIST_HEAD(&eb->unbound); batch = eb_batch_index(eb); - mutex_lock(&eb->gem_context->mutex); - if (unlikely(i915_gem_context_is_closed(eb->gem_context))) { - err = -ENOENT; - goto err_ctx; - } - for (i = 0; i < eb->buffer_count; i++) { u32 handle = eb->exec[i].handle; struct i915_lut_handle *lut; @@ -786,45 +787,37 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) i915_gem_object_unlock(obj); add_vma: - err = eb_add_vma(eb, i, batch, vma); + err = eb_validate_vma(eb, &eb->exec[i], vma); if (unlikely(err)) goto err_vma; - GEM_BUG_ON(vma != eb->vma[i]); - GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); - GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && - eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i])); + eb_add_vma(eb, i, batch, vma); } - mutex_unlock(&eb->gem_context->mutex); - - eb->args->flags |= __EXEC_VALIDATED; - return eb_reserve(eb); + return 0; err_obj: i915_gem_object_put(obj); err_vma: - eb->vma[i] = NULL; -err_ctx: - mutex_unlock(&eb->gem_context->mutex); + eb->vma[i].vma = NULL; return err; } -static struct i915_vma * +static struct eb_vma * eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) { if (eb->lut_size < 0) { if (handle >= -eb->lut_size) return NULL; - return eb->vma[handle]; + return &eb->vma[handle]; } else { struct hlist_head *head; - struct i915_vma *vma; + struct eb_vma *ev; head = &eb->buckets[hash_32(handle, eb->lut_size)]; - hlist_for_each_entry(vma, head, exec_node) { - if (vma->exec_handle == handle) - return vma; + hlist_for_each_entry(ev, head, node) { + if (ev->handle == handle) + return ev; } return NULL; } @@ -836,32 +829,21 @@ static void eb_release_vmas(const struct i915_execbuffer *eb) unsigned int i; for (i = 0; i < count; i++) { - struct i915_vma *vma = eb->vma[i]; - unsigned int flags = eb->flags[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; if (!vma) break; - GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); - vma->exec_flags = NULL; - eb->vma[i] = NULL; + eb->vma[i].vma = NULL; - if (flags & __EXEC_OBJECT_HAS_PIN) - __eb_unreserve_vma(vma, flags); + if (ev->flags & __EXEC_OBJECT_HAS_PIN) + __eb_unreserve_vma(vma, ev->flags); - if (flags & __EXEC_OBJECT_HAS_REF) - i915_vma_put(vma); + i915_vma_put(vma); } } -static void eb_reset_vmas(const struct i915_execbuffer *eb) -{ - eb_release_vmas(eb); - if (eb->lut_size > 0) - memset(eb->buckets, 0, - sizeof(struct hlist_head) << eb->lut_size); -} - static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); @@ -1197,7 +1179,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto out_pool; skip_request: - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); err_request: i915_request_add(rq); err_unpin: @@ -1328,10 +1310,11 @@ out: static u64 eb_relocate_entry(struct i915_execbuffer *eb, - struct i915_vma *vma, + struct eb_vma *ev, const struct drm_i915_gem_relocation_entry *reloc) { - struct i915_vma *target; + struct drm_i915_private *i915 = eb->i915; + struct eb_vma *target; int err; /* we've already hold a reference to all valid objects */ @@ -1341,7 +1324,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, /* Validate that the target is in a valid r/w GPU domain */ if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { - DRM_DEBUG("reloc with multiple write domains: " + drm_dbg(&i915->drm, "reloc with multiple write domains: " "target %d offset %d " "read %08x write %08x", reloc->target_handle, @@ -1352,7 +1335,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, } if (unlikely((reloc->write_domain | reloc->read_domains) & ~I915_GEM_GPU_DOMAINS)) { - DRM_DEBUG("reloc with read/write non-GPU domains: " + drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: " "target %d offset %d " "read %08x write %08x", reloc->target_handle, @@ -1363,7 +1346,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, } if (reloc->write_domain) { - *target->exec_flags |= EXEC_OBJECT_WRITE; + target->flags |= EXEC_OBJECT_WRITE; /* * Sandybridge PPGTT errata: We need a global gtt mapping @@ -1373,7 +1356,8 @@ eb_relocate_entry(struct i915_execbuffer *eb, */ if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && IS_GEN(eb->i915, 6)) { - err = i915_vma_bind(target, target->obj->cache_level, + err = i915_vma_bind(target->vma, + target->vma->obj->cache_level, PIN_GLOBAL, NULL); if (WARN_ONCE(err, "Unexpected failure to bind target VMA!")) @@ -1386,21 +1370,21 @@ eb_relocate_entry(struct i915_execbuffer *eb, * more work needs to be done. */ if (!DBG_FORCE_RELOC && - gen8_canonical_addr(target->node.start) == reloc->presumed_offset) + gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { - DRM_DEBUG("Relocation beyond object bounds: " + ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { + drm_dbg(&i915->drm, "Relocation beyond object bounds: " "target %d offset %d size %d.\n", reloc->target_handle, (int)reloc->offset, - (int)vma->size); + (int)ev->vma->size); return -EINVAL; } if (unlikely(reloc->offset & 3)) { - DRM_DEBUG("Relocation not 4-byte aligned: " + drm_dbg(&i915->drm, "Relocation not 4-byte aligned: " "target %d offset %d.\n", reloc->target_handle, (int)reloc->offset); @@ -1415,18 +1399,18 @@ eb_relocate_entry(struct i915_execbuffer *eb, * do relocations we are already stalling, disable the user's opt * out of our synchronisation. */ - *vma->exec_flags &= ~EXEC_OBJECT_ASYNC; + ev->flags &= ~EXEC_OBJECT_ASYNC; /* and update the user's relocation entry */ - return relocate_entry(vma, reloc, eb, target); + return relocate_entry(ev->vma, reloc, eb, target->vma); } -static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) +static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) { #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *urelocs; - const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); + const struct drm_i915_gem_exec_object2 *entry = ev->exec; unsigned int remain; urelocs = u64_to_user_ptr(entry->relocs_ptr); @@ -1456,9 +1440,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) * we would try to acquire the struct mutex again. Obviously * this is bad and so lockdep complains vehemently. */ - pagefault_disable(); - copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); - pagefault_enable(); + copied = __copy_from_user(r, urelocs, count * sizeof(r[0])); if (unlikely(copied)) { remain = -EFAULT; goto out; @@ -1466,7 +1448,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) remain -= count; do { - u64 offset = eb_relocate_entry(eb, vma, r); + u64 offset = eb_relocate_entry(eb, ev, r); if (likely(offset == 0)) { } else if ((s64)offset < 0) { @@ -1508,281 +1490,34 @@ out: return remain; } -static int -eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) -{ - const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); - struct drm_i915_gem_relocation_entry *relocs = - u64_to_ptr(typeof(*relocs), entry->relocs_ptr); - unsigned int i; - int err; - - for (i = 0; i < entry->relocation_count; i++) { - u64 offset = eb_relocate_entry(eb, vma, &relocs[i]); - - if ((s64)offset < 0) { - err = (int)offset; - goto err; - } - } - err = 0; -err: - reloc_cache_reset(&eb->reloc_cache); - return err; -} - -static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) -{ - const char __user *addr, *end; - unsigned long size; - char __maybe_unused c; - - size = entry->relocation_count; - if (size == 0) - return 0; - - if (size > N_RELOC(ULONG_MAX)) - return -EINVAL; - - addr = u64_to_user_ptr(entry->relocs_ptr); - size *= sizeof(struct drm_i915_gem_relocation_entry); - if (!access_ok(addr, size)) - return -EFAULT; - - end = addr + size; - for (; addr < end; addr += PAGE_SIZE) { - int err = __get_user(c, addr); - if (err) - return err; - } - return __get_user(c, end - 1); -} - -static int eb_copy_relocations(const struct i915_execbuffer *eb) +static int eb_relocate(struct i915_execbuffer *eb) { - struct drm_i915_gem_relocation_entry *relocs; - const unsigned int count = eb->buffer_count; - unsigned int i; int err; - for (i = 0; i < count; i++) { - const unsigned int nreloc = eb->exec[i].relocation_count; - struct drm_i915_gem_relocation_entry __user *urelocs; - unsigned long size; - unsigned long copied; - - if (nreloc == 0) - continue; - - err = check_relocations(&eb->exec[i]); - if (err) - goto err; - - urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); - size = nreloc * sizeof(*relocs); - - relocs = kvmalloc_array(size, 1, GFP_KERNEL); - if (!relocs) { - err = -ENOMEM; - goto err; - } - - /* copy_from_user is limited to < 4GiB */ - copied = 0; - do { - unsigned int len = - min_t(u64, BIT_ULL(31), size - copied); - - if (__copy_from_user((char *)relocs + copied, - (char __user *)urelocs + copied, - len)) - goto end; - - copied += len; - } while (copied < size); - - /* - * As we do not update the known relocation offsets after - * relocating (due to the complexities in lock handling), - * we need to mark them as invalid now so that we force the - * relocation processing next time. Just in case the target - * object is evicted and then rebound into its old - * presumed_offset before the next execbuffer - if that - * happened we would make the mistake of assuming that the - * relocations were valid. - */ - if (!user_access_begin(urelocs, size)) - goto end; - - for (copied = 0; copied < nreloc; copied++) - unsafe_put_user(-1, - &urelocs[copied].presumed_offset, - end_user); - user_access_end(); - - eb->exec[i].relocs_ptr = (uintptr_t)relocs; - } - - return 0; - -end_user: - user_access_end(); -end: - kvfree(relocs); - err = -EFAULT; -err: - while (i--) { - relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); - if (eb->exec[i].relocation_count) - kvfree(relocs); - } - return err; -} - -static int eb_prefault_relocations(const struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - - if (unlikely(i915_modparams.prefault_disable)) - return 0; - - for (i = 0; i < count; i++) { - int err; - - err = check_relocations(&eb->exec[i]); - if (err) - return err; - } - - return 0; -} - -static noinline int eb_relocate_slow(struct i915_execbuffer *eb) -{ - struct drm_device *dev = &eb->i915->drm; - bool have_copy = false; - struct i915_vma *vma; - int err = 0; - -repeat: - if (signal_pending(current)) { - err = -ERESTARTSYS; - goto out; - } - - /* We may process another execbuffer during the unlock... */ - eb_reset_vmas(eb); - mutex_unlock(&dev->struct_mutex); - - /* - * We take 3 passes through the slowpatch. - * - * 1 - we try to just prefault all the user relocation entries and - * then attempt to reuse the atomic pagefault disabled fast path again. - * - * 2 - we copy the user entries to a local buffer here outside of the - * local and allow ourselves to wait upon any rendering before - * relocations - * - * 3 - we already have a local copy of the relocation entries, but - * were interrupted (EAGAIN) whilst waiting for the objects, try again. - */ - if (!err) { - err = eb_prefault_relocations(eb); - } else if (!have_copy) { - err = eb_copy_relocations(eb); - have_copy = err == 0; - } else { - cond_resched(); - err = 0; - } - if (err) { - mutex_lock(&dev->struct_mutex); - goto out; - } - - /* A frequent cause for EAGAIN are currently unavailable client pages */ - flush_workqueue(eb->i915->mm.userptr_wq); - - err = i915_mutex_lock_interruptible(dev); - if (err) { - mutex_lock(&dev->struct_mutex); - goto out; - } - - /* reacquire the objects */ + mutex_lock(&eb->gem_context->mutex); err = eb_lookup_vmas(eb); + mutex_unlock(&eb->gem_context->mutex); if (err) - goto err; - - GEM_BUG_ON(!eb->batch); - - list_for_each_entry(vma, &eb->relocs, reloc_link) { - if (!have_copy) { - pagefault_disable(); - err = eb_relocate_vma(eb, vma); - pagefault_enable(); - if (err) - goto repeat; - } else { - err = eb_relocate_vma_slow(eb, vma); - if (err) - goto err; - } - } - - /* - * Leave the user relocations as are, this is the painfully slow path, - * and we want to avoid the complication of dropping the lock whilst - * having buffers reserved in the aperture and so causing spurious - * ENOSPC for random operations. - */ - -err: - if (err == -EAGAIN) - goto repeat; - -out: - if (have_copy) { - const unsigned int count = eb->buffer_count; - unsigned int i; - - for (i = 0; i < count; i++) { - const struct drm_i915_gem_exec_object2 *entry = - &eb->exec[i]; - struct drm_i915_gem_relocation_entry *relocs; - - if (!entry->relocation_count) - continue; + return err; - relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); - kvfree(relocs); - } + if (!list_empty(&eb->unbound)) { + err = eb_reserve(eb); + if (err) + return err; } - return err; -} - -static int eb_relocate(struct i915_execbuffer *eb) -{ - if (eb_lookup_vmas(eb)) - goto slow; - /* The objects are in their final locations, apply the relocations. */ if (eb->args->flags & __EXEC_HAS_RELOC) { - struct i915_vma *vma; + struct eb_vma *ev; - list_for_each_entry(vma, &eb->relocs, reloc_link) { - if (eb_relocate_vma(eb, vma)) - goto slow; + list_for_each_entry(ev, &eb->relocs, reloc_link) { + err = eb_relocate_vma(eb, ev); + if (err) + return err; } } return 0; - -slow: - return eb_relocate_slow(eb); } static int eb_move_to_gpu(struct i915_execbuffer *eb) @@ -1795,27 +1530,19 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ww_acquire_init(&acquire, &reservation_ww_class); for (i = 0; i < count; i++) { - struct i915_vma *vma = eb->vma[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire); - if (!err) - continue; - - GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */ - if (err == -EDEADLK) { GEM_BUG_ON(i == 0); do { int j = i - 1; - ww_mutex_unlock(&eb->vma[j]->resv->lock); + ww_mutex_unlock(&eb->vma[j].vma->resv->lock); - swap(eb->flags[i], eb->flags[j]); swap(eb->vma[i], eb->vma[j]); - eb->vma[i]->exec_flags = &eb->flags[i]; } while (--i); - GEM_BUG_ON(vma != eb->vma[0]); - vma->exec_flags = &eb->flags[0]; err = ww_mutex_lock_slow_interruptible(&vma->resv->lock, &acquire); @@ -1826,8 +1553,9 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ww_acquire_done(&acquire); while (i--) { - unsigned int flags = eb->flags[i]; - struct i915_vma *vma = eb->vma[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + unsigned int flags = ev->flags; struct drm_i915_gem_object *obj = vma->obj; assert_vma_held(vma); @@ -1871,10 +1599,9 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) i915_vma_unlock(vma); __eb_unreserve_vma(vma, flags); - vma->exec_flags = NULL; + i915_vma_put(vma); - if (unlikely(flags & __EXEC_OBJECT_HAS_REF)) - i915_vma_put(vma); + ev->vma = NULL; } ww_acquire_fini(&acquire); @@ -1888,7 +1615,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) return 0; err_skip: - i915_request_skip(eb->request, err); + i915_request_set_error_once(eb->request, err); return err; } @@ -1922,7 +1649,7 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) int i; if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) { - DRM_DEBUG("sol reset is gen7/rcs only\n"); + drm_dbg(&rq->i915->drm, "sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -2009,7 +1736,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, if (!pw) return -ENOMEM; - err = i915_active_acquire(&eb->batch->active); + err = i915_active_acquire(&eb->batch->vma->active); if (err) goto err_free; @@ -2026,7 +1753,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, dma_fence_work_init(&pw->base, &eb_parse_ops); pw->engine = eb->engine; - pw->batch = eb->batch; + pw->batch = eb->batch->vma; pw->batch_offset = eb->batch_start_offset; pw->batch_length = eb->batch_len; pw->shadow = shadow; @@ -2068,7 +1795,7 @@ err_trampoline: err_shadow: i915_active_release(&shadow->active); err_batch: - i915_active_release(&eb->batch->active); + i915_active_release(&eb->batch->vma->active); err_free: kfree(pw); return err; @@ -2076,6 +1803,7 @@ err_free: static int eb_parse(struct i915_execbuffer *eb) { + struct drm_i915_private *i915 = eb->i915; struct intel_engine_pool_node *pool; struct i915_vma *shadow, *trampoline; unsigned int len; @@ -2091,7 +1819,8 @@ static int eb_parse(struct i915_execbuffer *eb) * post-scan tampering */ if (!eb->context->vm->has_read_only) { - DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + drm_dbg(&i915->drm, + "Cannot prevent post-scan tampering without RO capable vm\n"); return -EINVAL; } } else { @@ -2129,15 +1858,12 @@ static int eb_parse(struct i915_execbuffer *eb) if (err) goto err_trampoline; - eb->vma[eb->buffer_count] = i915_vma_get(shadow); - eb->flags[eb->buffer_count] = - __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; - shadow->exec_flags = &eb->flags[eb->buffer_count]; - eb->buffer_count++; + eb->vma[eb->buffer_count].vma = i915_vma_get(shadow); + eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN; + eb->batch = &eb->vma[eb->buffer_count++]; eb->trampoline = trampoline; eb->batch_start_offset = 0; - eb->batch = shadow; shadow->private = pool; return 0; @@ -2164,7 +1890,7 @@ add_to_client(struct i915_request *rq, struct drm_file *file) spin_unlock(&file_priv->mm.lock); } -static int eb_submit(struct i915_execbuffer *eb) +static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) { int err; @@ -2191,7 +1917,7 @@ static int eb_submit(struct i915_execbuffer *eb) } err = eb->engine->emit_bb_start(eb->request, - eb->batch->node.start + + batch->node.start + eb->batch_start_offset, eb->batch_len, eb->batch_flags); @@ -2326,15 +2052,22 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) intel_context_timeline_unlock(tl); if (rq) { - if (i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT) < 0) { - i915_request_put(rq); - err = -EINTR; - goto err_exit; - } + bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; + long timeout; + timeout = MAX_SCHEDULE_TIMEOUT; + if (nonblock) + timeout = 0; + + timeout = i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + timeout); i915_request_put(rq); + + if (timeout < 0) { + err = nonblock ? -EWOULDBLOCK : timeout; + goto err_exit; + } } eb->engine = ce->engine; @@ -2372,8 +2105,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, if (user_ring_id != I915_EXEC_BSD && (args->flags & I915_EXEC_BSD_MASK)) { - DRM_DEBUG("execbuf with non bsd ring but with invalid " - "bsd dispatch flags: %d\n", (int)(args->flags)); + drm_dbg(&i915->drm, + "execbuf with non bsd ring but with invalid " + "bsd dispatch flags: %d\n", (int)(args->flags)); return -1; } @@ -2387,8 +2121,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, bsd_idx >>= I915_EXEC_BSD_SHIFT; bsd_idx--; } else { - DRM_DEBUG("execbuf with unknown bsd ring: %u\n", - bsd_idx); + drm_dbg(&i915->drm, + "execbuf with unknown bsd ring: %u\n", + bsd_idx); return -1; } @@ -2396,7 +2131,8 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, } if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { - DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); + drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n", + user_ring_id); return -1; } @@ -2556,6 +2292,73 @@ signal_fence_array(struct i915_execbuffer *eb, } } +static void retire_requests(struct intel_timeline *tl, struct i915_request *end) +{ + struct i915_request *rq, *rn; + + list_for_each_entry_safe(rq, rn, &tl->requests, link) + if (rq == end || !i915_request_retire(rq)) + break; +} + +static void eb_request_add(struct i915_execbuffer *eb) +{ + struct i915_request *rq = eb->request; + struct intel_timeline * const tl = i915_request_timeline(rq); + struct i915_sched_attr attr = {}; + struct i915_request *prev; + + lockdep_assert_held(&tl->mutex); + lockdep_unpin_lock(&tl->mutex, rq->cookie); + + trace_i915_request_add(rq); + + prev = __i915_request_commit(rq); + + /* Check that the context wasn't destroyed before submission */ + if (likely(!intel_context_is_closed(eb->context))) { + attr = eb->gem_context->sched; + + /* + * Boost actual workloads past semaphores! + * + * With semaphores we spin on one engine waiting for another, + * simply to reduce the latency of starting our work when + * the signaler completes. However, if there is any other + * work that we could be doing on this engine instead, that + * is better utilisation and will reduce the overall duration + * of the current work. To avoid PI boosting a semaphore + * far in the distance past over useful work, we keep a history + * of any semaphore use along our dependency chain. + */ + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) + attr.priority |= I915_PRIORITY_NOSEMAPHORE; + + /* + * Boost priorities to new clients (new request flows). + * + * Allow interactive/synchronous clients to jump ahead of + * the bulk clients. (FQ_CODEL) + */ + if (list_empty(&rq->sched.signalers_list)) + attr.priority |= I915_PRIORITY_WAIT; + } else { + /* Serialise with context_close via the add_to_timeline */ + i915_request_set_error_once(rq, -ENOENT); + __i915_request_skip(rq); + } + + local_bh_disable(); + __i915_request_queue(rq, &attr); + local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + + /* Try to clean up the client's timeline after submitting the request */ + if (prev) + retire_requests(tl, prev); + + mutex_unlock(&tl->mutex); +} + static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, @@ -2568,6 +2371,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct dma_fence *in_fence = NULL; struct dma_fence *exec_fence = NULL; struct sync_file *out_fence = NULL; + struct i915_vma *batch; int out_fence_fd = -1; int err; @@ -2582,9 +2386,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; - eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1); - eb.vma[0] = NULL; - eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1); + eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); + eb.vma[0].vma = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -2652,10 +2455,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_context; - err = i915_mutex_lock_interruptible(dev); - if (err) - goto err_engine; - err = eb_relocate(&eb); if (err) { /* @@ -2669,20 +2468,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) { - DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); + if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) { + drm_dbg(&i915->drm, + "Attempting to use self-modifying batch buffer\n"); err = -EINVAL; goto err_vma; } - if (eb.batch_start_offset > eb.batch->size || - eb.batch_len > eb.batch->size - eb.batch_start_offset) { - DRM_DEBUG("Attempting to use out-of-bounds batch\n"); + + if (range_overflows_t(u64, + eb.batch_start_offset, eb.batch_len, + eb.batch->vma->size)) { + drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); err = -EINVAL; goto err_vma; } if (eb.batch_len == 0) - eb.batch_len = eb.batch->size - eb.batch_start_offset; + eb.batch_len = eb.batch->vma->size - eb.batch_start_offset; err = eb_parse(&eb); if (err) @@ -2692,6 +2494,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ + batch = eb.batch->vma; if (eb.batch_flags & I915_DISPATCH_SECURE) { struct i915_vma *vma; @@ -2705,13 +2508,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, * fitting due to fragmentation. * So this is actually safe. */ - vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err_vma; + goto err_parse; } - eb.batch = vma; + batch = vma; } /* All GPU relocation batches must be submitted prior to the user rq */ @@ -2758,16 +2561,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, * inactive_list and lose its active reference. Hence we do not need * to explicitly hold another reference here. */ - eb.request->batch = eb.batch; - if (eb.batch->private) - intel_engine_pool_mark_active(eb.batch->private, eb.request); + eb.request->batch = batch; + if (batch->private) + intel_engine_pool_mark_active(batch->private, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); - err = eb_submit(&eb); + err = eb_submit(&eb, batch); err_request: add_to_client(eb.request, file); i915_request_get(eb.request); - i915_request_add(eb.request); + eb_request_add(&eb); if (fences) signal_fence_array(&eb, fences); @@ -2786,16 +2589,15 @@ err_request: err_batch_unpin: if (eb.batch_flags & I915_DISPATCH_SECURE) - i915_vma_unpin(eb.batch); - if (eb.batch->private) - intel_engine_pool_put(eb.batch->private); + i915_vma_unpin(batch); +err_parse: + if (batch->private) + intel_engine_pool_put(batch->private); err_vma: if (eb.exec) eb_release_vmas(&eb); if (eb.trampoline) i915_vma_unpin(eb.trampoline); - mutex_unlock(&dev->struct_mutex); -err_engine: eb_unpin_engine(&eb); err_context: i915_gem_context_put(eb.gem_context); @@ -2813,9 +2615,7 @@ err_in_fence: static size_t eb_element_size(void) { - return (sizeof(struct drm_i915_gem_exec_object2) + - sizeof(struct i915_vma *) + - sizeof(unsigned int)); + return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma); } static bool check_buffer_count(size_t count) @@ -2839,6 +2639,7 @@ int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_exec_object *exec_list = NULL; @@ -2848,7 +2649,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, int err; if (!check_buffer_count(count)) { - DRM_DEBUG("execbuf2 with %zd buffers\n", count); + drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count); return -EINVAL; } @@ -2873,8 +2674,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, exec2_list = kvmalloc_array(count + 1, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec_list == NULL || exec2_list == NULL) { - DRM_DEBUG("Failed to allocate exec list for %d buffers\n", - args->buffer_count); + drm_dbg(&i915->drm, + "Failed to allocate exec list for %d buffers\n", + args->buffer_count); kvfree(exec_list); kvfree(exec2_list); return -ENOMEM; @@ -2883,8 +2685,8 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, u64_to_user_ptr(args->buffers_ptr), sizeof(*exec_list) * count); if (err) { - DRM_DEBUG("copy %d exec entries failed %d\n", - args->buffer_count, err); + drm_dbg(&i915->drm, "copy %d exec entries failed %d\n", + args->buffer_count, err); kvfree(exec_list); kvfree(exec2_list); return -EFAULT; @@ -2931,6 +2733,7 @@ int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; struct drm_syncobj **fences = NULL; @@ -2938,7 +2741,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, int err; if (!check_buffer_count(count)) { - DRM_DEBUG("execbuf2 with %zd buffers\n", count); + drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count); return -EINVAL; } @@ -2950,14 +2753,14 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, exec2_list = kvmalloc_array(count + 1, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec2_list == NULL) { - DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", - count); + drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n", + count); return -ENOMEM; } if (copy_from_user(exec2_list, u64_to_user_ptr(args->buffers_ptr), sizeof(*exec2_list) * count)) { - DRM_DEBUG("copy %zd exec entries failed\n", count); + drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count); kvfree(exec2_list); return -EFAULT; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 9cfb0e41ff06..cbbff81aa0af 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -8,8 +8,6 @@ #include <linux/slab.h> #include <linux/swiotlb.h> -#include <drm/i915_drm.h> - #include "i915_drv.h" #include "i915_gem.h" #include "i915_gem_object.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 0b6a442108de..b39c24dae64e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -613,8 +613,7 @@ __assign_mmap_offset(struct drm_file *file, if (!obj) return -ENOENT; - if (mmap_type == I915_MMAP_TYPE_GTT && - i915_gem_object_never_bind_ggtt(obj)) { + if (i915_gem_object_never_mmap(obj)) { err = -ENODEV; goto out; } @@ -776,7 +775,7 @@ static struct file *mmap_singleton(struct drm_i915_private *i915) struct file *file; rcu_read_lock(); - file = i915->gem.mmap_singleton; + file = READ_ONCE(i915->gem.mmap_singleton); if (file && !get_file_rcu(file)) file = NULL; rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 9c86f2dea947..2faa481cc18f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -11,8 +11,6 @@ #include <drm/drm_file.h> #include <drm/drm_device.h> -#include <drm/i915_drm.h> - #include "display/intel_frontbuffer.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" @@ -194,9 +192,9 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) } static inline bool -i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj) +i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj) { - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_GGTT); + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_MMAP); } static inline bool diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 70809d8897cd..e00792158f13 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -186,7 +186,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, 0); out_request: if (unlikely(err)) - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); i915_request_add(rq); out_batch: @@ -196,6 +196,17 @@ out_unpin: return err; } +/* Wa_1209644611:icl,ehl */ +static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size) +{ + u32 height = size >> PAGE_SHIFT; + + if (!IS_GEN(i915, 11)) + return false; + + return height % 4 == 3 && height <= 8; +} + struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, struct i915_vma *src, struct i915_vma *dst) @@ -237,7 +248,8 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, size = min_t(u64, rem, block_size); GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - if (INTEL_GEN(i915) >= 9) { + if (INTEL_GEN(i915) >= 9 && + !wa_1209644611_applies(i915, size)) { *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; *cmd++ = 0; @@ -385,7 +397,7 @@ out_unlock: drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); out_request: if (unlikely(err)) - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); i915_request_add(rq); out_batch: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index c2174da35bb0..a0b10bcd8d8a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -34,7 +34,7 @@ struct drm_i915_gem_object_ops { #define I915_GEM_OBJECT_HAS_IOMEM BIT(1) #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) #define I915_GEM_OBJECT_IS_PROXY BIT(3) -#define I915_GEM_OBJECT_NO_GGTT BIT(4) +#define I915_GEM_OBJECT_NO_MMAP BIT(4) #define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5) /* Interface between the GEM object and its backing storage. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 54aca5c9101e..24f4cadea114 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -83,10 +83,12 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); int err; if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { - DRM_DEBUG("Attempting to obtain a purgeable object\n"); + drm_dbg(&i915->drm, + "Attempting to obtain a purgeable object\n"); return -EFAULT; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index b07bb40edd5a..698e22420dc5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -194,10 +194,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) /* Perma-pin (until release) the physical set of pages */ __i915_gem_object_pin_pages(obj); - if (!IS_ERR_OR_NULL(pages)) { + if (!IS_ERR_OR_NULL(pages)) i915_gem_shmem_ops.put_pages(obj, pages); - i915_gem_object_release_memory_region(obj); - } + + i915_gem_object_release_memory_region(obj); + mutex_unlock(&obj->mm.lock); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index c8264eb036bf..3d215164dd5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -85,7 +85,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) spin_unlock_irqrestore(&i915->mm.obj_lock, flags); i915_gem_object_lock(obj); - WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + drm_WARN_ON(&i915->drm, + i915_gem_object_set_to_gtt_domain(obj, false)); i915_gem_object_unlock(obj); i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index a2a980d9d241..5d5d7eef3f43 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -148,7 +148,8 @@ rebuild_st: last_pfn = page_to_pfn(page); /* Check that the i965g/gm workaround works. */ - WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); + drm_WARN_ON(&i915->drm, + (gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); } if (sg) { /* loop terminated early; short sg table */ sg_page_sizes |= sg->length; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 59b387ade49c..03e5eb4c99d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -12,7 +12,6 @@ #include <linux/pci.h> #include <linux/dma-buf.h> #include <linux/vmalloc.h> -#include <drm/i915_drm.h> #include "i915_trace.h" @@ -401,19 +400,22 @@ void i915_gem_driver_register__shrinker(struct drm_i915_private *i915) i915->mm.shrinker.count_objects = i915_gem_shrinker_count; i915->mm.shrinker.seeks = DEFAULT_SEEKS; i915->mm.shrinker.batch = 4096; - WARN_ON(register_shrinker(&i915->mm.shrinker)); + drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker)); i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; - WARN_ON(register_oom_notifier(&i915->mm.oom_notifier)); + drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier)); i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; - WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); + drm_WARN_ON(&i915->drm, + register_vmap_purge_notifier(&i915->mm.vmap_notifier)); } void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915) { - WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); - WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); + drm_WARN_ON(&i915->drm, + unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); + drm_WARN_ON(&i915->drm, + unregister_oom_notifier(&i915->mm.oom_notifier)); unregister_shrinker(&i915->mm.shrinker); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 451f3078d60d..5557dfa83a7b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -13,6 +13,7 @@ #include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_gem_stolen.h" +#include "i915_vgpu.h" /* * The BIOS typically reserves some of the system's memory for the exclusive @@ -110,8 +111,11 @@ static int i915_adjust_stolen(struct drm_i915_private *i915, if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { - DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res); - DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm); + drm_dbg(&i915->drm, + "GTT within stolen memory at %pR\n", + &ggtt_res); + drm_dbg(&i915->drm, "Stolen memory adjusted to %pR\n", + dsm); } } @@ -142,8 +146,9 @@ static int i915_adjust_stolen(struct drm_i915_private *i915, * range. Apparently this works. */ if (!r && !IS_GEN(i915, 3)) { - DRM_ERROR("conflict detected with stolen region: %pR\n", - dsm); + drm_err(&i915->drm, + "conflict detected with stolen region: %pR\n", + dsm); return -EBUSY; } @@ -171,8 +176,8 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *i915, ELK_STOLEN_RESERVED); resource_size_t stolen_top = i915->dsm.end + 1; - DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", - IS_GM45(i915) ? "CTG" : "ELK", reg_val); + drm_dbg(&i915->drm, "%s_STOLEN_RESERVED = %08x\n", + IS_GM45(i915) ? "CTG" : "ELK", reg_val); if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) return; @@ -181,14 +186,16 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *i915, * Whether ILK really reuses the ELK register for this is unclear. * Let's see if we catch anyone with this supposedly enabled on ILK. */ - WARN(IS_GEN(i915, 5), "ILK stolen reserved found? 0x%08x\n", - reg_val); + drm_WARN(&i915->drm, IS_GEN(i915, 5), + "ILK stolen reserved found? 0x%08x\n", + reg_val); if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) return; *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; - WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); + drm_WARN_ON(&i915->drm, + (reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); *size = stolen_top - *base; } @@ -200,7 +207,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *i915, { u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val); if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; @@ -234,7 +241,7 @@ static void vlv_get_stolen_reserved(struct drm_i915_private *i915, u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); resource_size_t stolen_top = i915->dsm.end + 1; - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val); if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; @@ -262,7 +269,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *i915, { u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val); if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; @@ -289,7 +296,7 @@ static void chv_get_stolen_reserved(struct drm_i915_private *i915, { u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val); if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; @@ -323,7 +330,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *i915, u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); resource_size_t stolen_top = i915->dsm.end + 1; - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val); if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; @@ -342,7 +349,7 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, { u64 reg_val = intel_uncore_read64(uncore, GEN6_STOLEN_RESERVED); - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); + drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; @@ -453,8 +460,9 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915) * it likely means we failed to read the registers correctly. */ if (!reserved_base) { - DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n", - &reserved_base, &reserved_size); + drm_err(&i915->drm, + "inconsistent reservation %pa + %pa; ignoring\n", + &reserved_base, &reserved_size); reserved_base = stolen_top; reserved_size = 0; } @@ -463,8 +471,9 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915) (struct resource)DEFINE_RES_MEM(reserved_base, reserved_size); if (!resource_contains(&i915->dsm, &i915->dsm_reserved)) { - DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", - &i915->dsm_reserved, &i915->dsm); + drm_err(&i915->drm, + "Stolen reserved area %pR outside stolen memory %pR\n", + &i915->dsm_reserved, &i915->dsm); return 0; } @@ -472,9 +481,10 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915) * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; - DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", - (u64)resource_size(&i915->dsm) >> 10, - ((u64)resource_size(&i915->dsm) - reserved_total) >> 10); + drm_dbg(&i915->drm, + "Memory reserved for graphics device: %lluK, usable: %lluK\n", + (u64)resource_size(&i915->dsm) >> 10, + ((u64)resource_size(&i915->dsm) - reserved_total) >> 10); i915->stolen_usable_size = resource_size(&i915->dsm) - reserved_total; @@ -677,26 +687,24 @@ struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915) struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, resource_size_t stolen_offset, - resource_size_t gtt_offset, resource_size_t size) { struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN]; - struct i915_ggtt *ggtt = &i915->ggtt; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; - struct i915_vma *vma; int ret; if (!drm_mm_initialized(&i915->mm.stolen)) return ERR_PTR(-ENODEV); - DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", - &stolen_offset, >t_offset, &size); + drm_dbg(&i915->drm, + "creating preallocated stolen object: stolen_offset=%pa, size=%pa\n", + &stolen_offset, &size); /* KISS and expect everything to be page-aligned */ - if (WARN_ON(size == 0) || - WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || - WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) + if (GEM_WARN_ON(size == 0) || + GEM_WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || + GEM_WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); @@ -709,68 +717,20 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); mutex_unlock(&i915->mm.stolen_lock); if (ret) { - DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); - kfree(stolen); - return ERR_PTR(ret); + obj = ERR_PTR(ret); + goto err_free; } obj = __i915_gem_object_create_stolen(mem, stolen); - if (IS_ERR(obj)) { - DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); - i915_gem_stolen_remove_node(i915, stolen); - kfree(stolen); - return obj; - } - - /* Some objects just need physical mem from stolen space */ - if (gtt_offset == I915_GTT_OFFSET_NONE) - return obj; - - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err; - - vma = i915_vma_instance(obj, &ggtt->vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_pages; - } - - /* To simplify the initialisation sequence between KMS and GTT, - * we allow construction of the stolen object prior to - * setting up the GTT space. The actual reservation will occur - * later. - */ - mutex_lock(&ggtt->vm.mutex); - ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, - size, gtt_offset, obj->cache_level, - 0); - if (ret) { - DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); - mutex_unlock(&ggtt->vm.mutex); - goto err_pages; - } - - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - - GEM_BUG_ON(vma->pages); - vma->pages = obj->mm.pages; - atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); - - set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); - __i915_vma_set_map_and_fenceable(vma); - - list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); - mutex_unlock(&ggtt->vm.mutex); - - GEM_BUG_ON(i915_gem_object_is_shrinkable(obj)); - atomic_inc(&obj->bind_count); + if (IS_ERR(obj)) + goto err_stolen; + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); return obj; -err_pages: - i915_gem_object_unpin_pages(obj); -err: - i915_gem_object_put(obj); - return ERR_PTR(ret); +err_stolen: + i915_gem_stolen_remove_node(i915, stolen); +err_free: + kfree(stolen); + return obj; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index c1040627fbf3..e15c0adad8af 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -28,7 +28,6 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, resource_size_t stolen_offset, - resource_size_t gtt_offset, resource_size_t size); #endif /* __I915_GEM_STOLEN_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index 6c7825a2dc2a..37f77aee1212 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -6,7 +6,6 @@ #include <linux/string.h> #include <linux/bitops.h> -#include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_gem.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 580319b7bf1a..7ffd7afeb7a5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -10,8 +10,6 @@ #include <linux/swap.h> #include <linux/sched/mm.h> -#include <drm/i915_drm.h> - #include "i915_drv.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -704,7 +702,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE | - I915_GEM_OBJECT_NO_GGTT | + I915_GEM_OBJECT_NO_MMAP | I915_GEM_OBJECT_ASYNC_CANCEL, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, @@ -770,6 +768,23 @@ i915_gem_userptr_ioctl(struct drm_device *dev, I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; + /* + * XXX: There is a prevalence of the assumption that we fit the + * object's page count inside a 32bit _signed_ variable. Let's document + * this and catch if we ever need to fix it. In the meantime, if you do + * spot such a local variable, please consider fixing! + * + * Aside from our own locals (for which we have no excuse!): + * - sg_table embeds unsigned int for num_pages + * - get_user_pages*() mixed ints with longs + */ + + if (args->user_size >> PAGE_SHIFT > INT_MAX) + return -E2BIG; + + if (overflows_type(args->user_size, obj->base.size)) + return -E2BIG; + if (!args->user_size) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 9311250d7d6f..2d0fd50c5312 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1208,107 +1208,6 @@ static int igt_write_huge(struct i915_gem_context *ctx, return err; } -static int igt_ppgtt_exhaust_huge(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - unsigned long supported = INTEL_INFO(i915)->page_sizes; - static unsigned int pages[ARRAY_SIZE(page_sizes)]; - struct drm_i915_gem_object *obj; - unsigned int size_mask; - unsigned int page_mask; - int n, i; - int err = -ENODEV; - - if (supported == I915_GTT_PAGE_SIZE_4K) - return 0; - - /* - * Sanity check creating objects with a varying mix of page sizes -- - * ensuring that our writes lands in the right place. - */ - - n = 0; - for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) - pages[n++] = BIT(i); - - for (size_mask = 2; size_mask < BIT(n); size_mask++) { - unsigned int size = 0; - - for (i = 0; i < n; i++) { - if (size_mask & BIT(i)) - size |= pages[i]; - } - - /* - * For our page mask we want to enumerate all the page-size - * combinations which will fit into our chosen object size. - */ - for (page_mask = 2; page_mask <= size_mask; page_mask++) { - unsigned int page_sizes = 0; - - for (i = 0; i < n; i++) { - if (page_mask & BIT(i)) - page_sizes |= pages[i]; - } - - /* - * Ensure that we can actually fill the given object - * with our chosen page mask. - */ - if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) - continue; - - obj = huge_pages_object(i915, size, page_sizes); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_device; - } - - err = i915_gem_object_pin_pages(obj); - if (err) { - i915_gem_object_put(obj); - - if (err == -ENOMEM) { - pr_info("unable to get pages, size=%u, pages=%u\n", - size, page_sizes); - err = 0; - break; - } - - pr_err("pin_pages failed, size=%u, pages=%u\n", - size_mask, page_mask); - - goto out_device; - } - - /* Force the page-size for the gtt insertion */ - obj->mm.page_sizes.sg = page_sizes; - - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("exhaust write-huge failed with size=%u\n", - size); - goto out_unpin; - } - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj); - i915_gem_object_put(obj); - } - } - - goto out_device; - -out_unpin: - i915_gem_object_unpin_pages(obj); - i915_gem_object_put(obj); -out_device: - mkwrite_device_info(i915)->page_sizes = supported; - - return err; -} - typedef struct drm_i915_gem_object * (*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags); @@ -1900,7 +1799,6 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_shrink_thp), SUBTEST(igt_ppgtt_pin_update), SUBTEST(igt_tmpfs_fallback), - SUBTEST(igt_ppgtt_exhaust_huge), SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 7fc46861a54d..54b86cf7f5d2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1004,7 +1004,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, return 0; skip_request: - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); err_request: i915_request_add(rq); err_batch: @@ -1465,9 +1465,12 @@ out_file: static int check_scratch(struct i915_address_space *vm, u64 offset) { - struct drm_mm_node *node = - __drm_mm_interval_first(&vm->mm, - offset, offset + sizeof(u32) - 1); + struct drm_mm_node *node; + + mutex_lock(&vm->mutex); + node = __drm_mm_interval_first(&vm->mm, + offset, offset + sizeof(u32) - 1); + mutex_unlock(&vm->mutex); if (!node || node->start > offset) return 0; @@ -1492,6 +1495,10 @@ static int write_to_scratch(struct i915_gem_context *ctx, GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); + err = check_scratch(ctx_vm(ctx), offset); + if (err) + return err; + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1528,10 +1535,6 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto out_vm; - err = check_scratch(vm, offset); - if (err) - goto err_unpin; - rq = igt_request_alloc(ctx, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); @@ -1556,7 +1559,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto out_vm; skip_request: - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); err_request: i915_request_add(rq); err_unpin: @@ -1575,64 +1578,95 @@ static int read_from_scratch(struct i915_gem_context *ctx, struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; struct i915_address_space *vm; - const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ const u32 result = 0x100; struct i915_request *rq; struct i915_vma *vma; + unsigned int flags; u32 *cmd; int err; GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); + err = check_scratch(ctx_vm(ctx), offset); + if (err) + return err; + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out; - } - - memset(cmd, POISON_INUSE, PAGE_SIZE); if (INTEL_GEN(i915) >= 8) { + const u32 GPR0 = engine->mmio_base + 0x600; + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_vm; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); + if (err) + goto out_vm; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out; + } + + memset(cmd, POISON_INUSE, PAGE_SIZE); *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; - *cmd++ = RCS_GPR0; + *cmd++ = GPR0; *cmd++ = lower_32_bits(offset); *cmd++ = upper_32_bits(offset); *cmd++ = MI_STORE_REGISTER_MEM_GEN8; - *cmd++ = RCS_GPR0; + *cmd++ = GPR0; *cmd++ = result; *cmd++ = 0; + *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + flags = 0; } else { + const u32 reg = engine->mmio_base + 0x420; + + /* hsw: register access even to 3DPRIM! is protected */ + vm = i915_vm_get(&engine->gt->ggtt->vm); + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_vm; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_vm; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out; + } + + memset(cmd, POISON_INUSE, PAGE_SIZE); *cmd++ = MI_LOAD_REGISTER_MEM; - *cmd++ = RCS_GPR0; + *cmd++ = reg; *cmd++ = offset; - *cmd++ = MI_STORE_REGISTER_MEM; - *cmd++ = RCS_GPR0; - *cmd++ = result; - } - *cmd = MI_BATCH_BUFFER_END; + *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; + *cmd++ = reg; + *cmd++ = vma->node.start + result; + *cmd = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); - intel_gt_chipset_flush(engine->gt); - - vm = i915_gem_context_get_vm_rcu(ctx); - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_vm; + flags = I915_DISPATCH_SECURE; } - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); - if (err) - goto out_vm; - - err = check_scratch(vm, offset); - if (err) - goto err_unpin; + intel_gt_chipset_flush(engine->gt); rq = igt_request_alloc(ctx, engine); if (IS_ERR(rq)) { @@ -1640,7 +1674,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto err_unpin; } - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); + err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); if (err) goto err_request; @@ -1674,7 +1708,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto out_vm; skip_request: - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); err_request: i915_request_add(rq); err_unpin: @@ -1686,6 +1720,39 @@ out: return err; } +static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) +{ + struct i915_address_space *vm; + struct page *page; + u32 *vaddr; + int err = 0; + + vm = ctx_vm(ctx); + if (!vm) + return -ENODEV; + + page = vm->scratch[0].base.page; + if (!page) { + pr_err("No scratch page!\n"); + return -EINVAL; + } + + vaddr = kmap(page); + if (!vaddr) { + pr_err("No (mappable) scratch page!\n"); + return -EINVAL; + } + + memcpy(out, vaddr, sizeof(*out)); + if (memchr_inv(vaddr, *out, PAGE_SIZE)) { + pr_err("Inconsistent initial state of scratch page!\n"); + err = -EINVAL; + } + kunmap(page); + + return err; +} + static int igt_vm_isolation(void *arg) { struct drm_i915_private *i915 = arg; @@ -1696,6 +1763,7 @@ static int igt_vm_isolation(void *arg) I915_RND_STATE(prng); struct file *file; u64 vm_total; + u32 expected; int err; if (INTEL_GEN(i915) < 7) @@ -1730,9 +1798,17 @@ static int igt_vm_isolation(void *arg) if (ctx_vm(ctx_a) == ctx_vm(ctx_b)) goto out_file; + /* Read the initial state of the scratch page */ + err = check_scratch_page(ctx_a, &expected); + if (err) + goto out_file; + + err = check_scratch_page(ctx_b, &expected); + if (err) + goto out_file; + vm_total = ctx_vm(ctx_a)->total; GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total); - vm_total -= I915_GTT_PAGE_SIZE; count = 0; num_engines = 0; @@ -1743,14 +1819,18 @@ static int igt_vm_isolation(void *arg) if (!intel_engine_can_store_dword(engine)) continue; + /* Not all engines have their own GPR! */ + if (INTEL_GEN(i915) < 8 && engine->class != RENDER_CLASS) + continue; + while (!__igt_timeout(end_time, NULL)) { u32 value = 0xc5c5c5c5; u64 offset; - div64_u64_rem(i915_prandom_u64_state(&prng), - vm_total, &offset); - offset = round_down(offset, alignof_dword); - offset += I915_GTT_PAGE_SIZE; + /* Leave enough space at offset 0 for the batch */ + offset = igt_random_offset(&prng, + I915_GTT_PAGE_SIZE, vm_total, + sizeof(u32), alignof_dword); err = write_to_scratch(ctx_a, engine, offset, 0xdeadbeef); @@ -1760,7 +1840,7 @@ static int igt_vm_isolation(void *arg) if (err) goto out_file; - if (value) { + if (value != expected) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", engine->name, value, upper_32_bits(offset), diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index 62077fe46715..31549ad83fa6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -210,6 +210,7 @@ static int igt_fill_blt_thread(void *arg) struct intel_context *ce; unsigned int prio; IGT_TIMEOUT(end); + u64 total, max; int err; ctx = thread->ctx; @@ -225,27 +226,32 @@ static int igt_fill_blt_thread(void *arg) ce = i915_gem_context_get_engine(ctx, BCS0); GEM_BUG_ON(IS_ERR(ce)); + /* + * If we have a tiny shared address space, like for the GGTT + * then we can't be too greedy. + */ + max = ce->vm->total; + if (i915_is_ggtt(ce->vm) || thread->ctx) + max = div_u64(max, thread->n_cpus); + max >>= 4; + + total = PAGE_SIZE; do { - const u32 max_block_size = S16_MAX * PAGE_SIZE; + /* Aim to keep the runtime under reasonable bounds! */ + const u32 max_phys_size = SZ_64K; u32 val = prandom_u32_state(prng); - u64 total = ce->vm->total; u32 phys_sz; u32 sz; u32 *vaddr; u32 i; - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - if (i915_is_ggtt(ce->vm)) - total = div64_u64(total, thread->n_cpus); - - sz = min_t(u64, total >> 4, prandom_u32_state(prng)); - phys_sz = sz % (max_block_size + 1); + total = min(total, max); + sz = i915_prandom_u32_max_state(total, prng) + 1; + phys_sz = sz % max_phys_size + 1; sz = round_up(sz, PAGE_SIZE); phys_sz = round_up(phys_sz, PAGE_SIZE); + phys_sz = min(phys_sz, sz); pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, phys_sz, sz, val); @@ -276,13 +282,14 @@ static int igt_fill_blt_thread(void *arg) if (err) goto err_unpin; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_cpu_domain(obj, false); - i915_gem_object_unlock(obj); + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); if (err) goto err_unpin; - for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { + for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) { + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); + if (vaddr[i] != val) { pr_err("vaddr[%u]=%x, expected=%x\n", i, vaddr[i], val); @@ -293,6 +300,8 @@ static int igt_fill_blt_thread(void *arg) i915_gem_object_unpin_map(obj); i915_gem_object_put(obj); + + total <<= 1; } while (!time_after(jiffies, end)); goto err_flush; @@ -319,6 +328,7 @@ static int igt_copy_blt_thread(void *arg) struct intel_context *ce; unsigned int prio; IGT_TIMEOUT(end); + u64 total, max; int err; ctx = thread->ctx; @@ -334,23 +344,32 @@ static int igt_copy_blt_thread(void *arg) ce = i915_gem_context_get_engine(ctx, BCS0); GEM_BUG_ON(IS_ERR(ce)); + /* + * If we have a tiny shared address space, like for the GGTT + * then we can't be too greedy. + */ + max = ce->vm->total; + if (i915_is_ggtt(ce->vm) || thread->ctx) + max = div_u64(max, thread->n_cpus); + max >>= 4; + + total = PAGE_SIZE; do { - const u32 max_block_size = S16_MAX * PAGE_SIZE; + /* Aim to keep the runtime under reasonable bounds! */ + const u32 max_phys_size = SZ_64K; u32 val = prandom_u32_state(prng); - u64 total = ce->vm->total; u32 phys_sz; u32 sz; u32 *vaddr; u32 i; - if (i915_is_ggtt(ce->vm)) - total = div64_u64(total, thread->n_cpus); - - sz = min_t(u64, total >> 4, prandom_u32_state(prng)); - phys_sz = sz % (max_block_size + 1); + total = min(total, max); + sz = i915_prandom_u32_max_state(total, prng) + 1; + phys_sz = sz % max_phys_size + 1; sz = round_up(sz, PAGE_SIZE); phys_sz = round_up(phys_sz, PAGE_SIZE); + phys_sz = min(phys_sz, sz); pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, phys_sz, sz, val); @@ -397,13 +416,14 @@ static int igt_copy_blt_thread(void *arg) if (err) goto err_unpin; - i915_gem_object_lock(dst); - err = i915_gem_object_set_to_cpu_domain(dst, false); - i915_gem_object_unlock(dst); + err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT); if (err) goto err_unpin; - for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) { + for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) { + if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); + if (vaddr[i] != val) { pr_err("vaddr[%u]=%x, expected=%x\n", i, vaddr[i], val); @@ -416,6 +436,8 @@ static int igt_copy_blt_thread(void *arg) i915_gem_object_put(src); i915_gem_object_put(dst); + + total <<= 1; } while (!time_after(jiffies, end)); goto err_flush; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 6718da20f35d..772d8cba7da9 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -159,7 +159,7 @@ int igt_gpu_fill_dw(struct intel_context *ce, return 0; skip_request: - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); err_request: i915_request_add(rq); err_batch: diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 384143aa7776..e7e3c620f542 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -23,6 +23,9 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + spin_lock_init(&ctx->stale.lock); + INIT_LIST_HEAD(&ctx->stale.engines); + i915_gem_context_set_persistence(ctx); mutex_init(&ctx->engines_mutex); @@ -37,7 +40,7 @@ mock_context(struct drm_i915_private *i915, if (name) { struct i915_ppgtt *ppgtt; - strncpy(ctx->name, name, sizeof(ctx->name)); + strncpy(ctx->name, name, sizeof(ctx->name) - 1); ppgtt = mock_ppgtt(i915, name); if (!ppgtt) @@ -83,6 +86,8 @@ live_context(struct drm_i915_private *i915, struct file *file) if (IS_ERR(ctx)) return ctx; + i915_gem_context_set_no_error_capture(ctx); + err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id); if (err < 0) goto err_ctx; @@ -105,6 +110,7 @@ kernel_context(struct drm_i915_private *i915) i915_gem_context_clear_bannable(ctx); i915_gem_context_set_persistence(ctx); + i915_gem_context_set_no_error_capture(ctx); return ctx; } |