From 112ed2d31a46f4704085ad925435b77e62b8abee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 18:48:39 +0100 Subject: drm/i915: Move GraphicsTechnology files under gt/ Start partitioning off the code that talks to the hardware (GT) from the uapi layers and move the device facing code under gt/ One casualty is s/intel_ringbuffer.h/intel_engine.h/ with the plan to subdivide that header and body further (and split out the submission code from the ringbuffer and logical context handling). This patch aims to be simple motion so git can fixup inflight patches with little mess. Signed-off-by: Chris Wilson Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20190424174839.7141-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 37f60cb8e9e1..1b6d6403ee92 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -25,8 +25,9 @@ #include #include +#include "gt/intel_lrc_reg.h" + #include "intel_guc_submission.h" -#include "intel_lrc_reg.h" #include "i915_drv.h" #define GUC_PREEMPT_FINISHED 0x1 -- cgit v1.2.3 From 5e2a0419ef7cb25d0f9a5fd6a62372bb47ce948d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 17:33:34 +0100 Subject: drm/i915: Switch back to an array of logical per-engine HW contexts We switched to a tree of per-engine HW context to accommodate the introduction of virtual engines. However, we plan to also support multiple instances of the same engine within the GEM context, defeating our use of the engine as a key to looking up the HW context. Just allocate a logical per-engine instance and always use an index into the ctx->engines[]. Later on, this ctx->engines[] may be replaced by a user specified map. v2: Add for_each_gem_engine() helper to iterator within the engines lock v3: intel_context_create_request() helper v4: s/unsigned long/unsigned int/ 4 billion engines is quite enough. v5: Push iterator locking to caller Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190426163336.15906-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_context.c | 112 ++++------------------ drivers/gpu/drm/i915/gt/intel_context.h | 27 +----- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 - drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/mock_engine.c | 3 +- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 24 ++--- drivers/gpu/drm/i915/i915_gem_context.c | 96 ++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_context.h | 58 +++++++++++ drivers/gpu/drm/i915/i915_gem_context_types.h | 40 +++++++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 70 +++++++------- drivers/gpu/drm/i915/i915_perf.c | 80 +++++++++------- drivers/gpu/drm/i915/i915_request.c | 15 +-- drivers/gpu/drm/i915/intel_guc_submission.c | 22 +++-- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/selftests/mock_context.c | 14 ++- 16 files changed, 328 insertions(+), 241 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 15ac99c5dd4a..5e506e648454 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -17,7 +17,7 @@ static struct i915_global_context { struct kmem_cache *slab_ce; } global; -struct intel_context *intel_context_alloc(void) +static struct intel_context *intel_context_alloc(void) { return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); } @@ -28,104 +28,17 @@ void intel_context_free(struct intel_context *ce) } struct intel_context * -intel_context_lookup(struct i915_gem_context *ctx, +intel_context_create(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = NULL; - struct rb_node *p; - - spin_lock(&ctx->hw_contexts_lock); - p = ctx->hw_contexts.rb_node; - while (p) { - struct intel_context *this = - rb_entry(p, struct intel_context, node); - - if (this->engine == engine) { - GEM_BUG_ON(this->gem_context != ctx); - ce = this; - break; - } - - if (this->engine < engine) - p = p->rb_right; - else - p = p->rb_left; - } - spin_unlock(&ctx->hw_contexts_lock); - - return ce; -} - -struct intel_context * -__intel_context_insert(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context *ce) -{ - struct rb_node **p, *parent; - int err = 0; - - spin_lock(&ctx->hw_contexts_lock); - - parent = NULL; - p = &ctx->hw_contexts.rb_node; - while (*p) { - struct intel_context *this; - - parent = *p; - this = rb_entry(parent, struct intel_context, node); - - if (this->engine == engine) { - err = -EEXIST; - ce = this; - break; - } - - if (this->engine < engine) - p = &parent->rb_right; - else - p = &parent->rb_left; - } - if (!err) { - rb_link_node(&ce->node, parent, p); - rb_insert_color(&ce->node, &ctx->hw_contexts); - } - - spin_unlock(&ctx->hw_contexts_lock); - - return ce; -} - -void __intel_context_remove(struct intel_context *ce) -{ - struct i915_gem_context *ctx = ce->gem_context; - - spin_lock(&ctx->hw_contexts_lock); - rb_erase(&ce->node, &ctx->hw_contexts); - spin_unlock(&ctx->hw_contexts_lock); -} - -struct intel_context * -intel_context_instance(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct intel_context *ce, *pos; - - ce = intel_context_lookup(ctx, engine); - if (likely(ce)) - return intel_context_get(ce); + struct intel_context *ce; ce = intel_context_alloc(); if (!ce) return ERR_PTR(-ENOMEM); intel_context_init(ce, ctx, engine); - - pos = __intel_context_insert(ctx, engine, ce); - if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */ - intel_context_free(ce); - - GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos); - return intel_context_get(pos); + return ce; } int __intel_context_do_pin(struct intel_context *ce) @@ -204,6 +117,8 @@ intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + GEM_BUG_ON(!engine->cops); + kref_init(&ce->ref); ce->gem_context = ctx; @@ -254,3 +169,18 @@ void intel_context_exit_engine(struct intel_context *ce) { intel_engine_pm_put(ce->engine); } + +struct i915_request *intel_context_create_request(struct intel_context *ce) +{ + struct i915_request *rq; + int err; + + err = intel_context_pin(ce); + if (unlikely(err)) + return ERR_PTR(err); + + rq = i915_request_create(ce); + intel_context_unpin(ce); + + return rq; +} diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index b746add6b71d..63392c88cd98 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,24 +12,16 @@ #include "intel_context_types.h" #include "intel_engine_types.h" -struct intel_context *intel_context_alloc(void); -void intel_context_free(struct intel_context *ce); - void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine); -/** - * intel_context_lookup - Find the matching HW context for this (ctx, engine) - * @ctx - the parent GEM context - * @engine - the target HW engine - * - * May return NULL if the HW context hasn't been instantiated (i.e. unused). - */ struct intel_context * -intel_context_lookup(struct i915_gem_context *ctx, +intel_context_create(struct i915_gem_context *ctx, struct intel_engine_cs *engine); +void intel_context_free(struct intel_context *ce); + /** * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context * @ce - the context @@ -71,17 +63,6 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce) mutex_unlock(&ce->pin_mutex); } -struct intel_context * -__intel_context_insert(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context *ce); -void -__intel_context_remove(struct intel_context *ce); - -struct intel_context * -intel_context_instance(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - int __intel_context_do_pin(struct intel_context *ce); static inline int intel_context_pin(struct intel_context *ce) @@ -144,4 +125,6 @@ static inline void intel_context_timeline_unlock(struct intel_context *ce) mutex_unlock(&ce->ring->timeline->mutex); } +struct i915_request *intel_context_create_request(struct intel_context *ce); + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index f02d27734e3b..3579c2708321 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "i915_active_types.h" @@ -61,7 +60,6 @@ struct intel_context { struct i915_active_request active_tracker; const struct intel_context_ops *ops; - struct rb_node node; /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7682f16fa567..f7308479d511 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -732,7 +732,7 @@ static int pin_context(struct i915_gem_context *ctx, struct intel_context *ce; int err; - ce = intel_context_instance(ctx, engine); + ce = i915_gem_context_get_engine(ctx, engine->id); if (IS_ERR(ce)) return PTR_ERR(ce); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 85cdbfe1d989..2941916b37bf 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -23,6 +23,7 @@ */ #include "i915_drv.h" +#include "i915_gem_context.h" #include "intel_context.h" #include "intel_engine_pm.h" @@ -286,7 +287,7 @@ int mock_engine_init(struct intel_engine_cs *engine) i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); engine->kernel_context = - intel_context_instance(i915->kernel_context, engine); + i915_gem_context_get_engine(i915->kernel_context, engine->id); if (IS_ERR(engine->kernel_context)) goto err_timeline; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index da6b52de5b16..7ae42f2ebfe8 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1183,7 +1183,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) INIT_LIST_HEAD(&s->workload_q_head[i]); s->shadow[i] = ERR_PTR(-EINVAL); - ce = intel_context_instance(ctx, engine); + ce = i915_gem_context_get_engine(ctx, i); if (IS_ERR(ce)) { ret = PTR_ERR(ce); goto out_shadow_ctx; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 08c66e76d712..4c1793b1012e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4289,8 +4289,9 @@ out: static int __intel_engines_record_defaults(struct drm_i915_private *i915) { - struct i915_gem_context *ctx; struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct i915_gem_engines *e; enum intel_engine_id id; int err = 0; @@ -4307,18 +4308,21 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (IS_ERR(ctx)) return PTR_ERR(ctx); + e = i915_gem_context_lock_engines(ctx); + for_each_engine(engine, i915, id) { + struct intel_context *ce = e->engines[id]; struct i915_request *rq; - rq = i915_request_alloc(engine, ctx); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_ctx; + goto err_active; } err = 0; - if (engine->init_context) - err = engine->init_context(rq); + if (rq->engine->init_context) + err = rq->engine->init_context(rq); i915_request_add(rq); if (err) @@ -4332,15 +4336,10 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) } for_each_engine(engine, i915, id) { - struct intel_context *ce; - struct i915_vma *state; + struct intel_context *ce = e->engines[id]; + struct i915_vma *state = ce->state; void *vaddr; - ce = intel_context_lookup(ctx, engine); - if (!ce) - continue; - - state = ce->state; if (!state) continue; @@ -4396,6 +4395,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) } out_ctx: + i915_gem_context_unlock_engines(ctx); i915_gem_context_set_closed(ctx); i915_gem_context_put(ctx); return err; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d9db3fea151c..3ea199ca834b 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -150,7 +150,7 @@ lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance) if (!engine) return ERR_PTR(-EINVAL); - return intel_context_instance(ctx, engine); + return i915_gem_context_get_engine(ctx, engine->id); } static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) @@ -242,10 +242,51 @@ static void release_hw_id(struct i915_gem_context *ctx) mutex_unlock(&i915->contexts.mutex); } -static void i915_gem_context_free(struct i915_gem_context *ctx) +static void __free_engines(struct i915_gem_engines *e, unsigned int count) { - struct intel_context *it, *n; + while (count--) { + if (!e->engines[count]) + continue; + + intel_context_put(e->engines[count]); + } + kfree(e); +} + +static void free_engines(struct i915_gem_engines *e) +{ + __free_engines(e, e->num_engines); +} + +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +{ + struct intel_engine_cs *engine; + struct i915_gem_engines *e; + enum intel_engine_id id; + + e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + e->i915 = ctx->i915; + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce; + + ce = intel_context_create(ctx, engine); + if (IS_ERR(ce)) { + __free_engines(e, id); + return ERR_CAST(ce); + } + e->engines[id] = ce; + } + e->num_engines = id; + + return e; +} + +static void i915_gem_context_free(struct i915_gem_context *ctx) +{ lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); GEM_BUG_ON(!list_empty(&ctx->active_engines)); @@ -253,8 +294,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) release_hw_id(ctx); i915_ppgtt_put(ctx->ppgtt); - rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node) - intel_context_put(it); + free_engines(rcu_access_pointer(ctx->engines)); + mutex_destroy(&ctx->engines_mutex); if (ctx->timeline) i915_timeline_put(ctx->timeline); @@ -363,6 +404,8 @@ static struct i915_gem_context * __create_context(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; + struct i915_gem_engines *e; + int err; int i; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -376,8 +419,13 @@ __create_context(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&ctx->active_engines); mutex_init(&ctx->mutex); - ctx->hw_contexts = RB_ROOT; - spin_lock_init(&ctx->hw_contexts_lock); + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx); + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto err_free; + } + RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -399,6 +447,10 @@ __create_context(struct drm_i915_private *dev_priv) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; return ctx; + +err_free: + kfree(ctx); + return ERR_PTR(err); } static struct i915_hw_ppgtt * @@ -857,7 +909,8 @@ static int context_barrier_task(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; - struct intel_context *ce, *next; + struct i915_gem_engines_iter it; + struct intel_context *ce; int err = 0; lockdep_assert_held(&i915->drm.struct_mutex); @@ -870,20 +923,19 @@ static int context_barrier_task(struct i915_gem_context *ctx, i915_active_init(i915, &cb->base, cb_retire); i915_active_acquire(&cb->base); - rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) { - struct intel_engine_cs *engine = ce->engine; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct i915_request *rq; - if (!(engine->mask & engines)) + if (!(ce->engine->mask & engines)) continue; if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - engine->mask)) { + ce->engine->mask)) { err = -ENXIO; break; } - rq = i915_request_alloc(engine, ctx); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -899,6 +951,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (err) break; } + i915_gem_context_unlock_engines(ctx); cb->task = err ? NULL : task; /* caller needs to unwind instead */ cb->data = data; @@ -1729,6 +1782,23 @@ out_unlock: return err; } +/* GEM context-engines iterator: for_each_gem_engine() */ +struct intel_context * +i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) +{ + const struct i915_gem_engines *e = it->engines; + struct intel_context *ctx; + + do { + if (it->idx >= e->num_engines) + return NULL; + + ctx = e->engines[it->idx++]; + } while (!ctx); + + return ctx; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_context.c" #include "selftests/i915_gem_context.c" diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 5a8e080499fb..272e183ebc0c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -176,6 +176,64 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline struct i915_gem_engines * +i915_gem_context_engines(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->engines, + lockdep_is_held(&ctx->engines_mutex)); +} + +static inline struct i915_gem_engines * +i915_gem_context_lock_engines(struct i915_gem_context *ctx) + __acquires(&ctx->engines_mutex) +{ + mutex_lock(&ctx->engines_mutex); + return i915_gem_context_engines(ctx); +} + +static inline void +i915_gem_context_unlock_engines(struct i915_gem_context *ctx) + __releases(&ctx->engines_mutex) +{ + mutex_unlock(&ctx->engines_mutex); +} + +static inline struct intel_context * +i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx) +{ + return i915_gem_context_engines(ctx)->engines[idx]; +} + +static inline struct intel_context * +i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) +{ + struct intel_context *ce = ERR_PTR(-EINVAL); + + rcu_read_lock(); { + struct i915_gem_engines *e = rcu_dereference(ctx->engines); + if (likely(idx < e->num_engines && e->engines[idx])) + ce = intel_context_get(e->engines[idx]); + } rcu_read_unlock(); + + return ce; +} + +static inline void +i915_gem_engines_iter_init(struct i915_gem_engines_iter *it, + struct i915_gem_engines *engines) +{ + GEM_BUG_ON(!engines); + it->engines = engines; + it->idx = 0; +} + +struct intel_context * +i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); + +#define for_each_gem_engine(ce, engines, it) \ + for (i915_gem_engines_iter_init(&(it), (engines)); \ + ((ce) = i915_gem_engines_iter_next(&(it)));) + struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h index d282a6ab3b9f..5f84618cf7db 100644 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -29,6 +29,18 @@ struct i915_hw_ppgtt; struct i915_timeline; struct intel_ring; +struct i915_gem_engines { + struct rcu_work rcu; + struct drm_i915_private *i915; + unsigned int num_engines; + struct intel_context *engines[]; +}; + +struct i915_gem_engines_iter { + unsigned int idx; + const struct i915_gem_engines *engines; +}; + /** * struct i915_gem_context - client state * @@ -42,6 +54,30 @@ struct i915_gem_context { /** file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; + /** + * @engines: User defined engines for this context + * + * Various uAPI offer the ability to lookup up an + * index from this array to select an engine operate on. + * + * Multiple logically distinct instances of the same engine + * may be defined in the array, as well as composite virtual + * engines. + * + * Execbuf uses the I915_EXEC_RING_MASK as an index into this + * array to select which HW context + engine to execute on. For + * the default array, the user_ring_map[] is used to translate + * the legacy uABI onto the approprate index (e.g. both + * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same + * context, and I915_EXEC_BSD is weird). For a use defined + * array, execbuf uses I915_EXEC_RING_MASK as a plain index. + * + * User defined by I915_CONTEXT_PARAM_ENGINE (when the + * CONTEXT_USER_ENGINES flag is set). + */ + struct i915_gem_engines __rcu *engines; + struct mutex engines_mutex; /* guards writes to engines */ + struct i915_timeline *timeline; /** @@ -134,10 +170,6 @@ struct i915_gem_context { struct i915_sched_attr sched; - /** hw_contexts: per-engine logical HW state */ - struct rb_root hw_contexts; - spinlock_t hw_contexts_lock; - /** ring_size: size for allocating the per-engine ring buffer */ u32 ring_size; /** desc_template: invariant fields for the HW context descriptor */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 166a33c0d3ed..679f7c1561ba 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2076,9 +2076,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, return file_priv->bsd_engine; } -#define I915_USER_RINGS (4) - -static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { +static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_DEFAULT] = RCS0, [I915_EXEC_RENDER] = RCS0, [I915_EXEC_BLT] = BCS0, @@ -2086,10 +2084,8 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { [I915_EXEC_VEBOX] = VECS0 }; -static int eb_pin_context(struct i915_execbuffer *eb, - struct intel_engine_cs *engine) +static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) { - struct intel_context *ce; int err; /* @@ -2100,21 +2096,16 @@ static int eb_pin_context(struct i915_execbuffer *eb, if (err) return err; - ce = intel_context_instance(eb->gem_context, engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for * ourselves. */ err = intel_context_pin(ce); - intel_context_put(ce); if (err) return err; - eb->engine = engine; + eb->engine = ce->engine; eb->context = ce; return 0; } @@ -2124,25 +2115,19 @@ static void eb_unpin_context(struct i915_execbuffer *eb) intel_context_unpin(eb->context); } -static int -eb_select_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +static unsigned int +eb_select_legacy_ring(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { struct drm_i915_private *i915 = eb->i915; unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; - struct intel_engine_cs *engine; - - if (user_ring_id > I915_USER_RINGS) { - DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return -EINVAL; - } - if ((user_ring_id != I915_EXEC_BSD) && - ((args->flags & I915_EXEC_BSD_MASK) != 0)) { + if (user_ring_id != I915_EXEC_BSD && + (args->flags & I915_EXEC_BSD_MASK)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return -EINVAL; + return -1; } if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { @@ -2157,20 +2142,39 @@ eb_select_engine(struct i915_execbuffer *eb, } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return -EINVAL; + return -1; } - engine = i915->engine[_VCS(bsd_idx)]; - } else { - engine = i915->engine[user_ring_map[user_ring_id]]; + return _VCS(bsd_idx); } - if (!engine) { - DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return -EINVAL; + if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { + DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); + return -1; } - return eb_pin_context(eb, engine); + return user_ring_map[user_ring_id]; +} + +static int +eb_select_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) +{ + struct intel_context *ce; + unsigned int idx; + int err; + + idx = eb_select_legacy_ring(eb, file, args); + + ce = i915_gem_context_get_engine(eb->gem_context, idx); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = eb_pin_context(eb, ce); + intel_context_put(ce); + + return err; } static void diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index afaeabe5e531..c4995d5a16d2 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1203,35 +1203,35 @@ static int i915_oa_read(struct i915_perf_stream *stream, static struct intel_context *oa_pin_context(struct drm_i915_private *i915, struct i915_gem_context *ctx) { - struct intel_engine_cs *engine = i915->engine[RCS0]; + struct i915_gem_engines_iter it; struct intel_context *ce; int err; - ce = intel_context_instance(ctx, engine); - if (IS_ERR(ce)) - return ce; - err = i915_mutex_lock_interruptible(&i915->drm); - if (err) { - intel_context_put(ce); + if (err) return ERR_PTR(err); + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (ce->engine->class != RENDER_CLASS) + continue; + + /* + * As the ID is the gtt offset of the context's vma we + * pin the vma to ensure the ID remains fixed. + */ + err = intel_context_pin(ce); + if (err == 0) { + i915->perf.oa.pinned_ctx = ce; + break; + } } + i915_gem_context_unlock_engines(ctx); - /* - * As the ID is the gtt offset of the context's vma we - * pin the vma to ensure the ID remains fixed. - * - * NB: implied RCS engine... - */ - err = intel_context_pin(ce); mutex_unlock(&i915->drm.struct_mutex); - intel_context_put(ce); if (err) return ERR_PTR(err); - i915->perf.oa.pinned_ctx = ce; - - return ce; + return i915->perf.oa.pinned_ctx; } /** @@ -1717,7 +1717,6 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { - struct intel_engine_cs *engine = dev_priv->engine[RCS0]; unsigned int map_type = i915_coherent_map_type(dev_priv); struct i915_gem_context *ctx; struct i915_request *rq; @@ -1746,30 +1745,43 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct intel_context *ce = intel_context_lookup(ctx, engine); - u32 *regs; - - /* OA settings will be set upon first use */ - if (!ce || !ce->state) - continue; - - regs = i915_gem_object_pin_map(ce->state->obj, map_type); - if (IS_ERR(regs)) - return PTR_ERR(regs); + struct i915_gem_engines_iter it; + struct intel_context *ce; + + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), + it) { + u32 *regs; + + if (ce->engine->class != RENDER_CLASS) + continue; + + /* OA settings will be set upon first use */ + if (!ce->state) + continue; + + regs = i915_gem_object_pin_map(ce->state->obj, + map_type); + if (IS_ERR(regs)) { + i915_gem_context_unlock_engines(ctx); + return PTR_ERR(regs); + } - ce->state->obj->mm.dirty = true; - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + ce->state->obj->mm.dirty = true; + regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); - gen8_update_reg_state_unlocked(ce, regs, oa_config); + gen8_update_reg_state_unlocked(ce, regs, oa_config); - i915_gem_object_unpin_map(ce->state->obj); + i915_gem_object_unpin_map(ce->state->obj); + } + i915_gem_context_unlock_engines(ctx); } /* * Apply the configuration by doing one context restore of the edited * context image. */ - rq = i915_request_create(engine->kernel_context); + rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1a03ebcaf52e..7638a5e5ec9e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -785,7 +785,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) struct drm_i915_private *i915 = engine->i915; struct intel_context *ce; struct i915_request *rq; - int err; /* * Preempt contexts are reserved for exclusive use to inject a @@ -799,21 +798,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ce = intel_context_instance(ctx, engine); + ce = i915_gem_context_get_engine(ctx, engine->id); if (IS_ERR(ce)) return ERR_CAST(ce); - err = intel_context_pin(ce); - if (err) { - rq = ERR_PTR(err); - goto err_put; - } - - rq = i915_request_create(ce); - intel_context_unpin(ce); - -err_put: + rq = intel_context_create_request(ce); intel_context_put(ce); + return rq; } diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 1b6d6403ee92..4c814344809c 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -364,11 +364,10 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc) static void guc_stage_desc_init(struct intel_guc_client *client) { struct intel_guc *guc = client->guc; - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_engine_cs *engine; struct i915_gem_context *ctx = client->owner; + struct i915_gem_engines_iter it; struct guc_stage_desc *desc; - unsigned int tmp; + struct intel_context *ce; u32 gfx_addr; desc = __get_stage_desc(client); @@ -382,10 +381,11 @@ static void guc_stage_desc_init(struct intel_guc_client *client) desc->priority = client->priority; desc->db_id = client->doorbell_id; - for_each_engine_masked(engine, dev_priv, client->engines, tmp) { - struct intel_context *ce = intel_context_lookup(ctx, engine); - u32 guc_engine_id = engine->guc_id; - struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + struct guc_execlist_context *lrc; + + if (!(ce->engine->mask & client->engines)) + continue; /* TODO: We have a design issue to be solved here. Only when we * receive the first batch, we know which engine is used by the @@ -394,7 +394,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) * for now who owns a GuC client. But for future owner of GuC * client, need to make sure lrc is pinned prior to enter here. */ - if (!ce || !ce->state) + if (!ce->state) break; /* XXX: continue? */ /* @@ -404,6 +404,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) * Instead, the GuC uses the LRCA of the user mode context (see * guc_add_request below). */ + lrc = &desc->lrc[ce->engine->guc_id]; lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ @@ -414,15 +415,16 @@ static void guc_stage_desc_init(struct intel_guc_client *client) * here. In proxy submission, it wants the stage id */ lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | - (guc_engine_id << GUC_ELC_ENGINE_OFFSET); + (ce->engine->guc_id << GUC_ELC_ENGINE_OFFSET); lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma); lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; - desc->engines_used |= (1 << guc_engine_id); + desc->engines_used |= BIT(ce->engine->guc_id); } + i915_gem_context_unlock_engines(ctx); DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", client->engines, desc->engines_used); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 214d1fd2f4dc..7fd224a4ca4c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1094,7 +1094,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, wakeref = intel_runtime_pm_get(i915); - ce = intel_context_instance(ctx, i915->engine[RCS0]); + ce = i915_gem_context_get_engine(ctx, RCS0); if (IS_ERR(ce)) { ret = PTR_ERR(ce); goto out_rpm; diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 0426093bf1d9..71c750693585 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915, const char *name) { struct i915_gem_context *ctx; + struct i915_gem_engines *e; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -40,8 +41,11 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; - ctx->hw_contexts = RB_ROOT; - spin_lock_init(&ctx->hw_contexts_lock); + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx); + if (IS_ERR(e)) + goto err_free; + RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -51,7 +55,7 @@ mock_context(struct drm_i915_private *i915, ret = i915_gem_context_pin_hw_id(ctx); if (ret < 0) - goto err_handles; + goto err_engines; if (name) { struct i915_hw_ppgtt *ppgtt; @@ -69,7 +73,9 @@ mock_context(struct drm_i915_private *i915, return ctx; -err_handles: +err_engines: + free_engines(rcu_access_pointer(ctx->engines)); +err_free: kfree(ctx); return NULL; -- cgit v1.2.3 From c34c5bca331adc6dcdff81577383df463b4c475c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 May 2019 09:09:42 +0100 Subject: drm/i915/execlists: Flush the tasklet on parking Tidy up the cleanup sequence by always ensure that the tasklet is flushed on parking (before we cleanup). The parking provides a convenient point to ensure that the backend is truly idle. v2: Do the full check for idleness before parking, to be sure we flush any residual interrupt. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190503080942.30151-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 27 ++++++++++++++++++++++----- drivers/gpu/drm/i915/gt/intel_engine_pm.h | 2 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 16 +++++++--------- drivers/gpu/drm/i915/intel_guc_submission.c | 2 ++ 5 files changed, 35 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e63bf5032a1d..5907a9613641 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1094,6 +1094,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (READ_ONCE(engine->execlists.active)) { struct tasklet_struct *t = &engine->execlists.tasklet; + synchronize_hardirq(engine->i915->drm.irq); + local_bh_disable(); if (tasklet_trylock(t)) { /* Must wait for any GPU reset in progress. */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 3976aea3c1d1..ccf034764741 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -10,7 +10,7 @@ #include "intel_engine_pm.h" #include "intel_gt_pm.h" -static int intel_engine_unpark(struct intel_wakeref *wf) +static int __engine_unpark(struct intel_wakeref *wf) { struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); @@ -37,7 +37,24 @@ static int intel_engine_unpark(struct intel_wakeref *wf) void intel_engine_pm_get(struct intel_engine_cs *engine) { - intel_wakeref_get(engine->i915, &engine->wakeref, intel_engine_unpark); + intel_wakeref_get(engine->i915, &engine->wakeref, __engine_unpark); +} + +void intel_engine_park(struct intel_engine_cs *engine) +{ + /* + * We are committed now to parking this engine, make sure there + * will be no more interrupts arriving later and the engine + * is truly idle. + */ + if (wait_for(intel_engine_is_idle(engine), 10)) { + struct drm_printer p = drm_debug_printer(__func__); + + dev_err(engine->i915->drm.dev, + "%s is not idle before parking\n", + engine->name); + intel_engine_dump(engine, &p, NULL); + } } static bool switch_to_kernel_context(struct intel_engine_cs *engine) @@ -56,7 +73,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * Note, we do this without taking the timeline->mutex. We cannot * as we may be called while retiring the kernel context and so * already underneath the timeline->mutex. Instead we rely on the - * exclusive property of the intel_engine_park that prevents anyone + * exclusive property of the __engine_park that prevents anyone * else from creating a request on this engine. This also requires * that the ring is empty and we avoid any waits while constructing * the context, as they assume protection by the timeline->mutex. @@ -76,7 +93,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) return false; } -static int intel_engine_park(struct intel_wakeref *wf) +static int __engine_park(struct intel_wakeref *wf) { struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); @@ -114,7 +131,7 @@ static int intel_engine_park(struct intel_wakeref *wf) void intel_engine_pm_put(struct intel_engine_cs *engine) { - intel_wakeref_put(engine->i915, &engine->wakeref, intel_engine_park); + intel_wakeref_put(engine->i915, &engine->wakeref, __engine_park); } void intel_engine_init__pm(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 143ac90ba117..b326cd993d60 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -13,6 +13,8 @@ struct intel_engine_cs; void intel_engine_pm_get(struct intel_engine_cs *engine); void intel_engine_pm_put(struct intel_engine_cs *engine); +void intel_engine_park(struct intel_engine_cs *engine); + void intel_engine_init__pm(struct intel_engine_cs *engine); int intel_engines_resume(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 851e62ddcb87..7d69d07490e8 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -136,6 +136,7 @@ #include "i915_drv.h" #include "i915_gem_render_state.h" #include "i915_vgpu.h" +#include "intel_engine_pm.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" @@ -2331,6 +2332,11 @@ static int gen8_init_rcs_context(struct i915_request *rq) return i915_gem_render_state_emit(rq); } +static void execlists_park(struct intel_engine_cs *engine) +{ + intel_engine_park(engine); +} + void intel_execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; @@ -2342,7 +2348,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->reset.reset = execlists_reset; engine->reset.finish = execlists_reset_finish; - engine->park = NULL; + engine->park = execlists_park; engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; @@ -2355,14 +2361,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) static void execlists_destroy(struct intel_engine_cs *engine) { - /* - * Tasklet cannot be active at this point due intel_mark_active/idle - * so this is just for documentation. - */ - if (GEM_DEBUG_WARN_ON(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state))) - tasklet_kill(&engine->execlists.tasklet); - intel_engine_cleanup_common(engine); lrc_destroy_wa_ctx(engine); kfree(engine); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 4c814344809c..57ed1dd4ae41 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -25,6 +25,7 @@ #include #include +#include "gt/intel_engine_pm.h" #include "gt/intel_lrc_reg.h" #include "intel_guc_submission.h" @@ -1363,6 +1364,7 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) static void guc_submission_park(struct intel_engine_cs *engine) { + intel_engine_park(engine); intel_engine_unpin_breadcrumbs_irq(engine); engine->flags &= ~I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; } -- cgit v1.2.3 From 25d851adbf4c4faef5b1acbe81a92720d1bd173e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 May 2019 13:25:44 +0100 Subject: drm/i915: Only reschedule the submission tasklet if preemption is possible If we couple the scheduler more tightly with the execlists policy, we can apply the preemption policy to the question of whether we need to kick the tasklet at all for this priority bump. v2: Rephrase it as a core i915 policy and not an execlists foible. v3: Pull the kick together. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190507122544.12698-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine.h | 18 --------------- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 7 +++++- drivers/gpu/drm/i915/i915_request.c | 2 -- drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++------------- drivers/gpu/drm/i915/i915_scheduler.h | 18 +++++++++++++++ drivers/gpu/drm/i915/intel_guc_submission.c | 3 ++- 7 files changed, 47 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index f5b0f27cecb6..06d785533502 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -106,24 +106,6 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); -static inline bool __execlists_need_preempt(int prio, int last) -{ - /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. - * - * More naturally we would write - * prio >= max(0, last); - * except that we wish to prevent triggering preemption at the same - * priority level: the task that is running should remain running - * to preserve FIFO ordering of dependencies. - */ - return prio > max(I915_PRIORITY_NORMAL - 1, last); -} - static inline void execlists_set_active(struct intel_engine_execlists *execlists, unsigned int bit) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 5580b6f1aa0c..636df21983dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -252,8 +252,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * ourselves, ignore the request. */ last_prio = effective_prio(rq); - if (!__execlists_need_preempt(engine->execlists.queue_priority_hint, - last_prio)) + if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint, + last_prio)) return false; /* diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 84538f69185b..4b042893dc0e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -638,14 +638,19 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) GEM_BUG_ON(i915_request_completed(rq)); i915_sw_fence_init(&rq->submit, dummy_notify); - i915_sw_fence_commit(&rq->submit); + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); return rq; } static void dummy_request_free(struct i915_request *dummy) { + /* We have to fake the CS interrupt to kick the next request */ + i915_sw_fence_commit(&dummy->submit); + i915_request_mark_complete(dummy); + dma_fence_signal(&dummy->fence); + i915_sched_node_fini(&dummy->sched); i915_sw_fence_fini(&dummy->submit); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e0be00c07c24..fa955b7b6def 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1415,9 +1415,7 @@ long i915_request_wait(struct i915_request *rq, if (flags & I915_WAIT_PRIORITY) { if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) gen6_rps_boost(rq); - local_bh_disable(); /* suspend tasklets for reprioritisation */ i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); - local_bh_enable(); /* kick tasklets en masse */ } wait.tsk = current; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 39bc4f54e272..ec22c3fe7360 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -261,16 +261,27 @@ sched_lock_engine(const struct i915_sched_node *node, return engine; } -static bool inflight(const struct i915_request *rq, - const struct intel_engine_cs *engine) +static inline int rq_prio(const struct i915_request *rq) { - const struct i915_request *active; + return rq->sched.attr.priority | __NO_PREEMPTION; +} + +static void kick_submission(struct intel_engine_cs *engine, int prio) +{ + const struct i915_request *inflight = + port_request(engine->execlists.port); - if (!i915_request_is_active(rq)) - return false; + /* + * If we are already the currently executing context, don't + * bother evaluating if we should preempt ourselves, or if + * we expect nothing to change as a result of running the + * tasklet, i.e. we have not change the priority queue + * sufficiently to oust the running context. + */ + if (inflight && !i915_scheduler_need_preempt(prio, rq_prio(inflight))) + return; - active = port_request(engine->execlists.port); - return active->hw_context == rq->hw_context; + tasklet_hi_schedule(&engine->execlists.tasklet); } static void __i915_schedule(struct i915_request *rq, @@ -396,15 +407,8 @@ static void __i915_schedule(struct i915_request *rq, engine->execlists.queue_priority_hint = prio; - /* - * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves. - */ - if (inflight(node_to_request(node), engine)) - continue; - /* Defer (tasklet) submission until after all of our updates. */ - tasklet_hi_schedule(&engine->execlists.tasklet); + kick_submission(engine, prio); } spin_unlock(&engine->timeline.lock); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 07d243acf553..7eefccff39bf 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -52,4 +52,22 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } +static inline bool i915_scheduler_need_preempt(int prio, int active) +{ + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. + */ + return prio > max(I915_PRIORITY_NORMAL - 1, active); +} + #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 57ed1dd4ae41..380d83a2bfb6 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -747,7 +747,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) &engine->i915->guc.preempt_work[engine->id]; int prio = execlists->queue_priority_hint; - if (__execlists_need_preempt(prio, port_prio(port))) { + if (i915_scheduler_need_preempt(prio, + port_prio(port))) { execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); queue_work(engine->i915->guc.preempt_wq, -- cgit v1.2.3 From 0152b3b3f49b36b0f1a1bf9f0353dc636f41d8f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 May 2019 12:24:52 +0100 Subject: drm/i915: Seal races between async GPU cancellation, retirement and signaling Currently there is an underlying assumption that i915_request_unsubmit() is synchronous wrt the GPU -- that is the request is no longer in flight as we remove it. In the near future that may change, and this may upset our signaling as we can process an interrupt for that request while it is no longer in flight. CPU0 CPU1 intel_engine_breadcrumbs_irq (queue request completion) i915_request_cancel_signaling ... ... i915_request_enable_signaling dma_fence_signal Hence in the time it took us to drop the lock to signal the request, a preemption event may have occurred and re-queued the request. In the process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and so reused the rq->signal_link that was in use on CPU0, leading to bad pointer chasing in intel_engine_breadcrumbs_irq. A related issue was that if someone started listening for a signal on a completed but no longer in-flight request, we missed the opportunity to immediately signal that request. Furthermore, as intel_contexts may be immediately released during request retirement, in order to be entirely sure that intel_engine_breadcrumbs_irq may no longer dereference the intel_context (ce->signals and ce->signal_link), we must wait for irq spinlock. In order to prevent the race, we use a bit in the fence.flags to signal the transfer onto the signal list inside intel_engine_breadcrumbs_irq. For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then quickly signals to any outside observer that the fence is indeed signaled. v2: Sketch out potential dma-fence API for manual signaling v3: And the test_and_set_bit() Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190508112452.18942-1-chris@chris-wilson.co.uk --- drivers/dma-buf/dma-fence.c | 1 + drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 78 +++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_request.c | 1 + drivers/gpu/drm/i915/intel_guc_submission.c | 1 - 4 files changed, 59 insertions(+), 22 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 3aa8733f832a..9bf06042619a 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -29,6 +29,7 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); static DEFINE_SPINLOCK(dma_fence_stub_lock); static struct dma_fence dma_fence_stub; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index fe455f01aa65..c092bdf5f0bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -23,6 +23,7 @@ */ #include +#include #include #include "i915_drv.h" @@ -96,9 +97,39 @@ check_signal_order(struct intel_context *ce, struct i915_request *rq) return true; } +static bool +__dma_fence_signal(struct dma_fence *fence) +{ + return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); +} + +static void +__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) +{ + fence->timestamp = timestamp; + set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); + trace_dma_fence_signaled(fence); +} + +static void +__dma_fence_signal__notify(struct dma_fence *fence) +{ + struct dma_fence_cb *cur, *tmp; + + lockdep_assert_held(fence->lock); + lockdep_assert_irqs_disabled(); + + list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { + INIT_LIST_HEAD(&cur->node); + cur->func(fence, cur); + } + INIT_LIST_HEAD(&fence->cb_list); +} + void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + const ktime_t timestamp = ktime_get(); struct intel_context *ce, *cn; struct list_head *pos, *next; LIST_HEAD(signal); @@ -122,6 +153,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + if (!__dma_fence_signal(&rq->fence)) + continue; /* * Queue for execution after dropping the signaling @@ -129,14 +164,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) * more signalers to the same context or engine. */ i915_request_get(rq); - - /* - * We may race with direct invocation of - * dma_fence_signal(), e.g. i915_request_retire(), - * so we need to acquire our reference to the request - * before we cancel the breadcrumb. - */ - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); list_add_tail(&rq->signal_link, &signal); } @@ -159,7 +186,12 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); - dma_fence_signal(&rq->fence); + __dma_fence_signal__timestamp(&rq->fence, timestamp); + + spin_lock(&rq->lock); + __dma_fence_signal__notify(&rq->fence); + spin_unlock(&rq->lock); + i915_request_put(rq); } } @@ -261,19 +293,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) bool i915_request_enable_breadcrumb(struct i915_request *rq) { - struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + lockdep_assert_held(&rq->lock); + lockdep_assert_irqs_disabled(); - if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) - return true; - - spin_lock(&b->irq_lock); - if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) && - !__request_completed(rq)) { + if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { + struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; struct intel_context *ce = rq->hw_context; struct list_head *pos; + spin_lock(&b->irq_lock); + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + __intel_breadcrumbs_arm_irq(b); /* @@ -303,8 +333,8 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) GEM_BUG_ON(!check_signal_order(ce, rq)); set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + spin_unlock(&b->irq_lock); } - spin_unlock(&b->irq_lock); return !__request_completed(rq); } @@ -313,9 +343,15 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) { struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) - return; + lockdep_assert_held(&rq->lock); + lockdep_assert_irqs_disabled(); + /* + * We must wait for b->irq_lock so that we know the interrupt handler + * has released its reference to the intel_context and has completed + * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if + * required). + */ spin_lock(&b->irq_lock); if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { struct intel_context *ce = rq->hw_context; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index fa955b7b6def..bed213148cbb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -437,6 +437,7 @@ void __i915_request_submit(struct i915_request *request) set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && !i915_request_enable_breadcrumb(request)) intel_engine_queue_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 380d83a2bfb6..ea0e3734d37c 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -23,7 +23,6 @@ */ #include -#include #include "gt/intel_engine_pm.h" #include "gt/intel_lrc_reg.h" -- cgit v1.2.3 From 78577e294bf2b013470d2f9e1e5b5f531440683b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 22 May 2019 19:31:58 +0000 Subject: drm/i915/guc: Rename intel_guc_is_alive to intel_guc_is_loaded This function just check our software flag, while 'is_alive' may suggest that we are checking runtime firmware status. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190522193203.23932-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_guc.h | 10 +++++----- drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 2494e84831a2..d4b015ab8a36 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -96,11 +96,6 @@ struct intel_guc { void (*notify)(struct intel_guc *guc); }; -static inline bool intel_guc_is_alive(struct intel_guc *guc) -{ - return intel_uc_fw_is_loaded(&guc->fw); -} - static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { @@ -176,6 +171,11 @@ u32 intel_guc_reserved_gtt_size(struct intel_guc *guc); int intel_guc_reserve_ggtt_top(struct intel_guc *guc); void intel_guc_release_ggtt_top(struct intel_guc *guc); +static inline bool intel_guc_is_loaded(struct intel_guc *guc) +{ + return intel_uc_fw_is_loaded(&guc->fw); +} + static inline int intel_guc_sanitize(struct intel_guc *guc) { intel_uc_fw_sanitize(&guc->fw); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index ea0e3734d37c..987ff586d7f9 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -1199,7 +1199,7 @@ static void __guc_client_disable(struct intel_guc_client *client) * the case, instead of trying (in vain) to communicate with it, let's * just cleanup the doorbell HW and our internal state. */ - if (intel_guc_is_alive(client->guc)) + if (intel_guc_is_loaded(client->guc)) destroy_doorbell(client); else __fini_doorbell(client); -- cgit v1.2.3 From e958cc908792d0a433c9abf9bf47e97648e4c204 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 27 May 2019 18:36:01 +0000 Subject: drm/i915/guc: Always ask GuC to update power domain states With newer GuC firmware it is always ok to ask GuC to update power domain states. Make it an unconditional initialization step. Signed-off-by: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: John Spotswood Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: John Spotswood Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190527183613.17076-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_guc_submission.c | 4 ---- drivers/gpu/drm/i915/intel_uc.c | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 987ff586d7f9..ffdab22db2b0 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -1426,10 +1426,6 @@ int intel_guc_submission_enable(struct intel_guc *guc) GEM_BUG_ON(!guc->execbuf_client); - err = intel_guc_sample_forcewake(guc); - if (err) - return err; - err = guc_clients_enable(guc); if (err) return err; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 082036164c0c..3eb4f4320667 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -439,14 +439,14 @@ int intel_uc_init_hw(struct drm_i915_private *i915) goto err_communication; } + ret = intel_guc_sample_forcewake(guc); + if (ret) + goto err_communication; + if (USES_GUC_SUBMISSION(i915)) { ret = intel_guc_submission_enable(guc); if (ret) goto err_communication; - } else if (INTEL_GEN(i915) < 11) { - ret = intel_guc_sample_forcewake(guc); - if (ret) - goto err_communication; } dev_info(i915->drm.dev, "GuC firmware version %u.%u\n", -- cgit v1.2.3 From 10be98a77c558f8cfb823cd2777171fbb35040f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 May 2019 10:29:49 +0100 Subject: drm/i915: Move more GEM objects under gem/ Continuing the theme of separating out the GEM clutter. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190528092956.14910-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 26 +- drivers/gpu/drm/i915/Makefile.header-test | 2 - drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 160 ++ drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 20 + drivers/gpu/drm/i915/gem/i915_gem_context.c | 2453 +++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_context.h | 240 ++ drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 208 ++ drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 318 +++ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2768 +++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_internal.c | 197 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 251 ++ drivers/gpu/drm/i915/gem/i915_gem_pm.h | 25 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 555 ++++ drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 704 +++++ drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 440 +++ drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 832 ++++++ drivers/gpu/drm/i915/gem/i915_gemfs.c | 57 + drivers/gpu/drm/i915/gem/i915_gemfs.h | 16 + .../gpu/drm/i915/gem/selftests/huge_gem_object.c | 121 + .../gpu/drm/i915/gem/selftests/huge_gem_object.h | 27 + drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 1780 +++++++++++++ .../drm/i915/gem/selftests/i915_gem_coherency.c | 379 +++ .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 1736 ++++++++++++ .../gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 386 +++ drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 2 +- .../gpu/drm/i915/gem/selftests/i915_gem_object.c | 99 + drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c | 34 + drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h | 17 + drivers/gpu/drm/i915/gem/selftests/mock_context.c | 111 + drivers/gpu/drm/i915/gem/selftests/mock_context.h | 24 + drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c | 144 + drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h | 22 + .../gpu/drm/i915/gem/selftests/mock_gem_object.h | 14 + drivers/gpu/drm/i915/gt/intel_context.c | 4 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 + drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + drivers/gpu/drm/i915/gt/intel_lrc.h | 14 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 + drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 3 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + drivers/gpu/drm/i915/gt/mock_engine.c | 3 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 6 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 7 +- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 6 +- drivers/gpu/drm/i915/gvt/mmio_context.c | 1 + drivers/gpu/drm/i915/gvt/scheduler.c | 5 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 11 +- drivers/gpu/drm/i915/i915_gem_clflush.c | 178 -- drivers/gpu/drm/i915/i915_gem_clflush.h | 36 - drivers/gpu/drm/i915/i915_gem_context.c | 2474 ----------------- drivers/gpu/drm/i915/i915_gem_context.h | 258 -- drivers/gpu/drm/i915/i915_gem_context_types.h | 208 -- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 337 --- drivers/gpu/drm/i915/i915_gem_evict.c | 2 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2788 -------------------- drivers/gpu/drm/i915/i915_gem_internal.c | 207 -- drivers/gpu/drm/i915/i915_gem_pm.c | 251 -- drivers/gpu/drm/i915/i915_gem_pm.h | 25 - drivers/gpu/drm/i915/i915_gem_shrinker.c | 574 ---- drivers/gpu/drm/i915/i915_gem_stolen.c | 721 ----- drivers/gpu/drm/i915/i915_gem_tiling.c | 460 ---- drivers/gpu/drm/i915/i915_gem_userptr.c | 851 ------ drivers/gpu/drm/i915/i915_gemfs.c | 75 - drivers/gpu/drm/i915/i915_gemfs.h | 34 - drivers/gpu/drm/i915/i915_globals.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 2 + drivers/gpu/drm/i915/i915_perf.c | 2 + drivers/gpu/drm/i915/i915_request.c | 3 + drivers/gpu/drm/i915/intel_display.c | 1 - drivers/gpu/drm/i915/intel_guc_submission.c | 2 + drivers/gpu/drm/i915/intel_overlay.c | 2 + drivers/gpu/drm/i915/selftests/huge_gem_object.c | 139 - drivers/gpu/drm/i915/selftests/huge_gem_object.h | 45 - drivers/gpu/drm/i915/selftests/huge_pages.c | 1793 ------------- drivers/gpu/drm/i915/selftests/i915_active.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem.c | 8 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 397 --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 1752 ------------ drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c | 404 --- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 8 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 5 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 117 - drivers/gpu/drm/i915/selftests/i915_request.c | 6 +- drivers/gpu/drm/i915/selftests/i915_timeline.c | 4 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 5 +- drivers/gpu/drm/i915/selftests/igt_flush_test.c | 6 +- drivers/gpu/drm/i915/selftests/igt_gem_utils.c | 34 - drivers/gpu/drm/i915/selftests/igt_gem_utils.h | 17 - drivers/gpu/drm/i915/selftests/igt_spinner.c | 3 +- drivers/gpu/drm/i915/selftests/igt_spinner.h | 9 +- drivers/gpu/drm/i915/selftests/intel_guc.c | 3 +- drivers/gpu/drm/i915/selftests/mock_context.c | 129 - drivers/gpu/drm/i915/selftests/mock_context.h | 42 - drivers/gpu/drm/i915/selftests/mock_dmabuf.c | 162 -- drivers/gpu/drm/i915/selftests/mock_dmabuf.h | 41 - drivers/gpu/drm/i915/selftests/mock_gem_device.c | 5 +- drivers/gpu/drm/i915/selftests/mock_gem_object.h | 9 - drivers/gpu/drm/i915/selftests/mock_request.c | 2 +- 102 files changed, 14267 insertions(+), 14626 deletions(-) create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_clflush.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_clflush.h create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_context.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_context.h create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_context_types.h create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_internal.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_pm.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_pm.h create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_stolen.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_tiling.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_userptr.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gemfs.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gemfs.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/huge_pages.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_context.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_context.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_context.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_context.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_context_types.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_dmabuf.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_execbuffer.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_pm.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_pm.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_shrinker.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_stolen.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_tiling.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c delete mode 100644 drivers/gpu/drm/i915/i915_gemfs.c delete mode 100644 drivers/gpu/drm/i915/i915_gemfs.h delete mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.c delete mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.h delete mode 100644 drivers/gpu/drm/i915/selftests/huge_pages.c delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_coherency.c delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_context.c delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_object.c delete mode 100644 drivers/gpu/drm/i915/selftests/igt_gem_utils.c delete mode 100644 drivers/gpu/drm/i915/selftests/igt_gem_utils.h delete mode 100644 drivers/gpu/drm/i915/selftests/mock_context.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_context.h delete mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.h delete mode 100644 drivers/gpu/drm/i915/selftests/mock_gem_object.h (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5ffd7e9b19ad..3f3d378f467d 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -87,33 +87,33 @@ i915-y += $(gt-y) # GEM (Graphics Execution Management) code obj-y += gem/ gem-y += \ + gem/i915_gem_clflush.o \ + gem/i915_gem_context.o \ + gem/i915_gem_dmabuf.o \ gem/i915_gem_domain.o \ + gem/i915_gem_execbuffer.o \ + gem/i915_gem_internal.o \ gem/i915_gem_object.o \ gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ - gem/i915_gem_shmem.o + gem/i915_gem_pm.o \ + gem/i915_gem_shmem.o \ + gem/i915_gem_shrinker.o \ + gem/i915_gem_stolen.o \ + gem/i915_gem_tiling.o \ + gem/i915_gem_userptr.o \ + gem/i915_gemfs.o i915-y += \ $(gem-y) \ i915_active.o \ i915_cmd_parser.o \ i915_gem_batch_pool.o \ - i915_gem_clflush.o \ - i915_gem_context.o \ - i915_gem_dmabuf.o \ i915_gem_evict.o \ - i915_gem_execbuffer.o \ i915_gem_fence_reg.o \ i915_gem_gtt.o \ - i915_gem_internal.o \ i915_gem.o \ - i915_gem_pm.o \ i915_gem_render_state.o \ - i915_gem_shrinker.o \ - i915_gem_stolen.o \ - i915_gem_tiling.o \ - i915_gem_userptr.o \ - i915_gemfs.o \ i915_globals.o \ i915_query.o \ i915_request.o \ @@ -199,10 +199,10 @@ i915-y += dvo_ch7017.o \ # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ + gem/selftests/igt_gem_utils.o \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_flush_test.o \ - selftests/igt_gem_utils.o \ selftests/igt_live_test.o \ selftests/igt_reset.o \ selftests/igt_spinner.o diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index 3a9663002d4a..e01cd91dc1c8 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -6,8 +6,6 @@ header_test := \ i915_active_types.h \ i915_debugfs.h \ i915_drv.h \ - i915_gem_context_types.h \ - i915_gem_pm.h \ i915_irq.h \ i915_params.h \ i915_priolist_types.h \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c new file mode 100644 index 000000000000..45d238d784fc --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -0,0 +1,160 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_gem_clflush.h" +#include "intel_frontbuffer.h" + +static DEFINE_SPINLOCK(clflush_lock); + +struct clflush { + struct dma_fence dma; /* Must be first for dma_fence_free() */ + struct i915_sw_fence wait; + struct work_struct work; + struct drm_i915_gem_object *obj; +}; + +static const char *i915_clflush_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) +{ + return "clflush"; +} + +static void i915_clflush_release(struct dma_fence *fence) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), dma); + + i915_sw_fence_fini(&clflush->wait); + + BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); + dma_fence_free(&clflush->dma); +} + +static const struct dma_fence_ops i915_clflush_ops = { + .get_driver_name = i915_clflush_get_driver_name, + .get_timeline_name = i915_clflush_get_timeline_name, + .release = i915_clflush_release, +}; + +static void __i915_do_clflush(struct drm_i915_gem_object *obj) +{ + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + drm_clflush_sg(obj->mm.pages); + intel_fb_obj_flush(obj, ORIGIN_CPU); +} + +static void i915_clflush_work(struct work_struct *work) +{ + struct clflush *clflush = container_of(work, typeof(*clflush), work); + struct drm_i915_gem_object *obj = clflush->obj; + + if (i915_gem_object_pin_pages(obj)) { + DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); + goto out; + } + + __i915_do_clflush(obj); + + i915_gem_object_unpin_pages(obj); + +out: + i915_gem_object_put(obj); + + dma_fence_signal(&clflush->dma); + dma_fence_put(&clflush->dma); +} + +static int __i915_sw_fence_call +i915_clflush_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + + switch (state) { + case FENCE_COMPLETE: + schedule_work(&clflush->work); + break; + + case FENCE_FREE: + dma_fence_put(&clflush->dma); + break; + } + + return NOTIFY_DONE; +} + +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct clflush *clflush; + + /* + * Stolen memory is always coherent with the GPU as it is explicitly + * marked as wc by the system, or the system is cache-coherent. + * Similarly, we only access struct pages through the CPU cache, so + * anything not backed by physical memory we consider to be always + * coherent and not need clflushing. + */ + if (!i915_gem_object_has_struct_page(obj)) { + obj->cache_dirty = false; + return false; + } + + /* If the GPU is snooping the contents of the CPU cache, + * we do not need to manually clear the CPU cache lines. However, + * the caches are only snooped when the render cache is + * flushed/invalidated. As we always have to emit invalidations + * and flushes when moving into and out of the RENDER domain, correct + * snooping behaviour occurs naturally as the result of our domain + * tracking. + */ + if (!(flags & I915_CLFLUSH_FORCE) && + obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) + return false; + + trace_i915_gem_object_clflush(obj); + + clflush = NULL; + if (!(flags & I915_CLFLUSH_SYNC)) + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (clflush) { + GEM_BUG_ON(!obj->cache_dirty); + + dma_fence_init(&clflush->dma, + &i915_clflush_ops, + &clflush_lock, + to_i915(obj->base.dev)->mm.unordered_timeline, + 0); + i915_sw_fence_init(&clflush->wait, i915_clflush_notify); + + clflush->obj = i915_gem_object_get(obj); + INIT_WORK(&clflush->work, i915_clflush_work); + + dma_fence_get(&clflush->dma); + + i915_sw_fence_await_reservation(&clflush->wait, + obj->resv, NULL, + true, I915_FENCE_TIMEOUT, + I915_FENCE_GFP); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &clflush->dma); + reservation_object_unlock(obj->resv); + + i915_sw_fence_commit(&clflush->wait); + } else if (obj->mm.pages) { + __i915_do_clflush(obj); + } else { + GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); + } + + obj->cache_dirty = false; + return true; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h new file mode 100644 index 000000000000..e6c382973129 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_GEM_CLFLUSH_H__ +#define __I915_GEM_CLFLUSH_H__ + +#include + +struct drm_i915_private; +struct drm_i915_gem_object; + +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags); +#define I915_CLFLUSH_FORCE BIT(0) +#define I915_CLFLUSH_SYNC BIT(1) + +#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c new file mode 100644 index 000000000000..5dcdf6540f43 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -0,0 +1,2453 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2011-2012 Intel Corporation + */ + +/* + * This file implements HW context support. On gen5+ a HW context consists of an + * opaque GPU object which is referenced at times of context saves and restores. + * With RC6 enabled, the context is also referenced as the GPU enters and exists + * from RC6 (GPU has it's own internal power context, except on gen5). Though + * something like a context does exist for the media ring, the code only + * supports contexts for the render ring. + * + * In software, there is a distinction between contexts created by the user, + * and the default HW context. The default HW context is used by GPU clients + * that do not request setup of their own hardware context. The default + * context's state is never restored to help prevent programming errors. This + * would happen if a client ran and piggy-backed off another clients GPU state. + * The default context only exists to give the GPU some offset to load as the + * current to invoke a save of the context we actually care about. In fact, the + * code could likely be constructed, albeit in a more complicated fashion, to + * never use the default context, though that limits the driver's ability to + * swap out, and/or destroy other contexts. + * + * All other contexts are created as a request by the GPU client. These contexts + * store GPU state, and thus allow GPU clients to not re-emit state (and + * potentially query certain state) at any time. The kernel driver makes + * certain that the appropriate commands are inserted. + * + * The context life cycle is semi-complicated in that context BOs may live + * longer than the context itself because of the way the hardware, and object + * tracking works. Below is a very crude representation of the state machine + * describing the context life. + * refcount pincount active + * S0: initial state 0 0 0 + * S1: context created 1 0 0 + * S2: context is currently running 2 1 X + * S3: GPU referenced, but not current 2 0 1 + * S4: context is current, but destroyed 1 1 0 + * S5: like S3, but destroyed 1 0 1 + * + * The most common (but not all) transitions: + * S0->S1: client creates a context + * S1->S2: client submits execbuf with context + * S2->S3: other clients submits execbuf with context + * S3->S1: context object was retired + * S3->S2: clients submits another execbuf + * S2->S4: context destroy called with current context + * S3->S5->S0: destroy path + * S4->S5->S0: destroy path on current context + * + * There are two confusing terms used above: + * The "current context" means the context which is currently running on the + * GPU. The GPU has loaded its state already and has stored away the gtt + * offset of the BO. The GPU is not actively referencing the data at this + * offset, but it will on the next context switch. The only way to avoid this + * is to do a GPU reset. + * + * An "active context' is one which was previously the "current context" and is + * on the active list waiting for the next context switch to occur. Until this + * happens, the object must remain at the same gtt offset. It is therefore + * possible to destroy a context, but it is still active. + * + */ + +#include +#include + +#include + +#include "gt/intel_lrc_reg.h" + +#include "i915_gem_context.h" +#include "i915_globals.h" +#include "i915_trace.h" +#include "i915_user_extensions.h" + +#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 + +static struct i915_global_gem_context { + struct i915_global base; + struct kmem_cache *slab_luts; +} global; + +struct i915_lut_handle *i915_lut_handle_alloc(void) +{ + return kmem_cache_alloc(global.slab_luts, GFP_KERNEL); +} + +void i915_lut_handle_free(struct i915_lut_handle *lut) +{ + return kmem_cache_free(global.slab_luts, lut); +} + +static void lut_close(struct i915_gem_context *ctx) +{ + struct i915_lut_handle *lut, *ln; + struct radix_tree_iter iter; + void __rcu **slot; + + list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) { + list_del(&lut->obj_link); + i915_lut_handle_free(lut); + } + INIT_LIST_HEAD(&ctx->handles_list); + + rcu_read_lock(); + radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { + struct i915_vma *vma = rcu_dereference_raw(*slot); + + radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); + + vma->open_count--; + __i915_gem_object_release_unless_active(vma->obj); + } + rcu_read_unlock(); +} + +static struct intel_context * +lookup_user_engine(struct i915_gem_context *ctx, + unsigned long flags, + const struct i915_engine_class_instance *ci) +#define LOOKUP_USER_INDEX BIT(0) +{ + int idx; + + if (!!(flags & LOOKUP_USER_INDEX) != i915_gem_context_user_engines(ctx)) + return ERR_PTR(-EINVAL); + + if (!i915_gem_context_user_engines(ctx)) { + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(ctx->i915, + ci->engine_class, + ci->engine_instance); + if (!engine) + return ERR_PTR(-EINVAL); + + idx = engine->id; + } else { + idx = ci->engine_instance; + } + + return i915_gem_context_get_engine(ctx, idx); +} + +static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) +{ + unsigned int max; + + lockdep_assert_held(&i915->contexts.mutex); + + if (INTEL_GEN(i915) >= 11) + max = GEN11_MAX_CONTEXT_HW_ID; + else if (USES_GUC_SUBMISSION(i915)) + /* + * When using GuC in proxy submission, GuC consumes the + * highest bit in the context id to indicate proxy submission. + */ + max = MAX_GUC_CONTEXT_HW_ID; + else + max = MAX_CONTEXT_HW_ID; + + return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); +} + +static int steal_hw_id(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx, *cn; + LIST_HEAD(pinned); + int id = -ENOSPC; + + lockdep_assert_held(&i915->contexts.mutex); + + list_for_each_entry_safe(ctx, cn, + &i915->contexts.hw_id_list, hw_id_link) { + if (atomic_read(&ctx->hw_id_pin_count)) { + list_move_tail(&ctx->hw_id_link, &pinned); + continue; + } + + GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ + list_del_init(&ctx->hw_id_link); + id = ctx->hw_id; + break; + } + + /* + * Remember how far we got up on the last repossesion scan, so the + * list is kept in a "least recently scanned" order. + */ + list_splice_tail(&pinned, &i915->contexts.hw_id_list); + return id; +} + +static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) +{ + int ret; + + lockdep_assert_held(&i915->contexts.mutex); + + /* + * We prefer to steal/stall ourselves and our users over that of the + * entire system. That may be a little unfair to our users, and + * even hurt high priority clients. The choice is whether to oomkill + * something else, or steal a context id. + */ + ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (unlikely(ret < 0)) { + ret = steal_hw_id(i915); + if (ret < 0) /* once again for the correct errno code */ + ret = new_hw_id(i915, GFP_KERNEL); + if (ret < 0) + return ret; + } + + *out = ret; + return 0; +} + +static void release_hw_id(struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = ctx->i915; + + if (list_empty(&ctx->hw_id_link)) + return; + + mutex_lock(&i915->contexts.mutex); + if (!list_empty(&ctx->hw_id_link)) { + ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); + list_del_init(&ctx->hw_id_link); + } + mutex_unlock(&i915->contexts.mutex); +} + +static void __free_engines(struct i915_gem_engines *e, unsigned int count) +{ + while (count--) { + if (!e->engines[count]) + continue; + + intel_context_put(e->engines[count]); + } + kfree(e); +} + +static void free_engines(struct i915_gem_engines *e) +{ + __free_engines(e, e->num_engines); +} + +static void free_engines_rcu(struct work_struct *wrk) +{ + struct i915_gem_engines *e = + container_of(wrk, struct i915_gem_engines, rcu.work); + struct drm_i915_private *i915 = e->i915; + + mutex_lock(&i915->drm.struct_mutex); + free_engines(e); + mutex_unlock(&i915->drm.struct_mutex); +} + +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +{ + struct intel_engine_cs *engine; + struct i915_gem_engines *e; + enum intel_engine_id id; + + e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + e->i915 = ctx->i915; + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce; + + ce = intel_context_create(ctx, engine); + if (IS_ERR(ce)) { + __free_engines(e, id); + return ERR_CAST(ce); + } + + e->engines[id] = ce; + } + e->num_engines = id; + + return e; +} + +static void i915_gem_context_free(struct i915_gem_context *ctx) +{ + lockdep_assert_held(&ctx->i915->drm.struct_mutex); + GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + + release_hw_id(ctx); + i915_ppgtt_put(ctx->ppgtt); + + free_engines(rcu_access_pointer(ctx->engines)); + mutex_destroy(&ctx->engines_mutex); + + if (ctx->timeline) + i915_timeline_put(ctx->timeline); + + kfree(ctx->name); + put_pid(ctx->pid); + + list_del(&ctx->link); + mutex_destroy(&ctx->mutex); + + kfree_rcu(ctx, rcu); +} + +static void contexts_free(struct drm_i915_private *i915) +{ + struct llist_node *freed = llist_del_all(&i915->contexts.free_list); + struct i915_gem_context *ctx, *cn; + + lockdep_assert_held(&i915->drm.struct_mutex); + + llist_for_each_entry_safe(ctx, cn, freed, free_link) + i915_gem_context_free(ctx); +} + +static void contexts_free_first(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx; + struct llist_node *freed; + + lockdep_assert_held(&i915->drm.struct_mutex); + + freed = llist_del_first(&i915->contexts.free_list); + if (!freed) + return; + + ctx = container_of(freed, typeof(*ctx), free_link); + i915_gem_context_free(ctx); +} + +static void contexts_free_worker(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), contexts.free_work); + + mutex_lock(&i915->drm.struct_mutex); + contexts_free(i915); + mutex_unlock(&i915->drm.struct_mutex); +} + +void i915_gem_context_release(struct kref *ref) +{ + struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + struct drm_i915_private *i915 = ctx->i915; + + trace_i915_context_free(ctx); + if (llist_add(&ctx->free_link, &i915->contexts.free_list)) + queue_work(i915->wq, &i915->contexts.free_work); +} + +static void context_close(struct i915_gem_context *ctx) +{ + i915_gem_context_set_closed(ctx); + + /* + * This context will never again be assinged to HW, so we can + * reuse its ID for the next context. + */ + release_hw_id(ctx); + + /* + * The LUT uses the VMA as a backpointer to unref the object, + * so we need to clear the LUT before we close all the VMA (inside + * the ppgtt). + */ + lut_close(ctx); + + ctx->file_priv = ERR_PTR(-EBADF); + i915_gem_context_put(ctx); +} + +static u32 default_desc_template(const struct drm_i915_private *i915, + const struct i915_hw_ppgtt *ppgtt) +{ + u32 address_mode; + u32 desc; + + desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + + address_mode = INTEL_LEGACY_32B_CONTEXT; + if (ppgtt && i915_vm_is_4lvl(&ppgtt->vm)) + address_mode = INTEL_LEGACY_64B_CONTEXT; + desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; + + if (IS_GEN(i915, 8)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers + * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; + */ + + return desc; +} + +static struct i915_gem_context * +__create_context(struct drm_i915_private *dev_priv) +{ + struct i915_gem_context *ctx; + struct i915_gem_engines *e; + int err; + int i; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + kref_init(&ctx->ref); + list_add_tail(&ctx->link, &dev_priv->contexts.list); + ctx->i915 = dev_priv; + ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); + mutex_init(&ctx->mutex); + + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx); + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto err_free; + } + RCU_INIT_POINTER(ctx->engines, e); + + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + INIT_LIST_HEAD(&ctx->handles_list); + INIT_LIST_HEAD(&ctx->hw_id_link); + + /* NB: Mark all slices as needing a remap so that when the context first + * loads it will restore whatever remap state already exists. If there + * is no remap info, it will be a NOP. */ + ctx->remap_slice = ALL_L3_SLICES(dev_priv); + + i915_gem_context_set_bannable(ctx); + i915_gem_context_set_recoverable(ctx); + + ctx->ring_size = 4 * PAGE_SIZE; + ctx->desc_template = + default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); + + for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) + ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + + return ctx; + +err_free: + kfree(ctx); + return ERR_PTR(err); +} + +static struct i915_hw_ppgtt * +__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt) +{ + struct i915_hw_ppgtt *old = ctx->ppgtt; + + ctx->ppgtt = i915_ppgtt_get(ppgtt); + ctx->desc_template = default_desc_template(ctx->i915, ppgtt); + + return old; +} + +static void __assign_ppgtt(struct i915_gem_context *ctx, + struct i915_hw_ppgtt *ppgtt) +{ + if (ppgtt == ctx->ppgtt) + return; + + ppgtt = __set_ppgtt(ctx, ppgtt); + if (ppgtt) + i915_ppgtt_put(ppgtt); +} + +static struct i915_gem_context * +i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) +{ + struct i915_gem_context *ctx; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && + !HAS_EXECLISTS(dev_priv)) + return ERR_PTR(-EINVAL); + + /* Reap the most stale context */ + contexts_free_first(dev_priv); + + ctx = __create_context(dev_priv); + if (IS_ERR(ctx)) + return ctx; + + if (HAS_FULL_PPGTT(dev_priv)) { + struct i915_hw_ppgtt *ppgtt; + + ppgtt = i915_ppgtt_create(dev_priv); + if (IS_ERR(ppgtt)) { + DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", + PTR_ERR(ppgtt)); + context_close(ctx); + return ERR_CAST(ppgtt); + } + + __assign_ppgtt(ctx, ppgtt); + i915_ppgtt_put(ppgtt); + } + + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { + struct i915_timeline *timeline; + + timeline = i915_timeline_create(dev_priv, NULL); + if (IS_ERR(timeline)) { + context_close(ctx); + return ERR_CAST(timeline); + } + + ctx->timeline = timeline; + } + + trace_i915_context_create(ctx); + + return ctx; +} + +/** + * i915_gem_context_create_gvt - create a GVT GEM context + * @dev: drm device * + * + * This function is used to create a GVT specific GEM context. + * + * Returns: + * pointer to i915_gem_context on success, error pointer if failed + * + */ +struct i915_gem_context * +i915_gem_context_create_gvt(struct drm_device *dev) +{ + struct i915_gem_context *ctx; + int ret; + + if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) + return ERR_PTR(-ENODEV); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ERR_PTR(ret); + + ctx = i915_gem_create_context(to_i915(dev), 0); + if (IS_ERR(ctx)) + goto out; + + ret = i915_gem_context_pin_hw_id(ctx); + if (ret) { + context_close(ctx); + ctx = ERR_PTR(ret); + goto out; + } + + ctx->file_priv = ERR_PTR(-EBADF); + i915_gem_context_set_closed(ctx); /* not user accessible */ + i915_gem_context_clear_bannable(ctx); + i915_gem_context_set_force_single_submission(ctx); + if (!USES_GUC_SUBMISSION(to_i915(dev))) + ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ + + GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); +out: + mutex_unlock(&dev->struct_mutex); + return ctx; +} + +static void +destroy_kernel_context(struct i915_gem_context **ctxp) +{ + struct i915_gem_context *ctx; + + /* Keep the context ref so that we can free it immediately ourselves */ + ctx = i915_gem_context_get(fetch_and_zero(ctxp)); + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + context_close(ctx); + i915_gem_context_free(ctx); +} + +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) +{ + struct i915_gem_context *ctx; + int err; + + ctx = i915_gem_create_context(i915, 0); + if (IS_ERR(ctx)) + return ctx; + + err = i915_gem_context_pin_hw_id(ctx); + if (err) { + destroy_kernel_context(&ctx); + return ERR_PTR(err); + } + + i915_gem_context_clear_bannable(ctx); + ctx->sched.priority = I915_USER_PRIORITY(prio); + ctx->ring_size = PAGE_SIZE; + + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + return ctx; +} + +static void init_contexts(struct drm_i915_private *i915) +{ + mutex_init(&i915->contexts.mutex); + INIT_LIST_HEAD(&i915->contexts.list); + + /* Using the simple ida interface, the max is limited by sizeof(int) */ + BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); + ida_init(&i915->contexts.hw_ida); + INIT_LIST_HEAD(&i915->contexts.hw_id_list); + + INIT_WORK(&i915->contexts.free_work, contexts_free_worker); + init_llist_head(&i915->contexts.free_list); +} + +static bool needs_preempt_context(struct drm_i915_private *i915) +{ + return HAS_EXECLISTS(i915); +} + +int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +{ + struct i915_gem_context *ctx; + + /* Reassure ourselves we are only called once */ + GEM_BUG_ON(dev_priv->kernel_context); + GEM_BUG_ON(dev_priv->preempt_context); + + intel_engine_init_ctx_wa(dev_priv->engine[RCS0]); + init_contexts(dev_priv); + + /* lowest priority; idle task */ + ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); + if (IS_ERR(ctx)) { + DRM_ERROR("Failed to create default global context\n"); + return PTR_ERR(ctx); + } + /* + * For easy recognisablity, we want the kernel context to be 0 and then + * all user contexts will have non-zero hw_id. Kernel contexts are + * permanently pinned, so that we never suffer a stall and can + * use them from any allocation context (e.g. for evicting other + * contexts and from inside the shrinker). + */ + GEM_BUG_ON(ctx->hw_id); + GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); + dev_priv->kernel_context = ctx; + + /* highest priority; preempting task */ + if (needs_preempt_context(dev_priv)) { + ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); + if (!IS_ERR(ctx)) + dev_priv->preempt_context = ctx; + else + DRM_ERROR("Failed to create preempt context; disabling preemption\n"); + } + + DRM_DEBUG_DRIVER("%s context support initialized\n", + DRIVER_CAPS(dev_priv)->has_logical_contexts ? + "logical" : "fake"); + return 0; +} + +void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + for_each_engine(engine, dev_priv, id) + intel_engine_lost_context(engine); +} + +void i915_gem_contexts_fini(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + + if (i915->preempt_context) + destroy_kernel_context(&i915->preempt_context); + destroy_kernel_context(&i915->kernel_context); + + /* Must free all deferred contexts (via flush_workqueue) first */ + GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); + ida_destroy(&i915->contexts.hw_ida); +} + +static int context_idr_cleanup(int id, void *p, void *data) +{ + context_close(p); + return 0; +} + +static int vm_idr_cleanup(int id, void *p, void *data) +{ + i915_ppgtt_put(p); + return 0; +} + +static int gem_context_register(struct i915_gem_context *ctx, + struct drm_i915_file_private *fpriv) +{ + int ret; + + ctx->file_priv = fpriv; + if (ctx->ppgtt) + ctx->ppgtt->vm.file = fpriv; + + ctx->pid = get_task_pid(current, PIDTYPE_PID); + ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", + current->comm, pid_nr(ctx->pid)); + if (!ctx->name) { + ret = -ENOMEM; + goto err_pid; + } + + /* And finally expose ourselves to userspace via the idr */ + mutex_lock(&fpriv->context_idr_lock); + ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&fpriv->context_idr_lock); + if (ret >= 0) + goto out; + + kfree(fetch_and_zero(&ctx->name)); +err_pid: + put_pid(fetch_and_zero(&ctx->pid)); +out: + return ret; +} + +int i915_gem_context_open(struct drm_i915_private *i915, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_context *ctx; + int err; + + mutex_init(&file_priv->context_idr_lock); + mutex_init(&file_priv->vm_idr_lock); + + idr_init(&file_priv->context_idr); + idr_init_base(&file_priv->vm_idr, 1); + + mutex_lock(&i915->drm.struct_mutex); + ctx = i915_gem_create_context(i915, 0); + mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err; + } + + err = gem_context_register(ctx, file_priv); + if (err < 0) + goto err_ctx; + + GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); + GEM_BUG_ON(err > 0); + + return 0; + +err_ctx: + mutex_lock(&i915->drm.struct_mutex); + context_close(ctx); + mutex_unlock(&i915->drm.struct_mutex); +err: + idr_destroy(&file_priv->vm_idr); + idr_destroy(&file_priv->context_idr); + mutex_destroy(&file_priv->vm_idr_lock); + mutex_destroy(&file_priv->context_idr_lock); + return err; +} + +void i915_gem_context_close(struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + + lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex); + + idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); + idr_destroy(&file_priv->context_idr); + mutex_destroy(&file_priv->context_idr_lock); + + idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); + idr_destroy(&file_priv->vm_idr); + mutex_destroy(&file_priv->vm_idr_lock); +} + +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_vm_control *args = data; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_hw_ppgtt *ppgtt; + int err; + + if (!HAS_FULL_PPGTT(i915)) + return -ENODEV; + + if (args->flags) + return -EINVAL; + + ppgtt = i915_ppgtt_create(i915); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); + + ppgtt->vm.file = file_priv; + + if (args->extensions) { + err = i915_user_extensions(u64_to_user_ptr(args->extensions), + NULL, 0, + ppgtt); + if (err) + goto err_put; + } + + err = mutex_lock_interruptible(&file_priv->vm_idr_lock); + if (err) + goto err_put; + + err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); + if (err < 0) + goto err_unlock; + + GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */ + + mutex_unlock(&file_priv->vm_idr_lock); + + args->vm_id = err; + return 0; + +err_unlock: + mutex_unlock(&file_priv->vm_idr_lock); +err_put: + i915_ppgtt_put(ppgtt); + return err; +} + +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_vm_control *args = data; + struct i915_hw_ppgtt *ppgtt; + int err; + u32 id; + + if (args->flags) + return -EINVAL; + + if (args->extensions) + return -EINVAL; + + id = args->vm_id; + if (!id) + return -ENOENT; + + err = mutex_lock_interruptible(&file_priv->vm_idr_lock); + if (err) + return err; + + ppgtt = idr_remove(&file_priv->vm_idr, id); + + mutex_unlock(&file_priv->vm_idr_lock); + if (!ppgtt) + return -ENOENT; + + i915_ppgtt_put(ppgtt); + return 0; +} + +struct context_barrier_task { + struct i915_active base; + void (*task)(void *data); + void *data; +}; + +static void cb_retire(struct i915_active *base) +{ + struct context_barrier_task *cb = container_of(base, typeof(*cb), base); + + if (cb->task) + cb->task(cb->data); + + i915_active_fini(&cb->base); + kfree(cb); +} + +I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); +static int context_barrier_task(struct i915_gem_context *ctx, + intel_engine_mask_t engines, + int (*emit)(struct i915_request *rq, void *data), + void (*task)(void *data), + void *data) +{ + struct drm_i915_private *i915 = ctx->i915; + struct context_barrier_task *cb; + struct i915_gem_engines_iter it; + struct intel_context *ce; + int err = 0; + + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(!task); + + cb = kmalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) + return -ENOMEM; + + i915_active_init(i915, &cb->base, cb_retire); + i915_active_acquire(&cb->base); + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + struct i915_request *rq; + + if (I915_SELFTEST_ONLY(context_barrier_inject_fault & + ce->engine->mask)) { + err = -ENXIO; + break; + } + + if (!(ce->engine->mask & engines) || !ce->state) + continue; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = 0; + if (emit) + err = emit(rq, data); + if (err == 0) + err = i915_active_ref(&cb->base, rq->fence.context, rq); + + i915_request_add(rq); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); + + cb->task = err ? NULL : task; /* caller needs to unwind instead */ + cb->data = data; + + i915_active_release(&cb->base); + + return err; +} + +static int get_ppgtt(struct drm_i915_file_private *file_priv, + struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_hw_ppgtt *ppgtt; + int ret; + + if (!ctx->ppgtt) + return -ENODEV; + + /* XXX rcu acquire? */ + ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + if (ret) + return ret; + + ppgtt = i915_ppgtt_get(ctx->ppgtt); + mutex_unlock(&ctx->i915->drm.struct_mutex); + + ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); + if (ret) + goto err_put; + + ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); + GEM_BUG_ON(!ret); + if (ret < 0) + goto err_unlock; + + i915_ppgtt_get(ppgtt); + + args->size = 0; + args->value = ret; + + ret = 0; +err_unlock: + mutex_unlock(&file_priv->vm_idr_lock); +err_put: + i915_ppgtt_put(ppgtt); + return ret; +} + +static void set_ppgtt_barrier(void *data) +{ + struct i915_hw_ppgtt *old = data; + + if (INTEL_GEN(old->vm.i915) < 8) + gen6_ppgtt_unpin_all(old); + + i915_ppgtt_put(old); +} + +static int emit_ppgtt_update(struct i915_request *rq, void *data) +{ + struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt; + struct intel_engine_cs *engine = rq->engine; + u32 base = engine->mmio_base; + u32 *cs; + int i; + + if (i915_vm_is_4lvl(&ppgtt->vm)) { + const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(2); + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); + *cs++ = lower_32_bits(pd_daddr); + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { + cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); + for (i = GEN8_3LVL_PDPES; i--; ) { + const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); + *cs++ = lower_32_bits(pd_daddr); + } + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + } else { + /* ppGTT is not part of the legacy context image */ + gen6_ppgtt_pin(ppgtt); + } + + return 0; +} + +static int set_ppgtt(struct drm_i915_file_private *file_priv, + struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_hw_ppgtt *ppgtt, *old; + int err; + + if (args->size) + return -EINVAL; + + if (!ctx->ppgtt) + return -ENODEV; + + if (upper_32_bits(args->value)) + return -ENOENT; + + err = mutex_lock_interruptible(&file_priv->vm_idr_lock); + if (err) + return err; + + ppgtt = idr_find(&file_priv->vm_idr, args->value); + if (ppgtt) + i915_ppgtt_get(ppgtt); + mutex_unlock(&file_priv->vm_idr_lock); + if (!ppgtt) + return -ENOENT; + + err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + if (err) + goto out; + + if (ppgtt == ctx->ppgtt) + goto unlock; + + /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ + lut_close(ctx); + + old = __set_ppgtt(ctx, ppgtt); + + /* + * We need to flush any requests using the current ppgtt before + * we release it as the requests do not hold a reference themselves, + * only indirectly through the context. + */ + err = context_barrier_task(ctx, ALL_ENGINES, + emit_ppgtt_update, + set_ppgtt_barrier, + old); + if (err) { + ctx->ppgtt = old; + ctx->desc_template = default_desc_template(ctx->i915, old); + i915_ppgtt_put(ppgtt); + } + +unlock: + mutex_unlock(&ctx->i915->drm.struct_mutex); + +out: + i915_ppgtt_put(ppgtt); + return err; +} + +static int gen8_emit_rpcs_config(struct i915_request *rq, + struct intel_context *ce, + struct intel_sseu sseu) +{ + u64 offset; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = i915_ggtt_offset(ce->state) + + LRC_STATE_PN * PAGE_SIZE + + (CTX_R_PWR_CLK_STATE + 1) * 4; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) +{ + struct i915_request *rq; + int ret; + + lockdep_assert_held(&ce->pin_mutex); + + /* + * If the context is not idle, we have to submit an ordered request to + * modify its context image via the kernel context (writing to our own + * image, or into the registers directory, does not stick). Pristine + * and idle contexts will be configured on pinning. + */ + if (!intel_context_is_pinned(ce)) + return 0; + + rq = i915_request_create(ce->engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + /* Queue this switch after all other activity by this context. */ + ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); + if (ret) + goto out_add; + + ret = gen8_emit_rpcs_config(rq, ce, sseu); + if (ret) + goto out_add; + + /* + * Guarantee context image and the timeline remains pinned until the + * modifying request is retired by setting the ce activity tracker. + * + * But we only need to take one pin on the account of it. Or in other + * words transfer the pinned ce object to tracked active request. + */ + if (!i915_active_request_isset(&ce->active_tracker)) + __intel_context_pin(ce); + __i915_active_request_set(&ce->active_tracker, rq); + +out_add: + i915_request_add(rq); + return ret; +} + +static int +__intel_context_reconfigure_sseu(struct intel_context *ce, + struct intel_sseu sseu) +{ + int ret; + + GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8); + + ret = intel_context_lock_pinned(ce); + if (ret) + return ret; + + /* Nothing to do if unmodified. */ + if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) + goto unlock; + + ret = gen8_modify_rpcs(ce, sseu); + if (!ret) + ce->sseu = sseu; + +unlock: + intel_context_unlock_pinned(ce); + return ret; +} + +static int +intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) +{ + struct drm_i915_private *i915 = ce->gem_context->i915; + int ret; + + ret = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (ret) + return ret; + + ret = __intel_context_reconfigure_sseu(ce, sseu); + + mutex_unlock(&i915->drm.struct_mutex); + + return ret; +} + +static int +user_to_context_sseu(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param_sseu *user, + struct intel_sseu *context) +{ + const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu; + + /* No zeros in any field. */ + if (!user->slice_mask || !user->subslice_mask || + !user->min_eus_per_subslice || !user->max_eus_per_subslice) + return -EINVAL; + + /* Max > min. */ + if (user->max_eus_per_subslice < user->min_eus_per_subslice) + return -EINVAL; + + /* + * Some future proofing on the types since the uAPI is wider than the + * current internal implementation. + */ + if (overflows_type(user->slice_mask, context->slice_mask) || + overflows_type(user->subslice_mask, context->subslice_mask) || + overflows_type(user->min_eus_per_subslice, + context->min_eus_per_subslice) || + overflows_type(user->max_eus_per_subslice, + context->max_eus_per_subslice)) + return -EINVAL; + + /* Check validity against hardware. */ + if (user->slice_mask & ~device->slice_mask) + return -EINVAL; + + if (user->subslice_mask & ~device->subslice_mask[0]) + return -EINVAL; + + if (user->max_eus_per_subslice > device->max_eus_per_subslice) + return -EINVAL; + + context->slice_mask = user->slice_mask; + context->subslice_mask = user->subslice_mask; + context->min_eus_per_subslice = user->min_eus_per_subslice; + context->max_eus_per_subslice = user->max_eus_per_subslice; + + /* Part specific restrictions. */ + if (IS_GEN(i915, 11)) { + unsigned int hw_s = hweight8(device->slice_mask); + unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); + unsigned int req_s = hweight8(context->slice_mask); + unsigned int req_ss = hweight8(context->subslice_mask); + + /* + * Only full subslice enablement is possible if more than one + * slice is turned on. + */ + if (req_s > 1 && req_ss != hw_ss_per_s) + return -EINVAL; + + /* + * If more than four (SScount bitfield limit) subslices are + * requested then the number has to be even. + */ + if (req_ss > 4 && (req_ss & 1)) + return -EINVAL; + + /* + * If only one slice is enabled and subslice count is below the + * device full enablement, it must be at most half of the all + * available subslices. + */ + if (req_s == 1 && req_ss < hw_ss_per_s && + req_ss > (hw_ss_per_s / 2)) + return -EINVAL; + + /* ABI restriction - VME use case only. */ + + /* All slices or one slice only. */ + if (req_s != 1 && req_s != hw_s) + return -EINVAL; + + /* + * Half subslices or full enablement only when one slice is + * enabled. + */ + if (req_s == 1 && + (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2))) + return -EINVAL; + + /* No EU configuration changes. */ + if ((user->min_eus_per_subslice != + device->max_eus_per_subslice) || + (user->max_eus_per_subslice != + device->max_eus_per_subslice)) + return -EINVAL; + } + + return 0; +} + +static int set_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_context *ce; + struct intel_sseu sseu; + unsigned long lookup; + int ret; + + if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (!IS_GEN(i915, 11)) + return -ENODEV; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd) + return -EINVAL; + + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) + return -EINVAL; + + lookup = 0; + if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) + lookup |= LOOKUP_USER_INDEX; + + ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + /* Only render engine supports RPCS configuration. */ + if (ce->engine->class != RENDER_CLASS) { + ret = -ENODEV; + goto out_ce; + } + + ret = user_to_context_sseu(i915, &user_sseu, &sseu); + if (ret) + goto out_ce; + + ret = intel_context_reconfigure_sseu(ce, sseu); + if (ret) + goto out_ce; + + args->size = sizeof(user_sseu); + +out_ce: + intel_context_put(ce); + return ret; +} + +struct set_engines { + struct i915_gem_context *ctx; + struct i915_gem_engines *engines; +}; + +static int +set_engines__load_balance(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_load_balance __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_engines *set = data; + struct intel_engine_cs *stack[16]; + struct intel_engine_cs **siblings; + struct intel_context *ce; + u16 num_siblings, idx; + unsigned int n; + int err; + + if (!HAS_EXECLISTS(set->ctx->i915)) + return -ENODEV; + + if (USES_GUC_SUBMISSION(set->ctx->i915)) + return -ENODEV; /* not implement yet */ + + if (get_user(idx, &ext->engine_index)) + return -EFAULT; + + if (idx >= set->engines->num_engines) { + DRM_DEBUG("Invalid placement value, %d >= %d\n", + idx, set->engines->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->engines->num_engines); + if (set->engines->engines[idx]) { + DRM_DEBUG("Invalid placement[%d], already occupied\n", idx); + return -EEXIST; + } + + if (get_user(num_siblings, &ext->num_siblings)) + return -EFAULT; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + err = check_user_mbz(&ext->mbz64); + if (err) + return err; + + siblings = stack; + if (num_siblings > ARRAY_SIZE(stack)) { + siblings = kmalloc_array(num_siblings, + sizeof(*siblings), + GFP_KERNEL); + if (!siblings) + return -ENOMEM; + } + + for (n = 0; n < num_siblings; n++) { + struct i915_engine_class_instance ci; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { + err = -EFAULT; + goto out_siblings; + } + + siblings[n] = intel_engine_lookup_user(set->ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!siblings[n]) { + DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n", + n, ci.engine_class, ci.engine_instance); + err = -EINVAL; + goto out_siblings; + } + } + + ce = intel_execlists_create_virtual(set->ctx, siblings, n); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out_siblings; + } + + if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { + intel_context_put(ce); + err = -EEXIST; + goto out_siblings; + } + +out_siblings: + if (siblings != stack) + kfree(siblings); + + return err; +} + +static int +set_engines__bond(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_bond __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_engines *set = data; + struct i915_engine_class_instance ci; + struct intel_engine_cs *virtual; + struct intel_engine_cs *master; + u16 idx, num_bonds; + int err, n; + + if (get_user(idx, &ext->virtual_index)) + return -EFAULT; + + if (idx >= set->engines->num_engines) { + DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n", + idx, set->engines->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->engines->num_engines); + if (!set->engines->engines[idx]) { + DRM_DEBUG("Invalid engine at %d\n", idx); + return -EINVAL; + } + virtual = set->engines->engines[idx]->engine; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { + err = check_user_mbz(&ext->mbz64[n]); + if (err) + return err; + } + + if (copy_from_user(&ci, &ext->master, sizeof(ci))) + return -EFAULT; + + master = intel_engine_lookup_user(set->ctx->i915, + ci.engine_class, ci.engine_instance); + if (!master) { + DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n", + ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + if (get_user(num_bonds, &ext->num_bonds)) + return -EFAULT; + + for (n = 0; n < num_bonds; n++) { + struct intel_engine_cs *bond; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) + return -EFAULT; + + bond = intel_engine_lookup_user(set->ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!bond) { + DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", + n, ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + /* + * A non-virtual engine has no siblings to choose between; and + * a submit fence will always be directed to the one engine. + */ + if (intel_engine_is_virtual(virtual)) { + err = intel_virtual_engine_attach_bond(virtual, + master, + bond); + if (err) + return err; + } + } + + return 0; +} + +static const i915_user_extension_fn set_engines__extensions[] = { + [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, + [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, +}; + +static int +set_engines(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + struct i915_context_param_engines __user *user = + u64_to_user_ptr(args->value); + struct set_engines set = { .ctx = ctx }; + unsigned int num_engines, n; + u64 extensions; + int err; + + if (!args->size) { /* switch back to legacy user_ring_map */ + if (!i915_gem_context_user_engines(ctx)) + return 0; + + set.engines = default_engines(ctx); + if (IS_ERR(set.engines)) + return PTR_ERR(set.engines); + + goto replace; + } + + BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); + if (args->size < sizeof(*user) || + !IS_ALIGNED(args->size, sizeof(*user->engines))) { + DRM_DEBUG("Invalid size for engine array: %d\n", + args->size); + return -EINVAL; + } + + /* + * Note that I915_EXEC_RING_MASK limits execbuf to only using the + * first 64 engines defined here. + */ + num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + + set.engines = kmalloc(struct_size(set.engines, engines, num_engines), + GFP_KERNEL); + if (!set.engines) + return -ENOMEM; + + set.engines->i915 = ctx->i915; + for (n = 0; n < num_engines; n++) { + struct i915_engine_class_instance ci; + struct intel_engine_cs *engine; + + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { + __free_engines(set.engines, n); + return -EFAULT; + } + + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { + set.engines->engines[n] = NULL; + continue; + } + + engine = intel_engine_lookup_user(ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!engine) { + DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n", + n, ci.engine_class, ci.engine_instance); + __free_engines(set.engines, n); + return -ENOENT; + } + + set.engines->engines[n] = intel_context_create(ctx, engine); + if (!set.engines->engines[n]) { + __free_engines(set.engines, n); + return -ENOMEM; + } + } + set.engines->num_engines = num_engines; + + err = -EFAULT; + if (!get_user(extensions, &user->extensions)) + err = i915_user_extensions(u64_to_user_ptr(extensions), + set_engines__extensions, + ARRAY_SIZE(set_engines__extensions), + &set); + if (err) { + free_engines(set.engines); + return err; + } + +replace: + mutex_lock(&ctx->engines_mutex); + if (args->size) + i915_gem_context_set_user_engines(ctx); + else + i915_gem_context_clear_user_engines(ctx); + rcu_swap_protected(ctx->engines, set.engines, 1); + mutex_unlock(&ctx->engines_mutex); + + INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu); + queue_rcu_work(system_wq, &set.engines->rcu); + + return 0; +} + +static struct i915_gem_engines * +__copy_engines(struct i915_gem_engines *e) +{ + struct i915_gem_engines *copy; + unsigned int n; + + copy = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); + if (!copy) + return ERR_PTR(-ENOMEM); + + copy->i915 = e->i915; + for (n = 0; n < e->num_engines; n++) { + if (e->engines[n]) + copy->engines[n] = intel_context_get(e->engines[n]); + else + copy->engines[n] = NULL; + } + copy->num_engines = n; + + return copy; +} + +static int +get_engines(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_context_param_engines __user *user; + struct i915_gem_engines *e; + size_t n, count, size; + int err = 0; + + err = mutex_lock_interruptible(&ctx->engines_mutex); + if (err) + return err; + + e = NULL; + if (i915_gem_context_user_engines(ctx)) + e = __copy_engines(i915_gem_context_engines(ctx)); + mutex_unlock(&ctx->engines_mutex); + if (IS_ERR_OR_NULL(e)) { + args->size = 0; + return PTR_ERR_OR_ZERO(e); + } + + count = e->num_engines; + + /* Be paranoid in case we have an impedance mismatch */ + if (!check_struct_size(user, engines, count, &size)) { + err = -EINVAL; + goto err_free; + } + if (overflows_type(size, args->size)) { + err = -EINVAL; + goto err_free; + } + + if (!args->size) { + args->size = size; + goto err_free; + } + + if (args->size < size) { + err = -EINVAL; + goto err_free; + } + + user = u64_to_user_ptr(args->value); + if (!access_ok(user, size)) { + err = -EFAULT; + goto err_free; + } + + if (put_user(0, &user->extensions)) { + err = -EFAULT; + goto err_free; + } + + for (n = 0; n < count; n++) { + struct i915_engine_class_instance ci = { + .engine_class = I915_ENGINE_CLASS_INVALID, + .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, + }; + + if (e->engines[n]) { + ci.engine_class = e->engines[n]->engine->uabi_class; + ci.engine_instance = e->engines[n]->engine->instance; + } + + if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { + err = -EFAULT; + goto err_free; + } + } + + args->size = size; + +err_free: + INIT_RCU_WORK(&e->rcu, free_engines_rcu); + queue_rcu_work(system_wq, &e->rcu); + return err; +} + +static int ctx_setparam(struct drm_i915_file_private *fpriv, + struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + int ret = 0; + + switch (args->param) { + case I915_CONTEXT_PARAM_NO_ZEROMAP: + if (args->size) + ret = -EINVAL; + else if (args->value) + set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); + else + clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); + break; + + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + if (args->size) + ret = -EINVAL; + else if (args->value) + i915_gem_context_set_no_error_capture(ctx); + else + i915_gem_context_clear_no_error_capture(ctx); + break; + + case I915_CONTEXT_PARAM_BANNABLE: + if (args->size) + ret = -EINVAL; + else if (!capable(CAP_SYS_ADMIN) && !args->value) + ret = -EPERM; + else if (args->value) + i915_gem_context_set_bannable(ctx); + else + i915_gem_context_clear_bannable(ctx); + break; + + case I915_CONTEXT_PARAM_RECOVERABLE: + if (args->size) + ret = -EINVAL; + else if (args->value) + i915_gem_context_set_recoverable(ctx); + else + i915_gem_context_clear_recoverable(ctx); + break; + + case I915_CONTEXT_PARAM_PRIORITY: + { + s64 priority = args->value; + + if (args->size) + ret = -EINVAL; + else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + ret = -ENODEV; + else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + ret = -EINVAL; + else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + ret = -EPERM; + else + ctx->sched.priority = + I915_USER_PRIORITY(priority); + } + break; + + case I915_CONTEXT_PARAM_SSEU: + ret = set_sseu(ctx, args); + break; + + case I915_CONTEXT_PARAM_VM: + ret = set_ppgtt(fpriv, ctx, args); + break; + + case I915_CONTEXT_PARAM_ENGINES: + ret = set_engines(ctx, args); + break; + + case I915_CONTEXT_PARAM_BAN_PERIOD: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +struct create_ext { + struct i915_gem_context *ctx; + struct drm_i915_file_private *fpriv; +}; + +static int create_setparam(struct i915_user_extension __user *ext, void *data) +{ + struct drm_i915_gem_context_create_ext_setparam local; + const struct create_ext *arg = data; + + if (copy_from_user(&local, ext, sizeof(local))) + return -EFAULT; + + if (local.param.ctx_id) + return -EINVAL; + + return ctx_setparam(arg->fpriv, arg->ctx, &local.param); +} + +static int clone_engines(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + struct i915_gem_engines *e = i915_gem_context_lock_engines(src); + struct i915_gem_engines *clone; + bool user_engines; + unsigned long n; + + clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); + if (!clone) + goto err_unlock; + + clone->i915 = dst->i915; + for (n = 0; n < e->num_engines; n++) { + struct intel_engine_cs *engine; + + if (!e->engines[n]) { + clone->engines[n] = NULL; + continue; + } + engine = e->engines[n]->engine; + + /* + * Virtual engines are singletons; they can only exist + * inside a single context, because they embed their + * HW context... As each virtual context implies a single + * timeline (each engine can only dequeue a single request + * at any time), it would be surprising for two contexts + * to use the same engine. So let's create a copy of + * the virtual engine instead. + */ + if (intel_engine_is_virtual(engine)) + clone->engines[n] = + intel_execlists_clone_virtual(dst, engine); + else + clone->engines[n] = intel_context_create(dst, engine); + if (IS_ERR_OR_NULL(clone->engines[n])) { + __free_engines(clone, n); + goto err_unlock; + } + } + clone->num_engines = n; + + user_engines = i915_gem_context_user_engines(src); + i915_gem_context_unlock_engines(src); + + free_engines(dst->engines); + RCU_INIT_POINTER(dst->engines, clone); + if (user_engines) + i915_gem_context_set_user_engines(dst); + else + i915_gem_context_clear_user_engines(dst); + return 0; + +err_unlock: + i915_gem_context_unlock_engines(src); + return -ENOMEM; +} + +static int clone_flags(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + dst->user_flags = src->user_flags; + return 0; +} + +static int clone_schedattr(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + dst->sched = src->sched; + return 0; +} + +static int clone_sseu(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + struct i915_gem_engines *e = i915_gem_context_lock_engines(src); + struct i915_gem_engines *clone; + unsigned long n; + int err; + + clone = dst->engines; /* no locking required; sole access */ + if (e->num_engines != clone->num_engines) { + err = -EINVAL; + goto unlock; + } + + for (n = 0; n < e->num_engines; n++) { + struct intel_context *ce = e->engines[n]; + + if (clone->engines[n]->engine->class != ce->engine->class) { + /* Must have compatible engine maps! */ + err = -EINVAL; + goto unlock; + } + + /* serialises with set_sseu */ + err = intel_context_lock_pinned(ce); + if (err) + goto unlock; + + clone->engines[n]->sseu = ce->sseu; + intel_context_unlock_pinned(ce); + } + + err = 0; +unlock: + i915_gem_context_unlock_engines(src); + return err; +} + +static int clone_timeline(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + if (src->timeline) { + GEM_BUG_ON(src->timeline == dst->timeline); + + if (dst->timeline) + i915_timeline_put(dst->timeline); + dst->timeline = i915_timeline_get(src->timeline); + } + + return 0; +} + +static int clone_vm(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + struct i915_hw_ppgtt *ppgtt; + + rcu_read_lock(); + do { + ppgtt = READ_ONCE(src->ppgtt); + if (!ppgtt) + break; + + if (!kref_get_unless_zero(&ppgtt->ref)) + continue; + + /* + * This ppgtt may have be reallocated between + * the read and the kref, and reassigned to a third + * context. In order to avoid inadvertent sharing + * of this ppgtt with that third context (and not + * src), we have to confirm that we have the same + * ppgtt after passing through the strong memory + * barrier implied by a successful + * kref_get_unless_zero(). + * + * Once we have acquired the current ppgtt of src, + * we no longer care if it is released from src, as + * it cannot be reallocated elsewhere. + */ + + if (ppgtt == READ_ONCE(src->ppgtt)) + break; + + i915_ppgtt_put(ppgtt); + } while (1); + rcu_read_unlock(); + + if (ppgtt) { + __assign_ppgtt(dst, ppgtt); + i915_ppgtt_put(ppgtt); + } + + return 0; +} + +static int create_clone(struct i915_user_extension __user *ext, void *data) +{ + static int (* const fn[])(struct i915_gem_context *dst, + struct i915_gem_context *src) = { +#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y + MAP(ENGINES, clone_engines), + MAP(FLAGS, clone_flags), + MAP(SCHEDATTR, clone_schedattr), + MAP(SSEU, clone_sseu), + MAP(TIMELINE, clone_timeline), + MAP(VM, clone_vm), +#undef MAP + }; + struct drm_i915_gem_context_create_ext_clone local; + const struct create_ext *arg = data; + struct i915_gem_context *dst = arg->ctx; + struct i915_gem_context *src; + int err, bit; + + if (copy_from_user(&local, ext, sizeof(local))) + return -EFAULT; + + BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) != + I915_CONTEXT_CLONE_UNKNOWN); + + if (local.flags & I915_CONTEXT_CLONE_UNKNOWN) + return -EINVAL; + + if (local.rsvd) + return -EINVAL; + + rcu_read_lock(); + src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id); + rcu_read_unlock(); + if (!src) + return -ENOENT; + + GEM_BUG_ON(src == dst); + + for (bit = 0; bit < ARRAY_SIZE(fn); bit++) { + if (!(local.flags & BIT(bit))) + continue; + + err = fn[bit](dst, src); + if (err) + return err; + } + + return 0; +} + +static const i915_user_extension_fn create_extensions[] = { + [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, + [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone, +}; + +static bool client_is_banned(struct drm_i915_file_private *file_priv) +{ + return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; +} + +int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_context_create_ext *args = data; + struct create_ext ext_data; + int ret; + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return -ENODEV; + + if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) + return -EINVAL; + + ret = i915_terminally_wedged(i915); + if (ret) + return ret; + + ext_data.fpriv = file->driver_priv; + if (client_is_banned(ext_data.fpriv)) { + DRM_DEBUG("client %s[%d] banned from creating ctx\n", + current->comm, + pid_nr(get_task_pid(current, PIDTYPE_PID))); + return -EIO; + } + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ret; + + ext_data.ctx = i915_gem_create_context(i915, args->flags); + mutex_unlock(&dev->struct_mutex); + if (IS_ERR(ext_data.ctx)) + return PTR_ERR(ext_data.ctx); + + if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) { + ret = i915_user_extensions(u64_to_user_ptr(args->extensions), + create_extensions, + ARRAY_SIZE(create_extensions), + &ext_data); + if (ret) + goto err_ctx; + } + + ret = gem_context_register(ext_data.ctx, ext_data.fpriv); + if (ret < 0) + goto err_ctx; + + args->ctx_id = ret; + DRM_DEBUG("HW context %d created\n", args->ctx_id); + + return 0; + +err_ctx: + mutex_lock(&dev->struct_mutex); + context_close(ext_data.ctx); + mutex_unlock(&dev->struct_mutex); + return ret; +} + +int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_context_destroy *args = data; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_context *ctx; + + if (args->pad != 0) + return -EINVAL; + + if (!args->ctx_id) + return -ENOENT; + + if (mutex_lock_interruptible(&file_priv->context_idr_lock)) + return -EINTR; + + ctx = idr_remove(&file_priv->context_idr, args->ctx_id); + mutex_unlock(&file_priv->context_idr_lock); + if (!ctx) + return -ENOENT; + + mutex_lock(&dev->struct_mutex); + context_close(ctx); + mutex_unlock(&dev->struct_mutex); + + return 0; +} + +static int get_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_context *ce; + unsigned long lookup; + int err; + + if (args->size == 0) + goto out; + else if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd) + return -EINVAL; + + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) + return -EINVAL; + + lookup = 0; + if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) + lookup |= LOOKUP_USER_INDEX; + + ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_lock_pinned(ce); /* serialises with set_sseu */ + if (err) { + intel_context_put(ce); + return err; + } + + user_sseu.slice_mask = ce->sseu.slice_mask; + user_sseu.subslice_mask = ce->sseu.subslice_mask; + user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice; + user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice; + + intel_context_unlock_pinned(ce); + intel_context_put(ce); + + if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu, + sizeof(user_sseu))) + return -EFAULT; + +out: + args->size = sizeof(user_sseu); + + return 0; +} + +int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_context_param *args = data; + struct i915_gem_context *ctx; + int ret = 0; + + ctx = i915_gem_context_lookup(file_priv, args->ctx_id); + if (!ctx) + return -ENOENT; + + switch (args->param) { + case I915_CONTEXT_PARAM_NO_ZEROMAP: + args->size = 0; + args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); + break; + + case I915_CONTEXT_PARAM_GTT_SIZE: + args->size = 0; + if (ctx->ppgtt) + args->value = ctx->ppgtt->vm.total; + else if (to_i915(dev)->mm.aliasing_ppgtt) + args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total; + else + args->value = to_i915(dev)->ggtt.vm.total; + break; + + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + args->size = 0; + args->value = i915_gem_context_no_error_capture(ctx); + break; + + case I915_CONTEXT_PARAM_BANNABLE: + args->size = 0; + args->value = i915_gem_context_is_bannable(ctx); + break; + + case I915_CONTEXT_PARAM_RECOVERABLE: + args->size = 0; + args->value = i915_gem_context_is_recoverable(ctx); + break; + + case I915_CONTEXT_PARAM_PRIORITY: + args->size = 0; + args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT; + break; + + case I915_CONTEXT_PARAM_SSEU: + ret = get_sseu(ctx, args); + break; + + case I915_CONTEXT_PARAM_VM: + ret = get_ppgtt(file_priv, ctx, args); + break; + + case I915_CONTEXT_PARAM_ENGINES: + ret = get_engines(ctx, args); + break; + + case I915_CONTEXT_PARAM_BAN_PERIOD: + default: + ret = -EINVAL; + break; + } + + i915_gem_context_put(ctx); + return ret; +} + +int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_context_param *args = data; + struct i915_gem_context *ctx; + int ret; + + ctx = i915_gem_context_lookup(file_priv, args->ctx_id); + if (!ctx) + return -ENOENT; + + ret = ctx_setparam(file_priv, ctx, args); + + i915_gem_context_put(ctx); + return ret; +} + +int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, + void *data, struct drm_file *file) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_reset_stats *args = data; + struct i915_gem_context *ctx; + int ret; + + if (args->flags || args->pad) + return -EINVAL; + + ret = -ENOENT; + rcu_read_lock(); + ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); + if (!ctx) + goto out; + + /* + * We opt for unserialised reads here. This may result in tearing + * in the extremely unlikely event of a GPU hang on this context + * as we are querying them. If we need that extra layer of protection, + * we should wrap the hangstats with a seqlock. + */ + + if (capable(CAP_SYS_ADMIN)) + args->reset_count = i915_reset_count(&dev_priv->gpu_error); + else + args->reset_count = 0; + + args->batch_active = atomic_read(&ctx->guilty_count); + args->batch_pending = atomic_read(&ctx->active_count); + + ret = 0; +out: + rcu_read_unlock(); + return ret; +} + +int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = ctx->i915; + int err = 0; + + mutex_lock(&i915->contexts.mutex); + + GEM_BUG_ON(i915_gem_context_is_closed(ctx)); + + if (list_empty(&ctx->hw_id_link)) { + GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); + + err = assign_hw_id(i915, &ctx->hw_id); + if (err) + goto out_unlock; + + list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); + } + + GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); + atomic_inc(&ctx->hw_id_pin_count); + +out_unlock: + mutex_unlock(&i915->contexts.mutex); + return err; +} + +/* GEM context-engines iterator: for_each_gem_engine() */ +struct intel_context * +i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) +{ + const struct i915_gem_engines *e = it->engines; + struct intel_context *ctx; + + do { + if (it->idx >= e->num_engines) + return NULL; + + ctx = e->engines[it->idx++]; + } while (!ctx); + + return ctx; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_context.c" +#include "selftests/i915_gem_context.c" +#endif + +static void i915_global_gem_context_shrink(void) +{ + kmem_cache_shrink(global.slab_luts); +} + +static void i915_global_gem_context_exit(void) +{ + kmem_cache_destroy(global.slab_luts); +} + +static struct i915_global_gem_context global = { { + .shrink = i915_global_gem_context_shrink, + .exit = i915_global_gem_context_exit, +} }; + +int __init i915_global_gem_context_init(void) +{ + global.slab_luts = KMEM_CACHE(i915_lut_handle, 0); + if (!global.slab_luts) + return -ENOMEM; + + i915_global_register(&global.base); + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h new file mode 100644 index 000000000000..630392c77e48 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -0,0 +1,240 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_GEM_CONTEXT_H__ +#define __I915_GEM_CONTEXT_H__ + +#include "i915_gem_context_types.h" + +#include "gt/intel_context.h" + +#include "i915_gem.h" +#include "i915_scheduler.h" +#include "intel_device_info.h" + +struct drm_device; +struct drm_file; + +static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_CLOSED, &ctx->flags); +} + +static inline void i915_gem_context_set_closed(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(i915_gem_context_is_closed(ctx)); + set_bit(CONTEXT_CLOSED, &ctx->flags); +} + +static inline bool i915_gem_context_no_error_capture(const struct i915_gem_context *ctx) +{ + return test_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); +} + +static inline void i915_gem_context_set_no_error_capture(struct i915_gem_context *ctx) +{ + set_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); +} + +static inline void i915_gem_context_clear_no_error_capture(struct i915_gem_context *ctx) +{ + clear_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); +} + +static inline bool i915_gem_context_is_bannable(const struct i915_gem_context *ctx) +{ + return test_bit(UCONTEXT_BANNABLE, &ctx->user_flags); +} + +static inline void i915_gem_context_set_bannable(struct i915_gem_context *ctx) +{ + set_bit(UCONTEXT_BANNABLE, &ctx->user_flags); +} + +static inline void i915_gem_context_clear_bannable(struct i915_gem_context *ctx) +{ + clear_bit(UCONTEXT_BANNABLE, &ctx->user_flags); +} + +static inline bool i915_gem_context_is_recoverable(const struct i915_gem_context *ctx) +{ + return test_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); +} + +static inline void i915_gem_context_set_recoverable(struct i915_gem_context *ctx) +{ + set_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); +} + +static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *ctx) +{ + clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); +} + +static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_BANNED, &ctx->flags); +} + +static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_BANNED, &ctx->flags); +} + +static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); +} + +static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx) +{ + __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); +} + +static inline bool +i915_gem_context_user_engines(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +static inline void +i915_gem_context_set_user_engines(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +static inline void +i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) +{ + clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); +static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) +{ + if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) + return 0; + + return __i915_gem_context_pin_hw_id(ctx); +} + +static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); + atomic_dec(&ctx->hw_id_pin_count); +} + +static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) +{ + return !ctx->file_priv; +} + +/* i915_gem_context.c */ +int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); +void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); +void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); + +int i915_gem_context_open(struct drm_i915_private *i915, + struct drm_file *file); +void i915_gem_context_close(struct drm_file *file); + +void i915_gem_context_release(struct kref *ctx_ref); +struct i915_gem_context * +i915_gem_context_create_gvt(struct drm_device *dev); + +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); + +static inline struct i915_gem_context * +i915_gem_context_get(struct i915_gem_context *ctx) +{ + kref_get(&ctx->ref); + return ctx; +} + +static inline void i915_gem_context_put(struct i915_gem_context *ctx) +{ + kref_put(&ctx->ref, i915_gem_context_release); +} + +static inline struct i915_gem_engines * +i915_gem_context_engines(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->engines, + lockdep_is_held(&ctx->engines_mutex)); +} + +static inline struct i915_gem_engines * +i915_gem_context_lock_engines(struct i915_gem_context *ctx) + __acquires(&ctx->engines_mutex) +{ + mutex_lock(&ctx->engines_mutex); + return i915_gem_context_engines(ctx); +} + +static inline void +i915_gem_context_unlock_engines(struct i915_gem_context *ctx) + __releases(&ctx->engines_mutex) +{ + mutex_unlock(&ctx->engines_mutex); +} + +static inline struct intel_context * +i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx) +{ + return i915_gem_context_engines(ctx)->engines[idx]; +} + +static inline struct intel_context * +i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) +{ + struct intel_context *ce = ERR_PTR(-EINVAL); + + rcu_read_lock(); { + struct i915_gem_engines *e = rcu_dereference(ctx->engines); + if (likely(idx < e->num_engines && e->engines[idx])) + ce = intel_context_get(e->engines[idx]); + } rcu_read_unlock(); + + return ce; +} + +static inline void +i915_gem_engines_iter_init(struct i915_gem_engines_iter *it, + struct i915_gem_engines *engines) +{ + GEM_BUG_ON(!engines); + it->engines = engines; + it->idx = 0; +} + +struct intel_context * +i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); + +#define for_each_gem_engine(ce, engines, it) \ + for (i915_gem_engines_iter_init(&(it), (engines)); \ + ((ce) = i915_gem_engines_iter_next(&(it)));) + +struct i915_lut_handle *i915_lut_handle_alloc(void); +void i915_lut_handle_free(struct i915_lut_handle *lut); + +#endif /* !__I915_GEM_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h new file mode 100644 index 000000000000..fb965ded2508 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -0,0 +1,208 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_CONTEXT_TYPES_H__ +#define __I915_GEM_CONTEXT_TYPES_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gt/intel_context_types.h" + +#include "i915_scheduler.h" + +struct pid; + +struct drm_i915_private; +struct drm_i915_file_private; +struct i915_hw_ppgtt; +struct i915_timeline; +struct intel_ring; + +struct i915_gem_engines { + struct rcu_work rcu; + struct drm_i915_private *i915; + unsigned int num_engines; + struct intel_context *engines[]; +}; + +struct i915_gem_engines_iter { + unsigned int idx; + const struct i915_gem_engines *engines; +}; + +/** + * struct i915_gem_context - client state + * + * The struct i915_gem_context represents the combined view of the driver and + * logical hardware state for a particular client. + */ +struct i915_gem_context { + /** i915: i915 device backpointer */ + struct drm_i915_private *i915; + + /** file_priv: owning file descriptor */ + struct drm_i915_file_private *file_priv; + + /** + * @engines: User defined engines for this context + * + * Various uAPI offer the ability to lookup up an + * index from this array to select an engine operate on. + * + * Multiple logically distinct instances of the same engine + * may be defined in the array, as well as composite virtual + * engines. + * + * Execbuf uses the I915_EXEC_RING_MASK as an index into this + * array to select which HW context + engine to execute on. For + * the default array, the user_ring_map[] is used to translate + * the legacy uABI onto the approprate index (e.g. both + * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same + * context, and I915_EXEC_BSD is weird). For a use defined + * array, execbuf uses I915_EXEC_RING_MASK as a plain index. + * + * User defined by I915_CONTEXT_PARAM_ENGINE (when the + * CONTEXT_USER_ENGINES flag is set). + */ + struct i915_gem_engines __rcu *engines; + struct mutex engines_mutex; /* guards writes to engines */ + + struct i915_timeline *timeline; + + /** + * @ppgtt: unique address space (GTT) + * + * In full-ppgtt mode, each context has its own address space ensuring + * complete seperation of one client from all others. + * + * In other modes, this is a NULL pointer with the expectation that + * the caller uses the shared global GTT. + */ + struct i915_hw_ppgtt *ppgtt; + + /** + * @pid: process id of creator + * + * Note that who created the context may not be the principle user, + * as the context may be shared across a local socket. However, + * that should only affect the default context, all contexts created + * explicitly by the client are expected to be isolated. + */ + struct pid *pid; + + /** + * @name: arbitrary name + * + * A name is constructed for the context from the creator's process + * name, pid and user handle in order to uniquely identify the + * context in messages. + */ + const char *name; + + /** link: place with &drm_i915_private.context_list */ + struct list_head link; + struct llist_node free_link; + + /** + * @ref: reference count + * + * A reference to a context is held by both the client who created it + * and on each request submitted to the hardware using the request + * (to ensure the hardware has access to the state until it has + * finished all pending writes). See i915_gem_context_get() and + * i915_gem_context_put() for access. + */ + struct kref ref; + + /** + * @rcu: rcu_head for deferred freeing. + */ + struct rcu_head rcu; + + /** + * @user_flags: small set of booleans controlled by the user + */ + unsigned long user_flags; +#define UCONTEXT_NO_ZEROMAP 0 +#define UCONTEXT_NO_ERROR_CAPTURE 1 +#define UCONTEXT_BANNABLE 2 +#define UCONTEXT_RECOVERABLE 3 + + /** + * @flags: small set of booleans + */ + unsigned long flags; +#define CONTEXT_BANNED 0 +#define CONTEXT_CLOSED 1 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 +#define CONTEXT_USER_ENGINES 3 + + /** + * @hw_id: - unique identifier for the context + * + * The hardware needs to uniquely identify the context for a few + * functions like fault reporting, PASID, scheduling. The + * &drm_i915_private.context_hw_ida is used to assign a unqiue + * id for the lifetime of the context. + * + * @hw_id_pin_count: - number of times this context had been pinned + * for use (should be, at most, once per engine). + * + * @hw_id_link: - all contexts with an assigned id are tracked + * for possible repossession. + */ + unsigned int hw_id; + atomic_t hw_id_pin_count; + struct list_head hw_id_link; + + struct mutex mutex; + + struct i915_sched_attr sched; + + /** ring_size: size for allocating the per-engine ring buffer */ + u32 ring_size; + /** desc_template: invariant fields for the HW context descriptor */ + u32 desc_template; + + /** guilty_count: How many times this context has caused a GPU hang. */ + atomic_t guilty_count; + /** + * @active_count: How many times this context was active during a GPU + * hang, but did not cause it. + */ + atomic_t active_count; + + /** + * @hang_timestamp: The last time(s) this context caused a GPU hang + */ + unsigned long hang_timestamp[2]; +#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ + + /** remap_slice: Bitmask of cache lines that need remapping */ + u8 remap_slice; + + /** handles_vma: rbtree to look up our context specific obj/vma for + * the user handle. (user handles are per fd, but the binding is + * per vm, which may be one per context or shared with the global GTT) + */ + struct radix_tree_root handles_vma; + + /** handles_list: reverse list of all the rbtree entries in use for + * this context, which allows us to free all the allocations on + * context close. + */ + struct list_head handles_list; +}; + +#endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c new file mode 100644 index 000000000000..600fc926f81e --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -0,0 +1,318 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright 2012 Red Hat Inc + */ + +#include +#include +#include + +#include "i915_drv.h" +#include "i915_gem_object.h" + +static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) +{ + return to_intel_bo(buf->priv); +} + +static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); + struct sg_table *st; + struct scatterlist *src, *dst; + int ret, i; + + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err; + + /* Copy sg so that we make an independent mapping */ + st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); + if (st == NULL) { + ret = -ENOMEM; + goto err_unpin_pages; + } + + ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); + if (ret) + goto err_free; + + src = obj->mm.pages->sgl; + dst = st->sgl; + for (i = 0; i < obj->mm.pages->nents; i++) { + sg_set_page(dst, sg_page(src), src->length, 0); + dst = sg_next(dst); + src = sg_next(src); + } + + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + ret = -ENOMEM; + goto err_free_sg; + } + + return st; + +err_free_sg: + sg_free_table(st); +err_free: + kfree(st); +err_unpin_pages: + i915_gem_object_unpin_pages(obj); +err: + return ERR_PTR(ret); +} + +static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *sg, + enum dma_data_direction dir) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); + + dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir); + sg_free_table(sg); + kfree(sg); + + i915_gem_object_unpin_pages(obj); +} + +static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + + return i915_gem_object_pin_map(obj, I915_MAP_WB); +} + +static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); +} + +static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct page *page; + + if (page_num >= obj->base.size >> PAGE_SHIFT) + return NULL; + + if (!i915_gem_object_has_struct_page(obj)) + return NULL; + + if (i915_gem_object_pin_pages(obj)) + return NULL; + + /* Synchronisation is left to the caller (via .begin_cpu_access()) */ + page = i915_gem_object_get_page(obj, page_num); + if (IS_ERR(page)) + goto err_unpin; + + return kmap(page); + +err_unpin: + i915_gem_object_unpin_pages(obj); + return NULL; +} + +static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + + kunmap(virt_to_page(addr)); + i915_gem_object_unpin_pages(obj); +} + +static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + int ret; + + if (obj->base.size < vma->vm_end - vma->vm_start) + return -EINVAL; + + if (!obj->base.filp) + return -ENODEV; + + ret = call_mmap(obj->base.filp, vma); + if (ret) + return ret; + + fput(vma->vm_file); + vma->vm_file = get_file(obj->base.filp); + + return 0; +} + +static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct drm_device *dev = obj->base.dev; + bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); + int err; + + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto out; + + err = i915_gem_object_set_to_cpu_domain(obj, write); + mutex_unlock(&dev->struct_mutex); + +out: + i915_gem_object_unpin_pages(obj); + return err; +} + +static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct drm_device *dev = obj->base.dev; + int err; + + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto out; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + mutex_unlock(&dev->struct_mutex); + +out: + i915_gem_object_unpin_pages(obj); + return err; +} + +static const struct dma_buf_ops i915_dmabuf_ops = { + .map_dma_buf = i915_gem_map_dma_buf, + .unmap_dma_buf = i915_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .map = i915_gem_dmabuf_kmap, + .unmap = i915_gem_dmabuf_kunmap, + .mmap = i915_gem_dmabuf_mmap, + .vmap = i915_gem_dmabuf_vmap, + .vunmap = i915_gem_dmabuf_vunmap, + .begin_cpu_access = i915_gem_begin_cpu_access, + .end_cpu_access = i915_gem_end_cpu_access, +}; + +struct dma_buf *i915_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gem_obj, int flags) +{ + struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &i915_dmabuf_ops; + exp_info.size = gem_obj->size; + exp_info.flags = flags; + exp_info.priv = gem_obj; + exp_info.resv = obj->resv; + + if (obj->ops->dmabuf_export) { + int ret = obj->ops->dmabuf_export(obj); + if (ret) + return ERR_PTR(ret); + } + + return drm_gem_dmabuf_export(dev, &exp_info); +} + +static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) +{ + struct sg_table *pages; + unsigned int sg_page_sizes; + + pages = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + sg_page_sizes = i915_sg_page_sizes(pages->sgl); + + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + + return 0; +} + +static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + dma_buf_unmap_attachment(obj->base.import_attach, pages, + DMA_BIDIRECTIONAL); +} + +static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { + .get_pages = i915_gem_object_get_pages_dmabuf, + .put_pages = i915_gem_object_put_pages_dmabuf, +}; + +struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct dma_buf_attachment *attach; + struct drm_i915_gem_object *obj; + int ret; + + /* is this one of own objects? */ + if (dma_buf->ops == &i915_dmabuf_ops) { + obj = dma_buf_to_obj(dma_buf); + /* is it from our device? */ + if (obj->base.dev == dev) { + /* + * Importing dmabuf exported from out own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + return &i915_gem_object_get(obj)->base; + } + } + + /* need to attach */ + attach = dma_buf_attach(dma_buf, dev->dev); + if (IS_ERR(attach)) + return ERR_CAST(attach); + + get_dma_buf(dma_buf); + + obj = i915_gem_object_alloc(); + if (obj == NULL) { + ret = -ENOMEM; + goto fail_detach; + } + + drm_gem_private_object_init(dev, &obj->base, dma_buf->size); + i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); + obj->base.import_attach = attach; + obj->resv = dma_buf->resv; + + /* We use GTT as shorthand for a coherent domain, one that is + * neither in the GPU cache nor in the CPU cache, where all + * writes are immediately visible in memory. (That's not strictly + * true, but it's close! There are internal buffers such as the + * write-combined buffer or a delay through the chipset for GTT + * writes that do require us to treat GTT as a separate cache domain.) + */ + obj->read_domains = I915_GEM_DOMAIN_GTT; + obj->write_domain = 0; + + return &obj->base; + +fail_detach: + dma_buf_detach(dma_buf, attach); + dma_buf_put(dma_buf); + + return ERR_PTR(ret); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_dmabuf.c" +#include "selftests/i915_gem_dmabuf.c" +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c new file mode 100644 index 000000000000..09e64bf33842 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -0,0 +1,2768 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008,2010 Intel Corporation + */ + +#include +#include +#include +#include + +#include +#include + +#include "gem/i915_gem_ioctls.h" +#include "gt/intel_context.h" +#include "gt/intel_gt_pm.h" + +#include "i915_gem_ioctls.h" +#include "i915_gem_clflush.h" +#include "i915_gem_context.h" +#include "i915_trace.h" +#include "intel_drv.h" +#include "intel_frontbuffer.h" + +enum { + FORCE_CPU_RELOC = 1, + FORCE_GTT_RELOC, + FORCE_GPU_RELOC, +#define DBG_FORCE_RELOC 0 /* choose one of the above! */ +}; + +#define __EXEC_OBJECT_HAS_REF BIT(31) +#define __EXEC_OBJECT_HAS_PIN BIT(30) +#define __EXEC_OBJECT_HAS_FENCE BIT(29) +#define __EXEC_OBJECT_NEEDS_MAP BIT(28) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(27) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */ +#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) + +#define __EXEC_HAS_RELOC BIT(31) +#define __EXEC_VALIDATED BIT(30) +#define __EXEC_INTERNAL_FLAGS (~0u << 30) +#define UPDATE PIN_OFFSET_FIXED + +#define BATCH_OFFSET_BIAS (256*1024) + +#define __I915_EXEC_ILLEGAL_FLAGS \ + (__I915_EXEC_UNKNOWN_FLAGS | \ + I915_EXEC_CONSTANTS_MASK | \ + I915_EXEC_RESOURCE_STREAMER) + +/* Catch emission of unexpected errors for CI! */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +#undef EINVAL +#define EINVAL ({ \ + DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ + 22; \ +}) +#endif + +/** + * DOC: User command execution + * + * Userspace submits commands to be executed on the GPU as an instruction + * stream within a GEM object we call a batchbuffer. This instructions may + * refer to other GEM objects containing auxiliary state such as kernels, + * samplers, render targets and even secondary batchbuffers. Userspace does + * not know where in the GPU memory these objects reside and so before the + * batchbuffer is passed to the GPU for execution, those addresses in the + * batchbuffer and auxiliary objects are updated. This is known as relocation, + * or patching. To try and avoid having to relocate each object on the next + * execution, userspace is told the location of those objects in this pass, + * but this remains just a hint as the kernel may choose a new location for + * any object in the future. + * + * At the level of talking to the hardware, submitting a batchbuffer for the + * GPU to execute is to add content to a buffer from which the HW + * command streamer is reading. + * + * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. + * Execlists, this command is not placed on the same buffer as the + * remaining items. + * + * 2. Add a command to invalidate caches to the buffer. + * + * 3. Add a batchbuffer start command to the buffer; the start command is + * essentially a token together with the GPU address of the batchbuffer + * to be executed. + * + * 4. Add a pipeline flush to the buffer. + * + * 5. Add a memory write command to the buffer to record when the GPU + * is done executing the batchbuffer. The memory write writes the + * global sequence number of the request, ``i915_request::global_seqno``; + * the i915 driver uses the current value in the register to determine + * if the GPU has completed the batchbuffer. + * + * 6. Add a user interrupt command to the buffer. This command instructs + * the GPU to issue an interrupt when the command, pipeline flush and + * memory write are completed. + * + * 7. Inform the hardware of the additional commands added to the buffer + * (by updating the tail pointer). + * + * Processing an execbuf ioctl is conceptually split up into a few phases. + * + * 1. Validation - Ensure all the pointers, handles and flags are valid. + * 2. Reservation - Assign GPU address space for every object + * 3. Relocation - Update any addresses to point to the final locations + * 4. Serialisation - Order the request with respect to its dependencies + * 5. Construction - Construct a request to execute the batchbuffer + * 6. Submission (at some point in the future execution) + * + * Reserving resources for the execbuf is the most complicated phase. We + * neither want to have to migrate the object in the address space, nor do + * we want to have to update any relocations pointing to this object. Ideally, + * we want to leave the object where it is and for all the existing relocations + * to match. If the object is given a new address, or if userspace thinks the + * object is elsewhere, we have to parse all the relocation entries and update + * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that + * all the target addresses in all of its objects match the value in the + * relocation entries and that they all match the presumed offsets given by the + * list of execbuffer objects. Using this knowledge, we know that if we haven't + * moved any buffers, all the relocation entries are valid and we can skip + * the update. (If userspace is wrong, the likely outcome is an impromptu GPU + * hang.) The requirement for using I915_EXEC_NO_RELOC are: + * + * The addresses written in the objects must match the corresponding + * reloc.presumed_offset which in turn must match the corresponding + * execobject.offset. + * + * Any render targets written to in the batch must be flagged with + * EXEC_OBJECT_WRITE. + * + * To avoid stalling, execobject.offset should match the current + * address of that object within the active context. + * + * The reservation is done is multiple phases. First we try and keep any + * object already bound in its current location - so as long as meets the + * constraints imposed by the new execbuffer. Any object left unbound after the + * first pass is then fitted into any available idle space. If an object does + * not fit, all objects are removed from the reservation and the process rerun + * after sorting the objects into a priority order (more difficult to fit + * objects are tried first). Failing that, the entire VM is cleared and we try + * to fit the execbuf once last time before concluding that it simply will not + * fit. + * + * A small complication to all of this is that we allow userspace not only to + * specify an alignment and a size for the object in the address space, but + * we also allow userspace to specify the exact offset. This objects are + * simpler to place (the location is known a priori) all we have to do is make + * sure the space is available. + * + * Once all the objects are in place, patching up the buried pointers to point + * to the final locations is a fairly simple job of walking over the relocation + * entry arrays, looking up the right address and rewriting the value into + * the object. Simple! ... The relocation entries are stored in user memory + * and so to access them we have to copy them into a local buffer. That copy + * has to avoid taking any pagefaults as they may lead back to a GEM object + * requiring the struct_mutex (i.e. recursive deadlock). So once again we split + * the relocation into multiple passes. First we try to do everything within an + * atomic context (avoid the pagefaults) which requires that we never wait. If + * we detect that we may wait, or if we need to fault, then we have to fallback + * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm + * bells yet?) Dropping the mutex means that we lose all the state we have + * built up so far for the execbuf and we must reset any global data. However, + * we do leave the objects pinned in their final locations - which is a + * potential issue for concurrent execbufs. Once we have left the mutex, we can + * allocate and copy all the relocation entries into a large array at our + * leisure, reacquire the mutex, reclaim all the objects and other state and + * then proceed to update any incorrect addresses with the objects. + * + * As we process the relocation entries, we maintain a record of whether the + * object is being written to. Using NORELOC, we expect userspace to provide + * this information instead. We also check whether we can skip the relocation + * by comparing the expected value inside the relocation entry with the target's + * final address. If they differ, we have to map the current object and rewrite + * the 4 or 8 byte pointer within. + * + * Serialising an execbuf is quite simple according to the rules of the GEM + * ABI. Execution within each context is ordered by the order of submission. + * Writes to any GEM object are in order of submission and are exclusive. Reads + * from a GEM object are unordered with respect to other reads, but ordered by + * writes. A write submitted after a read cannot occur before the read, and + * similarly any read submitted after a write cannot occur before the write. + * Writes are ordered between engines such that only one write occurs at any + * time (completing any reads beforehand) - using semaphores where available + * and CPU serialisation otherwise. Other GEM access obey the same rules, any + * write (either via mmaps using set-domain, or via pwrite) must flush all GPU + * reads before starting, and any read (either using set-domain or pread) must + * flush all GPU writes before starting. (Note we only employ a barrier before, + * we currently rely on userspace not concurrently starting a new execution + * whilst reading or writing to an object. This may be an advantage or not + * depending on how much you trust userspace not to shoot themselves in the + * foot.) Serialisation may just result in the request being inserted into + * a DAG awaiting its turn, but most simple is to wait on the CPU until + * all dependencies are resolved. + * + * After all of that, is just a matter of closing the request and handing it to + * the hardware (well, leaving it in a queue to be executed). However, we also + * offer the ability for batchbuffers to be run with elevated privileges so + * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) + * Before any batch is given extra privileges we first must check that it + * contains no nefarious instructions, we check that each instruction is from + * our whitelist and all registers are also from an allowed list. We first + * copy the user's batchbuffer to a shadow (so that the user doesn't have + * access to it, either by the CPU or GPU as we scan it) and then parse each + * instruction. If everything is ok, we set a flag telling the hardware to run + * the batchbuffer in trusted mode, otherwise the ioctl is rejected. + */ + +struct i915_execbuffer { + struct drm_i915_private *i915; /** i915 backpointer */ + struct drm_file *file; /** per-file lookup tables and limits */ + struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ + struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ + struct i915_vma **vma; + unsigned int *flags; + + struct intel_engine_cs *engine; /** engine to queue the request to */ + struct intel_context *context; /* logical state for the request */ + struct i915_gem_context *gem_context; /** caller's context */ + struct i915_address_space *vm; /** GTT and vma for the request */ + + struct i915_request *request; /** our request to build */ + struct i915_vma *batch; /** identity of the batch obj/vma */ + + /** actual size of execobj[] as we may extend it for the cmdparser */ + unsigned int buffer_count; + + /** list of vma not yet bound during reservation phase */ + struct list_head unbound; + + /** list of vma that have execobj.relocation_count */ + struct list_head relocs; + + /** + * Track the most recently used object for relocations, as we + * frequently have to perform multiple relocations within the same + * obj/page + */ + struct reloc_cache { + struct drm_mm_node node; /** temporary GTT binding */ + unsigned long vaddr; /** Current kmap address */ + unsigned long page; /** Currently mapped page index */ + unsigned int gen; /** Cached value of INTEL_GEN */ + bool use_64bit_reloc : 1; + bool has_llc : 1; + bool has_fence : 1; + bool needs_unfenced : 1; + + struct i915_request *rq; + u32 *rq_cmd; + unsigned int rq_size; + } reloc_cache; + + u64 invalid_flags; /** Set of execobj.flags that are invalid */ + u32 context_flags; /** Set of execobj.flags to insert from the ctx */ + + u32 batch_start_offset; /** Location within object of batch */ + u32 batch_len; /** Length of batch within object */ + u32 batch_flags; /** Flags composed for emit_bb_start() */ + + /** + * Indicate either the size of the hastable used to resolve + * relocation handles, or if negative that we are using a direct + * index into the execobj[]. + */ + int lut_size; + struct hlist_head *buckets; /** ht for relocation handles */ +}; + +#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) + +/* + * Used to convert any address to canonical form. + * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, + * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the + * addresses to be in a canonical form: + * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct + * canonical form [63:48] == [47]." + */ +#define GEN8_HIGH_ADDRESS_BIT 47 +static inline u64 gen8_canonical_addr(u64 address) +{ + return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); +} + +static inline u64 gen8_noncanonical_addr(u64 address) +{ + return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); +} + +static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) +{ + return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; +} + +static int eb_create(struct i915_execbuffer *eb) +{ + if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { + unsigned int size = 1 + ilog2(eb->buffer_count); + + /* + * Without a 1:1 association between relocation handles and + * the execobject[] index, we instead create a hashtable. + * We size it dynamically based on available memory, starting + * first with 1:1 assocative hash and scaling back until + * the allocation succeeds. + * + * Later on we use a positive lut_size to indicate we are + * using this hashtable, and a negative value to indicate a + * direct lookup. + */ + do { + gfp_t flags; + + /* While we can still reduce the allocation size, don't + * raise a warning and allow the allocation to fail. + * On the last pass though, we want to try as hard + * as possible to perform the allocation and warn + * if it fails. + */ + flags = GFP_KERNEL; + if (size > 1) + flags |= __GFP_NORETRY | __GFP_NOWARN; + + eb->buckets = kzalloc(sizeof(struct hlist_head) << size, + flags); + if (eb->buckets) + break; + } while (--size); + + if (unlikely(!size)) + return -ENOMEM; + + eb->lut_size = size; + } else { + eb->lut_size = -eb->buffer_count; + } + + return 0; +} + +static bool +eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, + const struct i915_vma *vma, + unsigned int flags) +{ + if (vma->node.size < entry->pad_to_size) + return true; + + if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) + return true; + + if (flags & EXEC_OBJECT_PINNED && + vma->node.start != entry->offset) + return true; + + if (flags & __EXEC_OBJECT_NEEDS_BIAS && + vma->node.start < BATCH_OFFSET_BIAS) + return true; + + if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && + (vma->node.start + vma->node.size - 1) >> 32) + return true; + + if (flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_map_and_fenceable(vma)) + return true; + + return false; +} + +static inline bool +eb_pin_vma(struct i915_execbuffer *eb, + const struct drm_i915_gem_exec_object2 *entry, + struct i915_vma *vma) +{ + unsigned int exec_flags = *vma->exec_flags; + u64 pin_flags; + + if (vma->node.size) + pin_flags = vma->node.start; + else + pin_flags = entry->offset & PIN_OFFSET_MASK; + + pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT)) + pin_flags |= PIN_GLOBAL; + + if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) + return false; + + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { + if (unlikely(i915_vma_pin_fence(vma))) { + i915_vma_unpin(vma); + return false; + } + + if (vma->fence) + exec_flags |= __EXEC_OBJECT_HAS_FENCE; + } + + *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; + return !eb_vma_misplaced(entry, vma, exec_flags); +} + +static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) +{ + GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); + + if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) + __i915_vma_unpin_fence(vma); + + __i915_vma_unpin(vma); +} + +static inline void +eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags) +{ + if (!(*flags & __EXEC_OBJECT_HAS_PIN)) + return; + + __eb_unreserve_vma(vma, *flags); + *flags &= ~__EXEC_OBJECT_RESERVED; +} + +static int +eb_validate_vma(struct i915_execbuffer *eb, + struct drm_i915_gem_exec_object2 *entry, + struct i915_vma *vma) +{ + if (unlikely(entry->flags & eb->invalid_flags)) + return -EINVAL; + + if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) + return -EINVAL; + + /* + * Offset can be used as input (EXEC_OBJECT_PINNED), reject + * any non-page-aligned or non-canonical addresses. + */ + if (unlikely(entry->flags & EXEC_OBJECT_PINNED && + entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) + return -EINVAL; + + /* pad_to_size was once a reserved field, so sanitize it */ + if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (unlikely(offset_in_page(entry->pad_to_size))) + return -EINVAL; + } else { + entry->pad_to_size = 0; + } + + if (unlikely(vma->exec_flags)) { + DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", + entry->handle, (int)(entry - eb->exec)); + return -EINVAL; + } + + /* + * From drm_mm perspective address space is continuous, + * so from this point we're always using non-canonical + * form internally. + */ + entry->offset = gen8_noncanonical_addr(entry->offset); + + if (!eb->reloc_cache.has_fence) { + entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; + } else { + if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || + eb->reloc_cache.needs_unfenced) && + i915_gem_object_is_tiled(vma->obj)) + entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; + } + + if (!(entry->flags & EXEC_OBJECT_PINNED)) + entry->flags |= eb->context_flags; + + return 0; +} + +static int +eb_add_vma(struct i915_execbuffer *eb, + unsigned int i, unsigned batch_idx, + struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + int err; + + GEM_BUG_ON(i915_vma_is_closed(vma)); + + if (!(eb->args->flags & __EXEC_VALIDATED)) { + err = eb_validate_vma(eb, entry, vma); + if (unlikely(err)) + return err; + } + + if (eb->lut_size > 0) { + vma->exec_handle = entry->handle; + hlist_add_head(&vma->exec_node, + &eb->buckets[hash_32(entry->handle, + eb->lut_size)]); + } + + if (entry->relocation_count) + list_add_tail(&vma->reloc_link, &eb->relocs); + + /* + * Stash a pointer from the vma to execobj, so we can query its flags, + * size, alignment etc as provided by the user. Also we stash a pointer + * to the vma inside the execobj so that we can use a direct lookup + * to find the right target VMA when doing relocations. + */ + eb->vma[i] = vma; + eb->flags[i] = entry->flags; + vma->exec_flags = &eb->flags[i]; + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if (i == batch_idx) { + if (entry->relocation_count && + !(eb->flags[i] & EXEC_OBJECT_PINNED)) + eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; + if (eb->reloc_cache.has_fence) + eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; + + eb->batch = vma; + } + + err = 0; + if (eb_pin_vma(eb, entry, vma)) { + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + } else { + eb_unreserve_vma(vma, vma->exec_flags); + + list_add_tail(&vma->exec_link, &eb->unbound); + if (drm_mm_node_allocated(&vma->node)) + err = i915_vma_unbind(vma); + if (unlikely(err)) + vma->exec_flags = NULL; + } + return err; +} + +static inline int use_cpu_reloc(const struct reloc_cache *cache, + const struct drm_i915_gem_object *obj) +{ + if (!i915_gem_object_has_struct_page(obj)) + return false; + + if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) + return true; + + if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) + return false; + + return (cache->has_llc || + obj->cache_dirty || + obj->cache_level != I915_CACHE_NONE); +} + +static int eb_reserve_vma(const struct i915_execbuffer *eb, + struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); + unsigned int exec_flags = *vma->exec_flags; + u64 pin_flags; + int err; + + pin_flags = PIN_USER | PIN_NONBLOCK; + if (exec_flags & EXEC_OBJECT_NEEDS_GTT) + pin_flags |= PIN_GLOBAL; + + /* + * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to the first 4GBs for unflagged objects. + */ + if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) + pin_flags |= PIN_ZONE_4G; + + if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) + pin_flags |= PIN_MAPPABLE; + + if (exec_flags & EXEC_OBJECT_PINNED) { + pin_flags |= entry->offset | PIN_OFFSET_FIXED; + pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */ + } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) { + pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + } + + err = i915_vma_pin(vma, + entry->pad_to_size, entry->alignment, + pin_flags); + if (err) + return err; + + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { + err = i915_vma_pin_fence(vma); + if (unlikely(err)) { + i915_vma_unpin(vma); + return err; + } + + if (vma->fence) + exec_flags |= __EXEC_OBJECT_HAS_FENCE; + } + + *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; + GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags)); + + return 0; +} + +static int eb_reserve(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + struct list_head last; + struct i915_vma *vma; + unsigned int i, pass; + int err; + + /* + * Attempt to pin all of the buffers into the GTT. + * This is done in 3 phases: + * + * 1a. Unbind all objects that do not match the GTT constraints for + * the execbuffer (fenceable, mappable, alignment etc). + * 1b. Increment pin count for already bound objects. + * 2. Bind new objects. + * 3. Decrement pin count. + * + * This avoid unnecessary unbinding of later objects in order to make + * room for the earlier objects *unless* we need to defragment. + */ + + pass = 0; + err = 0; + do { + list_for_each_entry(vma, &eb->unbound, exec_link) { + err = eb_reserve_vma(eb, vma); + if (err) + break; + } + if (err != -ENOSPC) + return err; + + /* Resort *all* the objects into priority order */ + INIT_LIST_HEAD(&eb->unbound); + INIT_LIST_HEAD(&last); + for (i = 0; i < count; i++) { + unsigned int flags = eb->flags[i]; + struct i915_vma *vma = eb->vma[i]; + + if (flags & EXEC_OBJECT_PINNED && + flags & __EXEC_OBJECT_HAS_PIN) + continue; + + eb_unreserve_vma(vma, &eb->flags[i]); + + if (flags & EXEC_OBJECT_PINNED) + /* Pinned must have their slot */ + list_add(&vma->exec_link, &eb->unbound); + else if (flags & __EXEC_OBJECT_NEEDS_MAP) + /* Map require the lowest 256MiB (aperture) */ + list_add_tail(&vma->exec_link, &eb->unbound); + else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) + /* Prioritise 4GiB region for restricted bo */ + list_add(&vma->exec_link, &last); + else + list_add_tail(&vma->exec_link, &last); + } + list_splice_tail(&last, &eb->unbound); + + switch (pass++) { + case 0: + break; + + case 1: + /* Too fragmented, unbind everything and retry */ + err = i915_gem_evict_vm(eb->vm); + if (err) + return err; + break; + + default: + return -ENOSPC; + } + } while (1); +} + +static unsigned int eb_batch_index(const struct i915_execbuffer *eb) +{ + if (eb->args->flags & I915_EXEC_BATCH_FIRST) + return 0; + else + return eb->buffer_count - 1; +} + +static int eb_select_context(struct i915_execbuffer *eb) +{ + struct i915_gem_context *ctx; + + ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); + if (unlikely(!ctx)) + return -ENOENT; + + eb->gem_context = ctx; + if (ctx->ppgtt) { + eb->vm = &ctx->ppgtt->vm; + eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; + } else { + eb->vm = &eb->i915->ggtt.vm; + } + + eb->context_flags = 0; + if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) + eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return 0; +} + +static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) +{ + struct i915_request *rq; + + /* + * Completely unscientific finger-in-the-air estimates for suitable + * maximum user request size (to avoid blocking) and then backoff. + */ + if (intel_ring_update_space(ring) >= PAGE_SIZE) + return NULL; + + /* + * Find a request that after waiting upon, there will be at least half + * the ring available. The hysteresis allows us to compete for the + * shared ring and should mean that we sleep less often prior to + * claiming our resources, but not so long that the ring completely + * drains before we can submit our next request. + */ + list_for_each_entry(rq, &ring->request_list, ring_link) { + if (__intel_ring_space(rq->postfix, + ring->emit, ring->size) > ring->size / 2) + break; + } + if (&rq->ring_link == &ring->request_list) + return NULL; /* weird, we will check again later for real */ + + return i915_request_get(rq); +} + +static int eb_wait_for_ring(const struct i915_execbuffer *eb) +{ + struct i915_request *rq; + int ret = 0; + + /* + * Apply a light amount of backpressure to prevent excessive hogs + * from blocking waiting for space whilst holding struct_mutex and + * keeping all of their resources pinned. + */ + + rq = __eb_wait_for_ring(eb->context->ring); + if (rq) { + mutex_unlock(&eb->i915->drm.struct_mutex); + + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + ret = -EINTR; + + i915_request_put(rq); + + mutex_lock(&eb->i915->drm.struct_mutex); + } + + return ret; +} + +static int eb_lookup_vmas(struct i915_execbuffer *eb) +{ + struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; + struct drm_i915_gem_object *obj; + unsigned int i, batch; + int err; + + if (unlikely(i915_gem_context_is_closed(eb->gem_context))) + return -ENOENT; + + if (unlikely(i915_gem_context_is_banned(eb->gem_context))) + return -EIO; + + INIT_LIST_HEAD(&eb->relocs); + INIT_LIST_HEAD(&eb->unbound); + + batch = eb_batch_index(eb); + + for (i = 0; i < eb->buffer_count; i++) { + u32 handle = eb->exec[i].handle; + struct i915_lut_handle *lut; + struct i915_vma *vma; + + vma = radix_tree_lookup(handles_vma, handle); + if (likely(vma)) + goto add_vma; + + obj = i915_gem_object_lookup(eb->file, handle); + if (unlikely(!obj)) { + err = -ENOENT; + goto err_vma; + } + + vma = i915_vma_instance(obj, eb->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + lut = i915_lut_handle_alloc(); + if (unlikely(!lut)) { + err = -ENOMEM; + goto err_obj; + } + + err = radix_tree_insert(handles_vma, handle, vma); + if (unlikely(err)) { + i915_lut_handle_free(lut); + goto err_obj; + } + + /* transfer ref to ctx */ + if (!vma->open_count++) + i915_vma_reopen(vma); + list_add(&lut->obj_link, &obj->lut_list); + list_add(&lut->ctx_link, &eb->gem_context->handles_list); + lut->ctx = eb->gem_context; + lut->handle = handle; + +add_vma: + err = eb_add_vma(eb, i, batch, vma); + if (unlikely(err)) + goto err_vma; + + GEM_BUG_ON(vma != eb->vma[i]); + GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && + eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i])); + } + + eb->args->flags |= __EXEC_VALIDATED; + return eb_reserve(eb); + +err_obj: + i915_gem_object_put(obj); +err_vma: + eb->vma[i] = NULL; + return err; +} + +static struct i915_vma * +eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) +{ + if (eb->lut_size < 0) { + if (handle >= -eb->lut_size) + return NULL; + return eb->vma[handle]; + } else { + struct hlist_head *head; + struct i915_vma *vma; + + head = &eb->buckets[hash_32(handle, eb->lut_size)]; + hlist_for_each_entry(vma, head, exec_node) { + if (vma->exec_handle == handle) + return vma; + } + return NULL; + } +} + +static void eb_release_vmas(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + struct i915_vma *vma = eb->vma[i]; + unsigned int flags = eb->flags[i]; + + if (!vma) + break; + + GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); + vma->exec_flags = NULL; + eb->vma[i] = NULL; + + if (flags & __EXEC_OBJECT_HAS_PIN) + __eb_unreserve_vma(vma, flags); + + if (flags & __EXEC_OBJECT_HAS_REF) + i915_vma_put(vma); + } +} + +static void eb_reset_vmas(const struct i915_execbuffer *eb) +{ + eb_release_vmas(eb); + if (eb->lut_size > 0) + memset(eb->buckets, 0, + sizeof(struct hlist_head) << eb->lut_size); +} + +static void eb_destroy(const struct i915_execbuffer *eb) +{ + GEM_BUG_ON(eb->reloc_cache.rq); + + if (eb->lut_size > 0) + kfree(eb->buckets); +} + +static inline u64 +relocation_target(const struct drm_i915_gem_relocation_entry *reloc, + const struct i915_vma *target) +{ + return gen8_canonical_addr((int)reloc->delta + target->node.start); +} + +static void reloc_cache_init(struct reloc_cache *cache, + struct drm_i915_private *i915) +{ + cache->page = -1; + cache->vaddr = 0; + /* Must be a variable in the struct to allow GCC to unroll. */ + cache->gen = INTEL_GEN(i915); + cache->has_llc = HAS_LLC(i915); + cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); + cache->has_fence = cache->gen < 4; + cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; + cache->node.allocated = false; + cache->rq = NULL; + cache->rq_size = 0; +} + +static inline void *unmask_page(unsigned long p) +{ + return (void *)(uintptr_t)(p & PAGE_MASK); +} + +static inline unsigned int unmask_flags(unsigned long p) +{ + return p & ~PAGE_MASK; +} + +#define KMAP 0x4 /* after CLFLUSH_FLAGS */ + +static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) +{ + struct drm_i915_private *i915 = + container_of(cache, struct i915_execbuffer, reloc_cache)->i915; + return &i915->ggtt; +} + +static void reloc_gpu_flush(struct reloc_cache *cache) +{ + GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32)); + cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; + + __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size); + i915_gem_object_unpin_map(cache->rq->batch->obj); + + i915_gem_chipset_flush(cache->rq->i915); + + i915_request_add(cache->rq); + cache->rq = NULL; +} + +static void reloc_cache_reset(struct reloc_cache *cache) +{ + void *vaddr; + + if (cache->rq) + reloc_gpu_flush(cache); + + if (!cache->vaddr) + return; + + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) { + if (cache->vaddr & CLFLUSH_AFTER) + mb(); + + kunmap_atomic(vaddr); + i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); + } else { + wmb(); + io_mapping_unmap_atomic((void __iomem *)vaddr); + if (cache->node.allocated) { + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + + ggtt->vm.clear_range(&ggtt->vm, + cache->node.start, + cache->node.size); + drm_mm_remove_node(&cache->node); + } else { + i915_vma_unpin((struct i915_vma *)cache->node.mm); + } + } + + cache->vaddr = 0; + cache->page = -1; +} + +static void *reloc_kmap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long page) +{ + void *vaddr; + + if (cache->vaddr) { + kunmap_atomic(unmask_page(cache->vaddr)); + } else { + unsigned int flushes; + int err; + + err = i915_gem_object_prepare_write(obj, &flushes); + if (err) + return ERR_PTR(err); + + BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); + BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); + + cache->vaddr = flushes | KMAP; + cache->node.mm = (void *)obj; + if (flushes) + mb(); + } + + vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); + cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; + cache->page = page; + + return vaddr; +} + +static void *reloc_iomap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long page) +{ + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + unsigned long offset; + void *vaddr; + + if (cache->vaddr) { + io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); + } else { + struct i915_vma *vma; + int err; + + if (use_cpu_reloc(cache, obj)) + return NULL; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return ERR_PTR(err); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK | + PIN_NONFAULT); + if (IS_ERR(vma)) { + memset(&cache->node, 0, sizeof(cache->node)); + err = drm_mm_insert_node_in_range + (&ggtt->vm.mm, &cache->node, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (err) /* no inactive aperture space, use cpu reloc */ + return NULL; + } else { + err = i915_vma_put_fence(vma); + if (err) { + i915_vma_unpin(vma); + return ERR_PTR(err); + } + + cache->node.start = vma->node.start; + cache->node.mm = (void *)vma; + } + } + + offset = cache->node.start; + if (cache->node.allocated) { + wmb(); + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, page), + offset, I915_CACHE_NONE, 0); + } else { + offset += page << PAGE_SHIFT; + } + + vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, + offset); + cache->page = page; + cache->vaddr = (unsigned long)vaddr; + + return vaddr; +} + +static void *reloc_vaddr(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long page) +{ + void *vaddr; + + if (cache->page == page) { + vaddr = unmask_page(cache->vaddr); + } else { + vaddr = NULL; + if ((cache->vaddr & KMAP) == 0) + vaddr = reloc_iomap(obj, cache, page); + if (!vaddr) + vaddr = reloc_kmap(obj, cache, page); + } + + return vaddr; +} + +static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) +{ + if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { + if (flushes & CLFLUSH_BEFORE) { + clflushopt(addr); + mb(); + } + + *addr = value; + + /* + * Writes to the same cacheline are serialised by the CPU + * (including clflush). On the write path, we only require + * that it hits memory in an orderly fashion and place + * mb barriers at the start and end of the relocation phase + * to ensure ordering of clflush wrt to the system. + */ + if (flushes & CLFLUSH_AFTER) + clflushopt(addr); + } else + *addr = value; +} + +static int __reloc_gpu_alloc(struct i915_execbuffer *eb, + struct i915_vma *vma, + unsigned int len) +{ + struct reloc_cache *cache = &eb->reloc_cache; + struct drm_i915_gem_object *obj; + struct i915_request *rq; + struct i915_vma *batch; + u32 *cmd; + int err; + + if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) { + obj = vma->obj; + if (obj->cache_dirty & ~obj->cache_coherent) + i915_gem_clflush_object(obj, 0); + obj->write_domain = 0; + } + + GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU); + + obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + cmd = i915_gem_object_pin_map(obj, + cache->has_llc ? + I915_MAP_FORCE_WB : + I915_MAP_FORCE_WC); + i915_gem_object_unpin_pages(obj); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + batch = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_unmap; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); + if (err) + goto err_unmap; + + rq = i915_request_create(eb->context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + err = i915_request_await_object(rq, vma->obj, true); + if (err) + goto err_request; + + err = eb->engine->emit_bb_start(rq, + batch->node.start, PAGE_SIZE, + cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); + if (err) + goto err_request; + + GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + rq->batch = batch; + i915_vma_unpin(batch); + + cache->rq = rq; + cache->rq_cmd = cmd; + cache->rq_size = 0; + + /* Return with batch mapping (cmd) still pinned */ + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_unpin: + i915_vma_unpin(batch); +err_unmap: + i915_gem_object_unpin_map(obj); + return err; +} + +static u32 *reloc_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + unsigned int len) +{ + struct reloc_cache *cache = &eb->reloc_cache; + u32 *cmd; + + if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) + reloc_gpu_flush(cache); + + if (unlikely(!cache->rq)) { + int err; + + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + + if (!intel_engine_can_store_dword(eb->engine)) + return ERR_PTR(-ENODEV); + + err = __reloc_gpu_alloc(eb, vma, len); + if (unlikely(err)) + return ERR_PTR(err); + } + + cmd = cache->rq_cmd + cache->rq_size; + cache->rq_size += len; + + return cmd; +} + +static u64 +relocate_entry(struct i915_vma *vma, + const struct drm_i915_gem_relocation_entry *reloc, + struct i915_execbuffer *eb, + const struct i915_vma *target) +{ + u64 offset = reloc->offset; + u64 target_offset = relocation_target(reloc, target); + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; + + if (!eb->reloc_cache.vaddr && + (DBG_FORCE_RELOC == FORCE_GPU_RELOC || + !reservation_object_test_signaled_rcu(vma->resv, true))) { + const unsigned int gen = eb->reloc_cache.gen; + unsigned int len; + u32 *batch; + u64 addr; + + if (wide) + len = offset & 7 ? 8 : 5; + else if (gen >= 4) + len = 4; + else + len = 3; + + batch = reloc_gpu(eb, vma, len); + if (IS_ERR(batch)) + goto repeat; + + addr = gen8_canonical_addr(vma->node.start + offset); + if (wide) { + if (offset & 7) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_offset); + + addr = gen8_canonical_addr(addr + 4); + + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = upper_32_bits(target_offset); + } else { + *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_offset); + *batch++ = upper_32_bits(target_offset); + } + } else if (gen >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_offset; + } else if (gen >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_offset; + } else { + *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *batch++ = addr; + *batch++ = target_offset; + } + + goto out; + } + +repeat: + vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_offset), + eb->reloc_cache.vaddr); + + if (wide) { + offset += sizeof(u32); + target_offset >>= 32; + wide = false; + goto repeat; + } + +out: + return target->node.start | UPDATE; +} + +static u64 +eb_relocate_entry(struct i915_execbuffer *eb, + struct i915_vma *vma, + const struct drm_i915_gem_relocation_entry *reloc) +{ + struct i915_vma *target; + int err; + + /* we've already hold a reference to all valid objects */ + target = eb_get_vma(eb, reloc->target_handle); + if (unlikely(!target)) + return -ENOENT; + + /* Validate that the target is in a valid r/w GPU domain */ + if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { + DRM_DEBUG("reloc with multiple write domains: " + "target %d offset %d " + "read %08x write %08x", + reloc->target_handle, + (int) reloc->offset, + reloc->read_domains, + reloc->write_domain); + return -EINVAL; + } + if (unlikely((reloc->write_domain | reloc->read_domains) + & ~I915_GEM_GPU_DOMAINS)) { + DRM_DEBUG("reloc with read/write non-GPU domains: " + "target %d offset %d " + "read %08x write %08x", + reloc->target_handle, + (int) reloc->offset, + reloc->read_domains, + reloc->write_domain); + return -EINVAL; + } + + if (reloc->write_domain) { + *target->exec_flags |= EXEC_OBJECT_WRITE; + + /* + * Sandybridge PPGTT errata: We need a global gtt mapping + * for MI and pipe_control writes because the gpu doesn't + * properly redirect them through the ppgtt for non_secure + * batchbuffers. + */ + if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && + IS_GEN(eb->i915, 6)) { + err = i915_vma_bind(target, target->obj->cache_level, + PIN_GLOBAL); + if (WARN_ONCE(err, + "Unexpected failure to bind target VMA!")) + return err; + } + } + + /* + * If the relocation already has the right value in it, no + * more work needs to be done. + */ + if (!DBG_FORCE_RELOC && + gen8_canonical_addr(target->node.start) == reloc->presumed_offset) + return 0; + + /* Check that the relocation address is valid... */ + if (unlikely(reloc->offset > + vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { + DRM_DEBUG("Relocation beyond object bounds: " + "target %d offset %d size %d.\n", + reloc->target_handle, + (int)reloc->offset, + (int)vma->size); + return -EINVAL; + } + if (unlikely(reloc->offset & 3)) { + DRM_DEBUG("Relocation not 4-byte aligned: " + "target %d offset %d.\n", + reloc->target_handle, + (int)reloc->offset); + return -EINVAL; + } + + /* + * If we write into the object, we need to force the synchronisation + * barrier, either with an asynchronous clflush or if we executed the + * patching using the GPU (though that should be serialised by the + * timeline). To be completely sure, and since we are required to + * do relocations we are already stalling, disable the user's opt + * out of our synchronisation. + */ + *vma->exec_flags &= ~EXEC_OBJECT_ASYNC; + + /* and update the user's relocation entry */ + return relocate_entry(vma, reloc, eb, target); +} + +static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) +{ +#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) + struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; + struct drm_i915_gem_relocation_entry __user *urelocs; + const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); + unsigned int remain; + + urelocs = u64_to_user_ptr(entry->relocs_ptr); + remain = entry->relocation_count; + if (unlikely(remain > N_RELOC(ULONG_MAX))) + return -EINVAL; + + /* + * We must check that the entire relocation array is safe + * to read. However, if the array is not writable the user loses + * the updated relocation values. + */ + if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs)))) + return -EFAULT; + + do { + struct drm_i915_gem_relocation_entry *r = stack; + unsigned int count = + min_t(unsigned int, remain, ARRAY_SIZE(stack)); + unsigned int copied; + + /* + * This is the fast path and we cannot handle a pagefault + * whilst holding the struct mutex lest the user pass in the + * relocations contained within a mmaped bo. For in such a case + * we, the page fault handler would call i915_gem_fault() and + * we would try to acquire the struct mutex again. Obviously + * this is bad and so lockdep complains vehemently. + */ + pagefault_disable(); + copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); + pagefault_enable(); + if (unlikely(copied)) { + remain = -EFAULT; + goto out; + } + + remain -= count; + do { + u64 offset = eb_relocate_entry(eb, vma, r); + + if (likely(offset == 0)) { + } else if ((s64)offset < 0) { + remain = (int)offset; + goto out; + } else { + /* + * Note that reporting an error now + * leaves everything in an inconsistent + * state as we have *already* changed + * the relocation value inside the + * object. As we have not changed the + * reloc.presumed_offset or will not + * change the execobject.offset, on the + * call we may not rewrite the value + * inside the object, leaving it + * dangling and causing a GPU hang. Unless + * userspace dynamically rebuilds the + * relocations on each execbuf rather than + * presume a static tree. + * + * We did previously check if the relocations + * were writable (access_ok), an error now + * would be a strange race with mprotect, + * having already demonstrated that we + * can read from this userspace address. + */ + offset = gen8_canonical_addr(offset & ~UPDATE); + if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) { + remain = -EFAULT; + goto out; + } + } + } while (r++, --count); + urelocs += ARRAY_SIZE(stack); + } while (remain); +out: + reloc_cache_reset(&eb->reloc_cache); + return remain; +} + +static int +eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) +{ + const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); + struct drm_i915_gem_relocation_entry *relocs = + u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + unsigned int i; + int err; + + for (i = 0; i < entry->relocation_count; i++) { + u64 offset = eb_relocate_entry(eb, vma, &relocs[i]); + + if ((s64)offset < 0) { + err = (int)offset; + goto err; + } + } + err = 0; +err: + reloc_cache_reset(&eb->reloc_cache); + return err; +} + +static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) +{ + const char __user *addr, *end; + unsigned long size; + char __maybe_unused c; + + size = entry->relocation_count; + if (size == 0) + return 0; + + if (size > N_RELOC(ULONG_MAX)) + return -EINVAL; + + addr = u64_to_user_ptr(entry->relocs_ptr); + size *= sizeof(struct drm_i915_gem_relocation_entry); + if (!access_ok(addr, size)) + return -EFAULT; + + end = addr + size; + for (; addr < end; addr += PAGE_SIZE) { + int err = __get_user(c, addr); + if (err) + return err; + } + return __get_user(c, end - 1); +} + +static int eb_copy_relocations(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + int err; + + for (i = 0; i < count; i++) { + const unsigned int nreloc = eb->exec[i].relocation_count; + struct drm_i915_gem_relocation_entry __user *urelocs; + struct drm_i915_gem_relocation_entry *relocs; + unsigned long size; + unsigned long copied; + + if (nreloc == 0) + continue; + + err = check_relocations(&eb->exec[i]); + if (err) + goto err; + + urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); + size = nreloc * sizeof(*relocs); + + relocs = kvmalloc_array(size, 1, GFP_KERNEL); + if (!relocs) { + err = -ENOMEM; + goto err; + } + + /* copy_from_user is limited to < 4GiB */ + copied = 0; + do { + unsigned int len = + min_t(u64, BIT_ULL(31), size - copied); + + if (__copy_from_user((char *)relocs + copied, + (char __user *)urelocs + copied, + len)) { +end_user: + user_access_end(); +end: + kvfree(relocs); + err = -EFAULT; + goto err; + } + + copied += len; + } while (copied < size); + + /* + * As we do not update the known relocation offsets after + * relocating (due to the complexities in lock handling), + * we need to mark them as invalid now so that we force the + * relocation processing next time. Just in case the target + * object is evicted and then rebound into its old + * presumed_offset before the next execbuffer - if that + * happened we would make the mistake of assuming that the + * relocations were valid. + */ + if (!user_access_begin(urelocs, size)) + goto end; + + for (copied = 0; copied < nreloc; copied++) + unsafe_put_user(-1, + &urelocs[copied].presumed_offset, + end_user); + user_access_end(); + + eb->exec[i].relocs_ptr = (uintptr_t)relocs; + } + + return 0; + +err: + while (i--) { + struct drm_i915_gem_relocation_entry *relocs = + u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); + if (eb->exec[i].relocation_count) + kvfree(relocs); + } + return err; +} + +static int eb_prefault_relocations(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + if (unlikely(i915_modparams.prefault_disable)) + return 0; + + for (i = 0; i < count; i++) { + int err; + + err = check_relocations(&eb->exec[i]); + if (err) + return err; + } + + return 0; +} + +static noinline int eb_relocate_slow(struct i915_execbuffer *eb) +{ + struct drm_device *dev = &eb->i915->drm; + bool have_copy = false; + struct i915_vma *vma; + int err = 0; + +repeat: + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } + + /* We may process another execbuffer during the unlock... */ + eb_reset_vmas(eb); + mutex_unlock(&dev->struct_mutex); + + /* + * We take 3 passes through the slowpatch. + * + * 1 - we try to just prefault all the user relocation entries and + * then attempt to reuse the atomic pagefault disabled fast path again. + * + * 2 - we copy the user entries to a local buffer here outside of the + * local and allow ourselves to wait upon any rendering before + * relocations + * + * 3 - we already have a local copy of the relocation entries, but + * were interrupted (EAGAIN) whilst waiting for the objects, try again. + */ + if (!err) { + err = eb_prefault_relocations(eb); + } else if (!have_copy) { + err = eb_copy_relocations(eb); + have_copy = err == 0; + } else { + cond_resched(); + err = 0; + } + if (err) { + mutex_lock(&dev->struct_mutex); + goto out; + } + + /* A frequent cause for EAGAIN are currently unavailable client pages */ + flush_workqueue(eb->i915->mm.userptr_wq); + + err = i915_mutex_lock_interruptible(dev); + if (err) { + mutex_lock(&dev->struct_mutex); + goto out; + } + + /* reacquire the objects */ + err = eb_lookup_vmas(eb); + if (err) + goto err; + + GEM_BUG_ON(!eb->batch); + + list_for_each_entry(vma, &eb->relocs, reloc_link) { + if (!have_copy) { + pagefault_disable(); + err = eb_relocate_vma(eb, vma); + pagefault_enable(); + if (err) + goto repeat; + } else { + err = eb_relocate_vma_slow(eb, vma); + if (err) + goto err; + } + } + + /* + * Leave the user relocations as are, this is the painfully slow path, + * and we want to avoid the complication of dropping the lock whilst + * having buffers reserved in the aperture and so causing spurious + * ENOSPC for random operations. + */ + +err: + if (err == -EAGAIN) + goto repeat; + +out: + if (have_copy) { + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + const struct drm_i915_gem_exec_object2 *entry = + &eb->exec[i]; + struct drm_i915_gem_relocation_entry *relocs; + + if (!entry->relocation_count) + continue; + + relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + kvfree(relocs); + } + } + + return err; +} + +static int eb_relocate(struct i915_execbuffer *eb) +{ + if (eb_lookup_vmas(eb)) + goto slow; + + /* The objects are in their final locations, apply the relocations. */ + if (eb->args->flags & __EXEC_HAS_RELOC) { + struct i915_vma *vma; + + list_for_each_entry(vma, &eb->relocs, reloc_link) { + if (eb_relocate_vma(eb, vma)) + goto slow; + } + } + + return 0; + +slow: + return eb_relocate_slow(eb); +} + +static int eb_move_to_gpu(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + int err; + + for (i = 0; i < count; i++) { + unsigned int flags = eb->flags[i]; + struct i915_vma *vma = eb->vma[i]; + struct drm_i915_gem_object *obj = vma->obj; + + if (flags & EXEC_OBJECT_CAPTURE) { + struct i915_capture_list *capture; + + capture = kmalloc(sizeof(*capture), GFP_KERNEL); + if (unlikely(!capture)) + return -ENOMEM; + + capture->next = eb->request->capture_list; + capture->vma = eb->vma[i]; + eb->request->capture_list = capture; + } + + /* + * If the GPU is not _reading_ through the CPU cache, we need + * to make sure that any writes (both previous GPU writes from + * before a change in snooping levels and normal CPU writes) + * caught in that cache are flushed to main memory. + * + * We want to say + * obj->cache_dirty && + * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) + * but gcc's optimiser doesn't handle that as well and emits + * two jumps instead of one. Maybe one day... + */ + if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { + if (i915_gem_clflush_object(obj, 0)) + flags &= ~EXEC_OBJECT_ASYNC; + } + + if (flags & EXEC_OBJECT_ASYNC) + continue; + + err = i915_request_await_object + (eb->request, obj, flags & EXEC_OBJECT_WRITE); + if (err) + return err; + } + + for (i = 0; i < count; i++) { + unsigned int flags = eb->flags[i]; + struct i915_vma *vma = eb->vma[i]; + + err = i915_vma_move_to_active(vma, eb->request, flags); + if (unlikely(err)) { + i915_request_skip(eb->request, err); + return err; + } + + __eb_unreserve_vma(vma, flags); + vma->exec_flags = NULL; + + if (unlikely(flags & __EXEC_OBJECT_HAS_REF)) + i915_vma_put(vma); + } + eb->exec = NULL; + + /* Unconditionally flush any chipset caches (for streaming writes). */ + i915_gem_chipset_flush(eb->i915); + + return 0; +} + +static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +{ + if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) + return false; + + /* Kernel clipping was a DRI1 misfeature */ + if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { + if (exec->num_cliprects || exec->cliprects_ptr) + return false; + } + + if (exec->DR4 == 0xffffffff) { + DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + exec->DR4 = 0; + } + if (exec->DR1 || exec->DR4) + return false; + + if ((exec->batch_start_offset | exec->batch_len) & 0x7) + return false; + + return true; +} + +static int i915_reset_gen7_sol_offsets(struct i915_request *rq) +{ + u32 *cs; + int i; + + if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) { + DRM_DEBUG("sol reset is gen7/rcs only\n"); + return -EINVAL; + } + + cs = intel_ring_begin(rq, 4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(4); + for (i = 0; i < 4; i++) { + *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); + *cs++ = 0; + } + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) +{ + struct drm_i915_gem_object *shadow_batch_obj; + struct i915_vma *vma; + int err; + + shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, + PAGE_ALIGN(eb->batch_len)); + if (IS_ERR(shadow_batch_obj)) + return ERR_CAST(shadow_batch_obj); + + err = intel_engine_cmd_parser(eb->engine, + eb->batch->obj, + shadow_batch_obj, + eb->batch_start_offset, + eb->batch_len, + is_master); + if (err) { + if (err == -EACCES) /* unhandled chained batch */ + vma = NULL; + else + vma = ERR_PTR(err); + goto out; + } + + vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + goto out; + + eb->vma[eb->buffer_count] = i915_vma_get(vma); + eb->flags[eb->buffer_count] = + __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; + vma->exec_flags = &eb->flags[eb->buffer_count]; + eb->buffer_count++; + +out: + i915_gem_object_unpin_pages(shadow_batch_obj); + return vma; +} + +static void +add_to_client(struct i915_request *rq, struct drm_file *file) +{ + rq->file_priv = file->driver_priv; + list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); +} + +static int eb_submit(struct i915_execbuffer *eb) +{ + int err; + + err = eb_move_to_gpu(eb); + if (err) + return err; + + if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { + err = i915_reset_gen7_sol_offsets(eb->request); + if (err) + return err; + } + + /* + * After we completed waiting for other engines (using HW semaphores) + * then we can signal that this request/batch is ready to run. This + * allows us to determine if the batch is still waiting on the GPU + * or actually running by checking the breadcrumb. + */ + if (eb->engine->emit_init_breadcrumb) { + err = eb->engine->emit_init_breadcrumb(eb->request); + if (err) + return err; + } + + err = eb->engine->emit_bb_start(eb->request, + eb->batch->node.start + + eb->batch_start_offset, + eb->batch_len, + eb->batch_flags); + if (err) + return err; + + return 0; +} + +/* + * Find one BSD ring to dispatch the corresponding BSD command. + * The engine index is returned. + */ +static unsigned int +gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + + /* Check whether the file_priv has already selected one ring. */ + if ((int)file_priv->bsd_engine < 0) + file_priv->bsd_engine = atomic_fetch_xor(1, + &dev_priv->mm.bsd_engine_dispatch_index); + + return file_priv->bsd_engine; +} + +static const enum intel_engine_id user_ring_map[] = { + [I915_EXEC_DEFAULT] = RCS0, + [I915_EXEC_RENDER] = RCS0, + [I915_EXEC_BLT] = BCS0, + [I915_EXEC_BSD] = VCS0, + [I915_EXEC_VEBOX] = VECS0 +}; + +static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) +{ + int err; + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + err = i915_terminally_wedged(eb->i915); + if (err) + return err; + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + err = intel_context_pin(ce); + if (err) + return err; + + eb->engine = ce->engine; + eb->context = ce; + return 0; +} + +static void eb_unpin_context(struct i915_execbuffer *eb) +{ + intel_context_unpin(eb->context); +} + +static unsigned int +eb_select_legacy_ring(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) +{ + struct drm_i915_private *i915 = eb->i915; + unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; + + if (user_ring_id != I915_EXEC_BSD && + (args->flags & I915_EXEC_BSD_MASK)) { + DRM_DEBUG("execbuf with non bsd ring but with invalid " + "bsd dispatch flags: %d\n", (int)(args->flags)); + return -1; + } + + if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { + unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; + + if (bsd_idx == I915_EXEC_BSD_DEFAULT) { + bsd_idx = gen8_dispatch_bsd_engine(i915, file); + } else if (bsd_idx >= I915_EXEC_BSD_RING1 && + bsd_idx <= I915_EXEC_BSD_RING2) { + bsd_idx >>= I915_EXEC_BSD_SHIFT; + bsd_idx--; + } else { + DRM_DEBUG("execbuf with unknown bsd ring: %u\n", + bsd_idx); + return -1; + } + + return _VCS(bsd_idx); + } + + if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { + DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); + return -1; + } + + return user_ring_map[user_ring_id]; +} + +static int +eb_select_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) +{ + struct intel_context *ce; + unsigned int idx; + int err; + + if (i915_gem_context_user_engines(eb->gem_context)) + idx = args->flags & I915_EXEC_RING_MASK; + else + idx = eb_select_legacy_ring(eb, file, args); + + ce = i915_gem_context_get_engine(eb->gem_context, idx); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = eb_pin_context(eb, ce); + intel_context_put(ce); + + return err; +} + +static void +__free_fence_array(struct drm_syncobj **fences, unsigned int n) +{ + while (n--) + drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + kvfree(fences); +} + +static struct drm_syncobj ** +get_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct drm_file *file) +{ + const unsigned long nfences = args->num_cliprects; + struct drm_i915_gem_exec_fence __user *user; + struct drm_syncobj **fences; + unsigned long n; + int err; + + if (!(args->flags & I915_EXEC_FENCE_ARRAY)) + return NULL; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (nfences > min_t(unsigned long, + ULONG_MAX / sizeof(*user), + SIZE_MAX / sizeof(*fences))) + return ERR_PTR(-EINVAL); + + user = u64_to_user_ptr(args->cliprects_ptr); + if (!access_ok(user, nfences * sizeof(*user))) + return ERR_PTR(-EFAULT); + + fences = kvmalloc_array(nfences, sizeof(*fences), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + for (n = 0; n < nfences; n++) { + struct drm_i915_gem_exec_fence fence; + struct drm_syncobj *syncobj; + + if (__copy_from_user(&fence, user++, sizeof(fence))) { + err = -EFAULT; + goto err; + } + + if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + + syncobj = drm_syncobj_find(file, fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -ENOENT; + goto err; + } + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + } + + return fences; + +err: + __free_fence_array(fences, n); + return ERR_PTR(err); +} + +static void +put_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct drm_syncobj **fences) +{ + if (fences) + __free_fence_array(fences, args->num_cliprects); +} + +static int +await_fence_array(struct i915_execbuffer *eb, + struct drm_syncobj **fences) +{ + const unsigned int nfences = eb->args->num_cliprects; + unsigned int n; + int err; + + for (n = 0; n < nfences; n++) { + struct drm_syncobj *syncobj; + struct dma_fence *fence; + unsigned int flags; + + syncobj = ptr_unpack_bits(fences[n], &flags, 2); + if (!(flags & I915_EXEC_FENCE_WAIT)) + continue; + + fence = drm_syncobj_fence_get(syncobj); + if (!fence) + return -EINVAL; + + err = i915_request_await_dma_fence(eb->request, fence); + dma_fence_put(fence); + if (err < 0) + return err; + } + + return 0; +} + +static void +signal_fence_array(struct i915_execbuffer *eb, + struct drm_syncobj **fences) +{ + const unsigned int nfences = eb->args->num_cliprects; + struct dma_fence * const fence = &eb->request->fence; + unsigned int n; + + for (n = 0; n < nfences; n++) { + struct drm_syncobj *syncobj; + unsigned int flags; + + syncobj = ptr_unpack_bits(fences[n], &flags, 2); + if (!(flags & I915_EXEC_FENCE_SIGNAL)) + continue; + + drm_syncobj_replace_fence(syncobj, fence); + } +} + +static int +i915_gem_do_execbuffer(struct drm_device *dev, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args, + struct drm_i915_gem_exec_object2 *exec, + struct drm_syncobj **fences) +{ + struct i915_execbuffer eb; + struct dma_fence *in_fence = NULL; + struct dma_fence *exec_fence = NULL; + struct sync_file *out_fence = NULL; + int out_fence_fd = -1; + int err; + + BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & + ~__EXEC_OBJECT_UNKNOWN_FLAGS); + + eb.i915 = to_i915(dev); + eb.file = file; + eb.args = args; + if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) + args->flags |= __EXEC_HAS_RELOC; + + eb.exec = exec; + eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1); + eb.vma[0] = NULL; + eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1); + + eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; + reloc_cache_init(&eb.reloc_cache, eb.i915); + + eb.buffer_count = args->buffer_count; + eb.batch_start_offset = args->batch_start_offset; + eb.batch_len = args->batch_len; + + eb.batch_flags = 0; + if (args->flags & I915_EXEC_SECURE) { + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) + return -EPERM; + + eb.batch_flags |= I915_DISPATCH_SECURE; + } + if (args->flags & I915_EXEC_IS_PINNED) + eb.batch_flags |= I915_DISPATCH_PINNED; + + if (args->flags & I915_EXEC_FENCE_IN) { + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!in_fence) + return -EINVAL; + } + + if (args->flags & I915_EXEC_FENCE_SUBMIT) { + if (in_fence) { + err = -EINVAL; + goto err_in_fence; + } + + exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!exec_fence) { + err = -EINVAL; + goto err_in_fence; + } + } + + if (args->flags & I915_EXEC_FENCE_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + err = out_fence_fd; + goto err_exec_fence; + } + } + + err = eb_create(&eb); + if (err) + goto err_out_fence; + + GEM_BUG_ON(!eb.lut_size); + + err = eb_select_context(&eb); + if (unlikely(err)) + goto err_destroy; + + /* + * Take a local wakeref for preparing to dispatch the execbuf as + * we expect to access the hardware fairly frequently in the + * process. Upon first dispatch, we acquire another prolonged + * wakeref that we hold until the GPU has been idle for at least + * 100ms. + */ + intel_gt_pm_get(eb.i915); + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto err_rpm; + + err = eb_select_engine(&eb, file, args); + if (unlikely(err)) + goto err_unlock; + + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + if (unlikely(err)) + goto err_engine; + + err = eb_relocate(&eb); + if (err) { + /* + * If the user expects the execobject.offset and + * reloc.presumed_offset to be an exact match, + * as for using NO_RELOC, then we cannot update + * the execobject.offset until we have completed + * relocation. + */ + args->flags &= ~__EXEC_HAS_RELOC; + goto err_vma; + } + + if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) { + DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); + err = -EINVAL; + goto err_vma; + } + if (eb.batch_start_offset > eb.batch->size || + eb.batch_len > eb.batch->size - eb.batch_start_offset) { + DRM_DEBUG("Attempting to use out-of-bounds batch\n"); + err = -EINVAL; + goto err_vma; + } + + if (eb_use_cmdparser(&eb)) { + struct i915_vma *vma; + + vma = eb_parse(&eb, drm_is_current_master(file)); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_vma; + } + + if (vma) { + /* + * Batch parsed and accepted: + * + * Set the DISPATCH_SECURE bit to remove the NON_SECURE + * bit from MI_BATCH_BUFFER_START commands issued in + * the dispatch_execbuffer implementations. We + * specifically don't want that set on batches the + * command parser has accepted. + */ + eb.batch_flags |= I915_DISPATCH_SECURE; + eb.batch_start_offset = 0; + eb.batch = vma; + } + } + + if (eb.batch_len == 0) + eb.batch_len = eb.batch->size - eb.batch_start_offset; + + /* + * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure + * batch" bit. Hence we need to pin secure batches into the global gtt. + * hsw should have this fixed, but bdw mucks it up again. */ + if (eb.batch_flags & I915_DISPATCH_SECURE) { + struct i915_vma *vma; + + /* + * So on first glance it looks freaky that we pin the batch here + * outside of the reservation loop. But: + * - The batch is already pinned into the relevant ppgtt, so we + * already have the backing storage fully allocated. + * - No other BO uses the global gtt (well contexts, but meh), + * so we don't really have issues with multiple objects not + * fitting due to fragmentation. + * So this is actually safe. + */ + vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_vma; + } + + eb.batch = vma; + } + + /* All GPU relocation batches must be submitted prior to the user rq */ + GEM_BUG_ON(eb.reloc_cache.rq); + + /* Allocate a request for this batch buffer nice and early. */ + eb.request = i915_request_create(eb.context); + if (IS_ERR(eb.request)) { + err = PTR_ERR(eb.request); + goto err_batch_unpin; + } + + if (in_fence) { + err = i915_request_await_dma_fence(eb.request, in_fence); + if (err < 0) + goto err_request; + } + + if (exec_fence) { + err = i915_request_await_execution(eb.request, exec_fence, + eb.engine->bond_execute); + if (err < 0) + goto err_request; + } + + if (fences) { + err = await_fence_array(&eb, fences); + if (err) + goto err_request; + } + + if (out_fence_fd != -1) { + out_fence = sync_file_create(&eb.request->fence); + if (!out_fence) { + err = -ENOMEM; + goto err_request; + } + } + + /* + * Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when this + * request is retired will the the batch_obj be moved onto the + * inactive_list and lose its active reference. Hence we do not need + * to explicitly hold another reference here. + */ + eb.request->batch = eb.batch; + + trace_i915_request_queue(eb.request, eb.batch_flags); + err = eb_submit(&eb); +err_request: + add_to_client(eb.request, file); + i915_request_add(eb.request); + + if (fences) + signal_fence_array(&eb, fences); + + if (out_fence) { + if (err == 0) { + fd_install(out_fence_fd, out_fence->file); + args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ + args->rsvd2 |= (u64)out_fence_fd << 32; + out_fence_fd = -1; + } else { + fput(out_fence->file); + } + } + +err_batch_unpin: + if (eb.batch_flags & I915_DISPATCH_SECURE) + i915_vma_unpin(eb.batch); +err_vma: + if (eb.exec) + eb_release_vmas(&eb); +err_engine: + eb_unpin_context(&eb); +err_unlock: + mutex_unlock(&dev->struct_mutex); +err_rpm: + intel_gt_pm_put(eb.i915); + i915_gem_context_put(eb.gem_context); +err_destroy: + eb_destroy(&eb); +err_out_fence: + if (out_fence_fd != -1) + put_unused_fd(out_fence_fd); +err_exec_fence: + dma_fence_put(exec_fence); +err_in_fence: + dma_fence_put(in_fence); + return err; +} + +static size_t eb_element_size(void) +{ + return (sizeof(struct drm_i915_gem_exec_object2) + + sizeof(struct i915_vma *) + + sizeof(unsigned int)); +} + +static bool check_buffer_count(size_t count) +{ + const size_t sz = eb_element_size(); + + /* + * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup + * array size (see eb_create()). Otherwise, we can accept an array as + * large as can be addressed (though use large arrays at your peril)! + */ + + return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); +} + +/* + * Legacy execbuffer just creates an exec2 list from the original exec object + * list array and passes it to the real function. + */ +int +i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_execbuffer *args = data; + struct drm_i915_gem_execbuffer2 exec2; + struct drm_i915_gem_exec_object *exec_list = NULL; + struct drm_i915_gem_exec_object2 *exec2_list = NULL; + const size_t count = args->buffer_count; + unsigned int i; + int err; + + if (!check_buffer_count(count)) { + DRM_DEBUG("execbuf2 with %zd buffers\n", count); + return -EINVAL; + } + + exec2.buffers_ptr = args->buffers_ptr; + exec2.buffer_count = args->buffer_count; + exec2.batch_start_offset = args->batch_start_offset; + exec2.batch_len = args->batch_len; + exec2.DR1 = args->DR1; + exec2.DR4 = args->DR4; + exec2.num_cliprects = args->num_cliprects; + exec2.cliprects_ptr = args->cliprects_ptr; + exec2.flags = I915_EXEC_RENDER; + i915_execbuffer2_set_context_id(exec2, 0); + + if (!i915_gem_check_execbuffer(&exec2)) + return -EINVAL; + + /* Copy in the exec list from userland */ + exec_list = kvmalloc_array(count, sizeof(*exec_list), + __GFP_NOWARN | GFP_KERNEL); + exec2_list = kvmalloc_array(count + 1, eb_element_size(), + __GFP_NOWARN | GFP_KERNEL); + if (exec_list == NULL || exec2_list == NULL) { + DRM_DEBUG("Failed to allocate exec list for %d buffers\n", + args->buffer_count); + kvfree(exec_list); + kvfree(exec2_list); + return -ENOMEM; + } + err = copy_from_user(exec_list, + u64_to_user_ptr(args->buffers_ptr), + sizeof(*exec_list) * count); + if (err) { + DRM_DEBUG("copy %d exec entries failed %d\n", + args->buffer_count, err); + kvfree(exec_list); + kvfree(exec2_list); + return -EFAULT; + } + + for (i = 0; i < args->buffer_count; i++) { + exec2_list[i].handle = exec_list[i].handle; + exec2_list[i].relocation_count = exec_list[i].relocation_count; + exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; + exec2_list[i].alignment = exec_list[i].alignment; + exec2_list[i].offset = exec_list[i].offset; + if (INTEL_GEN(to_i915(dev)) < 4) + exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; + else + exec2_list[i].flags = 0; + } + + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); + if (exec2.flags & __EXEC_HAS_RELOC) { + struct drm_i915_gem_exec_object __user *user_exec_list = + u64_to_user_ptr(args->buffers_ptr); + + /* Copy the new buffer offsets back to the user's exec list. */ + for (i = 0; i < args->buffer_count; i++) { + if (!(exec2_list[i].offset & UPDATE)) + continue; + + exec2_list[i].offset = + gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); + exec2_list[i].offset &= PIN_OFFSET_MASK; + if (__copy_to_user(&user_exec_list[i].offset, + &exec2_list[i].offset, + sizeof(user_exec_list[i].offset))) + break; + } + } + + kvfree(exec_list); + kvfree(exec2_list); + return err; +} + +int +i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_execbuffer2 *args = data; + struct drm_i915_gem_exec_object2 *exec2_list; + struct drm_syncobj **fences = NULL; + const size_t count = args->buffer_count; + int err; + + if (!check_buffer_count(count)) { + DRM_DEBUG("execbuf2 with %zd buffers\n", count); + return -EINVAL; + } + + if (!i915_gem_check_execbuffer(args)) + return -EINVAL; + + /* Allocate an extra slot for use by the command parser */ + exec2_list = kvmalloc_array(count + 1, eb_element_size(), + __GFP_NOWARN | GFP_KERNEL); + if (exec2_list == NULL) { + DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", + count); + return -ENOMEM; + } + if (copy_from_user(exec2_list, + u64_to_user_ptr(args->buffers_ptr), + sizeof(*exec2_list) * count)) { + DRM_DEBUG("copy %zd exec entries failed\n", count); + kvfree(exec2_list); + return -EFAULT; + } + + if (args->flags & I915_EXEC_FENCE_ARRAY) { + fences = get_fence_array(args, file); + if (IS_ERR(fences)) { + kvfree(exec2_list); + return PTR_ERR(fences); + } + } + + err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); + + /* + * Now that we have begun execution of the batchbuffer, we ignore + * any new error after this point. Also given that we have already + * updated the associated relocations, we try to write out the current + * object locations irrespective of any error. + */ + if (args->flags & __EXEC_HAS_RELOC) { + struct drm_i915_gem_exec_object2 __user *user_exec_list = + u64_to_user_ptr(args->buffers_ptr); + unsigned int i; + + /* Copy the new buffer offsets back to the user's exec list. */ + /* + * Note: count * sizeof(*user_exec_list) does not overflow, + * because we checked 'count' in check_buffer_count(). + * + * And this range already got effectively checked earlier + * when we did the "copy_from_user()" above. + */ + if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list))) + goto end; + + for (i = 0; i < args->buffer_count; i++) { + if (!(exec2_list[i].offset & UPDATE)) + continue; + + exec2_list[i].offset = + gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); + unsafe_put_user(exec2_list[i].offset, + &user_exec_list[i].offset, + end_user); + } +end_user: + user_access_end(); +end:; + } + + args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; + put_fence_array(args, fences); + kvfree(exec2_list); + return err; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c new file mode 100644 index 000000000000..85a05a2435e9 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -0,0 +1,197 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2016 Intel Corporation + */ + +#include +#include +#include + +#include + +#include "i915_drv.h" +#include "i915_gem.h" +#include "i915_gem_object.h" +#include "i915_utils.h" + +#define QUIET (__GFP_NORETRY | __GFP_NOWARN) +#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) + +static void internal_free_pages(struct sg_table *st) +{ + struct scatterlist *sg; + + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } + + sg_free_table(st); + kfree(st); +} + +static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + unsigned int npages; + int max_order; + gfp_t gfp; + + max_order = MAX_ORDER; +#ifdef CONFIG_SWIOTLB + if (swiotlb_nr_tbl()) { + unsigned int max_segment; + + max_segment = swiotlb_max_segment(); + if (max_segment) { + max_segment = max_t(unsigned int, max_segment, + PAGE_SIZE) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); + } + } +#endif + + gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; + if (IS_I965GM(i915) || IS_I965G(i915)) { + /* 965gm cannot relocate objects above 4GiB. */ + gfp &= ~__GFP_HIGHMEM; + gfp |= __GFP_DMA32; + } + +create_st: + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + npages = obj->base.size / PAGE_SIZE; + if (sg_alloc_table(st, npages, GFP_KERNEL)) { + kfree(st); + return -ENOMEM; + } + + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + + do { + int order = min(fls(npages) - 1, max_order); + struct page *page; + + do { + page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), + order); + if (page) + break; + if (!order--) + goto err; + + /* Limit subsequent allocations as well */ + max_order = order; + } while (1); + + sg_set_page(sg, page, PAGE_SIZE << order, 0); + sg_page_sizes |= PAGE_SIZE << order; + st->nents++; + + npages -= 1 << order; + if (!npages) { + sg_mark_end(sg); + break; + } + + sg = __sg_next(sg); + } while (1); + + if (i915_gem_gtt_prepare_pages(obj, st)) { + /* Failed to dma-map try again with single page sg segments */ + if (get_order(st->sgl->length)) { + internal_free_pages(st); + max_order = 0; + goto create_st; + } + goto err; + } + + /* Mark the pages as dontneed whilst they are still pinned. As soon + * as they are unpinned they are allowed to be reaped by the shrinker, + * and the caller is expected to repopulate - the contents of this + * object are only valid whilst active and pinned. + */ + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err: + sg_set_page(sg, NULL, 0, 0); + sg_mark_end(sg); + internal_free_pages(st); + + return -ENOMEM; +} + +static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + internal_free_pages(pages); + + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = i915_gem_object_get_pages_internal, + .put_pages = i915_gem_object_put_pages_internal, +}; + +/** + * i915_gem_object_create_internal: create an object with volatile pages + * @i915: the i915 device + * @size: the size in bytes of backing storage to allocate for the object + * + * Creates a new object that wraps some internal memory for private use. + * This object is not backed by swappable storage, and as such its contents + * are volatile and only valid whilst pinned. If the object is reaped by the + * shrinker, its pages and data will be discarded. Equally, it is not a full + * GEM object and so not valid for access from userspace. This makes it useful + * for hardware interfaces like ringbuffers (which are pinned from the time + * the request is written to the time the hardware stops accessing it), but + * not for contexts (which need to be preserved when not active for later + * reuse). Note that it is not cleared upon allocation. + */ +struct drm_i915_gem_object * +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) +{ + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &i915_gem_object_internal_ops); + + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4ed28ac9ab3a..457e694a5c3f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -23,8 +23,9 @@ */ #include "i915_drv.h" -#include "i915_gem_object.h" #include "i915_gem_clflush.h" +#include "i915_gem_context.h" +#include "i915_gem_object.h" #include "i915_globals.h" #include "intel_frontbuffer.h" @@ -442,3 +443,10 @@ int __init i915_global_objects_init(void) i915_global_register(&global.base); return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/huge_gem_object.c" +#include "selftests/huge_pages.c" +#include "selftests/i915_gem_object.c" +#include "selftests/i915_gem_coherency.c" +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c new file mode 100644 index 000000000000..ad662e558dfb --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -0,0 +1,251 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_pm.h" +#include "gt/intel_gt_pm.h" + +#include "i915_drv.h" +#include "i915_globals.h" + +static void i915_gem_park(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) + i915_gem_batch_pool_fini(&engine->batch_pool); + + i915_timelines_park(i915); + i915_vma_parked(i915); + + i915_globals_park(); +} + +static void idle_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.idle_work); + bool restart = true; + + cancel_delayed_work(&i915->gem.retire_work); + mutex_lock(&i915->drm.struct_mutex); + + intel_wakeref_lock(&i915->gt.wakeref); + if (!intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work)) { + i915_gem_park(i915); + restart = false; + } + intel_wakeref_unlock(&i915->gt.wakeref); + + mutex_unlock(&i915->drm.struct_mutex); + if (restart) + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +static void retire_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.retire_work.work); + + /* Come back later if the device is busy... */ + if (mutex_trylock(&i915->drm.struct_mutex)) { + i915_retire_requests(i915); + mutex_unlock(&i915->drm.struct_mutex); + } + + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +static int pm_notifier(struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct drm_i915_private *i915 = + container_of(nb, typeof(*i915), gem.pm_notifier); + + switch (action) { + case INTEL_GT_UNPARK: + i915_globals_unpark(); + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); + break; + + case INTEL_GT_PARK: + queue_work(i915->wq, &i915->gem.idle_work); + break; + } + + return NOTIFY_OK; +} + +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) +{ + bool result = true; + + do { + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + i915_gem_set_wedged(i915); + result = false; + } + } while (i915_retire_requests(i915) && result); + + GEM_BUG_ON(i915->gt.awake); + return result; +} + +bool i915_gem_load_power_context(struct drm_i915_private *i915) +{ + return switch_to_kernel_context_sync(i915); +} + +void i915_gem_suspend(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + intel_wakeref_auto(&i915->mm.userfault_wakeref, 0); + flush_workqueue(i915->wq); + + mutex_lock(&i915->drm.struct_mutex); + + /* + * We have to flush all the executing contexts to main memory so + * that they can saved in the hibernation image. To ensure the last + * context image is coherent, we have to switch away from it. That + * leaves the i915->kernel_context still active when + * we actually suspend, and its image in memory may not match the GPU + * state. Fortunately, the kernel_context is disposable and we do + * not rely on its state. + */ + switch_to_kernel_context_sync(i915); + + mutex_unlock(&i915->drm.struct_mutex); + + /* + * Assert that we successfully flushed all the work and + * reset the GPU back to its idle, low power state. + */ + GEM_BUG_ON(i915->gt.awake); + flush_work(&i915->gem.idle_work); + + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + + i915_gem_drain_freed_objects(i915); + + intel_uc_suspend(i915); +} + +void i915_gem_suspend_late(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct list_head *phases[] = { + &i915->mm.unbound_list, + &i915->mm.bound_list, + NULL + }, **phase; + + /* + * Neither the BIOS, ourselves or any other kernel + * expects the system to be in execlists mode on startup, + * so we need to reset the GPU back to legacy mode. And the only + * known way to disable logical contexts is through a GPU reset. + * + * So in order to leave the system in a known default configuration, + * always reset the GPU upon unload and suspend. Afterwards we then + * clean up the GEM state tracking, flushing off the requests and + * leaving the system in a known idle state. + * + * Note that is of the upmost importance that the GPU is idle and + * all stray writes are flushed *before* we dismantle the backing + * storage for the pinned objects. + * + * However, since we are uncertain that resetting the GPU on older + * machines is a good idea, we don't - just in case it leaves the + * machine in an unusable condition. + */ + + mutex_lock(&i915->drm.struct_mutex); + for (phase = phases; *phase; phase++) { + list_for_each_entry(obj, *phase, mm.link) + WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + } + mutex_unlock(&i915->drm.struct_mutex); + + intel_uc_sanitize(i915); + i915_gem_sanitize(i915); +} + +void i915_gem_resume(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + WARN_ON(i915->gt.awake); + + mutex_lock(&i915->drm.struct_mutex); + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); + + /* + * As we didn't flush the kernel context before suspend, we cannot + * guarantee that the context image is complete. So let's just reset + * it and start again. + */ + intel_gt_resume(i915); + + if (i915_gem_init_hw(i915)) + goto err_wedged; + + intel_uc_resume(i915); + + /* Always reload a context for powersaving. */ + if (!i915_gem_load_power_context(i915)) + goto err_wedged; + +out_unlock: + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + mutex_unlock(&i915->drm.struct_mutex); + return; + +err_wedged: + if (!i915_reset_failed(i915)) { + dev_err(i915->drm.dev, + "Failed to re-initialize GPU, declaring it wedged!\n"); + i915_gem_set_wedged(i915); + } + goto out_unlock; +} + +void i915_gem_init__pm(struct drm_i915_private *i915) +{ + INIT_WORK(&i915->gem.idle_work, idle_work_handler); + INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); + + i915->gem.pm_notifier.notifier_call = pm_notifier; + blocking_notifier_chain_register(&i915->gt.pm_notifications, + &i915->gem.pm_notifier); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h new file mode 100644 index 000000000000..6f7d5d11ac3b --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_PM_H__ +#define __I915_GEM_PM_H__ + +#include + +struct drm_i915_private; +struct work_struct; + +void i915_gem_init__pm(struct drm_i915_private *i915); + +bool i915_gem_load_power_context(struct drm_i915_private *i915); +void i915_gem_resume(struct drm_i915_private *i915); + +void i915_gem_idle_work_handler(struct work_struct *work); + +void i915_gem_suspend(struct drm_i915_private *i915); +void i915_gem_suspend_late(struct drm_i915_private *i915); + +#endif /* __I915_GEM_PM_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c new file mode 100644 index 000000000000..cd42299f019a --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -0,0 +1,555 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2015 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i915_trace.h" + +static bool shrinker_lock(struct drm_i915_private *i915, + unsigned int flags, + bool *unlock) +{ + struct mutex *m = &i915->drm.struct_mutex; + + switch (mutex_trylock_recursive(m)) { + case MUTEX_TRYLOCK_RECURSIVE: + *unlock = false; + return true; + + case MUTEX_TRYLOCK_FAILED: + *unlock = false; + if (flags & I915_SHRINK_ACTIVE && + mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) + *unlock = true; + return *unlock; + + case MUTEX_TRYLOCK_SUCCESS: + *unlock = true; + return true; + } + + BUG(); +} + +static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) +{ + if (!unlock) + return; + + mutex_unlock(&i915->drm.struct_mutex); +} + +static bool swap_available(void) +{ + return get_nr_swap_pages() > 0; +} + +static bool can_release_pages(struct drm_i915_gem_object *obj) +{ + /* Consider only shrinkable ojects. */ + if (!i915_gem_object_is_shrinkable(obj)) + return false; + + /* Only report true if by unbinding the object and putting its pages + * we can actually make forward progress towards freeing physical + * pages. + * + * If the pages are pinned for any other reason than being bound + * to the GPU, simply unbinding from the GPU is not going to succeed + * in releasing our pin count on the pages themselves. + */ + if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count) + return false; + + /* If any vma are "permanently" pinned, it will prevent us from + * reclaiming the obj->mm.pages. We only allow scanout objects to claim + * a permanent pin, along with a few others like the context objects. + * To simplify the scan, and to avoid walking the list of vma under the + * object, we just check the count of its permanently pinned. + */ + if (READ_ONCE(obj->pin_global)) + return false; + + /* We can only return physical pages to the system if we can either + * discard the contents (because the user has marked them as being + * purgeable) or if we can move their contents out to swap. + */ + return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; +} + +static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) +{ + if (i915_gem_object_unbind(obj) == 0) + __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + return !i915_gem_object_has_pages(obj); +} + +static void try_to_writeback(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + switch (obj->mm.madv) { + case I915_MADV_DONTNEED: + i915_gem_object_truncate(obj); + case __I915_MADV_PURGED: + return; + } + + if (flags & I915_SHRINK_WRITEBACK) + i915_gem_object_writeback(obj); +} + +/** + * i915_gem_shrink - Shrink buffer object caches + * @i915: i915 device + * @target: amount of memory to make available, in pages + * @nr_scanned: optional output for number of pages scanned (incremental) + * @flags: control flags for selecting cache types + * + * This function is the main interface to the shrinker. It will try to release + * up to @target pages of main memory backing storage from buffer objects. + * Selection of the specific caches can be done with @flags. This is e.g. useful + * when purgeable objects should be removed from caches preferentially. + * + * Note that it's not guaranteed that released amount is actually available as + * free system memory - the pages might still be in-used to due to other reasons + * (like cpu mmaps) or the mm core has reused them before we could grab them. + * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to + * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all(). + * + * Also note that any kind of pinning (both per-vma address space pins and + * backing storage pins at the buffer object level) result in the shrinker code + * having to skip the object. + * + * Returns: + * The number of pages of backing storage actually released. + */ +unsigned long +i915_gem_shrink(struct drm_i915_private *i915, + unsigned long target, + unsigned long *nr_scanned, + unsigned flags) +{ + const struct { + struct list_head *list; + unsigned int bit; + } phases[] = { + { &i915->mm.unbound_list, I915_SHRINK_UNBOUND }, + { &i915->mm.bound_list, I915_SHRINK_BOUND }, + { NULL, 0 }, + }, *phase; + intel_wakeref_t wakeref = 0; + unsigned long count = 0; + unsigned long scanned = 0; + bool unlock; + + if (!shrinker_lock(i915, flags, &unlock)) + return 0; + + /* + * When shrinking the active list, also consider active contexts. + * Active contexts are pinned until they are retired, and so can + * not be simply unbound to retire and unpin their pages. To shrink + * the contexts, we must wait until the gpu is idle. + * + * We don't care about errors here; if we cannot wait upon the GPU, + * we will free as much as we can and hope to get a second chance. + */ + if (flags & I915_SHRINK_ACTIVE) + i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + trace_i915_gem_shrink(i915, target, flags); + i915_retire_requests(i915); + + /* + * Unbinding of objects will require HW access; Let us not wake the + * device just to recover a little memory. If absolutely necessary, + * we will force the wake during oom-notifier. + */ + if (flags & I915_SHRINK_BOUND) { + wakeref = intel_runtime_pm_get_if_in_use(i915); + if (!wakeref) + flags &= ~I915_SHRINK_BOUND; + } + + /* + * As we may completely rewrite the (un)bound list whilst unbinding + * (due to retiring requests) we have to strictly process only + * one element of the list at the time, and recheck the list + * on every iteration. + * + * In particular, we must hold a reference whilst removing the + * object as we may end up waiting for and/or retiring the objects. + * This might release the final reference (held by the active list) + * and result in the object being freed from under us. This is + * similar to the precautions the eviction code must take whilst + * removing objects. + * + * Also note that although these lists do not hold a reference to + * the object we can safely grab one here: The final object + * unreferencing and the bound_list are both protected by the + * dev->struct_mutex and so we won't ever be able to observe an + * object on the bound_list with a reference count equals 0. + */ + for (phase = phases; phase->list; phase++) { + struct list_head still_in_list; + struct drm_i915_gem_object *obj; + + if ((flags & phase->bit) == 0) + continue; + + INIT_LIST_HEAD(&still_in_list); + + /* + * We serialize our access to unreferenced objects through + * the use of the struct_mutex. While the objects are not + * yet freed (due to RCU then a workqueue) we still want + * to be able to shrink their pages, so they remain on + * the unbound/bound list until actually freed. + */ + spin_lock(&i915->mm.obj_lock); + while (count < target && + (obj = list_first_entry_or_null(phase->list, + typeof(*obj), + mm.link))) { + list_move_tail(&obj->mm.link, &still_in_list); + + if (flags & I915_SHRINK_PURGEABLE && + obj->mm.madv != I915_MADV_DONTNEED) + continue; + + if (flags & I915_SHRINK_VMAPS && + !is_vmalloc_addr(obj->mm.mapping)) + continue; + + if (!(flags & I915_SHRINK_ACTIVE) && + (i915_gem_object_is_active(obj) || + i915_gem_object_is_framebuffer(obj))) + continue; + + if (!can_release_pages(obj)) + continue; + + spin_unlock(&i915->mm.obj_lock); + + if (unsafe_drop_pages(obj)) { + /* May arrive from get_pages on another bo */ + mutex_lock_nested(&obj->mm.lock, + I915_MM_SHRINKER); + if (!i915_gem_object_has_pages(obj)) { + try_to_writeback(obj, flags); + count += obj->base.size >> PAGE_SHIFT; + } + mutex_unlock(&obj->mm.lock); + } + scanned += obj->base.size >> PAGE_SHIFT; + + spin_lock(&i915->mm.obj_lock); + } + list_splice_tail(&still_in_list, phase->list); + spin_unlock(&i915->mm.obj_lock); + } + + if (flags & I915_SHRINK_BOUND) + intel_runtime_pm_put(i915, wakeref); + + i915_retire_requests(i915); + + shrinker_unlock(i915, unlock); + + if (nr_scanned) + *nr_scanned += scanned; + return count; +} + +/** + * i915_gem_shrink_all - Shrink buffer object caches completely + * @i915: i915 device + * + * This is a simple wraper around i915_gem_shrink() to aggressively shrink all + * caches completely. It also first waits for and retires all outstanding + * requests to also be able to release backing storage for active objects. + * + * This should only be used in code to intentionally quiescent the gpu or as a + * last-ditch effort when memory seems to have run out. + * + * Returns: + * The number of pages of backing storage actually released. + */ +unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + unsigned long freed = 0; + + with_intel_runtime_pm(i915, wakeref) { + freed = i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + } + + return freed; +} + +static unsigned long +i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct drm_i915_private *i915 = + container_of(shrinker, struct drm_i915_private, mm.shrinker); + struct drm_i915_gem_object *obj; + unsigned long num_objects = 0; + unsigned long count = 0; + + spin_lock(&i915->mm.obj_lock); + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) + if (can_release_pages(obj)) { + count += obj->base.size >> PAGE_SHIFT; + num_objects++; + } + + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) { + count += obj->base.size >> PAGE_SHIFT; + num_objects++; + } + spin_unlock(&i915->mm.obj_lock); + + /* Update our preferred vmscan batch size for the next pass. + * Our rough guess for an effective batch size is roughly 2 + * available GEM objects worth of pages. That is we don't want + * the shrinker to fire, until it is worth the cost of freeing an + * entire GEM object. + */ + if (num_objects) { + unsigned long avg = 2 * count / num_objects; + + i915->mm.shrinker.batch = + max((i915->mm.shrinker.batch + avg) >> 1, + 128ul /* default SHRINK_BATCH */); + } + + return count; +} + +static unsigned long +i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct drm_i915_private *i915 = + container_of(shrinker, struct drm_i915_private, mm.shrinker); + unsigned long freed; + bool unlock; + + sc->nr_scanned = 0; + + if (!shrinker_lock(i915, 0, &unlock)) + return SHRINK_STOP; + + freed = i915_gem_shrink(i915, + sc->nr_to_scan, + &sc->nr_scanned, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_PURGEABLE | + I915_SHRINK_WRITEBACK); + if (sc->nr_scanned < sc->nr_to_scan) + freed += i915_gem_shrink(i915, + sc->nr_to_scan - sc->nr_scanned, + &sc->nr_scanned, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); + if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { + intel_wakeref_t wakeref; + + with_intel_runtime_pm(i915, wakeref) { + freed += i915_gem_shrink(i915, + sc->nr_to_scan - sc->nr_scanned, + &sc->nr_scanned, + I915_SHRINK_ACTIVE | + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); + } + } + + shrinker_unlock(i915, unlock); + + return sc->nr_scanned ? freed : SHRINK_STOP; +} + +static int +i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct drm_i915_private *i915 = + container_of(nb, struct drm_i915_private, mm.oom_notifier); + struct drm_i915_gem_object *obj; + unsigned long unevictable, bound, unbound, freed_pages; + intel_wakeref_t wakeref; + + freed_pages = 0; + with_intel_runtime_pm(i915, wakeref) + freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); + + /* Because we may be allocating inside our own driver, we cannot + * assert that there are no objects with pinned pages that are not + * being pointed to by hardware. + */ + unbound = bound = unevictable = 0; + spin_lock(&i915->mm.obj_lock); + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) { + if (!can_release_pages(obj)) + unevictable += obj->base.size >> PAGE_SHIFT; + else + unbound += obj->base.size >> PAGE_SHIFT; + } + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { + if (!can_release_pages(obj)) + unevictable += obj->base.size >> PAGE_SHIFT; + else + bound += obj->base.size >> PAGE_SHIFT; + } + spin_unlock(&i915->mm.obj_lock); + + if (freed_pages || unbound || bound) + pr_info("Purging GPU memory, %lu pages freed, " + "%lu pages still pinned.\n", + freed_pages, unevictable); + + *(unsigned long *)ptr += freed_pages; + return NOTIFY_DONE; +} + +static int +i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct drm_i915_private *i915 = + container_of(nb, struct drm_i915_private, mm.vmap_notifier); + struct i915_vma *vma, *next; + unsigned long freed_pages = 0; + intel_wakeref_t wakeref; + bool unlock; + + if (!shrinker_lock(i915, 0, &unlock)) + return NOTIFY_DONE; + + /* Force everything onto the inactive lists */ + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT)) + goto out; + + with_intel_runtime_pm(i915, wakeref) + freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_VMAPS); + + /* We also want to clear any cached iomaps as they wrap vmap */ + mutex_lock(&i915->ggtt.vm.mutex); + list_for_each_entry_safe(vma, next, + &i915->ggtt.vm.bound_list, vm_link) { + unsigned long count = vma->node.size >> PAGE_SHIFT; + + if (!vma->iomap || i915_vma_is_active(vma)) + continue; + + mutex_unlock(&i915->ggtt.vm.mutex); + if (i915_vma_unbind(vma) == 0) + freed_pages += count; + mutex_lock(&i915->ggtt.vm.mutex); + } + mutex_unlock(&i915->ggtt.vm.mutex); + +out: + shrinker_unlock(i915, unlock); + + *(unsigned long *)ptr += freed_pages; + return NOTIFY_DONE; +} + +/** + * i915_gem_shrinker_register - Register the i915 shrinker + * @i915: i915 device + * + * This function registers and sets up the i915 shrinker and OOM handler. + */ +void i915_gem_shrinker_register(struct drm_i915_private *i915) +{ + i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; + i915->mm.shrinker.count_objects = i915_gem_shrinker_count; + i915->mm.shrinker.seeks = DEFAULT_SEEKS; + i915->mm.shrinker.batch = 4096; + WARN_ON(register_shrinker(&i915->mm.shrinker)); + + i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; + WARN_ON(register_oom_notifier(&i915->mm.oom_notifier)); + + i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; + WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); +} + +/** + * i915_gem_shrinker_unregister - Unregisters the i915 shrinker + * @i915: i915 device + * + * This function unregisters the i915 shrinker and OOM handler. + */ +void i915_gem_shrinker_unregister(struct drm_i915_private *i915) +{ + WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); + WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); + unregister_shrinker(&i915->mm.shrinker); +} + +void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, + struct mutex *mutex) +{ + bool unlock = false; + + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) { + mutex_acquire(&i915->drm.struct_mutex.dep_map, + I915_MM_NORMAL, 0, _RET_IP_); + unlock = true; + } + + fs_reclaim_acquire(GFP_KERNEL); + + /* + * As we invariably rely on the struct_mutex within the shrinker, + * but have a complicated recursion dance, taint all the mutexes used + * within the shrinker with the struct_mutex. For completeness, we + * taint with all subclass of struct_mutex, even though we should + * only need tainting by I915_MM_NORMAL to catch possible ABBA + * deadlocks from using struct_mutex inside @mutex. + */ + mutex_acquire(&i915->drm.struct_mutex.dep_map, + I915_MM_SHRINKER, 0, _RET_IP_); + + mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); + mutex_release(&mutex->dep_map, 0, _RET_IP_); + + mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); + + fs_reclaim_release(GFP_KERNEL); + + if (unlock) + mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c new file mode 100644 index 000000000000..9080a736663a --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -0,0 +1,704 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2012 Intel Corporation + */ + +#include +#include + +#include +#include + +#include "i915_drv.h" + +/* + * The BIOS typically reserves some of the system's memory for the exclusive + * use of the integrated graphics. This memory is no longer available for + * use by the OS and so the user finds that his system has less memory + * available than he put in. We refer to this memory as stolen. + * + * The BIOS will allocate its framebuffer from the stolen memory. Our + * goal is try to reuse that object for our own fbcon which must always + * be available for panics. Anything else we can reuse the stolen memory + * for is a boon. + */ + +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment, u64 start, u64 end) +{ + int ret; + + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + return -ENODEV; + + /* WaSkipStolenMemoryFirstPage:bdw+ */ + if (INTEL_GEN(dev_priv) >= 8 && start < 4096) + start = 4096; + + mutex_lock(&dev_priv->mm.stolen_lock); + ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, + size, alignment, 0, + start, end, DRM_MM_INSERT_BEST); + mutex_unlock(&dev_priv->mm.stolen_lock); + + return ret; +} + +int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment) +{ + return i915_gem_stolen_insert_node_in_range(dev_priv, node, size, + alignment, 0, U64_MAX); +} + +void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node) +{ + mutex_lock(&dev_priv->mm.stolen_lock); + drm_mm_remove_node(node); + mutex_unlock(&dev_priv->mm.stolen_lock); +} + +static int i915_adjust_stolen(struct drm_i915_private *dev_priv, + struct resource *dsm) +{ + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct resource *r; + + if (dsm->start == 0 || dsm->end <= dsm->start) + return -EINVAL; + + /* + * TODO: We have yet too encounter the case where the GTT wasn't at the + * end of stolen. With that assumption we could simplify this. + */ + + /* Make sure we don't clobber the GTT if it's within stolen memory */ + if (INTEL_GEN(dev_priv) <= 4 && + !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { + struct resource stolen[2] = {*dsm, *dsm}; + struct resource ggtt_res; + resource_size_t ggtt_start; + + ggtt_start = I915_READ(PGTBL_CTL); + if (IS_GEN(dev_priv, 4)) + ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) | + (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; + else + ggtt_start &= PGTBL_ADDRESS_LO_MASK; + + ggtt_res = + (struct resource) DEFINE_RES_MEM(ggtt_start, + ggtt_total_entries(ggtt) * 4); + + if (ggtt_res.start >= stolen[0].start && ggtt_res.start < stolen[0].end) + stolen[0].end = ggtt_res.start; + if (ggtt_res.end > stolen[1].start && ggtt_res.end <= stolen[1].end) + stolen[1].start = ggtt_res.end; + + /* Pick the larger of the two chunks */ + if (resource_size(&stolen[0]) > resource_size(&stolen[1])) + *dsm = stolen[0]; + else + *dsm = stolen[1]; + + if (stolen[0].start != stolen[1].start || + stolen[0].end != stolen[1].end) { + DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res); + DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm); + } + } + + /* + * Verify that nothing else uses this physical address. Stolen + * memory should be reserved by the BIOS and hidden from the + * kernel. So if the region is already marked as busy, something + * is seriously wrong. + */ + r = devm_request_mem_region(dev_priv->drm.dev, dsm->start, + resource_size(dsm), + "Graphics Stolen Memory"); + if (r == NULL) { + /* + * One more attempt but this time requesting region from + * start + 1, as we have seen that this resolves the region + * conflict with the PCI Bus. + * This is a BIOS w/a: Some BIOS wrap stolen in the root + * PCI bus, but have an off-by-one error. Hence retry the + * reservation starting from 1 instead of 0. + * There's also BIOS with off-by-one on the other end. + */ + r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1, + resource_size(dsm) - 2, + "Graphics Stolen Memory"); + /* + * GEN3 firmware likes to smash pci bridges into the stolen + * range. Apparently this works. + */ + if (r == NULL && !IS_GEN(dev_priv, 3)) { + DRM_ERROR("conflict detected with stolen region: %pR\n", + dsm); + + return -EBUSY; + } + } + + return 0; +} + +void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) +{ + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + return; + + drm_mm_takedown(&dev_priv->mm.stolen); +} + +static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(IS_GM45(dev_priv) ? + CTG_STOLEN_RESERVED : + ELK_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; + + DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", + IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val); + + if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) + return; + + /* + * Whether ILK really reuses the ELK register for this is unclear. + * Let's see if we catch anyone with this supposedly enabled on ILK. + */ + WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n", + reg_val); + + if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) + return; + + *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; + WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); + + *size = stolen_top - *base; +} + +static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; + + switch (reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK) { + case GEN6_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + case GEN6_STOLEN_RESERVED_512K: + *size = 512 * 1024; + break; + case GEN6_STOLEN_RESERVED_256K: + *size = 256 * 1024; + break; + case GEN6_STOLEN_RESERVED_128K: + *size = 128 * 1024; + break; + default: + *size = 1024 * 1024; + MISSING_CASE(reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK); + } +} + +static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { + default: + MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); + /* fall through */ + case GEN7_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + } + + /* + * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the + * reserved location as (top - size). + */ + *base = stolen_top - *size; +} + +static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + *base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK; + + switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { + case GEN7_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + case GEN7_STOLEN_RESERVED_256K: + *size = 256 * 1024; + break; + default: + *size = 1024 * 1024; + MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); + } +} + +static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; + + switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { + case GEN8_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_2M: + *size = 2 * 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_4M: + *size = 4 * 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_8M: + *size = 8 * 1024 * 1024; + break; + default: + *size = 8 * 1024 * 1024; + MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); + } +} + +static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK)) + return; + + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; + *size = stolen_top - *base; +} + +static void icl_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u64 reg_val = I915_READ64(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); + + *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; + + switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { + case GEN8_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_2M: + *size = 2 * 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_4M: + *size = 4 * 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_8M: + *size = 8 * 1024 * 1024; + break; + default: + *size = 8 * 1024 * 1024; + MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); + } +} + +int i915_gem_init_stolen(struct drm_i915_private *dev_priv) +{ + resource_size_t reserved_base, stolen_top; + resource_size_t reserved_total, reserved_size; + + mutex_init(&dev_priv->mm.stolen_lock); + + if (intel_vgpu_active(dev_priv)) { + DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); + return 0; + } + + if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) { + DRM_INFO("DMAR active, disabling use of stolen memory\n"); + return 0; + } + + if (resource_size(&intel_graphics_stolen_res) == 0) + return 0; + + dev_priv->dsm = intel_graphics_stolen_res; + + if (i915_adjust_stolen(dev_priv, &dev_priv->dsm)) + return 0; + + GEM_BUG_ON(dev_priv->dsm.start == 0); + GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); + + stolen_top = dev_priv->dsm.end + 1; + reserved_base = stolen_top; + reserved_size = 0; + + switch (INTEL_GEN(dev_priv)) { + case 2: + case 3: + break; + case 4: + if (!IS_G4X(dev_priv)) + break; + /* fall through */ + case 5: + g4x_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + break; + case 6: + gen6_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + break; + case 7: + if (IS_VALLEYVIEW(dev_priv)) + vlv_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + else + gen7_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + break; + case 8: + case 9: + case 10: + if (IS_LP(dev_priv)) + chv_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + else + bdw_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + break; + case 11: + default: + icl_get_stolen_reserved(dev_priv, &reserved_base, + &reserved_size); + break; + } + + /* + * Our expectation is that the reserved space is at the top of the + * stolen region and *never* at the bottom. If we see !reserved_base, + * it likely means we failed to read the registers correctly. + */ + if (!reserved_base) { + DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n", + &reserved_base, &reserved_size); + reserved_base = stolen_top; + reserved_size = 0; + } + + dev_priv->dsm_reserved = + (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size); + + if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) { + DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", + &dev_priv->dsm_reserved, &dev_priv->dsm); + return 0; + } + + /* It is possible for the reserved area to end before the end of stolen + * memory, so just consider the start. */ + reserved_total = stolen_top - reserved_base; + + DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", + (u64)resource_size(&dev_priv->dsm) >> 10, + ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); + + dev_priv->stolen_usable_size = + resource_size(&dev_priv->dsm) - reserved_total; + + /* Basic memrange allocator for stolen space. */ + drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); + + return 0; +} + +static struct sg_table * +i915_pages_create_for_stolen(struct drm_device *dev, + resource_size_t offset, resource_size_t size) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct sg_table *st; + struct scatterlist *sg; + + GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm))); + + /* We hide that we have no struct page backing our stolen object + * by wrapping the contiguous physical allocation with a fake + * dma mapping in a single scatterlist. + */ + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (st == NULL) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(st, 1, GFP_KERNEL)) { + kfree(st); + return ERR_PTR(-ENOMEM); + } + + sg = st->sgl; + sg->offset = 0; + sg->length = size; + + sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset; + sg_dma_len(sg) = size; + + return st; +} + +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) +{ + struct sg_table *pages = + i915_pages_create_for_stolen(obj->base.dev, + obj->stolen->start, + obj->stolen->size); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + __i915_gem_object_set_pages(obj, pages, obj->stolen->size); + + return 0; +} + +static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + /* Should only be called from i915_gem_object_release_stolen() */ + sg_free_table(pages); + kfree(pages); +} + +static void +i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); + + GEM_BUG_ON(!stolen); + + __i915_gem_object_unpin_pages(obj); + + i915_gem_stolen_remove_node(dev_priv, stolen); + kfree(stolen); +} + +static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { + .get_pages = i915_gem_object_get_pages_stolen, + .put_pages = i915_gem_object_put_pages_stolen, + .release = i915_gem_object_release_stolen, +}; + +static struct drm_i915_gem_object * +_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + struct drm_mm_node *stolen) +{ + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + obj = i915_gem_object_alloc(); + if (obj == NULL) + return NULL; + + drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); + i915_gem_object_init(obj, &i915_gem_object_stolen_ops); + + obj->stolen = stolen; + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; + cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + + if (i915_gem_object_pin_pages(obj)) + goto cleanup; + + return obj; + +cleanup: + i915_gem_object_free(obj); + return NULL; +} + +struct drm_i915_gem_object * +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size) +{ + struct drm_i915_gem_object *obj; + struct drm_mm_node *stolen; + int ret; + + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + return NULL; + + if (size == 0) + return NULL; + + stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); + if (!stolen) + return NULL; + + ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); + if (ret) { + kfree(stolen); + return NULL; + } + + obj = _i915_gem_object_create_stolen(dev_priv, stolen); + if (obj) + return obj; + + i915_gem_stolen_remove_node(dev_priv, stolen); + kfree(stolen); + return NULL; +} + +struct drm_i915_gem_object * +i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, + resource_size_t stolen_offset, + resource_size_t gtt_offset, + resource_size_t size) +{ + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_gem_object *obj; + struct drm_mm_node *stolen; + struct i915_vma *vma; + int ret; + + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + return NULL; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", + &stolen_offset, >t_offset, &size); + + /* KISS and expect everything to be page-aligned */ + if (WARN_ON(size == 0) || + WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || + WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) + return NULL; + + stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); + if (!stolen) + return NULL; + + stolen->start = stolen_offset; + stolen->size = size; + mutex_lock(&dev_priv->mm.stolen_lock); + ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen); + mutex_unlock(&dev_priv->mm.stolen_lock); + if (ret) { + DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); + kfree(stolen); + return NULL; + } + + obj = _i915_gem_object_create_stolen(dev_priv, stolen); + if (obj == NULL) { + DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); + i915_gem_stolen_remove_node(dev_priv, stolen); + kfree(stolen); + return NULL; + } + + /* Some objects just need physical mem from stolen space */ + if (gtt_offset == I915_GTT_OFFSET_NONE) + return obj; + + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err; + + vma = i915_vma_instance(obj, &ggtt->vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_pages; + } + + /* To simplify the initialisation sequence between KMS and GTT, + * we allow construction of the stolen object prior to + * setting up the GTT space. The actual reservation will occur + * later. + */ + ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, + size, gtt_offset, obj->cache_level, + 0); + if (ret) { + DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); + goto err_pages; + } + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + + vma->pages = obj->mm.pages; + vma->flags |= I915_VMA_GLOBAL_BIND; + __i915_vma_set_map_and_fenceable(vma); + + mutex_lock(&ggtt->vm.mutex); + list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + mutex_unlock(&ggtt->vm.mutex); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); + obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); + + return obj; + +err_pages: + i915_gem_object_unpin_pages(obj); +err: + i915_gem_object_put(obj); + return NULL; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c new file mode 100644 index 000000000000..ca0c2f451742 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -0,0 +1,440 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008 Intel Corporation + */ + +#include +#include +#include + +#include "i915_drv.h" +#include "i915_gem.h" +#include "i915_gem_ioctls.h" +#include "i915_gem_object.h" + +/** + * DOC: buffer object tiling + * + * i915_gem_set_tiling_ioctl() and i915_gem_get_tiling_ioctl() is the userspace + * interface to declare fence register requirements. + * + * In principle GEM doesn't care at all about the internal data layout of an + * object, and hence it also doesn't care about tiling or swizzling. There's two + * exceptions: + * + * - For X and Y tiling the hardware provides detilers for CPU access, so called + * fences. Since there's only a limited amount of them the kernel must manage + * these, and therefore userspace must tell the kernel the object tiling if it + * wants to use fences for detiling. + * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which + * depends upon the physical page frame number. When swapping such objects the + * page frame number might change and the kernel must be able to fix this up + * and hence now the tiling. Note that on a subset of platforms with + * asymmetric memory channel population the swizzling pattern changes in an + * unknown way, and for those the kernel simply forbids swapping completely. + * + * Since neither of this applies for new tiling layouts on modern platforms like + * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled. + * Anything else can be handled in userspace entirely without the kernel's + * invovlement. + */ + +/** + * i915_gem_fence_size - required global GTT size for a fence + * @i915: i915 device + * @size: object size + * @tiling: tiling mode + * @stride: tiling stride + * + * Return the required global GTT size for a fence (view of a tiled object), + * taking into account potential fence register mapping. + */ +u32 i915_gem_fence_size(struct drm_i915_private *i915, + u32 size, unsigned int tiling, unsigned int stride) +{ + u32 ggtt_size; + + GEM_BUG_ON(!size); + + if (tiling == I915_TILING_NONE) + return size; + + GEM_BUG_ON(!stride); + + if (INTEL_GEN(i915) >= 4) { + stride *= i915_gem_tile_height(tiling); + GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE)); + return roundup(size, stride); + } + + /* Previous chips need a power-of-two fence region when tiling */ + if (IS_GEN(i915, 3)) + ggtt_size = 1024*1024; + else + ggtt_size = 512*1024; + + while (ggtt_size < size) + ggtt_size <<= 1; + + return ggtt_size; +} + +/** + * i915_gem_fence_alignment - required global GTT alignment for a fence + * @i915: i915 device + * @size: object size + * @tiling: tiling mode + * @stride: tiling stride + * + * Return the required global GTT alignment for a fence (a view of a tiled + * object), taking into account potential fence register mapping. + */ +u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, + unsigned int tiling, unsigned int stride) +{ + GEM_BUG_ON(!size); + + /* + * Minimum alignment is 4k (GTT page size), but might be greater + * if a fence register is needed for the object. + */ + if (tiling == I915_TILING_NONE) + return I915_GTT_MIN_ALIGNMENT; + + if (INTEL_GEN(i915) >= 4) + return I965_FENCE_PAGE; + + /* + * Previous chips need to be aligned to the size of the smallest + * fence register that can contain the object. + */ + return i915_gem_fence_size(i915, size, tiling, stride); +} + +/* Check pitch constriants for all chips & tiling formats */ +static bool +i915_tiling_ok(struct drm_i915_gem_object *obj, + unsigned int tiling, unsigned int stride) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int tile_width; + + /* Linear is always fine */ + if (tiling == I915_TILING_NONE) + return true; + + if (tiling > I915_TILING_LAST) + return false; + + /* check maximum stride & object size */ + /* i965+ stores the end address of the gtt mapping in the fence + * reg, so dont bother to check the size */ + if (INTEL_GEN(i915) >= 7) { + if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL) + return false; + } else if (INTEL_GEN(i915) >= 4) { + if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) + return false; + } else { + if (stride > 8192) + return false; + + if (!is_power_of_2(stride)) + return false; + } + + if (IS_GEN(i915, 2) || + (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915))) + tile_width = 128; + else + tile_width = 512; + + if (!stride || !IS_ALIGNED(stride, tile_width)) + return false; + + return true; +} + +static bool i915_vma_fence_prepare(struct i915_vma *vma, + int tiling_mode, unsigned int stride) +{ + struct drm_i915_private *i915 = vma->vm->i915; + u32 size, alignment; + + if (!i915_vma_is_map_and_fenceable(vma)) + return true; + + size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride); + if (vma->node.size < size) + return false; + + alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride); + if (!IS_ALIGNED(vma->node.start, alignment)) + return false; + + return true; +} + +/* Make the current GTT allocation valid for the change in tiling. */ +static int +i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, + int tiling_mode, unsigned int stride) +{ + struct i915_vma *vma; + int ret; + + if (tiling_mode == I915_TILING_NONE) + return 0; + + for_each_ggtt_vma(vma, obj) { + if (i915_vma_fence_prepare(vma, tiling_mode, stride)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } + + return 0; +} + +int +i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, + unsigned int tiling, unsigned int stride) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_vma *vma; + int err; + + /* Make sure we don't cross-contaminate obj->tiling_and_stride */ + BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); + + GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); + GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); + lockdep_assert_held(&i915->drm.struct_mutex); + + if ((tiling | stride) == obj->tiling_and_stride) + return 0; + + if (i915_gem_object_is_framebuffer(obj)) + return -EBUSY; + + /* We need to rebind the object if its current allocation + * no longer meets the alignment restrictions for its new + * tiling mode. Otherwise we can just leave it alone, but + * need to ensure that any fence register is updated before + * the next fenced (either through the GTT or by the BLT unit + * on older GPUs) access. + * + * After updating the tiling parameters, we then flag whether + * we need to update an associated fence register. Note this + * has to also include the unfenced register the GPU uses + * whilst executing a fenced command for an untiled object. + */ + + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) + return err; + + i915_gem_object_lock(obj); + if (i915_gem_object_is_framebuffer(obj)) { + i915_gem_object_unlock(obj); + return -EBUSY; + } + + /* If the memory has unknown (i.e. varying) swizzling, we pin the + * pages to prevent them being swapped out and causing corruption + * due to the change in swizzling. + */ + mutex_lock(&obj->mm.lock); + if (i915_gem_object_has_pages(obj) && + obj->mm.madv == I915_MADV_WILLNEED && + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + if (tiling == I915_TILING_NONE) { + GEM_BUG_ON(!obj->mm.quirked); + __i915_gem_object_unpin_pages(obj); + obj->mm.quirked = false; + } + if (!i915_gem_object_is_tiled(obj)) { + GEM_BUG_ON(obj->mm.quirked); + __i915_gem_object_pin_pages(obj); + obj->mm.quirked = true; + } + } + mutex_unlock(&obj->mm.lock); + + for_each_ggtt_vma(vma, obj) { + vma->fence_size = + i915_gem_fence_size(i915, vma->size, tiling, stride); + vma->fence_alignment = + i915_gem_fence_alignment(i915, + vma->size, tiling, stride); + + if (vma->fence) + vma->fence->dirty = true; + } + + obj->tiling_and_stride = tiling | stride; + i915_gem_object_unlock(obj); + + /* Force the fence to be reacquired for GTT access */ + i915_gem_object_release_mmap(obj); + + /* Try to preallocate memory required to save swizzling on put-pages */ + if (i915_gem_object_needs_bit17_swizzle(obj)) { + if (!obj->bit_17) { + obj->bit_17 = bitmap_zalloc(obj->base.size >> PAGE_SHIFT, + GFP_KERNEL); + } + } else { + bitmap_free(obj->bit_17); + obj->bit_17 = NULL; + } + + return 0; +} + +/** + * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode + * @dev: DRM device + * @data: data pointer for the ioctl + * @file: DRM file for the ioctl call + * + * Sets the tiling mode of an object, returning the required swizzling of + * bit 6 of addresses in the object. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int +i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_set_tiling *args = data; + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + /* + * The tiling mode of proxy objects is handled by its generator, and + * not allowed to be changed by userspace. + */ + if (i915_gem_object_is_proxy(obj)) { + err = -ENXIO; + goto err; + } + + if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) { + err = -EINVAL; + goto err; + } + + if (args->tiling_mode == I915_TILING_NONE) { + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + args->stride = 0; + } else { + if (args->tiling_mode == I915_TILING_X) + args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x; + else + args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y; + + /* Hide bit 17 swizzling from the user. This prevents old Mesa + * from aborting the application on sw fallbacks to bit 17, + * and we use the pread/pwrite bit17 paths to swizzle for it. + * If there was a user that was relying on the swizzle + * information for drm_intel_bo_map()ed reads/writes this would + * break it, but we don't have any of those. + */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + + /* If we can't handle the swizzling, make it untiled. */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { + args->tiling_mode = I915_TILING_NONE; + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + args->stride = 0; + } + } + + err = mutex_lock_interruptible(&dev->struct_mutex); + if (err) + goto err; + + err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); + mutex_unlock(&dev->struct_mutex); + + /* We have to maintain this existing ABI... */ + args->stride = i915_gem_object_get_stride(obj); + args->tiling_mode = i915_gem_object_get_tiling(obj); + +err: + i915_gem_object_put(obj); + return err; +} + +/** + * i915_gem_get_tiling_ioctl - IOCTL handler to get tiling mode + * @dev: DRM device + * @data: data pointer for the ioctl + * @file: DRM file for the ioctl call + * + * Returns the current tiling mode and required bit 6 swizzling for the object. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int +i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_get_tiling *args = data; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_gem_object *obj; + int err = -ENOENT; + + rcu_read_lock(); + obj = i915_gem_object_lookup_rcu(file, args->handle); + if (obj) { + args->tiling_mode = + READ_ONCE(obj->tiling_and_stride) & TILING_MASK; + err = 0; + } + rcu_read_unlock(); + if (unlikely(err)) + return err; + + switch (args->tiling_mode) { + case I915_TILING_X: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + break; + default: + case I915_TILING_NONE: + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + break; + } + + /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ + if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) + args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN; + else + args->phys_swizzle_mode = args->swizzle_mode; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c new file mode 100644 index 000000000000..ccac73b72597 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -0,0 +1,832 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2012-2014 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include + +#include "i915_gem_ioctls.h" +#include "i915_gem_object.h" +#include "i915_trace.h" +#include "intel_drv.h" + +struct i915_mm_struct { + struct mm_struct *mm; + struct drm_i915_private *i915; + struct i915_mmu_notifier *mn; + struct hlist_node node; + struct kref kref; + struct work_struct work; +}; + +#if defined(CONFIG_MMU_NOTIFIER) +#include + +struct i915_mmu_notifier { + spinlock_t lock; + struct hlist_node node; + struct mmu_notifier mn; + struct rb_root_cached objects; + struct i915_mm_struct *mm; +}; + +struct i915_mmu_object { + struct i915_mmu_notifier *mn; + struct drm_i915_gem_object *obj; + struct interval_tree_node it; +}; + +static void add_object(struct i915_mmu_object *mo) +{ + GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); + interval_tree_insert(&mo->it, &mo->mn->objects); +} + +static void del_object(struct i915_mmu_object *mo) +{ + if (RB_EMPTY_NODE(&mo->it.rb)) + return; + + interval_tree_remove(&mo->it, &mo->mn->objects); + RB_CLEAR_NODE(&mo->it.rb); +} + +static void +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) +{ + struct i915_mmu_object *mo = obj->userptr.mmu_object; + + /* + * During mm_invalidate_range we need to cancel any userptr that + * overlaps the range being invalidated. Doing so requires the + * struct_mutex, and that risks recursion. In order to cause + * recursion, the user must alias the userptr address space with + * a GTT mmapping (possible with a MAP_FIXED) - then when we have + * to invalidate that mmaping, mm_invalidate_range is called with + * the userptr address *and* the struct_mutex held. To prevent that + * we set a flag under the i915_mmu_notifier spinlock to indicate + * whether this object is valid. + */ + if (!mo) + return; + + spin_lock(&mo->mn->lock); + if (value) + add_object(mo); + else + del_object(mo); + spin_unlock(&mo->mn->lock); +} + +static int +userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, + const struct mmu_notifier_range *range) +{ + struct i915_mmu_notifier *mn = + container_of(_mn, struct i915_mmu_notifier, mn); + struct interval_tree_node *it; + struct mutex *unlock = NULL; + unsigned long end; + int ret = 0; + + if (RB_EMPTY_ROOT(&mn->objects.rb_root)) + return 0; + + /* interval ranges are inclusive, but invalidate range is exclusive */ + end = range->end - 1; + + spin_lock(&mn->lock); + it = interval_tree_iter_first(&mn->objects, range->start, end); + while (it) { + struct drm_i915_gem_object *obj; + + if (!mmu_notifier_range_blockable(range)) { + ret = -EAGAIN; + break; + } + + /* + * The mmu_object is released late when destroying the + * GEM object so it is entirely possible to gain a + * reference on an object in the process of being freed + * since our serialisation is via the spinlock and not + * the struct_mutex - and consequently use it after it + * is freed and then double free it. To prevent that + * use-after-free we only acquire a reference on the + * object if it is not in the process of being destroyed. + */ + obj = container_of(it, struct i915_mmu_object, it)->obj; + if (!kref_get_unless_zero(&obj->base.refcount)) { + it = interval_tree_iter_next(it, range->start, end); + continue; + } + spin_unlock(&mn->lock); + + if (!unlock) { + unlock = &mn->mm->i915->drm.struct_mutex; + + switch (mutex_trylock_recursive(unlock)) { + default: + case MUTEX_TRYLOCK_FAILED: + if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { + i915_gem_object_put(obj); + return -EINTR; + } + /* fall through */ + case MUTEX_TRYLOCK_SUCCESS: + break; + + case MUTEX_TRYLOCK_RECURSIVE: + unlock = ERR_PTR(-EEXIST); + break; + } + } + + ret = i915_gem_object_unbind(obj); + if (ret == 0) + ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + i915_gem_object_put(obj); + if (ret) + goto unlock; + + spin_lock(&mn->lock); + + /* + * As we do not (yet) protect the mmu from concurrent insertion + * over this range, there is no guarantee that this search will + * terminate given a pathologic workload. + */ + it = interval_tree_iter_first(&mn->objects, range->start, end); + } + spin_unlock(&mn->lock); + +unlock: + if (!IS_ERR_OR_NULL(unlock)) + mutex_unlock(unlock); + + return ret; + +} + +static const struct mmu_notifier_ops i915_gem_userptr_notifier = { + .invalidate_range_start = userptr_mn_invalidate_range_start, +}; + +static struct i915_mmu_notifier * +i915_mmu_notifier_create(struct i915_mm_struct *mm) +{ + struct i915_mmu_notifier *mn; + + mn = kmalloc(sizeof(*mn), GFP_KERNEL); + if (mn == NULL) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&mn->lock); + mn->mn.ops = &i915_gem_userptr_notifier; + mn->objects = RB_ROOT_CACHED; + mn->mm = mm; + + return mn; +} + +static void +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) +{ + struct i915_mmu_object *mo; + + mo = fetch_and_zero(&obj->userptr.mmu_object); + if (!mo) + return; + + spin_lock(&mo->mn->lock); + del_object(mo); + spin_unlock(&mo->mn->lock); + kfree(mo); +} + +static struct i915_mmu_notifier * +i915_mmu_notifier_find(struct i915_mm_struct *mm) +{ + struct i915_mmu_notifier *mn; + int err = 0; + + mn = mm->mn; + if (mn) + return mn; + + mn = i915_mmu_notifier_create(mm); + if (IS_ERR(mn)) + err = PTR_ERR(mn); + + down_write(&mm->mm->mmap_sem); + mutex_lock(&mm->i915->mm_lock); + if (mm->mn == NULL && !err) { + /* Protected by mmap_sem (write-lock) */ + err = __mmu_notifier_register(&mn->mn, mm->mm); + if (!err) { + /* Protected by mm_lock */ + mm->mn = fetch_and_zero(&mn); + } + } else if (mm->mn) { + /* + * Someone else raced and successfully installed the mmu + * notifier, we can cancel our own errors. + */ + err = 0; + } + mutex_unlock(&mm->i915->mm_lock); + up_write(&mm->mm->mmap_sem); + + if (mn && !IS_ERR(mn)) + kfree(mn); + + return err ? ERR_PTR(err) : mm->mn; +} + +static int +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, + unsigned flags) +{ + struct i915_mmu_notifier *mn; + struct i915_mmu_object *mo; + + if (flags & I915_USERPTR_UNSYNCHRONIZED) + return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; + + if (WARN_ON(obj->userptr.mm == NULL)) + return -EINVAL; + + mn = i915_mmu_notifier_find(obj->userptr.mm); + if (IS_ERR(mn)) + return PTR_ERR(mn); + + mo = kzalloc(sizeof(*mo), GFP_KERNEL); + if (!mo) + return -ENOMEM; + + mo->mn = mn; + mo->obj = obj; + mo->it.start = obj->userptr.ptr; + mo->it.last = obj->userptr.ptr + obj->base.size - 1; + RB_CLEAR_NODE(&mo->it.rb); + + obj->userptr.mmu_object = mo; + return 0; +} + +static void +i915_mmu_notifier_free(struct i915_mmu_notifier *mn, + struct mm_struct *mm) +{ + if (mn == NULL) + return; + + mmu_notifier_unregister(&mn->mn, mm); + kfree(mn); +} + +#else + +static void +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) +{ +} + +static void +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) +{ +} + +static int +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, + unsigned flags) +{ + if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0) + return -ENODEV; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} + +static void +i915_mmu_notifier_free(struct i915_mmu_notifier *mn, + struct mm_struct *mm) +{ +} + +#endif + +static struct i915_mm_struct * +__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real) +{ + struct i915_mm_struct *mm; + + /* Protected by dev_priv->mm_lock */ + hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real) + if (mm->mm == real) + return mm; + + return NULL; +} + +static int +i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_mm_struct *mm; + int ret = 0; + + /* During release of the GEM object we hold the struct_mutex. This + * precludes us from calling mmput() at that time as that may be + * the last reference and so call exit_mmap(). exit_mmap() will + * attempt to reap the vma, and if we were holding a GTT mmap + * would then call drm_gem_vm_close() and attempt to reacquire + * the struct mutex. So in order to avoid that recursion, we have + * to defer releasing the mm reference until after we drop the + * struct_mutex, i.e. we need to schedule a worker to do the clean + * up. + */ + mutex_lock(&dev_priv->mm_lock); + mm = __i915_mm_struct_find(dev_priv, current->mm); + if (mm == NULL) { + mm = kmalloc(sizeof(*mm), GFP_KERNEL); + if (mm == NULL) { + ret = -ENOMEM; + goto out; + } + + kref_init(&mm->kref); + mm->i915 = to_i915(obj->base.dev); + + mm->mm = current->mm; + mmgrab(current->mm); + + mm->mn = NULL; + + /* Protected by dev_priv->mm_lock */ + hash_add(dev_priv->mm_structs, + &mm->node, (unsigned long)mm->mm); + } else + kref_get(&mm->kref); + + obj->userptr.mm = mm; +out: + mutex_unlock(&dev_priv->mm_lock); + return ret; +} + +static void +__i915_mm_struct_free__worker(struct work_struct *work) +{ + struct i915_mm_struct *mm = container_of(work, typeof(*mm), work); + i915_mmu_notifier_free(mm->mn, mm->mm); + mmdrop(mm->mm); + kfree(mm); +} + +static void +__i915_mm_struct_free(struct kref *kref) +{ + struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref); + + /* Protected by dev_priv->mm_lock */ + hash_del(&mm->node); + mutex_unlock(&mm->i915->mm_lock); + + INIT_WORK(&mm->work, __i915_mm_struct_free__worker); + queue_work(mm->i915->mm.userptr_wq, &mm->work); +} + +static void +i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj) +{ + if (obj->userptr.mm == NULL) + return; + + kref_put_mutex(&obj->userptr.mm->kref, + __i915_mm_struct_free, + &to_i915(obj->base.dev)->mm_lock); + obj->userptr.mm = NULL; +} + +struct get_pages_work { + struct work_struct work; + struct drm_i915_gem_object *obj; + struct task_struct *task; +}; + +static struct sg_table * +__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, + struct page **pvec, int num_pages) +{ + unsigned int max_segment = i915_sg_segment_size(); + struct sg_table *st; + unsigned int sg_page_sizes; + int ret; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + +alloc_table: + ret = __sg_alloc_table_from_pages(st, pvec, num_pages, + 0, num_pages << PAGE_SHIFT, + max_segment, + GFP_KERNEL); + if (ret) { + kfree(st); + return ERR_PTR(ret); + } + + ret = i915_gem_gtt_prepare_pages(obj, st); + if (ret) { + sg_free_table(st); + + if (max_segment > PAGE_SIZE) { + max_segment = PAGE_SIZE; + goto alloc_table; + } + + kfree(st); + return ERR_PTR(ret); + } + + sg_page_sizes = i915_sg_page_sizes(st->sgl); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return st; +} + +static void +__i915_gem_userptr_get_pages_worker(struct work_struct *_work) +{ + struct get_pages_work *work = container_of(_work, typeof(*work), work); + struct drm_i915_gem_object *obj = work->obj; + const int npages = obj->base.size >> PAGE_SHIFT; + struct page **pvec; + int pinned, ret; + + ret = -ENOMEM; + pinned = 0; + + pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + if (pvec != NULL) { + struct mm_struct *mm = obj->userptr.mm->mm; + unsigned int flags = 0; + + if (!i915_gem_object_is_readonly(obj)) + flags |= FOLL_WRITE; + + ret = -EFAULT; + if (mmget_not_zero(mm)) { + down_read(&mm->mmap_sem); + while (pinned < npages) { + ret = get_user_pages_remote + (work->task, mm, + obj->userptr.ptr + pinned * PAGE_SIZE, + npages - pinned, + flags, + pvec + pinned, NULL, NULL); + if (ret < 0) + break; + + pinned += ret; + } + up_read(&mm->mmap_sem); + mmput(mm); + } + } + + mutex_lock(&obj->mm.lock); + if (obj->userptr.work == &work->work) { + struct sg_table *pages = ERR_PTR(ret); + + if (pinned == npages) { + pages = __i915_gem_userptr_alloc_pages(obj, pvec, + npages); + if (!IS_ERR(pages)) { + pinned = 0; + pages = NULL; + } + } + + obj->userptr.work = ERR_CAST(pages); + if (IS_ERR(pages)) + __i915_gem_userptr_set_active(obj, false); + } + mutex_unlock(&obj->mm.lock); + + release_pages(pvec, pinned); + kvfree(pvec); + + i915_gem_object_put(obj); + put_task_struct(work->task); + kfree(work); +} + +static struct sg_table * +__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) +{ + struct get_pages_work *work; + + /* Spawn a worker so that we can acquire the + * user pages without holding our mutex. Access + * to the user pages requires mmap_sem, and we have + * a strict lock ordering of mmap_sem, struct_mutex - + * we already hold struct_mutex here and so cannot + * call gup without encountering a lock inversion. + * + * Userspace will keep on repeating the operation + * (thanks to EAGAIN) until either we hit the fast + * path or the worker completes. If the worker is + * cancelled or superseded, the task is still run + * but the results ignored. (This leads to + * complications that we may have a stray object + * refcount that we need to be wary of when + * checking for existing objects during creation.) + * If the worker encounters an error, it reports + * that error back to this function through + * obj->userptr.work = ERR_PTR. + */ + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (work == NULL) + return ERR_PTR(-ENOMEM); + + obj->userptr.work = &work->work; + + work->obj = i915_gem_object_get(obj); + + work->task = current; + get_task_struct(work->task); + + INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); + queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work); + + return ERR_PTR(-EAGAIN); +} + +static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) +{ + const int num_pages = obj->base.size >> PAGE_SHIFT; + struct mm_struct *mm = obj->userptr.mm->mm; + struct page **pvec; + struct sg_table *pages; + bool active; + int pinned; + + /* If userspace should engineer that these pages are replaced in + * the vma between us binding this page into the GTT and completion + * of rendering... Their loss. If they change the mapping of their + * pages they need to create a new bo to point to the new vma. + * + * However, that still leaves open the possibility of the vma + * being copied upon fork. Which falls under the same userspace + * synchronisation issue as a regular bo, except that this time + * the process may not be expecting that a particular piece of + * memory is tied to the GPU. + * + * Fortunately, we can hook into the mmu_notifier in order to + * discard the page references prior to anything nasty happening + * to the vma (discard or cloning) which should prevent the more + * egregious cases from causing harm. + */ + + if (obj->userptr.work) { + /* active flag should still be held for the pending work */ + if (IS_ERR(obj->userptr.work)) + return PTR_ERR(obj->userptr.work); + else + return -EAGAIN; + } + + pvec = NULL; + pinned = 0; + + if (mm == current->mm) { + pvec = kvmalloc_array(num_pages, sizeof(struct page *), + GFP_KERNEL | + __GFP_NORETRY | + __GFP_NOWARN); + if (pvec) /* defer to worker if malloc fails */ + pinned = __get_user_pages_fast(obj->userptr.ptr, + num_pages, + !i915_gem_object_is_readonly(obj), + pvec); + } + + active = false; + if (pinned < 0) { + pages = ERR_PTR(pinned); + pinned = 0; + } else if (pinned < num_pages) { + pages = __i915_gem_userptr_get_pages_schedule(obj); + active = pages == ERR_PTR(-EAGAIN); + } else { + pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages); + active = !IS_ERR(pages); + } + if (active) + __i915_gem_userptr_set_active(obj, true); + + if (IS_ERR(pages)) + release_pages(pvec, pinned); + kvfree(pvec); + + return PTR_ERR_OR_ZERO(pages); +} + +static void +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + struct sgt_iter sgt_iter; + struct page *page; + + /* Cancel any inflight work and force them to restart their gup */ + obj->userptr.work = NULL; + __i915_gem_userptr_set_active(obj, false); + if (!pages) + return; + + __i915_gem_object_release_shmem(obj, pages, true); + i915_gem_gtt_finish_pages(obj, pages); + + for_each_sgt_page(page, sgt_iter, pages) { + if (obj->mm.dirty) + set_page_dirty(page); + + mark_page_accessed(page); + put_page(page); + } + obj->mm.dirty = false; + + sg_free_table(pages); + kfree(pages); +} + +static void +i915_gem_userptr_release(struct drm_i915_gem_object *obj) +{ + i915_gem_userptr_release__mmu_notifier(obj); + i915_gem_userptr_release__mm_struct(obj); +} + +static int +i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) +{ + if (obj->userptr.mmu_object) + return 0; + + return i915_gem_userptr_init__mmu_notifier(obj, 0); +} + +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_ASYNC_CANCEL, + .get_pages = i915_gem_userptr_get_pages, + .put_pages = i915_gem_userptr_put_pages, + .dmabuf_export = i915_gem_userptr_dmabuf_export, + .release = i915_gem_userptr_release, +}; + +/* + * Creates a new mm object that wraps some normal memory from the process + * context - user memory. + * + * We impose several restrictions upon the memory being mapped + * into the GPU. + * 1. It must be page aligned (both start/end addresses, i.e ptr and size). + * 2. It must be normal system memory, not a pointer into another map of IO + * space (e.g. it must not be a GTT mmapping of another object). + * 3. We only allow a bo as large as we could in theory map into the GTT, + * that is we limit the size to the total size of the GTT. + * 4. The bo is marked as being snoopable. The backing pages are left + * accessible directly by the CPU, but reads and writes by the GPU may + * incur the cost of a snoop (unless you have an LLC architecture). + * + * Synchronisation between multiple users and the GPU is left to userspace + * through the normal set-domain-ioctl. The kernel will enforce that the + * GPU relinquishes the VMA before it is returned back to the system + * i.e. upon free(), munmap() or process termination. However, the userspace + * malloc() library may not immediately relinquish the VMA after free() and + * instead reuse it whilst the GPU is still reading and writing to the VMA. + * Caveat emptor. + * + * Also note, that the object created here is not currently a "first class" + * object, in that several ioctls are banned. These are the CPU access + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use + * direct access via your pointer rather than use those ioctls. Another + * restriction is that we do not allow userptr surfaces to be pinned to the + * hardware and so we reject any attempt to create a framebuffer out of a + * userptr. + * + * If you think this is a good interface to use to pass GPU memory between + * drivers, please use dma-buf instead. In fact, wherever possible use + * dma-buf instead. + */ +int +i915_gem_userptr_ioctl(struct drm_device *dev, + void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_gem_userptr *args = data; + struct drm_i915_gem_object *obj; + int ret; + u32 handle; + + if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) { + /* We cannot support coherent userptr objects on hw without + * LLC and broken snooping. + */ + return -ENODEV; + } + + if (args->flags & ~(I915_USERPTR_READ_ONLY | + I915_USERPTR_UNSYNCHRONIZED)) + return -EINVAL; + + if (!args->user_size) + return -EINVAL; + + if (offset_in_page(args->user_ptr | args->user_size)) + return -EINVAL; + + if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size)) + return -EFAULT; + + if (args->flags & I915_USERPTR_READ_ONLY) { + struct i915_hw_ppgtt *ppgtt; + + /* + * On almost all of the older hw, we cannot tell the GPU that + * a page is readonly. + */ + ppgtt = dev_priv->kernel_context->ppgtt; + if (!ppgtt || !ppgtt->vm.has_read_only) + return -ENODEV; + } + + obj = i915_gem_object_alloc(); + if (obj == NULL) + return -ENOMEM; + + drm_gem_private_object_init(dev, &obj->base, args->user_size); + i915_gem_object_init(obj, &i915_gem_userptr_ops); + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + obj->userptr.ptr = args->user_ptr; + if (args->flags & I915_USERPTR_READ_ONLY) + i915_gem_object_set_readonly(obj); + + /* And keep a pointer to the current->mm for resolving the user pages + * at binding. This means that we need to hook into the mmu_notifier + * in order to detect if the mmu is destroyed. + */ + ret = i915_gem_userptr_init__mm_struct(obj); + if (ret == 0) + ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags); + if (ret == 0) + ret = drm_gem_handle_create(file, &obj->base, &handle); + + /* drop reference from allocate - handle holds it now */ + i915_gem_object_put(obj); + if (ret) + return ret; + + args->handle = handle; + return 0; +} + +int i915_gem_init_userptr(struct drm_i915_private *dev_priv) +{ + mutex_init(&dev_priv->mm_lock); + hash_init(dev_priv->mm_structs); + + dev_priv->mm.userptr_wq = + alloc_workqueue("i915-userptr-acquire", + WQ_HIGHPRI | WQ_UNBOUND, + 0); + if (!dev_priv->mm.userptr_wq) + return -ENOMEM; + + return 0; +} + +void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv) +{ + destroy_workqueue(dev_priv->mm.userptr_wq); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c new file mode 100644 index 000000000000..099f3397aada --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#include +#include +#include + +#include "i915_drv.h" +#include "i915_gemfs.h" + +int i915_gemfs_init(struct drm_i915_private *i915) +{ + struct file_system_type *type; + struct vfsmount *gemfs; + + type = get_fs_type("tmpfs"); + if (!type) + return -ENODEV; + + gemfs = kern_mount(type); + if (IS_ERR(gemfs)) + return PTR_ERR(gemfs); + + /* + * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most + * likely 2M. Note that within_size may overallocate huge-pages, if say + * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under + * memory pressure shmem should split any huge-pages which can be + * shrunk. + */ + + if (has_transparent_hugepage()) { + struct super_block *sb = gemfs->mnt_sb; + /* FIXME: Disabled until we get W/A for read BW issue. */ + char options[] = "huge=never"; + int flags = 0; + int err; + + err = sb->s_op->remount_fs(sb, &flags, options); + if (err) { + kern_unmount(gemfs); + return err; + } + } + + i915->mm.gemfs = gemfs; + + return 0; +} + +void i915_gemfs_fini(struct drm_i915_private *i915) +{ + kern_unmount(i915->mm.gemfs); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h new file mode 100644 index 000000000000..2a1e59af3e4a --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#ifndef __I915_GEMFS_H__ +#define __I915_GEMFS_H__ + +struct drm_i915_private; + +int i915_gemfs_init(struct drm_i915_private *i915); + +void i915_gemfs_fini(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c new file mode 100644 index 000000000000..824f3761314c --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -0,0 +1,121 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "huge_gem_object.h" + +static void huge_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + unsigned long nreal = obj->scratch / PAGE_SIZE; + struct scatterlist *sg; + + for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) + __free_page(sg_page(sg)); + + sg_free_table(pages); + kfree(pages); +} + +static int huge_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + const unsigned long nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct scatterlist *sg, *src, *end; + struct sg_table *pages; + unsigned long n; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return -ENOMEM; + + if (sg_alloc_table(pages, npages, GFP)) { + kfree(pages); + return -ENOMEM; + } + + sg = pages->sgl; + for (n = 0; n < nreal; n++) { + struct page *page; + + page = alloc_page(GFP | __GFP_HIGHMEM); + if (!page) { + sg_mark_end(sg); + goto err; + } + + sg_set_page(sg, page, PAGE_SIZE, 0); + sg = __sg_next(sg); + } + if (nreal < npages) { + for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) { + sg_set_page(sg, sg_page(src), PAGE_SIZE, 0); + src = __sg_next(src); + if (src == end) + src = pages->sgl; + } + } + + if (i915_gem_gtt_prepare_pages(obj, pages)) + goto err; + + __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); + + return 0; + +err: + huge_free_pages(obj, pages); + + return -ENOMEM; +#undef GFP +} + +static void huge_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_free_pages(obj, pages); + + obj->mm.dirty = false; +} + +static const struct drm_i915_gem_object_ops huge_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = huge_get_pages, + .put_pages = huge_put_pages, +}; + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size) +{ + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + GEM_BUG_ON(!phys_size || phys_size > dma_size); + GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(dma_size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); + i915_gem_object_init(obj, &huge_ops); + + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->scratch = phys_size; + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h new file mode 100644 index 000000000000..549c1394bcdc --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __HUGE_GEM_OBJECT_H +#define __HUGE_GEM_OBJECT_H + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size); + +static inline phys_addr_t +huge_gem_object_phys_size(struct drm_i915_gem_object *obj) +{ + return obj->scratch; +} + +static inline dma_addr_t +huge_gem_object_dma_size(struct drm_i915_gem_object *obj) +{ + return obj->base.size; +} + +#endif /* !__HUGE_GEM_OBJECT_H */ diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c new file mode 100644 index 000000000000..7b437f06a9be --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -0,0 +1,1780 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#include + +#include "i915_selftest.h" + +#include "gem/i915_gem_pm.h" + +#include "igt_gem_utils.h" +#include "mock_context.h" + +#include "selftests/mock_drm.h" +#include "selftests/mock_gem_device.h" +#include "selftests/i915_random.h" + +static const unsigned int page_sizes[] = { + I915_GTT_PAGE_SIZE_2M, + I915_GTT_PAGE_SIZE_64K, + I915_GTT_PAGE_SIZE_4K, +}; + +static unsigned int get_largest_page_size(struct drm_i915_private *i915, + u64 rem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) + return page_size; + } + + return 0; +} + +static void huge_pages_free_pages(struct sg_table *st) +{ + struct scatterlist *sg; + + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } + + sg_free_table(st); + kfree(st); +} + +static int get_huge_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + unsigned int page_mask = obj->mm.page_mask; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + + /* + * Our goal here is simple, we want to greedily fill the object from + * largest to smallest page-size, while ensuring that we use *every* + * page-size as per the given page-mask. + */ + do { + unsigned int bit = ilog2(page_mask); + unsigned int page_size = BIT(bit); + int order = get_order(page_size); + + do { + struct page *page; + + GEM_BUG_ON(order >= MAX_ORDER); + page = alloc_pages(GFP | __GFP_ZERO, order); + if (!page) + goto err; + + sg_set_page(sg, page, page_size, 0); + sg_page_sizes |= page_size; + st->nents++; + + rem -= page_size; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = __sg_next(sg); + } while ((rem - ((page_size-1) & page_mask)) >= page_size); + + page_mask &= (page_size-1); + } while (page_mask); + + if (i915_gem_gtt_prepare_pages(obj, st)) + goto err; + + obj->mm.madv = I915_MADV_DONTNEED; + + GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err: + sg_set_page(sg, NULL, 0, 0); + sg_mark_end(sg); + huge_pages_free_pages(st); + + return -ENOMEM; +} + +static void put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_pages_free_pages(pages); + + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops huge_page_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = get_huge_pages, + .put_pages = put_huge_pages, +}; + +static struct drm_i915_gem_object * +huge_pages_object(struct drm_i915_private *i915, + u64 size, + unsigned int page_mask) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &huge_page_ops); + + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + obj->mm.page_mask = page_mask; + + return obj; +} + +static int fake_get_huge_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const u64 max_len = rounddown_pow_of_two(UINT_MAX); + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + /* Use optimal page sized chunks to fill in the sg table */ + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + do { + unsigned int page_size = get_largest_page_size(i915, rem); + unsigned int len = min(page_size * div_u64(rem, page_size), + max_len); + + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = len; + sg_dma_len(sg) = len; + sg_dma_address(sg) = page_size; + + sg_page_sizes |= len; + + st->nents++; + + rem -= len; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = sg_next(sg); + } while (1); + + i915_sg_trim(st); + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; +} + +static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + unsigned int page_size; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, 1, GFP)) { + kfree(st); + return -ENOMEM; + } + + sg = st->sgl; + st->nents = 1; + + page_size = get_largest_page_size(i915, obj->base.size); + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = obj->base.size; + sg_dma_len(sg) = obj->base.size; + sg_dma_address(sg) = page_size; + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; +#undef GFP +} + +static void fake_free_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static void fake_put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_huge_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages, + .put_pages = fake_put_huge_pages, +}; + +static const struct drm_i915_gem_object_ops fake_ops_single = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages_single, + .put_pages = fake_put_huge_pages, +}; + +static struct drm_i915_gem_object * +fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (size >> PAGE_SHIFT > UINT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + + if (single) + i915_gem_object_init(obj, &fake_ops_single); + else + i915_gem_object_init(obj, &fake_ops); + + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + return obj; +} + +static int igt_check_page_sizes(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = vma->vm->i915; + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj = vma->obj; + int err = 0; + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { + pr_err("unsupported page_sizes.sg=%u, supported=%u\n", + vma->page_sizes.sg & ~supported, supported); + err = -EINVAL; + } + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { + pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", + vma->page_sizes.gtt & ~supported, supported); + err = -EINVAL; + } + + if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { + pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", + vma->page_sizes.phys, obj->mm.page_sizes.phys); + err = -EINVAL; + } + + if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { + pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", + vma->page_sizes.sg, obj->mm.page_sizes.sg); + err = -EINVAL; + } + + if (obj->mm.page_sizes.gtt) { + pr_err("obj->page_sizes.gtt(%u) should never be set\n", + obj->mm.page_sizes.gtt); + err = -EINVAL; + } + + return err; +} + +static int igt_mock_exhaust_device_supported_pages(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int i, j, single; + int err; + + /* + * Sanity check creating objects with every valid page support + * combination for our mock device. + */ + + for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { + unsigned int combination = 0; + + for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { + if (i & BIT(j)) + combination |= page_sizes[j]; + } + + mkwrite_device_info(i915)->page_sizes = combination; + + for (single = 0; single <= 1; ++single) { + obj = fake_huge_pages_object(i915, combination, !!single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + if (obj->base.size != combination) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, combination); + err = -EINVAL; + goto out_put; + } + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.sg != combination) { + pr_err("page_sizes.sg=%u, expected=%u\n", + vma->page_sizes.sg, combination); + err = -EINVAL; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + + if (err) + goto out_device; + } + } + + goto out_device; + +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = saved_mask; + + return err; +} + +static int igt_mock_ppgtt_misaligned_dma(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + int bit; + int err; + + /* + * Sanity check dma misalignment for huge pages -- the dma addresses we + * insert into the paging structures need to always respect the page + * size alignment. + */ + + bit = ilog2(I915_GTT_PAGE_SIZE_64K); + + for_each_set_bit_from(bit, &supported, + ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + IGT_TIMEOUT(end_time); + unsigned int page_size = BIT(bit); + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int offset; + unsigned int size = + round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; + struct i915_vma *vma; + + obj = fake_huge_pages_object(i915, size, true); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != size) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, size); + err = -EINVAL; + goto out_put; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + /* Force the page size for this object */ + obj->mm.page_sizes.sg = page_size; + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != page_size) { + pr_err("page_sizes.gtt=%u, expected %u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + /* + * Try all the other valid offsets until the next + * boundary -- should always fall back to using 4K + * pages. + */ + for (offset = 4096; offset < page_size; offset += 4096) { + err = i915_vma_unbind(vma); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags | offset); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { + pr_err("page_sizes.gtt=%u, expected %llu\n", + vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + if (igt_timeout(end_time, + "%s timed out at offset %x with page-size %x\n", + __func__, offset, page_size)) + break; + } + + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static void close_object_list(struct list_head *objects, + struct i915_hw_ppgtt *ppgtt) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (!IS_ERR(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } +} + +static int igt_mock_ppgtt_huge_fill(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT; + unsigned long page_num; + bool single = false; + LIST_HEAD(objects); + IGT_TIMEOUT(end_time); + int err = -ENODEV; + + for_each_prime_number_from(page_num, 1, max_pages) { + struct drm_i915_gem_object *obj; + u64 size = page_num << PAGE_SHIFT; + struct i915_vma *vma; + unsigned int expected_gtt = 0; + int i; + + obj = fake_huge_pages_object(i915, size, single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + if (obj->base.size != size) { + pr_err("obj->base.size=%zd, expected=%llu\n", + obj->base.size, size); + i915_gem_object_put(obj); + err = -EINVAL; + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + break; + + err = igt_check_page_sizes(vma); + if (err) { + i915_vma_unpin(vma); + break; + } + + /* + * Figure out the expected gtt page size knowing that we go from + * largest to smallest page size sg chunks, and that we align to + * the largest page size. + */ + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && + size >= page_size) { + expected_gtt |= page_size; + size &= page_size-1; + } + } + + GEM_BUG_ON(!expected_gtt); + GEM_BUG_ON(size); + + if (expected_gtt & I915_GTT_PAGE_SIZE_4K) + expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; + + i915_vma_unpin(vma); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + break; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + break; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", + vma->page_sizes.gtt, expected_gtt, + obj->base.size, yesno(!!single)); + err = -EINVAL; + break; + } + + if (igt_timeout(end_time, + "%s timed out at size %zd\n", + __func__, obj->base.size)) + break; + + single = !single; + } + + close_object_list(&objects, ppgtt); + + if (err == -ENOMEM || err == -ENOSPC) + err = 0; + + return err; +} + +static int igt_mock_ppgtt_64K(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + struct drm_i915_gem_object *obj; + const struct object_info { + unsigned int size; + unsigned int gtt; + unsigned int offset; + } objects[] = { + /* Cases with forced padding/alignment */ + { + .size = SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_64K + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_64K - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + /* Try without any forced padding/alignment */ + { + .size = SZ_64K, + .offset = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + { + .size = SZ_128K, + .offset = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + }; + struct i915_vma *vma; + int i, single; + int err; + + /* + * Sanity check some of the trickiness with 64K pages -- either we can + * safely mark the whole page-table(2M block) as 64K, or we have to + * always fallback to 4K. + */ + + if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) + return 0; + + for (i = 0; i < ARRAY_SIZE(objects); ++i) { + unsigned int size = objects[i].size; + unsigned int expected_gtt = objects[i].gtt; + unsigned int offset = objects[i].offset; + unsigned int flags = PIN_USER; + + for (single = 0; single <= 1; single++) { + obj = fake_huge_pages_object(i915, size, !!single); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_object_put; + + /* + * Disable 2M pages -- We only want to use 64K/4K pages + * for this test. + */ + obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object_unpin; + } + + if (offset) + flags |= PIN_OFFSET_FIXED | offset; + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_vma_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + goto out_vma_unpin; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + goto out_vma_unpin; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", + vma->page_sizes.gtt, expected_gtt, i, + yesno(!!single)); + err = -EINVAL; + goto out_vma_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + } + + return 0; + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); +out_object_unpin: + i915_gem_object_unpin_pages(obj); +out_object_put: + i915_gem_object_put(obj); + + return err; +} + +static struct i915_vma * +gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) +{ + struct drm_i915_private *i915 = vma->vm->i915; + const int gen = INTEL_GEN(i915); + unsigned int count = vma->size >> PAGE_SHIFT; + struct drm_i915_gem_object *obj; + struct i915_vma *batch; + unsigned int size; + u32 *cmd; + int n; + int err; + + size = (1 + 4 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = val; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? MI_USE_GGTT : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = val; + } else { + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cmd++ = offset; + *cmd++ = val; + } + + offset += PAGE_SIZE; + } + + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + + i915_gem_object_unpin_map(obj); + + batch = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (err) + goto err; + + return batch; + +err: + i915_gem_object_put(obj); + + return ERR_PTR(err); +} + +static int gpu_write(struct i915_vma *vma, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 dword, + u32 value) +{ + struct i915_request *rq; + struct i915_vma *batch; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + if (err) + return err; + + batch = gpu_write_dw(vma, dword * sizeof(u32), value); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto err_request; + + i915_gem_object_set_active_reference(batch->obj); + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto err_request; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); +err_request: + if (err) + i915_request_skip(rq, err); + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); + i915_vma_close(batch); + + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned int needs_flush; + unsigned long n; + int err; + + err = i915_gem_object_prepare_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { + u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); + + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(ptr, PAGE_SIZE); + + if (ptr[dword] != val) { + pr_err("n=%lu ptr[%u]=%u, val=%u\n", + n, dword, ptr[dword], val); + kunmap_atomic(ptr); + err = -EINVAL; + break; + } + + kunmap_atomic(ptr); + } + + i915_gem_object_finish_access(obj); + + return err; +} + +static int __igt_write_huge(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + u64 size, u64 offset, + u32 dword, u32 val) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_unbind(vma); + if (err) + goto out_vma_close; + + err = i915_vma_pin(vma, size, 0, flags | offset); + if (err) { + /* + * The ggtt may have some pages reserved so + * refrain from erroring out. + */ + if (err == -ENOSPC && i915_is_ggtt(vm)) + err = 0; + + goto out_vma_close; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + err = gpu_write(vma, ctx, engine, dword, val); + if (err) { + pr_err("gpu-write failed at offset=%llx\n", offset); + goto out_vma_unpin; + } + + err = cpu_check(obj, dword, val); + if (err) { + pr_err("cpu-check failed at offset=%llx\n", offset); + goto out_vma_unpin; + } + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_destroy(vma); + + return err; +} + +static int igt_write_huge(struct i915_gem_context *ctx, + struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + static struct intel_engine_cs *engines[I915_NUM_ENGINES]; + struct intel_engine_cs *engine; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + unsigned int max_page_size; + unsigned int id; + u64 max; + u64 num; + u64 size; + int *order; + int i, n; + int err = 0; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + size = obj->base.size; + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); + + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64((vm->total - size), max_page_size); + + n = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) { + pr_info("store-dword-imm not supported on engine=%u\n", + id); + continue; + } + engines[n++] = engine; + } + + if (!n) + return 0; + + /* + * To keep things interesting when alternating between engines in our + * randomized order, lets also make feeding to the same engine a few + * times in succession a possibility by enlarging the permutation array. + */ + order = i915_random_order(n * I915_NUM_ENGINES, &prng); + if (!order) + return -ENOMEM; + + /* + * Try various offsets in an ascending/descending fashion until we + * timeout -- we want to avoid issues hidden by effectively always using + * offset = 0. + */ + i = 0; + for_each_prime_number_from(num, 0, max) { + u64 offset_low = num * max_page_size; + u64 offset_high = (max - num) * max_page_size; + u32 dword = offset_in_page(num) / 4; + + engine = engines[order[i] % n]; + i = (i + 1) % (n * I915_NUM_ENGINES); + + /* + * In order to utilize 64K pages we need to both pad the vma + * size and ensure the vma offset is at the start of the pt + * boundary, however to improve coverage we opt for testing both + * aligned and unaligned offsets. + */ + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + offset_low = round_down(offset_low, + I915_GTT_PAGE_SIZE_2M); + + err = __igt_write_huge(ctx, engine, obj, size, offset_low, + dword, num + 1); + if (err) + break; + + err = __igt_write_huge(ctx, engine, obj, size, offset_high, + dword, num + 1); + if (err) + break; + + if (igt_timeout(end_time, + "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, engine->id, offset_low, offset_high, + max_page_size)) + break; + } + + kfree(order); + + return err; +} + +static int igt_ppgtt_exhaust_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + static unsigned int pages[ARRAY_SIZE(page_sizes)]; + struct drm_i915_gem_object *obj; + unsigned int size_mask; + unsigned int page_mask; + int n, i; + int err = -ENODEV; + + if (supported == I915_GTT_PAGE_SIZE_4K) + return 0; + + /* + * Sanity check creating objects with a varying mix of page sizes -- + * ensuring that our writes lands in the right place. + */ + + n = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) + pages[n++] = BIT(i); + + for (size_mask = 2; size_mask < BIT(n); size_mask++) { + unsigned int size = 0; + + for (i = 0; i < n; i++) { + if (size_mask & BIT(i)) + size |= pages[i]; + } + + /* + * For our page mask we want to enumerate all the page-size + * combinations which will fit into our chosen object size. + */ + for (page_mask = 2; page_mask <= size_mask; page_mask++) { + unsigned int page_sizes = 0; + + for (i = 0; i < n; i++) { + if (page_mask & BIT(i)) + page_sizes |= pages[i]; + } + + /* + * Ensure that we can actually fill the given object + * with our chosen page mask. + */ + if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) + continue; + + obj = huge_pages_object(i915, size, page_sizes); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + + if (err == -ENOMEM) { + pr_info("unable to get pages, size=%u, pages=%u\n", + size, page_sizes); + err = 0; + break; + } + + pr_err("pin_pages failed, size=%u, pages=%u\n", + size_mask, page_mask); + + goto out_device; + } + + /* Force the page-size for the gtt insertion */ + obj->mm.page_sizes.sg = page_sizes; + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("exhaust write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + } + + goto out_device; + +out_unpin: + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = supported; + + return err; +} + +static int igt_ppgtt_internal_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_64K, + SZ_128K, + SZ_256K, + SZ_512K, + SZ_1M, + SZ_2M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through internal + * -- ensure that our writes land in the right place. + */ + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("internal unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("internal write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) +{ + return i915->mm.gemfs && has_transparent_hugepage(); +} + +static int igt_ppgtt_gemfs_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_2M, + SZ_4M, + SZ_8M, + SZ_16M, + SZ_32M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through gemfs -- + * ensure that our writes land in the right place. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_shmem(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("gemfs write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_ppgtt_pin_update(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *dev_priv = ctx->i915; + unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + int first, last; + int err; + + /* + * Make sure there's no funny business when doing a PIN_UPDATE -- in the + * past we had a subtle issue with being able to incorrectly do multiple + * alloc va ranges on the same object when doing a PIN_UPDATE, which + * resulted in some pretty nasty bugs, though only when using + * huge-gtt-pages. + */ + + if (!ppgtt || !i915_vm_is_4lvl(&ppgtt->vm)) { + pr_info("48b PPGTT not supported, skipping\n"); + return 0; + } + + first = ilog2(I915_GTT_PAGE_SIZE_64K); + last = ilog2(I915_GTT_PAGE_SIZE_2M); + + for_each_set_bit_from(first, &supported, last + 1) { + unsigned int page_size = BIT(first); + + obj = i915_gem_object_create_internal(dev_priv, page_size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, SZ_2M, 0, flags); + if (err) + goto out_close; + + if (vma->page_sizes.sg < page_size) { + pr_info("Unable to allocate page-size %x, finishing test early\n", + page_size); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + if (vma->page_sizes.gtt != page_size) { + dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); + + /* + * The only valid reason for this to ever fail would be + * if the dma-mapper screwed us over when we did the + * dma_map_sg(), since it has the final say over the dma + * address. + */ + if (IS_ALIGNED(addr, page_size)) { + pr_err("page_sizes.gtt=%u, expected=%u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } else { + pr_info("dma address misaligned, finishing test early\n"); + } + + goto out_unpin; + } + + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + } + + obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + /* + * Make sure we don't end up with something like where the pde is still + * pointing to the 2M page, and the pt we just filled-in is dangling -- + * we can check this by writing to the first page where it would then + * land in the now stale 2M page. + */ + + err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_tmpfs_fallback(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct vfsmount *gemfs = i915->mm.gemfs; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *vaddr; + int err = 0; + + /* + * Make sure that we don't burst into a ball of flames upon falling back + * to tmpfs, which we rely on if on the off-chance we encouter a failure + * when setting up gemfs. + */ + + i915->mm.gemfs = NULL; + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_restore; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + *vaddr = 0xdeadbeaf; + + __i915_gem_object_flush_map(obj, 0, 64); + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_restore: + i915->mm.gemfs = gemfs; + + return err; +} + +static int igt_shrink_thp(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER; + int err; + + /* + * Sanity check shrinking huge-paged object -- make sure nothing blows + * up. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + obj = i915_gem_object_create_shmem(i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("failed to allocate THP, finishing test early\n"); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + + /* + * Now that the pages are *unpinned* shrink-all should invoke + * shmem to truncate our pages. + */ + i915_gem_shrink_all(i915); + if (i915_gem_object_has_pages(obj)) { + pr_err("shrink-all didn't truncate the pages\n"); + err = -EINVAL; + goto out_close; + } + + if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { + pr_err("residual page-size bits left\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_huge_page_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_ppgtt_misaligned_dma), + SUBTEST(igt_mock_ppgtt_huge_fill), + SUBTEST(igt_mock_ppgtt_64K), + }; + struct drm_i915_private *dev_priv; + struct i915_hw_ppgtt *ppgtt; + int err; + + dev_priv = mock_gem_device(); + if (!dev_priv) + return -ENOMEM; + + /* Pretend to be a device which supports the 48b PPGTT */ + mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; + mkwrite_device_info(dev_priv)->ppgtt_size = 48; + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + + if (!i915_vm_is_4lvl(&ppgtt->vm)) { + pr_err("failed to create 48b PPGTT\n"); + err = -EINVAL; + goto out_close; + } + + /* If we were ever hit this then it's time to mock the 64K scratch */ + if (!i915_vm_has_scratch_64K(&ppgtt->vm)) { + pr_err("PPGTT missing 64K scratch page\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_subtests(tests, ppgtt); + +out_close: + i915_ppgtt_put(ppgtt); + +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + drm_dev_put(&dev_priv->drm); + + return err; +} + +int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_shrink_thp), + SUBTEST(igt_ppgtt_pin_update), + SUBTEST(igt_tmpfs_fallback), + SUBTEST(igt_ppgtt_exhaust_huge), + SUBTEST(igt_ppgtt_gemfs_huge), + SUBTEST(igt_ppgtt_internal_huge), + }; + struct drm_file *file; + struct i915_gem_context *ctx; + intel_wakeref_t wakeref; + int err; + + if (!HAS_PPGTT(dev_priv)) { + pr_info("PPGTT not supported, skipping live-selftests\n"); + return 0; + } + + if (i915_terminally_wedged(dev_priv)) + return 0; + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + wakeref = intel_runtime_pm_get(dev_priv); + + ctx = live_context(dev_priv, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + if (ctx->ppgtt) + ctx->ppgtt->vm.scrub_64K = true; + + err = i915_subtests(tests, ctx); + +out_unlock: + intel_runtime_pm_put(dev_priv, wakeref); + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + + return err; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c new file mode 100644 index 000000000000..5495875b48b3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -0,0 +1,379 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#include + +#include "i915_selftest.h" +#include "selftests/i915_random.h" + +static int cpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + unsigned int needs_clflush; + struct page *page; + void *map; + u32 *cpu; + int err; + + err = i915_gem_object_prepare_write(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + cpu = map + offset_in_page(offset); + + if (needs_clflush & CLFLUSH_BEFORE) + drm_clflush_virt_range(cpu, sizeof(*cpu)); + + *cpu = v; + + if (needs_clflush & CLFLUSH_AFTER) + drm_clflush_virt_range(cpu, sizeof(*cpu)); + + kunmap_atomic(map); + i915_gem_object_finish_access(obj); + + return 0; +} + +static int cpu_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + unsigned int needs_clflush; + struct page *page; + void *map; + u32 *cpu; + int err; + + err = i915_gem_object_prepare_read(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + cpu = map + offset_in_page(offset); + + if (needs_clflush & CLFLUSH_BEFORE) + drm_clflush_virt_range(cpu, sizeof(*cpu)); + + *v = *cpu; + + kunmap_atomic(map); + i915_gem_object_finish_access(obj); + + return 0; +} + +static int gtt_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct i915_vma *vma; + u32 __iomem *map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + iowrite32(v, &map[offset / sizeof(*map)]); + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int gtt_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + struct i915_vma *vma; + u32 __iomem *map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = ioread32(&map[offset / sizeof(*map)]); + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int wc_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + u32 *map; + int err; + + err = i915_gem_object_set_to_wc_domain(obj, true); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int wc_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + u32 *map; + int err; + + err = i915_gem_object_set_to_wc_domain(obj, false); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int gpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_request_create(i915->engine[RCS0]->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + i915_vma_unpin(vma); + return PTR_ERR(cs); + } + + if (INTEL_GEN(i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = v; + } else if (INTEL_GEN(i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + *cs++ = MI_NOOP; + } + intel_ring_advance(rq, cs); + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unpin(vma); + + i915_request_add(rq); + + return err; +} + +static bool always_valid(struct drm_i915_private *i915) +{ + return true; +} + +static bool needs_fence_registers(struct drm_i915_private *i915) +{ + return !i915_terminally_wedged(i915); +} + +static bool needs_mi_store_dword(struct drm_i915_private *i915) +{ + if (i915_terminally_wedged(i915)) + return false; + + return intel_engine_can_store_dword(i915->engine[RCS0]); +} + +static const struct igt_coherency_mode { + const char *name; + int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); + int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); + bool (*valid)(struct drm_i915_private *i915); +} igt_coherency_mode[] = { + { "cpu", cpu_set, cpu_get, always_valid }, + { "gtt", gtt_set, gtt_get, needs_fence_registers }, + { "wc", wc_set, wc_get, always_valid }, + { "gpu", gpu_set, NULL, needs_mi_store_dword }, + { }, +}; + +static int igt_gem_coherency(void *arg) +{ + const unsigned int ncachelines = PAGE_SIZE/64; + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + const struct igt_coherency_mode *read, *write, *over; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + unsigned long count, n; + u32 *offsets, *values; + int err = 0; + + /* We repeatedly write, overwrite and read from a sequence of + * cachelines in order to try and detect incoherency (unflushed writes + * from either the CPU or GPU). Each setter/getter uses our cache + * domain API which should prevent incoherency. + */ + + offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); + if (!offsets) + return -ENOMEM; + for (count = 0; count < ncachelines; count++) + offsets[count] = count * 64 + 4 * (count % 16); + + values = offsets + ncachelines; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + for (over = igt_coherency_mode; over->name; over++) { + if (!over->set) + continue; + + if (!over->valid(i915)) + continue; + + for (write = igt_coherency_mode; write->name; write++) { + if (!write->set) + continue; + + if (!write->valid(i915)) + continue; + + for (read = igt_coherency_mode; read->name; read++) { + if (!read->get) + continue; + + if (!read->valid(i915)) + continue; + + for_each_prime_number_from(count, 1, ncachelines) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto unlock; + } + + i915_random_reorder(offsets, ncachelines, &prng); + for (n = 0; n < count; n++) + values[n] = prandom_u32_state(&prng); + + for (n = 0; n < count; n++) { + err = over->set(obj, offsets[n], ~values[n]); + if (err) { + pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", + n, count, over->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + err = write->set(obj, offsets[n], values[n]); + if (err) { + pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", + n, count, write->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + u32 found; + + err = read->get(obj, offsets[n], &found); + if (err) { + pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", + n, count, read->name, err); + goto put_object; + } + + if (found != values[n]) { + pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", + n, count, over->name, + write->name, values[n], + read->name, found, + ~values[n], offsets[n]); + err = -EINVAL; + goto put_object; + } + } + + __i915_gem_object_release_unless_active(obj); + } + } + } + } +unlock: + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + kfree(offsets); + return err; + +put_object: + __i915_gem_object_release_unless_active(obj); + goto unlock; +} + +int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_coherency), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c new file mode 100644 index 000000000000..653ae08a277f --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -0,0 +1,1736 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#include + +#include "gem/i915_gem_pm.h" +#include "gt/intel_reset.h" +#include "i915_selftest.h" + +#include "gem/selftests/igt_gem_utils.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_live_test.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_spinner.h" +#include "selftests/mock_drm.h" +#include "selftests/mock_gem_device.h" + +#include "huge_gem_object.h" +#include "igt_gem_utils.h" + +#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) + +static int live_nop_switch(void *arg) +{ + const unsigned int nctx = 1024; + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context **ctx; + enum intel_engine_id id; + intel_wakeref_t wakeref; + struct igt_live_test t; + struct drm_file *file; + unsigned long n; + int err = -ENODEV; + + /* + * Create as many contexts as we can feasibly get away with + * and check we can switch between them rapidly. + * + * Serves as very simple stress test for submission and HW switching + * between contexts. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + err = -ENOMEM; + goto out_unlock; + } + + for (n = 0; n < nctx; n++) { + ctx[n] = live_context(i915, file); + if (IS_ERR(ctx[n])) { + err = PTR_ERR(ctx[n]); + goto out_unlock; + } + } + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + unsigned long end_time, prime; + ktime_t times[2] = {}; + + times[0] = ktime_get_raw(); + for (n = 0; n < nctx; n++) { + rq = igt_request_alloc(ctx[n], engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unlock; + } + i915_request_add(rq); + } + if (i915_request_wait(rq, + I915_WAIT_LOCKED, + HZ / 5) < 0) { + pr_err("Failed to populated %d contexts\n", nctx); + i915_gem_set_wedged(i915); + err = -EIO; + goto out_unlock; + } + + times[1] = ktime_get_raw(); + + pr_info("Populated %d contexts on %s in %lluns\n", + nctx, engine->name, ktime_to_ns(times[1] - times[0])); + + err = igt_live_test_begin(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + end_time = jiffies + i915_selftest.timeout_jiffies; + for_each_prime_number_from(prime, 2, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + rq = igt_request_alloc(ctx[n % nctx], engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unlock; + } + + /* + * This space is left intentionally blank. + * + * We do not actually want to perform any + * action with this request, we just want + * to measure the latency in allocation + * and submission of our breadcrumbs - + * ensuring that the bare request is sufficient + * for the system to work (i.e. proper HEAD + * tracking of the rings, interrupt handling, + * etc). It also gives us the lowest bounds + * for latency. + */ + + i915_request_add(rq); + } + if (i915_request_wait(rq, + I915_WAIT_LOCKED, + HZ / 5) < 0) { + pr_err("Switching between %ld contexts timed out\n", + prime); + i915_gem_set_wedged(i915); + break; + } + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 2) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = igt_live_test_end(&t); + if (err) + goto out_unlock; + + pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); + } + +out_unlock: + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + mock_file_free(i915, file); + return err; +} + +static struct i915_vma * +gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? MI_USE_GGTT : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = value; + } else { + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cmd++ = offset; + *cmd++ = value; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static unsigned long real_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; +} + +static unsigned long fake_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; +} + +static int gpu_fill(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + unsigned int dw) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + struct i915_request *rq; + struct i915_vma *vma; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(obj->base.size > vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); + if (err) + return err; + + /* Within the GTT the huge objects maps every page onto + * its 1024 real pages (using phys_pfn = dma_pfn % 1024). + * We set the nth dword within the page using the nth + * mapping via the GTT - this should exercise the GTT mapping + * whilst checking that each context provides a unique view + * into the object. + */ + batch = gpu_fill_dw(vma, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_unpin(vma); + + i915_request_add(rq); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +err_vma: + i915_vma_unpin(vma); + return err; +} + +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) +{ + const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); + unsigned int n, m, need_flush; + int err; + + err = i915_gem_object_prepare_write(obj, &need_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + for (m = 0; m < DW_PER_PAGE; m++) + map[m] = value; + if (!has_llc) + drm_clflush_virt_range(map, PAGE_SIZE); + kunmap_atomic(map); + } + + i915_gem_object_finish_access(obj); + obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; + obj->write_domain = 0; + return 0; +} + +static noinline int cpu_check(struct drm_i915_gem_object *obj, + unsigned int idx, unsigned int max) +{ + unsigned int n, m, needs_flush; + int err; + + err = i915_gem_object_prepare_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(map, PAGE_SIZE); + + for (m = 0; m < max; m++) { + if (map[m] != m) { + pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", + __builtin_return_address(0), idx, + n, real_page_count(obj), m, max, + map[m], m); + err = -EINVAL; + goto out_unmap; + } + } + + for (; m < DW_PER_PAGE; m++) { + if (map[m] != STACK_MAGIC) { + pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", + __builtin_return_address(0), idx, n, m, + map[m], STACK_MAGIC); + err = -EINVAL; + goto out_unmap; + } + } + +out_unmap: + kunmap_atomic(map); + if (err) + break; + } + + i915_gem_object_finish_access(obj); + return err; +} + +static int file_add_object(struct drm_file *file, + struct drm_i915_gem_object *obj) +{ + int err; + + GEM_BUG_ON(obj->base.handle_count); + + /* tie the object to the drm_file for easy reaping */ + err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); + if (err < 0) + return err; + + i915_gem_object_get(obj); + obj->base.handle_count++; + return 0; +} + +static struct drm_i915_gem_object * +create_test_object(struct i915_gem_context *ctx, + struct drm_file *file, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &ctx->i915->ggtt.vm; + u64 size; + int err; + + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); + size = round_down(size, DW_PER_PAGE * PAGE_SIZE); + + obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + if (IS_ERR(obj)) + return obj; + + err = file_add_object(file, obj); + i915_gem_object_put(obj); + if (err) + return ERR_PTR(err); + + err = cpu_fill(obj, STACK_MAGIC); + if (err) { + pr_err("Failed to fill object with cpu, err=%d\n", + err); + return ERR_PTR(err); + } + + list_add_tail(&obj->st_link, objects); + return obj; +} + +static unsigned long max_dwords(struct drm_i915_gem_object *obj) +{ + unsigned long npages = fake_page_count(obj); + + GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); + return npages / DW_PER_PAGE; +} + +static int igt_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENODEV; + + /* + * Create a few different contexts (with different mm) and write + * through each ctx/mm using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_object *obj = NULL; + unsigned long ncontexts, ndwords, dw; + struct igt_live_test t; + struct drm_file *file; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (!engine->context_size) + continue; /* No logical context support in HW */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + err = igt_live_test_begin(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + ncontexts = 0; + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct i915_gem_context *ctx; + intel_wakeref_t wakeref; + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + if (!obj) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + } + + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) { + obj = NULL; + dw = 0; + } + + ndwords++; + ncontexts++; + } + + pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", + ncontexts, engine->name, ndwords); + + ncontexts = dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, ncontexts++, rem); + if (err) + break; + + dw += rem; + } + +out_unlock: + if (igt_live_test_end(&t)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + if (err) + return err; + } + + return 0; +} + +static int igt_shared_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *parent; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_live_test t; + struct drm_file *file; + int err = 0; + + /* + * Create a few different contexts with the same mm and write + * through each ctx using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + parent = live_context(i915, file); + if (IS_ERR(parent)) { + err = PTR_ERR(parent); + goto out_unlock; + } + + if (!parent->ppgtt) { /* not full-ppgtt; nothing to share */ + err = 0; + goto out_unlock; + } + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + for_each_engine(engine, i915, id) { + unsigned long ncontexts, ndwords, dw; + struct drm_i915_gem_object *obj = NULL; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + + if (!intel_engine_can_store_dword(engine)) + continue; + + dw = 0; + ndwords = 0; + ncontexts = 0; + while (!time_after(jiffies, end_time)) { + struct i915_gem_context *ctx; + intel_wakeref_t wakeref; + + ctx = kernel_context(i915); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_test; + } + + __assign_ppgtt(ctx, parent->ppgtt); + + if (!obj) { + obj = create_test_object(parent, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + kernel_context_close(ctx); + goto out_test; + } + } + + err = 0; + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + kernel_context_close(ctx); + goto out_test; + } + + if (++dw == max_dwords(obj)) { + obj = NULL; + dw = 0; + } + + ndwords++; + ncontexts++; + + kernel_context_close(ctx); + } + pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", + ncontexts, engine->name, ndwords); + + ncontexts = dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, ncontexts++, rem); + if (err) + goto out_test; + + dw += rem; + } + } +out_test: + if (igt_live_test_end(&t)) + err = -EIO; +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj; + u32 *cmd; + int err; + + if (INTEL_GEN(vma->vm->i915) < 8) + return ERR_PTR(-EINVAL); + + obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *cmd++ = MI_STORE_REGISTER_MEM_GEN8; + *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + *cmd = MI_BATCH_BUFFER_END; + + __i915_gem_object_flush_map(obj, 0, 64); + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int +emit_rpcs_query(struct drm_i915_gem_object *obj, + struct intel_context *ce, + struct i915_request **rq_out) +{ + struct i915_request *rq; + struct i915_vma *batch; + struct i915_vma *vma; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); + + vma = i915_vma_instance(obj, &ce->gem_context->ppgtt->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + batch = rpcs_query_batch(vma); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); + if (err) + goto err_request; + + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_unpin(vma); + + *rq_out = i915_request_get(rq); + + i915_request_add(rq); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); +err_vma: + i915_vma_unpin(vma); + + return err; +} + +#define TEST_IDLE BIT(0) +#define TEST_BUSY BIT(1) +#define TEST_RESET BIT(2) + +static int +__sseu_prepare(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct intel_context *ce, + struct igt_spinner **spin) +{ + struct i915_request *rq; + int ret; + + *spin = NULL; + if (!(flags & (TEST_BUSY | TEST_RESET))) + return 0; + + *spin = kzalloc(sizeof(**spin), GFP_KERNEL); + if (!*spin) + return -ENOMEM; + + ret = igt_spinner_init(*spin, i915); + if (ret) + goto err_free; + + rq = igt_spinner_create_request(*spin, + ce->gem_context, + ce->engine, + MI_NOOP); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto err_fini; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(*spin, rq)) { + pr_err("%s: Spinner failed to start!\n", name); + ret = -ETIMEDOUT; + goto err_end; + } + + return 0; + +err_end: + igt_spinner_end(*spin); +err_fini: + igt_spinner_fini(*spin); +err_free: + kfree(fetch_and_zero(spin)); + return ret; +} + +static int +__read_slice_count(struct drm_i915_private *i915, + struct intel_context *ce, + struct drm_i915_gem_object *obj, + struct igt_spinner *spin, + u32 *rpcs) +{ + struct i915_request *rq = NULL; + u32 s_mask, s_shift; + unsigned int cnt; + u32 *buf, val; + long ret; + + ret = emit_rpcs_query(obj, ce, &rq); + if (ret) + return ret; + + if (spin) + igt_spinner_end(spin); + + ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + if (ret < 0) + return ret; + + buf = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + return ret; + } + + if (INTEL_GEN(i915) >= 11) { + s_mask = GEN11_RPCS_S_CNT_MASK; + s_shift = GEN11_RPCS_S_CNT_SHIFT; + } else { + s_mask = GEN8_RPCS_S_CNT_MASK; + s_shift = GEN8_RPCS_S_CNT_SHIFT; + } + + val = *buf; + cnt = (val & s_mask) >> s_shift; + *rpcs = val; + + i915_gem_object_unpin_map(obj); + + return cnt; +} + +static int +__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, + const char *prefix, const char *suffix) +{ + if (slices == expected) + return 0; + + if (slices < 0) { + pr_err("%s: %s read slice count failed with %d%s\n", + name, prefix, slices, suffix); + return slices; + } + + pr_err("%s: %s slice count %d is not %u%s\n", + name, prefix, slices, expected, suffix); + + pr_info("RPCS=0x%x; %u%sx%u%s\n", + rpcs, slices, + (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", + (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, + (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); + + return -EINVAL; +} + +static int +__sseu_finish(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct intel_context *ce, + struct drm_i915_gem_object *obj, + unsigned int expected, + struct igt_spinner *spin) +{ + unsigned int slices = hweight32(ce->engine->sseu.slice_mask); + u32 rpcs = 0; + int ret = 0; + + if (flags & TEST_RESET) { + ret = i915_reset_engine(ce->engine, "sseu"); + if (ret) + goto out; + } + + ret = __read_slice_count(i915, ce, obj, + flags & TEST_RESET ? NULL : spin, &rpcs); + ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); + if (ret) + goto out; + + ret = __read_slice_count(i915, ce->engine->kernel_context, obj, + NULL, &rpcs); + ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); + +out: + if (spin) + igt_spinner_end(spin); + + if ((flags & TEST_IDLE) && ret == 0) { + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (ret) + return ret; + + ret = __read_slice_count(i915, ce, obj, NULL, &rpcs); + ret = __check_rpcs(name, rpcs, ret, expected, + "Context", " after idle!"); + } + + return ret; +} + +static int +__sseu_test(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct intel_context *ce, + struct drm_i915_gem_object *obj, + struct intel_sseu sseu) +{ + struct igt_spinner *spin = NULL; + int ret; + + ret = __sseu_prepare(i915, name, flags, ce, &spin); + if (ret) + return ret; + + ret = __intel_context_reconfigure_sseu(ce, sseu); + if (ret) + goto out_spin; + + ret = __sseu_finish(i915, name, flags, ce, obj, + hweight32(sseu.slice_mask), spin); + +out_spin: + if (spin) { + igt_spinner_end(spin); + igt_spinner_fini(spin); + kfree(spin); + } + return ret; +} + +static int +__igt_ctx_sseu(struct drm_i915_private *i915, + const char *name, + unsigned int flags) +{ + struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_sseu default_sseu = engine->sseu; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct intel_sseu pg_sseu; + intel_wakeref_t wakeref; + struct drm_file *file; + int ret; + + if (INTEL_GEN(i915) < 9) + return 0; + + if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) + return 0; + + if (hweight32(default_sseu.slice_mask) < 2) + return 0; + + /* + * Gen11 VME friendly power-gated configuration with half enabled + * sub-slices. + */ + pg_sseu = default_sseu; + pg_sseu.slice_mask = 1; + pg_sseu.subslice_mask = + ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); + + pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", + name, flags, hweight32(default_sseu.slice_mask), + hweight32(pg_sseu.slice_mask)); + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (flags & TEST_RESET) + igt_global_reset_lock(i915); + + mutex_lock(&i915->drm.struct_mutex); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto out_unlock; + } + i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto out_unlock; + } + + wakeref = intel_runtime_pm_get(i915); + + ce = i915_gem_context_get_engine(ctx, RCS0); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + goto out_rpm; + } + + ret = intel_context_pin(ce); + if (ret) + goto out_context; + + /* First set the default mask. */ + ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); + if (ret) + goto out_fail; + + /* Then set a power-gated configuration. */ + ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); + if (ret) + goto out_fail; + + /* Back to defaults. */ + ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); + if (ret) + goto out_fail; + + /* One last power-gated configuration for the road. */ + ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); + if (ret) + goto out_fail; + +out_fail: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + ret = -EIO; + + intel_context_unpin(ce); +out_context: + intel_context_put(ce); +out_rpm: + intel_runtime_pm_put(i915, wakeref); + i915_gem_object_put(obj); + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (flags & TEST_RESET) + igt_global_reset_unlock(i915); + + mock_file_free(i915, file); + + if (ret) + pr_err("%s: Failed with %d!\n", name, ret); + + return ret; +} + +static int igt_ctx_sseu(void *arg) +{ + struct { + const char *name; + unsigned int flags; + } *phase, phases[] = { + { .name = "basic", .flags = 0 }, + { .name = "idle", .flags = TEST_IDLE }, + { .name = "busy", .flags = TEST_BUSY }, + { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, + { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, + { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, + }; + unsigned int i; + int ret = 0; + + for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); + i++, phase++) + ret = __igt_ctx_sseu(arg, phase->name, phase->flags); + + return ret; +} + +static int igt_ctx_readonly(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj = NULL; + struct i915_gem_context *ctx; + struct i915_hw_ppgtt *ppgtt; + unsigned long idx, ndwords, dw; + struct igt_live_test t; + struct drm_file *file; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + int err = -ENODEV; + + /* + * Create a few read-only objects (with the occasional writable object) + * and try to write into these object checking that the GPU discards + * any write to a read-only object. + */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + ppgtt = ctx->ppgtt ?: i915->mm.aliasing_ppgtt; + if (!ppgtt || !ppgtt->vm.has_read_only) { + err = 0; + goto out_unlock; + } + + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct intel_engine_cs *engine; + unsigned int id; + + for_each_engine(engine, i915, id) { + intel_wakeref_t wakeref; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (!obj) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + + if (prandom_u32_state(&prng) & 1) + i915_gem_object_set_readonly(obj); + } + + err = 0; + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) { + obj = NULL; + dw = 0; + } + ndwords++; + } + } + pr_info("Submitted %lu dwords (across %u engines)\n", + ndwords, RUNTIME_INFO(i915)->num_engines); + + dw = 0; + idx = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + unsigned int num_writes; + + num_writes = rem; + if (i915_gem_object_is_readonly(obj)) + num_writes = 0; + + err = cpu_check(obj, idx++, num_writes); + if (err) + break; + + dw += rem; + } + +out_unlock: + if (igt_live_test_end(&t)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +static int check_scratch(struct i915_gem_context *ctx, u64 offset) +{ + struct drm_mm_node *node = + __drm_mm_interval_first(&ctx->ppgtt->vm.mm, + offset, offset + sizeof(u32) - 1); + if (!node || node->start > offset) + return 0; + + GEM_BUG_ON(offset >= node->start + node->size); + + pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", + upper_32_bits(offset), lower_32_bits(offset)); + return -EINVAL; +} + +static int write_to_scratch(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u64 offset, u32 value) +{ + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cmd; + int err; + + GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + if (INTEL_GEN(i915) >= 8) { + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + } else { + *cmd++ = 0; + *cmd++ = offset; + } + *cmd++ = value; + *cmd = MI_BATCH_BUFFER_END; + __i915_gem_object_flush_map(obj, 0, 64); + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); + if (err) + goto err; + + err = check_scratch(ctx, offset); + if (err) + goto err_unpin; + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); + if (err) + goto err_request; + + err = i915_vma_move_to_active(vma, rq, 0); + if (err) + goto skip_request; + + i915_gem_object_set_active_reference(obj); + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_request_add(rq); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_unpin: + i915_vma_unpin(vma); +err: + i915_gem_object_put(obj); + return err; +} + +static int read_from_scratch(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u64 offset, u32 *value) +{ + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ + const u32 result = 0x100; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cmd; + int err; + + GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + memset(cmd, POISON_INUSE, PAGE_SIZE); + if (INTEL_GEN(i915) >= 8) { + *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; + *cmd++ = RCS_GPR0; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = MI_STORE_REGISTER_MEM_GEN8; + *cmd++ = RCS_GPR0; + *cmd++ = result; + *cmd++ = 0; + } else { + *cmd++ = MI_LOAD_REGISTER_MEM; + *cmd++ = RCS_GPR0; + *cmd++ = offset; + *cmd++ = MI_STORE_REGISTER_MEM; + *cmd++ = RCS_GPR0; + *cmd++ = result; + } + *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); + if (err) + goto err; + + err = check_scratch(ctx, offset); + if (err) + goto err_unpin; + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); + if (err) + goto err_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_request_add(rq); + + err = i915_gem_object_set_to_cpu_domain(obj, false); + if (err) + goto err; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *value = cmd[result / sizeof(*cmd)]; + i915_gem_object_unpin_map(obj); + i915_gem_object_put(obj); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_unpin: + i915_vma_unpin(vma); +err: + i915_gem_object_put(obj); + return err; +} + +static int igt_vm_isolation(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_a, *ctx_b; + struct intel_engine_cs *engine; + intel_wakeref_t wakeref; + struct igt_live_test t; + struct drm_file *file; + I915_RND_STATE(prng); + unsigned long count; + unsigned int id; + u64 vm_total; + int err; + + if (INTEL_GEN(i915) < 7) + return 0; + + /* + * The simple goal here is that a write into one context is not + * observed in a second (separate page tables and scratch). + */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + ctx_a = live_context(i915, file); + if (IS_ERR(ctx_a)) { + err = PTR_ERR(ctx_a); + goto out_unlock; + } + + ctx_b = live_context(i915, file); + if (IS_ERR(ctx_b)) { + err = PTR_ERR(ctx_b); + goto out_unlock; + } + + /* We can only test vm isolation, if the vm are distinct */ + if (ctx_a->ppgtt == ctx_b->ppgtt) + goto out_unlock; + + vm_total = ctx_a->ppgtt->vm.total; + GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total); + vm_total -= I915_GTT_PAGE_SIZE; + + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + unsigned long this = 0; + + if (!intel_engine_can_store_dword(engine)) + continue; + + while (!__igt_timeout(end_time, NULL)) { + u32 value = 0xc5c5c5c5; + u64 offset; + + div64_u64_rem(i915_prandom_u64_state(&prng), + vm_total, &offset); + offset &= -sizeof(u32); + offset += I915_GTT_PAGE_SIZE; + + err = write_to_scratch(ctx_a, engine, + offset, 0xdeadbeef); + if (err == 0) + err = read_from_scratch(ctx_b, engine, + offset, &value); + if (err) + goto out_rpm; + + if (value) { + pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", + engine->name, value, + upper_32_bits(offset), + lower_32_bits(offset), + this); + err = -EINVAL; + goto out_rpm; + } + + this++; + } + count += this; + } + pr_info("Checked %lu scratch offsets across %d engines\n", + count, RUNTIME_INFO(i915)->num_engines); + +out_rpm: + intel_runtime_pm_put(i915, wakeref); +out_unlock: + if (igt_live_test_end(&t)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +static __maybe_unused const char * +__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp; + + if (engines == ALL_ENGINES) + return "all"; + + for_each_engine_masked(engine, i915, engines, tmp) + return engine->name; + + return "none"; +} + +static void mock_barrier_task(void *data) +{ + unsigned int *counter = data; + + ++*counter; +} + +static int mock_context_barrier(void *arg) +{ +#undef pr_fmt +#define pr_fmt(x) "context_barrier_task():" # x + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx; + struct i915_request *rq; + unsigned int counter; + int err; + + /* + * The context barrier provides us with a callback after it emits + * a request; useful for retiring old state after loading new. + */ + + mutex_lock(&i915->drm.struct_mutex); + + ctx = mock_context(i915, "mock"); + if (!ctx) { + err = -ENOMEM; + goto unlock; + } + + counter = 0; + err = context_barrier_task(ctx, 0, + NULL, mock_barrier_task, &counter); + if (err) { + pr_err("Failed at line %d, err=%d\n", __LINE__, err); + goto out; + } + if (counter == 0) { + pr_err("Did not retire immediately with 0 engines\n"); + err = -EINVAL; + goto out; + } + + counter = 0; + err = context_barrier_task(ctx, ALL_ENGINES, + NULL, mock_barrier_task, &counter); + if (err) { + pr_err("Failed at line %d, err=%d\n", __LINE__, err); + goto out; + } + if (counter == 0) { + pr_err("Did not retire immediately for all unused engines\n"); + err = -EINVAL; + goto out; + } + + rq = igt_request_alloc(ctx, i915->engine[RCS0]); + if (IS_ERR(rq)) { + pr_err("Request allocation failed!\n"); + goto out; + } + i915_request_add(rq); + + counter = 0; + context_barrier_inject_fault = BIT(RCS0); + err = context_barrier_task(ctx, ALL_ENGINES, + NULL, mock_barrier_task, &counter); + context_barrier_inject_fault = 0; + if (err == -ENXIO) + err = 0; + else + pr_err("Did not hit fault injection!\n"); + if (counter != 0) { + pr_err("Invoked callback on error!\n"); + err = -EIO; + } + if (err) + goto out; + + counter = 0; + err = context_barrier_task(ctx, ALL_ENGINES, + NULL, mock_barrier_task, &counter); + if (err) { + pr_err("Failed at line %d, err=%d\n", __LINE__, err); + goto out; + } + mock_device_flush(i915); + if (counter == 0) { + pr_err("Did not retire on each active engines\n"); + err = -EINVAL; + goto out; + } + +out: + mock_context_close(ctx); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +#undef pr_fmt +#define pr_fmt(x) x +} + +int i915_gem_context_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(mock_context_barrier), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_put(&i915->drm); + return err; +} + +int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_nop_switch), + SUBTEST(igt_ctx_exec), + SUBTEST(igt_ctx_readonly), + SUBTEST(igt_ctx_sseu), + SUBTEST(igt_shared_ctx_exec), + SUBTEST(igt_vm_isolation), + }; + + if (i915_terminally_wedged(dev_priv)) + return 0; + + return i915_subtests(tests, dev_priv); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c new file mode 100644 index 000000000000..b7431712de66 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -0,0 +1,386 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "mock_dmabuf.h" +#include "selftests/mock_gem_device.h" + +static int igt_dmabuf_export(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + return PTR_ERR(dmabuf); + } + + dma_buf_put(dmabuf); + return 0; +} + +static int igt_dmabuf_import_self(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import != &obj->base) { + pr_err("i915_gem_prime_import created a new object!\n"); + err = -EINVAL; + goto out_import; + } + + err = 0; +out_import: + i915_gem_object_put(to_intel_bo(import)); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_import(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *obj_map, *dma_map; + u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff }; + int err, i; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto out_dmabuf; + } + + if (obj->base.dev != &i915->drm) { + pr_err("i915_gem_prime_import created a non-i915 object!\n"); + err = -EINVAL; + goto out_obj; + } + + if (obj->base.size != PAGE_SIZE) { + pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n", + (long long)obj->base.size, PAGE_SIZE); + err = -EINVAL; + goto out_obj; + } + + dma_map = dma_buf_vmap(dmabuf); + if (!dma_map) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto out_obj; + } + + if (0) { /* Can not yet map dmabuf */ + obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(obj_map)) { + err = PTR_ERR(obj_map); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto out_dma_map; + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(dma_map, pattern[i], PAGE_SIZE); + if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("imported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(obj_map, pattern[i], PAGE_SIZE); + if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("exported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + i915_gem_object_unpin_map(obj); + } + + err = 0; +out_dma_map: + dma_buf_vunmap(dmabuf, dma_map); +out_obj: + i915_gem_object_put(obj); +out_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_import_ownership(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + ptr = dma_buf_vmap(dmabuf); + if (!ptr) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto err_dmabuf; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_vunmap(dmabuf, ptr); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto err_dmabuf; + } + + dma_buf_put(dmabuf); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); + goto out_obj; + } + + err = 0; + i915_gem_object_unpin_pages(obj); +out_obj: + i915_gem_object_put(obj); + return err; + +err_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_export_vmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto err_obj; + } + i915_gem_object_put(obj); + + ptr = dma_buf_vmap(dmabuf); + if (!ptr) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto out; + } + + if (memchr_inv(ptr, 0, dmabuf->size)) { + pr_err("Exported object not initialiased to zero!\n"); + err = -EINVAL; + goto out; + } + + memset(ptr, 0xc5, dmabuf->size); + + err = 0; + dma_buf_vunmap(dmabuf, ptr); +out: + dma_buf_put(dmabuf); + return err; + +err_obj: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_export_kmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + err = PTR_ERR(dmabuf); + pr_err("i915_gem_prime_export failed with err=%d\n", err); + return err; + } + + ptr = dma_buf_kmap(dmabuf, 0); + if (!ptr) { + pr_err("dma_buf_kmap failed\n"); + err = -ENOMEM; + goto err; + } + + if (memchr_inv(ptr, 0, PAGE_SIZE)) { + dma_buf_kunmap(dmabuf, 0, ptr); + pr_err("Exported page[0] not initialiased to zero!\n"); + err = -EINVAL; + goto err; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_kunmap(dmabuf, 0, ptr); + + ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto err; + } + memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + ptr = dma_buf_kmap(dmabuf, 1); + if (!ptr) { + pr_err("dma_buf_kmap failed\n"); + err = -ENOMEM; + goto err; + } + + if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) { + dma_buf_kunmap(dmabuf, 1, ptr); + pr_err("Exported page[1] not set to 0xaa!\n"); + err = -EINVAL; + goto err; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_kunmap(dmabuf, 1, ptr); + + ptr = dma_buf_kmap(dmabuf, 0); + if (!ptr) { + pr_err("dma_buf_kmap failed\n"); + err = -ENOMEM; + goto err; + } + if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) { + dma_buf_kunmap(dmabuf, 0, ptr); + pr_err("Exported page[0] did not retain 0xc5!\n"); + err = -EINVAL; + goto err; + } + dma_buf_kunmap(dmabuf, 0, ptr); + + ptr = dma_buf_kmap(dmabuf, 2); + if (ptr) { + pr_err("Erroneously kmapped beyond the end of the object!\n"); + dma_buf_kunmap(dmabuf, 2, ptr); + err = -EINVAL; + goto err; + } + + ptr = dma_buf_kmap(dmabuf, -1); + if (ptr) { + pr_err("Erroneously kmapped before the start of the object!\n"); + dma_buf_kunmap(dmabuf, -1, ptr); + err = -EINVAL; + goto err; + } + + err = 0; +err: + dma_buf_put(dmabuf); + return err; +} + +int i915_gem_dmabuf_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_self), + SUBTEST(igt_dmabuf_import), + SUBTEST(igt_dmabuf_import_ownership), + SUBTEST(igt_dmabuf_export_vmap), + SUBTEST(igt_dmabuf_export_kmap), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_put(&i915->drm); + return err; +} + +int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 87da01230179..12c90d8fe0fb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -7,8 +7,8 @@ #include #include "gt/intel_gt_pm.h" +#include "huge_gem_object.h" #include "i915_selftest.h" -#include "selftests/huge_gem_object.h" #include "selftests/igt_flush_test.h" struct tile { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c new file mode 100644 index 000000000000..2b6db6f799de --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "huge_gem_object.h" +#include "selftests/igt_flush_test.h" +#include "selftests/mock_gem_device.h" + +static int igt_gem_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err = -ENOMEM; + + /* Basic test to ensure we can create an object */ + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + err = 0; + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_gem_huge(void *arg) +{ + const unsigned int nreal = 509; /* just to be awkward */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + unsigned int n; + int err; + + /* Basic sanitycheck of our huge fake object allocation */ + + obj = huge_gem_object(i915, + nreal * PAGE_SIZE, + i915->ggtt.vm.total + PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { + if (i915_gem_object_get_page(obj, n) != + i915_gem_object_get_page(obj, n % nreal)) { + pr_err("Page lookup mismatch at index %u [%u]\n", + n, n % nreal); + err = -EINVAL; + goto out_unpin; + } + } + +out_unpin: + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +int i915_gem_object_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_object), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_put(&i915->drm); + return err; +} + +int i915_gem_object_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_huge), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c new file mode 100644 index 000000000000..b232e6d2cd92 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "igt_gem_utils.h" + +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" +#include "gt/intel_context.h" + +#include "i915_request.h" + +struct i915_request * +igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ce = i915_gem_context_get_engine(ctx, engine->id); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = intel_context_create_request(ce); + intel_context_put(ce); + + return rq; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h new file mode 100644 index 000000000000..0f17251cf75d --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -0,0 +1,17 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef __IGT_GEM_UTILS_H__ +#define __IGT_GEM_UTILS_H__ + +struct i915_request; +struct i915_gem_context; +struct intel_engine_cs; + +struct i915_request * +igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); + +#endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c new file mode 100644 index 000000000000..68d50da035e6 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "mock_context.h" +#include "selftests/mock_gtt.h" + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name) +{ + struct i915_gem_context *ctx; + struct i915_gem_engines *e; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + kref_init(&ctx->ref); + INIT_LIST_HEAD(&ctx->link); + ctx->i915 = i915; + + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx); + if (IS_ERR(e)) + goto err_free; + RCU_INIT_POINTER(ctx->engines, e); + + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + INIT_LIST_HEAD(&ctx->handles_list); + INIT_LIST_HEAD(&ctx->hw_id_link); + mutex_init(&ctx->mutex); + + ret = i915_gem_context_pin_hw_id(ctx); + if (ret < 0) + goto err_engines; + + if (name) { + struct i915_hw_ppgtt *ppgtt; + + ctx->name = kstrdup(name, GFP_KERNEL); + if (!ctx->name) + goto err_put; + + ppgtt = mock_ppgtt(i915, name); + if (!ppgtt) + goto err_put; + + __set_ppgtt(ctx, ppgtt); + } + + return ctx; + +err_engines: + free_engines(rcu_access_pointer(ctx->engines)); +err_free: + kfree(ctx); + return NULL; + +err_put: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return NULL; +} + +void mock_context_close(struct i915_gem_context *ctx) +{ + context_close(ctx); +} + +void mock_init_contexts(struct drm_i915_private *i915) +{ + init_contexts(i915); +} + +struct i915_gem_context * +live_context(struct drm_i915_private *i915, struct drm_file *file) +{ + struct i915_gem_context *ctx; + int err; + + lockdep_assert_held(&i915->drm.struct_mutex); + + ctx = i915_gem_create_context(i915, 0); + if (IS_ERR(ctx)) + return ctx; + + err = gem_context_register(ctx, file->driver_priv); + if (err < 0) + goto err_ctx; + + return ctx; + +err_ctx: + context_close(ctx); + return ERR_PTR(err); +} + +struct i915_gem_context * +kernel_context(struct drm_i915_private *i915) +{ + return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL); +} + +void kernel_context_close(struct i915_gem_context *ctx) +{ + context_close(ctx); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h new file mode 100644 index 000000000000..0b926653914f --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __MOCK_CONTEXT_H +#define __MOCK_CONTEXT_H + +void mock_init_contexts(struct drm_i915_private *i915); + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name); + +void mock_context_close(struct i915_gem_context *ctx); + +struct i915_gem_context * +live_context(struct drm_i915_private *i915, struct drm_file *file); + +struct i915_gem_context *kernel_context(struct drm_i915_private *i915); +void kernel_context_close(struct i915_gem_context *ctx); + +#endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c new file mode 100644 index 000000000000..b9e059d4328a --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c @@ -0,0 +1,144 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "mock_dmabuf.h" + +static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct mock_dmabuf *mock = to_mock(attachment->dmabuf); + struct sg_table *st; + struct scatterlist *sg; + int i, err; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + err = sg_alloc_table(st, mock->npages, GFP_KERNEL); + if (err) + goto err_free; + + sg = st->sgl; + for (i = 0; i < mock->npages; i++) { + sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0); + sg = sg_next(sg); + } + + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + err = -ENOMEM; + goto err_st; + } + + return st; + +err_st: + sg_free_table(st); +err_free: + kfree(st); + return ERR_PTR(err); +} + +static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *st, + enum dma_data_direction dir) +{ + dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); + sg_free_table(st); + kfree(st); +} + +static void mock_dmabuf_release(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + int i; + + for (i = 0; i < mock->npages; i++) + put_page(mock->pages[i]); + + kfree(mock); +} + +static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); +} + +static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + vm_unmap_ram(vaddr, mock->npages); +} + +static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kunmap(mock->pages[page_num]); +} + +static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + return -ENODEV; +} + +static const struct dma_buf_ops mock_dmabuf_ops = { + .map_dma_buf = mock_map_dma_buf, + .unmap_dma_buf = mock_unmap_dma_buf, + .release = mock_dmabuf_release, + .map = mock_dmabuf_kmap, + .unmap = mock_dmabuf_kunmap, + .mmap = mock_dmabuf_mmap, + .vmap = mock_dmabuf_vmap, + .vunmap = mock_dmabuf_vunmap, +}; + +static struct dma_buf *mock_dmabuf(int npages) +{ + struct mock_dmabuf *mock; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dmabuf; + int i; + + mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), + GFP_KERNEL); + if (!mock) + return ERR_PTR(-ENOMEM); + + mock->npages = npages; + for (i = 0; i < npages; i++) { + mock->pages[i] = alloc_page(GFP_KERNEL); + if (!mock->pages[i]) + goto err; + } + + exp_info.ops = &mock_dmabuf_ops; + exp_info.size = npages * PAGE_SIZE; + exp_info.flags = O_CLOEXEC; + exp_info.priv = mock; + + dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(dmabuf)) + goto err; + + return dmabuf; + +err: + while (i--) + put_page(mock->pages[i]); + kfree(mock); + return ERR_PTR(-ENOMEM); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h new file mode 100644 index 000000000000..f0f8bbd82dfc --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h @@ -0,0 +1,22 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __MOCK_DMABUF_H__ +#define __MOCK_DMABUF_H__ + +#include + +struct mock_dmabuf { + int npages; + struct page *pages[]; +}; + +static struct mock_dmabuf *to_mock(struct dma_buf *buf) +{ + return buf->priv; +} + +#endif /* !__MOCK_DMABUF_H__ */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h new file mode 100644 index 000000000000..370360b4a148 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __MOCK_GEM_OBJECT_H__ +#define __MOCK_GEM_OBJECT_H__ + +struct mock_object { + struct drm_i915_gem_object base; +}; + +#endif /* !__MOCK_GEM_OBJECT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5b31e1e05ddd..c78ec0b58e77 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -4,8 +4,10 @@ * Copyright © 2019 Intel Corporation */ +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" + #include "i915_drv.h" -#include "i915_gem_context.h" #include "i915_globals.h" #include "intel_context.h" diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1c83ea9adac0..672dde71a46c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -24,10 +24,13 @@ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "intel_engine.h" #include "intel_engine_pm.h" +#include "intel_context.h" #include "intel_lrc.h" #include "intel_reset.h" diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 38a8e55a7c85..448f3c0d8704 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -133,6 +133,8 @@ */ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gem_render_state.h" #include "i915_vgpu.h" diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index e029aee87adf..c2bba82bcc16 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -24,7 +24,15 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ -#include "intel_engine.h" +#include + +struct drm_printer; + +struct drm_i915_private; +struct i915_gem_context; +struct i915_request; +struct intel_context; +struct intel_engine_cs; /* Execlists regs */ #define RING_ELSP(base) _MMIO((base) + 0x230) @@ -96,10 +104,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine); */ #define LRC_HEADER_PAGES LRC_PPHWSP_PN -struct drm_printer; - -struct drm_i915_private; - void intel_execlists_set_default_submission(struct intel_engine_cs *engine); void intel_lr_context_reset(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 8c60f7550f9c..377bc546a68f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -7,6 +7,8 @@ #include #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gpu_error.h" #include "i915_irq.h" diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index ac93080bd863..66d5a52d505c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -31,9 +31,12 @@ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gem_render_state.h" #include "i915_trace.h" +#include "intel_context.h" #include "intel_reset.h" #include "intel_workarounds.h" diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ce4bcca3f83c..133d069244f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_context.h" #include "intel_workarounds.h" /** diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2941916b37bf..6d7562769eb2 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -22,8 +22,9 @@ * */ +#include "gem/i915_gem_context.h" + #include "i915_drv.h" -#include "i915_gem_context.h" #include "intel_context.h" #include "intel_engine_pm.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 48a51739b926..690d77f5ecf6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -24,19 +24,21 @@ #include +#include "gem/i915_gem_context.h" #include "intel_engine_pm.h" #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" -#include "selftests/igt_gem_utils.h" #include "selftests/igt_reset.h" #include "selftests/igt_wedge_me.h" #include "selftests/igt_atomic.h" -#include "selftests/mock_context.h" #include "selftests/mock_drm.h" +#include "gem/selftests/mock_context.h" +#include "gem/selftests/igt_gem_utils.h" + #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ struct hang { diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index a8c50900e2d4..dfacc46ae7d3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -6,15 +6,18 @@ #include +#include "gem/i915_gem_pm.h" #include "gt/intel_reset.h" + #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" -#include "selftests/igt_gem_utils.h" #include "selftests/igt_live_test.h" #include "selftests/igt_spinner.h" #include "selftests/lib_sw_fence.h" -#include "selftests/mock_context.h" + +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" static int live_sanitycheck(void *arg) { diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index f9c9e7291187..9040cae38fc5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -4,17 +4,19 @@ * Copyright © 2018 Intel Corporation */ +#include "gem/i915_gem_pm.h" #include "i915_selftest.h" #include "intel_reset.h" #include "selftests/igt_flush_test.h" -#include "selftests/igt_gem_utils.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" #include "selftests/igt_wedge_me.h" -#include "selftests/mock_context.h" #include "selftests/mock_drm.h" +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" + static const struct wo_register { enum intel_platform platform; u32 reg; diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 96e1edf21b3f..2998999e8568 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -34,6 +34,7 @@ */ #include "i915_drv.h" +#include "gt/intel_context.h" #include "gvt.h" #include "trace.h" diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 3a691447f76c..d66bf77f55fd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -35,8 +35,11 @@ #include +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" +#include "gt/intel_context.h" + #include "i915_drv.h" -#include "i915_gem_pm.h" #include "gvt.h" #define RING_CTX_OFF(x) \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 344beab229a0..7ab8340af991 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,10 +32,10 @@ #include #include +#include "gem/i915_gem_context.h" #include "gt/intel_reset.h" #include "i915_debugfs.h" -#include "i915_gem_context.h" #include "i915_irq.h" #include "intel_csr.h" #include "intel_dp.h" diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a1f43dc5a8b5..5ca1594f3075 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -47,6 +47,7 @@ #include #include +#include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" #include "gt/intel_gt_pm.h" #include "gt/intel_reset.h" diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 596af542afea..38da46e773a3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,7 +80,7 @@ #include "intel_wopcm.h" #include "i915_gem.h" -#include "i915_gem_context.h" +#include "gem/i915_gem_context_types.h" #include "i915_gem_fence_reg.h" #include "i915_gem_gtt.h" #include "i915_gpu_error.h" diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0570907cc9d2..096e31e3df92 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,7 +38,11 @@ #include #include +#include "gem/i915_gem_clflush.h" +#include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" +#include "gem/i915_gem_pm.h" +#include "gem/i915_gemfs.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt_pm.h" #include "gt/intel_mocs.h" @@ -46,9 +50,6 @@ #include "gt/intel_workarounds.h" #include "i915_drv.h" -#include "i915_gem_clflush.h" -#include "i915_gemfs.h" -#include "i915_gem_pm.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -2371,9 +2372,5 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" -#include "selftests/huge_gem_object.c" -#include "selftests/huge_pages.c" -#include "selftests/i915_gem_object.c" -#include "selftests/i915_gem_coherency.c" #include "selftests/i915_gem.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c deleted file mode 100644 index 8e74c23cbd91..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "intel_frontbuffer.h" -#include "i915_gem_clflush.h" - -static DEFINE_SPINLOCK(clflush_lock); - -struct clflush { - struct dma_fence dma; /* Must be first for dma_fence_free() */ - struct i915_sw_fence wait; - struct work_struct work; - struct drm_i915_gem_object *obj; -}; - -static const char *i915_clflush_get_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) -{ - return "clflush"; -} - -static void i915_clflush_release(struct dma_fence *fence) -{ - struct clflush *clflush = container_of(fence, typeof(*clflush), dma); - - i915_sw_fence_fini(&clflush->wait); - - BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); - dma_fence_free(&clflush->dma); -} - -static const struct dma_fence_ops i915_clflush_ops = { - .get_driver_name = i915_clflush_get_driver_name, - .get_timeline_name = i915_clflush_get_timeline_name, - .release = i915_clflush_release, -}; - -static void __i915_do_clflush(struct drm_i915_gem_object *obj) -{ - GEM_BUG_ON(!i915_gem_object_has_pages(obj)); - drm_clflush_sg(obj->mm.pages); - intel_fb_obj_flush(obj, ORIGIN_CPU); -} - -static void i915_clflush_work(struct work_struct *work) -{ - struct clflush *clflush = container_of(work, typeof(*clflush), work); - struct drm_i915_gem_object *obj = clflush->obj; - - if (i915_gem_object_pin_pages(obj)) { - DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); - goto out; - } - - __i915_do_clflush(obj); - - i915_gem_object_unpin_pages(obj); - -out: - i915_gem_object_put(obj); - - dma_fence_signal(&clflush->dma); - dma_fence_put(&clflush->dma); -} - -static int __i915_sw_fence_call -i915_clflush_notify(struct i915_sw_fence *fence, - enum i915_sw_fence_notify state) -{ - struct clflush *clflush = container_of(fence, typeof(*clflush), wait); - - switch (state) { - case FENCE_COMPLETE: - schedule_work(&clflush->work); - break; - - case FENCE_FREE: - dma_fence_put(&clflush->dma); - break; - } - - return NOTIFY_DONE; -} - -bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, - unsigned int flags) -{ - struct clflush *clflush; - - /* - * Stolen memory is always coherent with the GPU as it is explicitly - * marked as wc by the system, or the system is cache-coherent. - * Similarly, we only access struct pages through the CPU cache, so - * anything not backed by physical memory we consider to be always - * coherent and not need clflushing. - */ - if (!i915_gem_object_has_struct_page(obj)) { - obj->cache_dirty = false; - return false; - } - - /* If the GPU is snooping the contents of the CPU cache, - * we do not need to manually clear the CPU cache lines. However, - * the caches are only snooped when the render cache is - * flushed/invalidated. As we always have to emit invalidations - * and flushes when moving into and out of the RENDER domain, correct - * snooping behaviour occurs naturally as the result of our domain - * tracking. - */ - if (!(flags & I915_CLFLUSH_FORCE) && - obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) - return false; - - trace_i915_gem_object_clflush(obj); - - clflush = NULL; - if (!(flags & I915_CLFLUSH_SYNC)) - clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); - if (clflush) { - GEM_BUG_ON(!obj->cache_dirty); - - dma_fence_init(&clflush->dma, - &i915_clflush_ops, - &clflush_lock, - to_i915(obj->base.dev)->mm.unordered_timeline, - 0); - i915_sw_fence_init(&clflush->wait, i915_clflush_notify); - - clflush->obj = i915_gem_object_get(obj); - INIT_WORK(&clflush->work, i915_clflush_work); - - dma_fence_get(&clflush->dma); - - i915_sw_fence_await_reservation(&clflush->wait, - obj->resv, NULL, - true, I915_FENCE_TIMEOUT, - I915_FENCE_GFP); - - reservation_object_lock(obj->resv, NULL); - reservation_object_add_excl_fence(obj->resv, &clflush->dma); - reservation_object_unlock(obj->resv); - - i915_sw_fence_commit(&clflush->wait); - } else if (obj->mm.pages) { - __i915_do_clflush(obj); - } else { - GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); - } - - obj->cache_dirty = false; - return true; -} diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h deleted file mode 100644 index f390247561b3..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_clflush.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __I915_GEM_CLFLUSH_H__ -#define __I915_GEM_CLFLUSH_H__ - -struct drm_i915_private; -struct drm_i915_gem_object; - -bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, - unsigned int flags); -#define I915_CLFLUSH_FORCE BIT(0) -#define I915_CLFLUSH_SYNC BIT(1) - -#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c deleted file mode 100644 index 5d2f8ba92b59..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ /dev/null @@ -1,2474 +0,0 @@ -/* - * Copyright © 2011-2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Ben Widawsky - * - */ - -/* - * This file implements HW context support. On gen5+ a HW context consists of an - * opaque GPU object which is referenced at times of context saves and restores. - * With RC6 enabled, the context is also referenced as the GPU enters and exists - * from RC6 (GPU has it's own internal power context, except on gen5). Though - * something like a context does exist for the media ring, the code only - * supports contexts for the render ring. - * - * In software, there is a distinction between contexts created by the user, - * and the default HW context. The default HW context is used by GPU clients - * that do not request setup of their own hardware context. The default - * context's state is never restored to help prevent programming errors. This - * would happen if a client ran and piggy-backed off another clients GPU state. - * The default context only exists to give the GPU some offset to load as the - * current to invoke a save of the context we actually care about. In fact, the - * code could likely be constructed, albeit in a more complicated fashion, to - * never use the default context, though that limits the driver's ability to - * swap out, and/or destroy other contexts. - * - * All other contexts are created as a request by the GPU client. These contexts - * store GPU state, and thus allow GPU clients to not re-emit state (and - * potentially query certain state) at any time. The kernel driver makes - * certain that the appropriate commands are inserted. - * - * The context life cycle is semi-complicated in that context BOs may live - * longer than the context itself because of the way the hardware, and object - * tracking works. Below is a very crude representation of the state machine - * describing the context life. - * refcount pincount active - * S0: initial state 0 0 0 - * S1: context created 1 0 0 - * S2: context is currently running 2 1 X - * S3: GPU referenced, but not current 2 0 1 - * S4: context is current, but destroyed 1 1 0 - * S5: like S3, but destroyed 1 0 1 - * - * The most common (but not all) transitions: - * S0->S1: client creates a context - * S1->S2: client submits execbuf with context - * S2->S3: other clients submits execbuf with context - * S3->S1: context object was retired - * S3->S2: clients submits another execbuf - * S2->S4: context destroy called with current context - * S3->S5->S0: destroy path - * S4->S5->S0: destroy path on current context - * - * There are two confusing terms used above: - * The "current context" means the context which is currently running on the - * GPU. The GPU has loaded its state already and has stored away the gtt - * offset of the BO. The GPU is not actively referencing the data at this - * offset, but it will on the next context switch. The only way to avoid this - * is to do a GPU reset. - * - * An "active context' is one which was previously the "current context" and is - * on the active list waiting for the next context switch to occur. Until this - * happens, the object must remain at the same gtt offset. It is therefore - * possible to destroy a context, but it is still active. - * - */ - -#include -#include - -#include - -#include "gt/intel_lrc_reg.h" - -#include "i915_drv.h" -#include "i915_globals.h" -#include "i915_trace.h" -#include "i915_user_extensions.h" - -#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 - -static struct i915_global_gem_context { - struct i915_global base; - struct kmem_cache *slab_luts; -} global; - -struct i915_lut_handle *i915_lut_handle_alloc(void) -{ - return kmem_cache_alloc(global.slab_luts, GFP_KERNEL); -} - -void i915_lut_handle_free(struct i915_lut_handle *lut) -{ - return kmem_cache_free(global.slab_luts, lut); -} - -static void lut_close(struct i915_gem_context *ctx) -{ - struct i915_lut_handle *lut, *ln; - struct radix_tree_iter iter; - void __rcu **slot; - - list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) { - list_del(&lut->obj_link); - i915_lut_handle_free(lut); - } - INIT_LIST_HEAD(&ctx->handles_list); - - rcu_read_lock(); - radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { - struct i915_vma *vma = rcu_dereference_raw(*slot); - - radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); - - vma->open_count--; - __i915_gem_object_release_unless_active(vma->obj); - } - rcu_read_unlock(); -} - -static struct intel_context * -lookup_user_engine(struct i915_gem_context *ctx, - unsigned long flags, - const struct i915_engine_class_instance *ci) -#define LOOKUP_USER_INDEX BIT(0) -{ - int idx; - - if (!!(flags & LOOKUP_USER_INDEX) != i915_gem_context_user_engines(ctx)) - return ERR_PTR(-EINVAL); - - if (!i915_gem_context_user_engines(ctx)) { - struct intel_engine_cs *engine; - - engine = intel_engine_lookup_user(ctx->i915, - ci->engine_class, - ci->engine_instance); - if (!engine) - return ERR_PTR(-EINVAL); - - idx = engine->id; - } else { - idx = ci->engine_instance; - } - - return i915_gem_context_get_engine(ctx, idx); -} - -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) -{ - unsigned int max; - - lockdep_assert_held(&i915->contexts.mutex); - - if (INTEL_GEN(i915) >= 11) - max = GEN11_MAX_CONTEXT_HW_ID; - else if (USES_GUC_SUBMISSION(i915)) - /* - * When using GuC in proxy submission, GuC consumes the - * highest bit in the context id to indicate proxy submission. - */ - max = MAX_GUC_CONTEXT_HW_ID; - else - max = MAX_CONTEXT_HW_ID; - - return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); -} - -static int steal_hw_id(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx, *cn; - LIST_HEAD(pinned); - int id = -ENOSPC; - - lockdep_assert_held(&i915->contexts.mutex); - - list_for_each_entry_safe(ctx, cn, - &i915->contexts.hw_id_list, hw_id_link) { - if (atomic_read(&ctx->hw_id_pin_count)) { - list_move_tail(&ctx->hw_id_link, &pinned); - continue; - } - - GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ - list_del_init(&ctx->hw_id_link); - id = ctx->hw_id; - break; - } - - /* - * Remember how far we got up on the last repossesion scan, so the - * list is kept in a "least recently scanned" order. - */ - list_splice_tail(&pinned, &i915->contexts.hw_id_list); - return id; -} - -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) -{ - int ret; - - lockdep_assert_held(&i915->contexts.mutex); - - /* - * We prefer to steal/stall ourselves and our users over that of the - * entire system. That may be a little unfair to our users, and - * even hurt high priority clients. The choice is whether to oomkill - * something else, or steal a context id. - */ - ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(ret < 0)) { - ret = steal_hw_id(i915); - if (ret < 0) /* once again for the correct errno code */ - ret = new_hw_id(i915, GFP_KERNEL); - if (ret < 0) - return ret; - } - - *out = ret; - return 0; -} - -static void release_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - - if (list_empty(&ctx->hw_id_link)) - return; - - mutex_lock(&i915->contexts.mutex); - if (!list_empty(&ctx->hw_id_link)) { - ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); - list_del_init(&ctx->hw_id_link); - } - mutex_unlock(&i915->contexts.mutex); -} - -static void __free_engines(struct i915_gem_engines *e, unsigned int count) -{ - while (count--) { - if (!e->engines[count]) - continue; - - intel_context_put(e->engines[count]); - } - kfree(e); -} - -static void free_engines(struct i915_gem_engines *e) -{ - __free_engines(e, e->num_engines); -} - -static void free_engines_rcu(struct work_struct *wrk) -{ - struct i915_gem_engines *e = - container_of(wrk, struct i915_gem_engines, rcu.work); - struct drm_i915_private *i915 = e->i915; - - mutex_lock(&i915->drm.struct_mutex); - free_engines(e); - mutex_unlock(&i915->drm.struct_mutex); -} - -static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) -{ - struct intel_engine_cs *engine; - struct i915_gem_engines *e; - enum intel_engine_id id; - - e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - - e->i915 = ctx->i915; - for_each_engine(engine, ctx->i915, id) { - struct intel_context *ce; - - ce = intel_context_create(ctx, engine); - if (IS_ERR(ce)) { - __free_engines(e, id); - return ERR_CAST(ce); - } - - e->engines[id] = ce; - } - e->num_engines = id; - - return e; -} - -static void i915_gem_context_free(struct i915_gem_context *ctx) -{ - lockdep_assert_held(&ctx->i915->drm.struct_mutex); - GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - - release_hw_id(ctx); - i915_ppgtt_put(ctx->ppgtt); - - free_engines(rcu_access_pointer(ctx->engines)); - mutex_destroy(&ctx->engines_mutex); - - if (ctx->timeline) - i915_timeline_put(ctx->timeline); - - kfree(ctx->name); - put_pid(ctx->pid); - - list_del(&ctx->link); - mutex_destroy(&ctx->mutex); - - kfree_rcu(ctx, rcu); -} - -static void contexts_free(struct drm_i915_private *i915) -{ - struct llist_node *freed = llist_del_all(&i915->contexts.free_list); - struct i915_gem_context *ctx, *cn; - - lockdep_assert_held(&i915->drm.struct_mutex); - - llist_for_each_entry_safe(ctx, cn, freed, free_link) - i915_gem_context_free(ctx); -} - -static void contexts_free_first(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx; - struct llist_node *freed; - - lockdep_assert_held(&i915->drm.struct_mutex); - - freed = llist_del_first(&i915->contexts.free_list); - if (!freed) - return; - - ctx = container_of(freed, typeof(*ctx), free_link); - i915_gem_context_free(ctx); -} - -static void contexts_free_worker(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), contexts.free_work); - - mutex_lock(&i915->drm.struct_mutex); - contexts_free(i915); - mutex_unlock(&i915->drm.struct_mutex); -} - -void i915_gem_context_release(struct kref *ref) -{ - struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - struct drm_i915_private *i915 = ctx->i915; - - trace_i915_context_free(ctx); - if (llist_add(&ctx->free_link, &i915->contexts.free_list)) - queue_work(i915->wq, &i915->contexts.free_work); -} - -static void context_close(struct i915_gem_context *ctx) -{ - i915_gem_context_set_closed(ctx); - - /* - * This context will never again be assinged to HW, so we can - * reuse its ID for the next context. - */ - release_hw_id(ctx); - - /* - * The LUT uses the VMA as a backpointer to unref the object, - * so we need to clear the LUT before we close all the VMA (inside - * the ppgtt). - */ - lut_close(ctx); - - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_put(ctx); -} - -static u32 default_desc_template(const struct drm_i915_private *i915, - const struct i915_hw_ppgtt *ppgtt) -{ - u32 address_mode; - u32 desc; - - desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; - - address_mode = INTEL_LEGACY_32B_CONTEXT; - if (ppgtt && i915_vm_is_4lvl(&ppgtt->vm)) - address_mode = INTEL_LEGACY_64B_CONTEXT; - desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - - if (IS_GEN(i915, 8)) - desc |= GEN8_CTX_L3LLC_COHERENT; - - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers - * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; - */ - - return desc; -} - -static struct i915_gem_context * -__create_context(struct drm_i915_private *dev_priv) -{ - struct i915_gem_context *ctx; - struct i915_gem_engines *e; - int err; - int i; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return ERR_PTR(-ENOMEM); - - kref_init(&ctx->ref); - list_add_tail(&ctx->link, &dev_priv->contexts.list); - ctx->i915 = dev_priv; - ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); - mutex_init(&ctx->mutex); - - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) { - err = PTR_ERR(e); - goto err_free; - } - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->handles_list); - INIT_LIST_HEAD(&ctx->hw_id_link); - - /* NB: Mark all slices as needing a remap so that when the context first - * loads it will restore whatever remap state already exists. If there - * is no remap info, it will be a NOP. */ - ctx->remap_slice = ALL_L3_SLICES(dev_priv); - - i915_gem_context_set_bannable(ctx); - i915_gem_context_set_recoverable(ctx); - - ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = - default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); - - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) - ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - - return ctx; - -err_free: - kfree(ctx); - return ERR_PTR(err); -} - -static struct i915_hw_ppgtt * -__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt) -{ - struct i915_hw_ppgtt *old = ctx->ppgtt; - - ctx->ppgtt = i915_ppgtt_get(ppgtt); - ctx->desc_template = default_desc_template(ctx->i915, ppgtt); - - return old; -} - -static void __assign_ppgtt(struct i915_gem_context *ctx, - struct i915_hw_ppgtt *ppgtt) -{ - if (ppgtt == ctx->ppgtt) - return; - - ppgtt = __set_ppgtt(ctx, ppgtt); - if (ppgtt) - i915_ppgtt_put(ppgtt); -} - -static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) -{ - struct i915_gem_context *ctx; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(dev_priv)) - return ERR_PTR(-EINVAL); - - /* Reap the most stale context */ - contexts_free_first(dev_priv); - - ctx = __create_context(dev_priv); - if (IS_ERR(ctx)) - return ctx; - - if (HAS_FULL_PPGTT(dev_priv)) { - struct i915_hw_ppgtt *ppgtt; - - ppgtt = i915_ppgtt_create(dev_priv); - if (IS_ERR(ppgtt)) { - DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", - PTR_ERR(ppgtt)); - context_close(ctx); - return ERR_CAST(ppgtt); - } - - __assign_ppgtt(ctx, ppgtt); - i915_ppgtt_put(ppgtt); - } - - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct i915_timeline *timeline; - - timeline = i915_timeline_create(dev_priv, NULL); - if (IS_ERR(timeline)) { - context_close(ctx); - return ERR_CAST(timeline); - } - - ctx->timeline = timeline; - } - - trace_i915_context_create(ctx); - - return ctx; -} - -/** - * i915_gem_context_create_gvt - create a GVT GEM context - * @dev: drm device * - * - * This function is used to create a GVT specific GEM context. - * - * Returns: - * pointer to i915_gem_context on success, error pointer if failed - * - */ -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev) -{ - struct i915_gem_context *ctx; - int ret; - - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return ERR_PTR(-ENODEV); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - - ctx = i915_gem_create_context(to_i915(dev), 0); - if (IS_ERR(ctx)) - goto out; - - ret = i915_gem_context_pin_hw_id(ctx); - if (ret) { - context_close(ctx); - ctx = ERR_PTR(ret); - goto out; - } - - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_set_closed(ctx); /* not user accessible */ - i915_gem_context_clear_bannable(ctx); - i915_gem_context_set_force_single_submission(ctx); - if (!USES_GUC_SUBMISSION(to_i915(dev))) - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ - - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); -out: - mutex_unlock(&dev->struct_mutex); - return ctx; -} - -static void -destroy_kernel_context(struct i915_gem_context **ctxp) -{ - struct i915_gem_context *ctx; - - /* Keep the context ref so that we can free it immediately ourselves */ - ctx = i915_gem_context_get(fetch_and_zero(ctxp)); - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - context_close(ctx); - i915_gem_context_free(ctx); -} - -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) -{ - struct i915_gem_context *ctx; - int err; - - ctx = i915_gem_create_context(i915, 0); - if (IS_ERR(ctx)) - return ctx; - - err = i915_gem_context_pin_hw_id(ctx); - if (err) { - destroy_kernel_context(&ctx); - return ERR_PTR(err); - } - - i915_gem_context_clear_bannable(ctx); - ctx->sched.priority = I915_USER_PRIORITY(prio); - ctx->ring_size = PAGE_SIZE; - - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - return ctx; -} - -static void init_contexts(struct drm_i915_private *i915) -{ - mutex_init(&i915->contexts.mutex); - INIT_LIST_HEAD(&i915->contexts.list); - - /* Using the simple ida interface, the max is limited by sizeof(int) */ - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); - ida_init(&i915->contexts.hw_ida); - INIT_LIST_HEAD(&i915->contexts.hw_id_list); - - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); - init_llist_head(&i915->contexts.free_list); -} - -static bool needs_preempt_context(struct drm_i915_private *i915) -{ - return HAS_EXECLISTS(i915); -} - -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) -{ - struct i915_gem_context *ctx; - - /* Reassure ourselves we are only called once */ - GEM_BUG_ON(dev_priv->kernel_context); - GEM_BUG_ON(dev_priv->preempt_context); - - intel_engine_init_ctx_wa(dev_priv->engine[RCS0]); - init_contexts(dev_priv); - - /* lowest priority; idle task */ - ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); - if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default global context\n"); - return PTR_ERR(ctx); - } - /* - * For easy recognisablity, we want the kernel context to be 0 and then - * all user contexts will have non-zero hw_id. Kernel contexts are - * permanently pinned, so that we never suffer a stall and can - * use them from any allocation context (e.g. for evicting other - * contexts and from inside the shrinker). - */ - GEM_BUG_ON(ctx->hw_id); - GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); - dev_priv->kernel_context = ctx; - - /* highest priority; preempting task */ - if (needs_preempt_context(dev_priv)) { - ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); - if (!IS_ERR(ctx)) - dev_priv->preempt_context = ctx; - else - DRM_ERROR("Failed to create preempt context; disabling preemption\n"); - } - - DRM_DEBUG_DRIVER("%s context support initialized\n", - DRIVER_CAPS(dev_priv)->has_logical_contexts ? - "logical" : "fake"); - return 0; -} - -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - for_each_engine(engine, dev_priv, id) - intel_engine_lost_context(engine); -} - -void i915_gem_contexts_fini(struct drm_i915_private *i915) -{ - lockdep_assert_held(&i915->drm.struct_mutex); - - if (i915->preempt_context) - destroy_kernel_context(&i915->preempt_context); - destroy_kernel_context(&i915->kernel_context); - - /* Must free all deferred contexts (via flush_workqueue) first */ - GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); - ida_destroy(&i915->contexts.hw_ida); -} - -static int context_idr_cleanup(int id, void *p, void *data) -{ - context_close(p); - return 0; -} - -static int vm_idr_cleanup(int id, void *p, void *data) -{ - i915_ppgtt_put(p); - return 0; -} - -static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv) -{ - int ret; - - ctx->file_priv = fpriv; - if (ctx->ppgtt) - ctx->ppgtt->vm.file = fpriv; - - ctx->pid = get_task_pid(current, PIDTYPE_PID); - ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", - current->comm, pid_nr(ctx->pid)); - if (!ctx->name) { - ret = -ENOMEM; - goto err_pid; - } - - /* And finally expose ourselves to userspace via the idr */ - mutex_lock(&fpriv->context_idr_lock); - ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&fpriv->context_idr_lock); - if (ret >= 0) - goto out; - - kfree(fetch_and_zero(&ctx->name)); -err_pid: - put_pid(fetch_and_zero(&ctx->pid)); -out: - return ret; -} - -int i915_gem_context_open(struct drm_i915_private *i915, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct i915_gem_context *ctx; - int err; - - mutex_init(&file_priv->context_idr_lock); - mutex_init(&file_priv->vm_idr_lock); - - idr_init(&file_priv->context_idr); - idr_init_base(&file_priv->vm_idr, 1); - - mutex_lock(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915, 0); - mutex_unlock(&i915->drm.struct_mutex); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto err; - } - - err = gem_context_register(ctx, file_priv); - if (err < 0) - goto err_ctx; - - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); - GEM_BUG_ON(err > 0); - - return 0; - -err_ctx: - mutex_lock(&i915->drm.struct_mutex); - context_close(ctx); - mutex_unlock(&i915->drm.struct_mutex); -err: - idr_destroy(&file_priv->vm_idr); - idr_destroy(&file_priv->context_idr); - mutex_destroy(&file_priv->vm_idr_lock); - mutex_destroy(&file_priv->context_idr_lock); - return err; -} - -void i915_gem_context_close(struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - - lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex); - - idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); - idr_destroy(&file_priv->context_idr); - mutex_destroy(&file_priv->context_idr_lock); - - idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); - idr_destroy(&file_priv->vm_idr); - mutex_destroy(&file_priv->vm_idr_lock); -} - -int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *i915 = to_i915(dev); - struct drm_i915_gem_vm_control *args = data; - struct drm_i915_file_private *file_priv = file->driver_priv; - struct i915_hw_ppgtt *ppgtt; - int err; - - if (!HAS_FULL_PPGTT(i915)) - return -ENODEV; - - if (args->flags) - return -EINVAL; - - ppgtt = i915_ppgtt_create(i915); - if (IS_ERR(ppgtt)) - return PTR_ERR(ppgtt); - - ppgtt->vm.file = file_priv; - - if (args->extensions) { - err = i915_user_extensions(u64_to_user_ptr(args->extensions), - NULL, 0, - ppgtt); - if (err) - goto err_put; - } - - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - goto err_put; - - err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); - if (err < 0) - goto err_unlock; - - GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */ - - mutex_unlock(&file_priv->vm_idr_lock); - - args->vm_id = err; - return 0; - -err_unlock: - mutex_unlock(&file_priv->vm_idr_lock); -err_put: - i915_ppgtt_put(ppgtt); - return err; -} - -int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_vm_control *args = data; - struct i915_hw_ppgtt *ppgtt; - int err; - u32 id; - - if (args->flags) - return -EINVAL; - - if (args->extensions) - return -EINVAL; - - id = args->vm_id; - if (!id) - return -ENOENT; - - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - - ppgtt = idr_remove(&file_priv->vm_idr, id); - - mutex_unlock(&file_priv->vm_idr_lock); - if (!ppgtt) - return -ENOENT; - - i915_ppgtt_put(ppgtt); - return 0; -} - -struct context_barrier_task { - struct i915_active base; - void (*task)(void *data); - void *data; -}; - -static void cb_retire(struct i915_active *base) -{ - struct context_barrier_task *cb = container_of(base, typeof(*cb), base); - - if (cb->task) - cb->task(cb->data); - - i915_active_fini(&cb->base); - kfree(cb); -} - -I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); -static int context_barrier_task(struct i915_gem_context *ctx, - intel_engine_mask_t engines, - int (*emit)(struct i915_request *rq, void *data), - void (*task)(void *data), - void *data) -{ - struct drm_i915_private *i915 = ctx->i915; - struct context_barrier_task *cb; - struct i915_gem_engines_iter it; - struct intel_context *ce; - int err = 0; - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!task); - - cb = kmalloc(sizeof(*cb), GFP_KERNEL); - if (!cb) - return -ENOMEM; - - i915_active_init(i915, &cb->base, cb_retire); - i915_active_acquire(&cb->base); - - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - struct i915_request *rq; - - if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - ce->engine->mask)) { - err = -ENXIO; - break; - } - - if (!(ce->engine->mask & engines) || !ce->state) - continue; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - break; - } - - err = 0; - if (emit) - err = emit(rq, data); - if (err == 0) - err = i915_active_ref(&cb->base, rq->fence.context, rq); - - i915_request_add(rq); - if (err) - break; - } - i915_gem_context_unlock_engines(ctx); - - cb->task = err ? NULL : task; /* caller needs to unwind instead */ - cb->data = data; - - i915_active_release(&cb->base); - - return err; -} - -static int get_ppgtt(struct drm_i915_file_private *file_priv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_hw_ppgtt *ppgtt; - int ret; - - if (!ctx->ppgtt) - return -ENODEV; - - /* XXX rcu acquire? */ - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (ret) - return ret; - - ppgtt = i915_ppgtt_get(ctx->ppgtt); - mutex_unlock(&ctx->i915->drm.struct_mutex); - - ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (ret) - goto err_put; - - ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); - GEM_BUG_ON(!ret); - if (ret < 0) - goto err_unlock; - - i915_ppgtt_get(ppgtt); - - args->size = 0; - args->value = ret; - - ret = 0; -err_unlock: - mutex_unlock(&file_priv->vm_idr_lock); -err_put: - i915_ppgtt_put(ppgtt); - return ret; -} - -static void set_ppgtt_barrier(void *data) -{ - struct i915_hw_ppgtt *old = data; - - if (INTEL_GEN(old->vm.i915) < 8) - gen6_ppgtt_unpin_all(old); - - i915_ppgtt_put(old); -} - -static int emit_ppgtt_update(struct i915_request *rq, void *data) -{ - struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt; - struct intel_engine_cs *engine = rq->engine; - u32 base = engine->mmio_base; - u32 *cs; - int i; - - if (i915_vm_is_4lvl(&ppgtt->vm)) { - const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - *cs++ = lower_32_bits(pd_daddr); - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } else { - /* ppGTT is not part of the legacy context image */ - gen6_ppgtt_pin(ppgtt); - } - - return 0; -} - -static int set_ppgtt(struct drm_i915_file_private *file_priv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_hw_ppgtt *ppgtt, *old; - int err; - - if (args->size) - return -EINVAL; - - if (!ctx->ppgtt) - return -ENODEV; - - if (upper_32_bits(args->value)) - return -ENOENT; - - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - - ppgtt = idr_find(&file_priv->vm_idr, args->value); - if (ppgtt) - i915_ppgtt_get(ppgtt); - mutex_unlock(&file_priv->vm_idr_lock); - if (!ppgtt) - return -ENOENT; - - err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (err) - goto out; - - if (ppgtt == ctx->ppgtt) - goto unlock; - - /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - lut_close(ctx); - - old = __set_ppgtt(ctx, ppgtt); - - /* - * We need to flush any requests using the current ppgtt before - * we release it as the requests do not hold a reference themselves, - * only indirectly through the context. - */ - err = context_barrier_task(ctx, ALL_ENGINES, - emit_ppgtt_update, - set_ppgtt_barrier, - old); - if (err) { - ctx->ppgtt = old; - ctx->desc_template = default_desc_template(ctx->i915, old); - i915_ppgtt_put(ppgtt); - } - -unlock: - mutex_unlock(&ctx->i915->drm.struct_mutex); - -out: - i915_ppgtt_put(ppgtt); - return err; -} - -static int gen8_emit_rpcs_config(struct i915_request *rq, - struct intel_context *ce, - struct intel_sseu sseu) -{ - u64 offset; - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - offset = i915_ggtt_offset(ce->state) + - LRC_STATE_PN * PAGE_SIZE + - (CTX_R_PWR_CLK_STATE + 1) * 4; - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); - - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) -{ - struct i915_request *rq; - int ret; - - lockdep_assert_held(&ce->pin_mutex); - - /* - * If the context is not idle, we have to submit an ordered request to - * modify its context image via the kernel context (writing to our own - * image, or into the registers directory, does not stick). Pristine - * and idle contexts will be configured on pinning. - */ - if (!intel_context_is_pinned(ce)) - return 0; - - rq = i915_request_create(ce->engine->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - /* Queue this switch after all other activity by this context. */ - ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); - if (ret) - goto out_add; - - ret = gen8_emit_rpcs_config(rq, ce, sseu); - if (ret) - goto out_add; - - /* - * Guarantee context image and the timeline remains pinned until the - * modifying request is retired by setting the ce activity tracker. - * - * But we only need to take one pin on the account of it. Or in other - * words transfer the pinned ce object to tracked active request. - */ - if (!i915_active_request_isset(&ce->active_tracker)) - __intel_context_pin(ce); - __i915_active_request_set(&ce->active_tracker, rq); - -out_add: - i915_request_add(rq); - return ret; -} - -static int -__intel_context_reconfigure_sseu(struct intel_context *ce, - struct intel_sseu sseu) -{ - int ret; - - GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8); - - ret = intel_context_lock_pinned(ce); - if (ret) - return ret; - - /* Nothing to do if unmodified. */ - if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) - goto unlock; - - ret = gen8_modify_rpcs(ce, sseu); - if (!ret) - ce->sseu = sseu; - -unlock: - intel_context_unlock_pinned(ce); - return ret; -} - -static int -intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) -{ - struct drm_i915_private *i915 = ce->gem_context->i915; - int ret; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - - mutex_unlock(&i915->drm.struct_mutex); - - return ret; -} - -static int -user_to_context_sseu(struct drm_i915_private *i915, - const struct drm_i915_gem_context_param_sseu *user, - struct intel_sseu *context) -{ - const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu; - - /* No zeros in any field. */ - if (!user->slice_mask || !user->subslice_mask || - !user->min_eus_per_subslice || !user->max_eus_per_subslice) - return -EINVAL; - - /* Max > min. */ - if (user->max_eus_per_subslice < user->min_eus_per_subslice) - return -EINVAL; - - /* - * Some future proofing on the types since the uAPI is wider than the - * current internal implementation. - */ - if (overflows_type(user->slice_mask, context->slice_mask) || - overflows_type(user->subslice_mask, context->subslice_mask) || - overflows_type(user->min_eus_per_subslice, - context->min_eus_per_subslice) || - overflows_type(user->max_eus_per_subslice, - context->max_eus_per_subslice)) - return -EINVAL; - - /* Check validity against hardware. */ - if (user->slice_mask & ~device->slice_mask) - return -EINVAL; - - if (user->subslice_mask & ~device->subslice_mask[0]) - return -EINVAL; - - if (user->max_eus_per_subslice > device->max_eus_per_subslice) - return -EINVAL; - - context->slice_mask = user->slice_mask; - context->subslice_mask = user->subslice_mask; - context->min_eus_per_subslice = user->min_eus_per_subslice; - context->max_eus_per_subslice = user->max_eus_per_subslice; - - /* Part specific restrictions. */ - if (IS_GEN(i915, 11)) { - unsigned int hw_s = hweight8(device->slice_mask); - unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); - unsigned int req_s = hweight8(context->slice_mask); - unsigned int req_ss = hweight8(context->subslice_mask); - - /* - * Only full subslice enablement is possible if more than one - * slice is turned on. - */ - if (req_s > 1 && req_ss != hw_ss_per_s) - return -EINVAL; - - /* - * If more than four (SScount bitfield limit) subslices are - * requested then the number has to be even. - */ - if (req_ss > 4 && (req_ss & 1)) - return -EINVAL; - - /* - * If only one slice is enabled and subslice count is below the - * device full enablement, it must be at most half of the all - * available subslices. - */ - if (req_s == 1 && req_ss < hw_ss_per_s && - req_ss > (hw_ss_per_s / 2)) - return -EINVAL; - - /* ABI restriction - VME use case only. */ - - /* All slices or one slice only. */ - if (req_s != 1 && req_s != hw_s) - return -EINVAL; - - /* - * Half subslices or full enablement only when one slice is - * enabled. - */ - if (req_s == 1 && - (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2))) - return -EINVAL; - - /* No EU configuration changes. */ - if ((user->min_eus_per_subslice != - device->max_eus_per_subslice) || - (user->max_eus_per_subslice != - device->max_eus_per_subslice)) - return -EINVAL; - } - - return 0; -} - -static int set_sseu(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_context_param_sseu user_sseu; - struct intel_context *ce; - struct intel_sseu sseu; - unsigned long lookup; - int ret; - - if (args->size < sizeof(user_sseu)) - return -EINVAL; - - if (!IS_GEN(i915, 11)) - return -ENODEV; - - if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), - sizeof(user_sseu))) - return -EFAULT; - - if (user_sseu.rsvd) - return -EINVAL; - - if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) - return -EINVAL; - - lookup = 0; - if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) - lookup |= LOOKUP_USER_INDEX; - - ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - /* Only render engine supports RPCS configuration. */ - if (ce->engine->class != RENDER_CLASS) { - ret = -ENODEV; - goto out_ce; - } - - ret = user_to_context_sseu(i915, &user_sseu, &sseu); - if (ret) - goto out_ce; - - ret = intel_context_reconfigure_sseu(ce, sseu); - if (ret) - goto out_ce; - - args->size = sizeof(user_sseu); - -out_ce: - intel_context_put(ce); - return ret; -} - -struct set_engines { - struct i915_gem_context *ctx; - struct i915_gem_engines *engines; -}; - -static int -set_engines__load_balance(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_load_balance __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct intel_engine_cs *stack[16]; - struct intel_engine_cs **siblings; - struct intel_context *ce; - u16 num_siblings, idx; - unsigned int n; - int err; - - if (!HAS_EXECLISTS(set->ctx->i915)) - return -ENODEV; - - if (USES_GUC_SUBMISSION(set->ctx->i915)) - return -ENODEV; /* not implement yet */ - - if (get_user(idx, &ext->engine_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - DRM_DEBUG("Invalid placement value, %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (set->engines->engines[idx]) { - DRM_DEBUG("Invalid placement[%d], already occupied\n", idx); - return -EEXIST; - } - - if (get_user(num_siblings, &ext->num_siblings)) - return -EFAULT; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - err = check_user_mbz(&ext->mbz64); - if (err) - return err; - - siblings = stack; - if (num_siblings > ARRAY_SIZE(stack)) { - siblings = kmalloc_array(num_siblings, - sizeof(*siblings), - GFP_KERNEL); - if (!siblings) - return -ENOMEM; - } - - for (n = 0; n < num_siblings; n++) { - struct i915_engine_class_instance ci; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { - err = -EFAULT; - goto out_siblings; - } - - siblings[n] = intel_engine_lookup_user(set->ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!siblings[n]) { - DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n", - n, ci.engine_class, ci.engine_instance); - err = -EINVAL; - goto out_siblings; - } - } - - ce = intel_execlists_create_virtual(set->ctx, siblings, n); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out_siblings; - } - - if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { - intel_context_put(ce); - err = -EEXIST; - goto out_siblings; - } - -out_siblings: - if (siblings != stack) - kfree(siblings); - - return err; -} - -static int -set_engines__bond(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_bond __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct i915_engine_class_instance ci; - struct intel_engine_cs *virtual; - struct intel_engine_cs *master; - u16 idx, num_bonds; - int err, n; - - if (get_user(idx, &ext->virtual_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (!set->engines->engines[idx]) { - DRM_DEBUG("Invalid engine at %d\n", idx); - return -EINVAL; - } - virtual = set->engines->engines[idx]->engine; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { - err = check_user_mbz(&ext->mbz64[n]); - if (err) - return err; - } - - if (copy_from_user(&ci, &ext->master, sizeof(ci))) - return -EFAULT; - - master = intel_engine_lookup_user(set->ctx->i915, - ci.engine_class, ci.engine_instance); - if (!master) { - DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n", - ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - if (get_user(num_bonds, &ext->num_bonds)) - return -EFAULT; - - for (n = 0; n < num_bonds; n++) { - struct intel_engine_cs *bond; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) - return -EFAULT; - - bond = intel_engine_lookup_user(set->ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!bond) { - DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", - n, ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - /* - * A non-virtual engine has no siblings to choose between; and - * a submit fence will always be directed to the one engine. - */ - if (intel_engine_is_virtual(virtual)) { - err = intel_virtual_engine_attach_bond(virtual, - master, - bond); - if (err) - return err; - } - } - - return 0; -} - -static const i915_user_extension_fn set_engines__extensions[] = { - [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, - [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, -}; - -static int -set_engines(struct i915_gem_context *ctx, - const struct drm_i915_gem_context_param *args) -{ - struct i915_context_param_engines __user *user = - u64_to_user_ptr(args->value); - struct set_engines set = { .ctx = ctx }; - unsigned int num_engines, n; - u64 extensions; - int err; - - if (!args->size) { /* switch back to legacy user_ring_map */ - if (!i915_gem_context_user_engines(ctx)) - return 0; - - set.engines = default_engines(ctx); - if (IS_ERR(set.engines)) - return PTR_ERR(set.engines); - - goto replace; - } - - BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); - if (args->size < sizeof(*user) || - !IS_ALIGNED(args->size, sizeof(*user->engines))) { - DRM_DEBUG("Invalid size for engine array: %d\n", - args->size); - return -EINVAL; - } - - /* - * Note that I915_EXEC_RING_MASK limits execbuf to only using the - * first 64 engines defined here. - */ - num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); - - set.engines = kmalloc(struct_size(set.engines, engines, num_engines), - GFP_KERNEL); - if (!set.engines) - return -ENOMEM; - - set.engines->i915 = ctx->i915; - for (n = 0; n < num_engines; n++) { - struct i915_engine_class_instance ci; - struct intel_engine_cs *engine; - - if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { - __free_engines(set.engines, n); - return -EFAULT; - } - - if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && - ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { - set.engines->engines[n] = NULL; - continue; - } - - engine = intel_engine_lookup_user(ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!engine) { - DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n", - n, ci.engine_class, ci.engine_instance); - __free_engines(set.engines, n); - return -ENOENT; - } - - set.engines->engines[n] = intel_context_create(ctx, engine); - if (!set.engines->engines[n]) { - __free_engines(set.engines, n); - return -ENOMEM; - } - } - set.engines->num_engines = num_engines; - - err = -EFAULT; - if (!get_user(extensions, &user->extensions)) - err = i915_user_extensions(u64_to_user_ptr(extensions), - set_engines__extensions, - ARRAY_SIZE(set_engines__extensions), - &set); - if (err) { - free_engines(set.engines); - return err; - } - -replace: - mutex_lock(&ctx->engines_mutex); - if (args->size) - i915_gem_context_set_user_engines(ctx); - else - i915_gem_context_clear_user_engines(ctx); - rcu_swap_protected(ctx->engines, set.engines, 1); - mutex_unlock(&ctx->engines_mutex); - - INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu); - queue_rcu_work(system_wq, &set.engines->rcu); - - return 0; -} - -static struct i915_gem_engines * -__copy_engines(struct i915_gem_engines *e) -{ - struct i915_gem_engines *copy; - unsigned int n; - - copy = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); - if (!copy) - return ERR_PTR(-ENOMEM); - - copy->i915 = e->i915; - for (n = 0; n < e->num_engines; n++) { - if (e->engines[n]) - copy->engines[n] = intel_context_get(e->engines[n]); - else - copy->engines[n] = NULL; - } - copy->num_engines = n; - - return copy; -} - -static int -get_engines(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_context_param_engines __user *user; - struct i915_gem_engines *e; - size_t n, count, size; - int err = 0; - - err = mutex_lock_interruptible(&ctx->engines_mutex); - if (err) - return err; - - e = NULL; - if (i915_gem_context_user_engines(ctx)) - e = __copy_engines(i915_gem_context_engines(ctx)); - mutex_unlock(&ctx->engines_mutex); - if (IS_ERR_OR_NULL(e)) { - args->size = 0; - return PTR_ERR_OR_ZERO(e); - } - - count = e->num_engines; - - /* Be paranoid in case we have an impedance mismatch */ - if (!check_struct_size(user, engines, count, &size)) { - err = -EINVAL; - goto err_free; - } - if (overflows_type(size, args->size)) { - err = -EINVAL; - goto err_free; - } - - if (!args->size) { - args->size = size; - goto err_free; - } - - if (args->size < size) { - err = -EINVAL; - goto err_free; - } - - user = u64_to_user_ptr(args->value); - if (!access_ok(user, size)) { - err = -EFAULT; - goto err_free; - } - - if (put_user(0, &user->extensions)) { - err = -EFAULT; - goto err_free; - } - - for (n = 0; n < count; n++) { - struct i915_engine_class_instance ci = { - .engine_class = I915_ENGINE_CLASS_INVALID, - .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, - }; - - if (e->engines[n]) { - ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->instance; - } - - if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { - err = -EFAULT; - goto err_free; - } - } - - args->size = size; - -err_free: - INIT_RCU_WORK(&e->rcu, free_engines_rcu); - queue_rcu_work(system_wq, &e->rcu); - return err; -} - -static int ctx_setparam(struct drm_i915_file_private *fpriv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - int ret = 0; - - switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - if (args->size) - ret = -EINVAL; - else if (args->value) - set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - else - clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - - case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: - if (args->size) - ret = -EINVAL; - else if (args->value) - i915_gem_context_set_no_error_capture(ctx); - else - i915_gem_context_clear_no_error_capture(ctx); - break; - - case I915_CONTEXT_PARAM_BANNABLE: - if (args->size) - ret = -EINVAL; - else if (!capable(CAP_SYS_ADMIN) && !args->value) - ret = -EPERM; - else if (args->value) - i915_gem_context_set_bannable(ctx); - else - i915_gem_context_clear_bannable(ctx); - break; - - case I915_CONTEXT_PARAM_RECOVERABLE: - if (args->size) - ret = -EINVAL; - else if (args->value) - i915_gem_context_set_recoverable(ctx); - else - i915_gem_context_clear_recoverable(ctx); - break; - - case I915_CONTEXT_PARAM_PRIORITY: - { - s64 priority = args->value; - - if (args->size) - ret = -EINVAL; - else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - ret = -ENODEV; - else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - ret = -EINVAL; - else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - ret = -EPERM; - else - ctx->sched.priority = - I915_USER_PRIORITY(priority); - } - break; - - case I915_CONTEXT_PARAM_SSEU: - ret = set_sseu(ctx, args); - break; - - case I915_CONTEXT_PARAM_VM: - ret = set_ppgtt(fpriv, ctx, args); - break; - - case I915_CONTEXT_PARAM_ENGINES: - ret = set_engines(ctx, args); - break; - - case I915_CONTEXT_PARAM_BAN_PERIOD: - default: - ret = -EINVAL; - break; - } - - return ret; -} - -struct create_ext { - struct i915_gem_context *ctx; - struct drm_i915_file_private *fpriv; -}; - -static int create_setparam(struct i915_user_extension __user *ext, void *data) -{ - struct drm_i915_gem_context_create_ext_setparam local; - const struct create_ext *arg = data; - - if (copy_from_user(&local, ext, sizeof(local))) - return -EFAULT; - - if (local.param.ctx_id) - return -EINVAL; - - return ctx_setparam(arg->fpriv, arg->ctx, &local.param); -} - -static int clone_engines(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; - bool user_engines; - unsigned long n; - - clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); - if (!clone) - goto err_unlock; - - clone->i915 = dst->i915; - for (n = 0; n < e->num_engines; n++) { - struct intel_engine_cs *engine; - - if (!e->engines[n]) { - clone->engines[n] = NULL; - continue; - } - engine = e->engines[n]->engine; - - /* - * Virtual engines are singletons; they can only exist - * inside a single context, because they embed their - * HW context... As each virtual context implies a single - * timeline (each engine can only dequeue a single request - * at any time), it would be surprising for two contexts - * to use the same engine. So let's create a copy of - * the virtual engine instead. - */ - if (intel_engine_is_virtual(engine)) - clone->engines[n] = - intel_execlists_clone_virtual(dst, engine); - else - clone->engines[n] = intel_context_create(dst, engine); - if (IS_ERR_OR_NULL(clone->engines[n])) { - __free_engines(clone, n); - goto err_unlock; - } - } - clone->num_engines = n; - - user_engines = i915_gem_context_user_engines(src); - i915_gem_context_unlock_engines(src); - - free_engines(dst->engines); - RCU_INIT_POINTER(dst->engines, clone); - if (user_engines) - i915_gem_context_set_user_engines(dst); - else - i915_gem_context_clear_user_engines(dst); - return 0; - -err_unlock: - i915_gem_context_unlock_engines(src); - return -ENOMEM; -} - -static int clone_flags(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->user_flags = src->user_flags; - return 0; -} - -static int clone_schedattr(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->sched = src->sched; - return 0; -} - -static int clone_sseu(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; - unsigned long n; - int err; - - clone = dst->engines; /* no locking required; sole access */ - if (e->num_engines != clone->num_engines) { - err = -EINVAL; - goto unlock; - } - - for (n = 0; n < e->num_engines; n++) { - struct intel_context *ce = e->engines[n]; - - if (clone->engines[n]->engine->class != ce->engine->class) { - /* Must have compatible engine maps! */ - err = -EINVAL; - goto unlock; - } - - /* serialises with set_sseu */ - err = intel_context_lock_pinned(ce); - if (err) - goto unlock; - - clone->engines[n]->sseu = ce->sseu; - intel_context_unlock_pinned(ce); - } - - err = 0; -unlock: - i915_gem_context_unlock_engines(src); - return err; -} - -static int clone_timeline(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - if (src->timeline) { - GEM_BUG_ON(src->timeline == dst->timeline); - - if (dst->timeline) - i915_timeline_put(dst->timeline); - dst->timeline = i915_timeline_get(src->timeline); - } - - return 0; -} - -static int clone_vm(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_hw_ppgtt *ppgtt; - - rcu_read_lock(); - do { - ppgtt = READ_ONCE(src->ppgtt); - if (!ppgtt) - break; - - if (!kref_get_unless_zero(&ppgtt->ref)) - continue; - - /* - * This ppgtt may have be reallocated between - * the read and the kref, and reassigned to a third - * context. In order to avoid inadvertent sharing - * of this ppgtt with that third context (and not - * src), we have to confirm that we have the same - * ppgtt after passing through the strong memory - * barrier implied by a successful - * kref_get_unless_zero(). - * - * Once we have acquired the current ppgtt of src, - * we no longer care if it is released from src, as - * it cannot be reallocated elsewhere. - */ - - if (ppgtt == READ_ONCE(src->ppgtt)) - break; - - i915_ppgtt_put(ppgtt); - } while (1); - rcu_read_unlock(); - - if (ppgtt) { - __assign_ppgtt(dst, ppgtt); - i915_ppgtt_put(ppgtt); - } - - return 0; -} - -static int create_clone(struct i915_user_extension __user *ext, void *data) -{ - static int (* const fn[])(struct i915_gem_context *dst, - struct i915_gem_context *src) = { -#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y - MAP(ENGINES, clone_engines), - MAP(FLAGS, clone_flags), - MAP(SCHEDATTR, clone_schedattr), - MAP(SSEU, clone_sseu), - MAP(TIMELINE, clone_timeline), - MAP(VM, clone_vm), -#undef MAP - }; - struct drm_i915_gem_context_create_ext_clone local; - const struct create_ext *arg = data; - struct i915_gem_context *dst = arg->ctx; - struct i915_gem_context *src; - int err, bit; - - if (copy_from_user(&local, ext, sizeof(local))) - return -EFAULT; - - BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) != - I915_CONTEXT_CLONE_UNKNOWN); - - if (local.flags & I915_CONTEXT_CLONE_UNKNOWN) - return -EINVAL; - - if (local.rsvd) - return -EINVAL; - - rcu_read_lock(); - src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id); - rcu_read_unlock(); - if (!src) - return -ENOENT; - - GEM_BUG_ON(src == dst); - - for (bit = 0; bit < ARRAY_SIZE(fn); bit++) { - if (!(local.flags & BIT(bit))) - continue; - - err = fn[bit](dst, src); - if (err) - return err; - } - - return 0; -} - -static const i915_user_extension_fn create_extensions[] = { - [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, - [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone, -}; - -static bool client_is_banned(struct drm_i915_file_private *file_priv) -{ - return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; -} - -int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *i915 = to_i915(dev); - struct drm_i915_gem_context_create_ext *args = data; - struct create_ext ext_data; - int ret; - - if (!DRIVER_CAPS(i915)->has_logical_contexts) - return -ENODEV; - - if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) - return -EINVAL; - - ret = i915_terminally_wedged(i915); - if (ret) - return ret; - - ext_data.fpriv = file->driver_priv; - if (client_is_banned(ext_data.fpriv)) { - DRM_DEBUG("client %s[%d] banned from creating ctx\n", - current->comm, - pid_nr(get_task_pid(current, PIDTYPE_PID))); - return -EIO; - } - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - ext_data.ctx = i915_gem_create_context(i915, args->flags); - mutex_unlock(&dev->struct_mutex); - if (IS_ERR(ext_data.ctx)) - return PTR_ERR(ext_data.ctx); - - if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) { - ret = i915_user_extensions(u64_to_user_ptr(args->extensions), - create_extensions, - ARRAY_SIZE(create_extensions), - &ext_data); - if (ret) - goto err_ctx; - } - - ret = gem_context_register(ext_data.ctx, ext_data.fpriv); - if (ret < 0) - goto err_ctx; - - args->ctx_id = ret; - DRM_DEBUG("HW context %d created\n", args->ctx_id); - - return 0; - -err_ctx: - mutex_lock(&dev->struct_mutex); - context_close(ext_data.ctx); - mutex_unlock(&dev->struct_mutex); - return ret; -} - -int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_context_destroy *args = data; - struct drm_i915_file_private *file_priv = file->driver_priv; - struct i915_gem_context *ctx; - - if (args->pad != 0) - return -EINVAL; - - if (!args->ctx_id) - return -ENOENT; - - if (mutex_lock_interruptible(&file_priv->context_idr_lock)) - return -EINTR; - - ctx = idr_remove(&file_priv->context_idr, args->ctx_id); - mutex_unlock(&file_priv->context_idr_lock); - if (!ctx) - return -ENOENT; - - mutex_lock(&dev->struct_mutex); - context_close(ctx); - mutex_unlock(&dev->struct_mutex); - - return 0; -} - -static int get_sseu(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct drm_i915_gem_context_param_sseu user_sseu; - struct intel_context *ce; - unsigned long lookup; - int err; - - if (args->size == 0) - goto out; - else if (args->size < sizeof(user_sseu)) - return -EINVAL; - - if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), - sizeof(user_sseu))) - return -EFAULT; - - if (user_sseu.rsvd) - return -EINVAL; - - if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) - return -EINVAL; - - lookup = 0; - if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) - lookup |= LOOKUP_USER_INDEX; - - ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - err = intel_context_lock_pinned(ce); /* serialises with set_sseu */ - if (err) { - intel_context_put(ce); - return err; - } - - user_sseu.slice_mask = ce->sseu.slice_mask; - user_sseu.subslice_mask = ce->sseu.subslice_mask; - user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice; - user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice; - - intel_context_unlock_pinned(ce); - intel_context_put(ce); - - if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu, - sizeof(user_sseu))) - return -EFAULT; - -out: - args->size = sizeof(user_sseu); - - return 0; -} - -int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_context_param *args = data; - struct i915_gem_context *ctx; - int ret = 0; - - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; - - switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - args->size = 0; - args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - - case I915_CONTEXT_PARAM_GTT_SIZE: - args->size = 0; - if (ctx->ppgtt) - args->value = ctx->ppgtt->vm.total; - else if (to_i915(dev)->mm.aliasing_ppgtt) - args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total; - else - args->value = to_i915(dev)->ggtt.vm.total; - break; - - case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: - args->size = 0; - args->value = i915_gem_context_no_error_capture(ctx); - break; - - case I915_CONTEXT_PARAM_BANNABLE: - args->size = 0; - args->value = i915_gem_context_is_bannable(ctx); - break; - - case I915_CONTEXT_PARAM_RECOVERABLE: - args->size = 0; - args->value = i915_gem_context_is_recoverable(ctx); - break; - - case I915_CONTEXT_PARAM_PRIORITY: - args->size = 0; - args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT; - break; - - case I915_CONTEXT_PARAM_SSEU: - ret = get_sseu(ctx, args); - break; - - case I915_CONTEXT_PARAM_VM: - ret = get_ppgtt(file_priv, ctx, args); - break; - - case I915_CONTEXT_PARAM_ENGINES: - ret = get_engines(ctx, args); - break; - - case I915_CONTEXT_PARAM_BAN_PERIOD: - default: - ret = -EINVAL; - break; - } - - i915_gem_context_put(ctx); - return ret; -} - -int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_context_param *args = data; - struct i915_gem_context *ctx; - int ret; - - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; - - ret = ctx_setparam(file_priv, ctx, args); - - i915_gem_context_put(ctx); - return ret; -} - -int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, - void *data, struct drm_file *file) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_reset_stats *args = data; - struct i915_gem_context *ctx; - int ret; - - if (args->flags || args->pad) - return -EINVAL; - - ret = -ENOENT; - rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); - if (!ctx) - goto out; - - /* - * We opt for unserialised reads here. This may result in tearing - * in the extremely unlikely event of a GPU hang on this context - * as we are querying them. If we need that extra layer of protection, - * we should wrap the hangstats with a seqlock. - */ - - if (capable(CAP_SYS_ADMIN)) - args->reset_count = i915_reset_count(&dev_priv->gpu_error); - else - args->reset_count = 0; - - args->batch_active = atomic_read(&ctx->guilty_count); - args->batch_pending = atomic_read(&ctx->active_count); - - ret = 0; -out: - rcu_read_unlock(); - return ret; -} - -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int err = 0; - - mutex_lock(&i915->contexts.mutex); - - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - - if (list_empty(&ctx->hw_id_link)) { - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); - - err = assign_hw_id(i915, &ctx->hw_id); - if (err) - goto out_unlock; - - list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); - } - - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); - atomic_inc(&ctx->hw_id_pin_count); - -out_unlock: - mutex_unlock(&i915->contexts.mutex); - return err; -} - -/* GEM context-engines iterator: for_each_gem_engine() */ -struct intel_context * -i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) -{ - const struct i915_gem_engines *e = it->engines; - struct intel_context *ctx; - - do { - if (it->idx >= e->num_engines) - return NULL; - - ctx = e->engines[it->idx++]; - } while (!ctx); - - return ctx; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_context.c" -#include "selftests/i915_gem_context.c" -#endif - -static void i915_global_gem_context_shrink(void) -{ - kmem_cache_shrink(global.slab_luts); -} - -static void i915_global_gem_context_exit(void) -{ - kmem_cache_destroy(global.slab_luts); -} - -static struct i915_global_gem_context global = { { - .shrink = i915_global_gem_context_shrink, - .exit = i915_global_gem_context_exit, -} }; - -int __init i915_global_gem_context_init(void) -{ - global.slab_luts = KMEM_CACHE(i915_lut_handle, 0); - if (!global.slab_luts) - return -ENOMEM; - - i915_global_register(&global.base); - return 0; -} diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h deleted file mode 100644 index 9ad4a6362438..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __I915_GEM_CONTEXT_H__ -#define __I915_GEM_CONTEXT_H__ - -#include "i915_gem_context_types.h" - -#include "gt/intel_context.h" - -#include "i915_gem.h" -#include "i915_scheduler.h" -#include "intel_device_info.h" - -struct drm_device; -struct drm_file; - -static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_CLOSED, &ctx->flags); -} - -static inline void i915_gem_context_set_closed(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - set_bit(CONTEXT_CLOSED, &ctx->flags); -} - -static inline bool i915_gem_context_no_error_capture(const struct i915_gem_context *ctx) -{ - return test_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); -} - -static inline void i915_gem_context_set_no_error_capture(struct i915_gem_context *ctx) -{ - set_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); -} - -static inline void i915_gem_context_clear_no_error_capture(struct i915_gem_context *ctx) -{ - clear_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); -} - -static inline bool i915_gem_context_is_bannable(const struct i915_gem_context *ctx) -{ - return test_bit(UCONTEXT_BANNABLE, &ctx->user_flags); -} - -static inline void i915_gem_context_set_bannable(struct i915_gem_context *ctx) -{ - set_bit(UCONTEXT_BANNABLE, &ctx->user_flags); -} - -static inline void i915_gem_context_clear_bannable(struct i915_gem_context *ctx) -{ - clear_bit(UCONTEXT_BANNABLE, &ctx->user_flags); -} - -static inline bool i915_gem_context_is_recoverable(const struct i915_gem_context *ctx) -{ - return test_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); -} - -static inline void i915_gem_context_set_recoverable(struct i915_gem_context *ctx) -{ - set_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); -} - -static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *ctx) -{ - clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); -} - -static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_BANNED, &ctx->flags); -} - -static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx) -{ - set_bit(CONTEXT_BANNED, &ctx->flags); -} - -static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - -static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx) -{ - __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - -static inline bool -i915_gem_context_user_engines(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_USER_ENGINES, &ctx->flags); -} - -static inline void -i915_gem_context_set_user_engines(struct i915_gem_context *ctx) -{ - set_bit(CONTEXT_USER_ENGINES, &ctx->flags); -} - -static inline void -i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) -{ - clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); -} - -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) - return 0; - - return __i915_gem_context_pin_hw_id(ctx); -} - -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); - atomic_dec(&ctx->hw_id_pin_count); -} - -static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) -{ - return !ctx->file_priv; -} - -/* i915_gem_context.c */ -int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); -void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); - -int i915_gem_context_open(struct drm_i915_private *i915, - struct drm_file *file); -void i915_gem_context_close(struct drm_file *file); - -void i915_gem_context_release(struct kref *ctx_ref); -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev); - -int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); - -int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); - -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); - -static inline struct i915_gem_context * -i915_gem_context_get(struct i915_gem_context *ctx) -{ - kref_get(&ctx->ref); - return ctx; -} - -static inline void i915_gem_context_put(struct i915_gem_context *ctx) -{ - kref_put(&ctx->ref, i915_gem_context_release); -} - -static inline struct i915_gem_engines * -i915_gem_context_engines(struct i915_gem_context *ctx) -{ - return rcu_dereference_protected(ctx->engines, - lockdep_is_held(&ctx->engines_mutex)); -} - -static inline struct i915_gem_engines * -i915_gem_context_lock_engines(struct i915_gem_context *ctx) - __acquires(&ctx->engines_mutex) -{ - mutex_lock(&ctx->engines_mutex); - return i915_gem_context_engines(ctx); -} - -static inline void -i915_gem_context_unlock_engines(struct i915_gem_context *ctx) - __releases(&ctx->engines_mutex) -{ - mutex_unlock(&ctx->engines_mutex); -} - -static inline struct intel_context * -i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx) -{ - return i915_gem_context_engines(ctx)->engines[idx]; -} - -static inline struct intel_context * -i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) -{ - struct intel_context *ce = ERR_PTR(-EINVAL); - - rcu_read_lock(); { - struct i915_gem_engines *e = rcu_dereference(ctx->engines); - if (likely(idx < e->num_engines && e->engines[idx])) - ce = intel_context_get(e->engines[idx]); - } rcu_read_unlock(); - - return ce; -} - -static inline void -i915_gem_engines_iter_init(struct i915_gem_engines_iter *it, - struct i915_gem_engines *engines) -{ - GEM_BUG_ON(!engines); - it->engines = engines; - it->idx = 0; -} - -struct intel_context * -i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); - -#define for_each_gem_engine(ce, engines, it) \ - for (i915_gem_engines_iter_init(&(it), (engines)); \ - ((ce) = i915_gem_engines_iter_next(&(it)));) - -struct i915_lut_handle *i915_lut_handle_alloc(void); -void i915_lut_handle_free(struct i915_lut_handle *lut); - -#endif /* !__I915_GEM_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h deleted file mode 100644 index fb965ded2508..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_GEM_CONTEXT_TYPES_H__ -#define __I915_GEM_CONTEXT_TYPES_H__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gt/intel_context_types.h" - -#include "i915_scheduler.h" - -struct pid; - -struct drm_i915_private; -struct drm_i915_file_private; -struct i915_hw_ppgtt; -struct i915_timeline; -struct intel_ring; - -struct i915_gem_engines { - struct rcu_work rcu; - struct drm_i915_private *i915; - unsigned int num_engines; - struct intel_context *engines[]; -}; - -struct i915_gem_engines_iter { - unsigned int idx; - const struct i915_gem_engines *engines; -}; - -/** - * struct i915_gem_context - client state - * - * The struct i915_gem_context represents the combined view of the driver and - * logical hardware state for a particular client. - */ -struct i915_gem_context { - /** i915: i915 device backpointer */ - struct drm_i915_private *i915; - - /** file_priv: owning file descriptor */ - struct drm_i915_file_private *file_priv; - - /** - * @engines: User defined engines for this context - * - * Various uAPI offer the ability to lookup up an - * index from this array to select an engine operate on. - * - * Multiple logically distinct instances of the same engine - * may be defined in the array, as well as composite virtual - * engines. - * - * Execbuf uses the I915_EXEC_RING_MASK as an index into this - * array to select which HW context + engine to execute on. For - * the default array, the user_ring_map[] is used to translate - * the legacy uABI onto the approprate index (e.g. both - * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same - * context, and I915_EXEC_BSD is weird). For a use defined - * array, execbuf uses I915_EXEC_RING_MASK as a plain index. - * - * User defined by I915_CONTEXT_PARAM_ENGINE (when the - * CONTEXT_USER_ENGINES flag is set). - */ - struct i915_gem_engines __rcu *engines; - struct mutex engines_mutex; /* guards writes to engines */ - - struct i915_timeline *timeline; - - /** - * @ppgtt: unique address space (GTT) - * - * In full-ppgtt mode, each context has its own address space ensuring - * complete seperation of one client from all others. - * - * In other modes, this is a NULL pointer with the expectation that - * the caller uses the shared global GTT. - */ - struct i915_hw_ppgtt *ppgtt; - - /** - * @pid: process id of creator - * - * Note that who created the context may not be the principle user, - * as the context may be shared across a local socket. However, - * that should only affect the default context, all contexts created - * explicitly by the client are expected to be isolated. - */ - struct pid *pid; - - /** - * @name: arbitrary name - * - * A name is constructed for the context from the creator's process - * name, pid and user handle in order to uniquely identify the - * context in messages. - */ - const char *name; - - /** link: place with &drm_i915_private.context_list */ - struct list_head link; - struct llist_node free_link; - - /** - * @ref: reference count - * - * A reference to a context is held by both the client who created it - * and on each request submitted to the hardware using the request - * (to ensure the hardware has access to the state until it has - * finished all pending writes). See i915_gem_context_get() and - * i915_gem_context_put() for access. - */ - struct kref ref; - - /** - * @rcu: rcu_head for deferred freeing. - */ - struct rcu_head rcu; - - /** - * @user_flags: small set of booleans controlled by the user - */ - unsigned long user_flags; -#define UCONTEXT_NO_ZEROMAP 0 -#define UCONTEXT_NO_ERROR_CAPTURE 1 -#define UCONTEXT_BANNABLE 2 -#define UCONTEXT_RECOVERABLE 3 - - /** - * @flags: small set of booleans - */ - unsigned long flags; -#define CONTEXT_BANNED 0 -#define CONTEXT_CLOSED 1 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 -#define CONTEXT_USER_ENGINES 3 - - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; - - struct mutex mutex; - - struct i915_sched_attr sched; - - /** ring_size: size for allocating the per-engine ring buffer */ - u32 ring_size; - /** desc_template: invariant fields for the HW context descriptor */ - u32 desc_template; - - /** guilty_count: How many times this context has caused a GPU hang. */ - atomic_t guilty_count; - /** - * @active_count: How many times this context was active during a GPU - * hang, but did not cause it. - */ - atomic_t active_count; - - /** - * @hang_timestamp: The last time(s) this context caused a GPU hang - */ - unsigned long hang_timestamp[2]; -#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - - /** remap_slice: Bitmask of cache lines that need remapping */ - u8 remap_slice; - - /** handles_vma: rbtree to look up our context specific obj/vma for - * the user handle. (user handles are per fd, but the binding is - * per vm, which may be one per context or shared with the global GTT) - */ - struct radix_tree_root handles_vma; - - /** handles_list: reverse list of all the rbtree entries in use for - * this context, which allows us to free all the allocations on - * context close. - */ - struct list_head handles_list; -}; - -#endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c deleted file mode 100644 index 5a101a9462d8..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Copyright 2012 Red Hat Inc - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Dave Airlie - */ - -#include -#include - - -#include "i915_drv.h" - -static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) -{ - return to_intel_bo(buf->priv); -} - -static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, - enum dma_data_direction dir) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - struct sg_table *st; - struct scatterlist *src, *dst; - int ret, i; - - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err; - - /* Copy sg so that we make an independent mapping */ - st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (st == NULL) { - ret = -ENOMEM; - goto err_unpin_pages; - } - - ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); - if (ret) - goto err_free; - - src = obj->mm.pages->sgl; - dst = st->sgl; - for (i = 0; i < obj->mm.pages->nents; i++) { - sg_set_page(dst, sg_page(src), src->length, 0); - dst = sg_next(dst); - src = sg_next(src); - } - - if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { - ret = -ENOMEM; - goto err_free_sg; - } - - return st; - -err_free_sg: - sg_free_table(st); -err_free: - kfree(st); -err_unpin_pages: - i915_gem_object_unpin_pages(obj); -err: - return ERR_PTR(ret); -} - -static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, - struct sg_table *sg, - enum dma_data_direction dir) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - - dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir); - sg_free_table(sg); - kfree(sg); - - i915_gem_object_unpin_pages(obj); -} - -static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - - return i915_gem_object_pin_map(obj, I915_MAP_WB); -} - -static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); -} - -static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct page *page; - - if (page_num >= obj->base.size >> PAGE_SHIFT) - return NULL; - - if (!i915_gem_object_has_struct_page(obj)) - return NULL; - - if (i915_gem_object_pin_pages(obj)) - return NULL; - - /* Synchronisation is left to the caller (via .begin_cpu_access()) */ - page = i915_gem_object_get_page(obj, page_num); - if (IS_ERR(page)) - goto err_unpin; - - return kmap(page); - -err_unpin: - i915_gem_object_unpin_pages(obj); - return NULL; -} - -static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - - kunmap(virt_to_page(addr)); - i915_gem_object_unpin_pages(obj); -} - -static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - int ret; - - if (obj->base.size < vma->vm_end - vma->vm_start) - return -EINVAL; - - if (!obj->base.filp) - return -ENODEV; - - ret = call_mmap(obj->base.filp, vma); - if (ret) - return ret; - - fput(vma->vm_file); - vma->vm_file = get_file(obj->base.filp); - - return 0; -} - -static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct drm_device *dev = obj->base.dev; - bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); - int err; - - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - err = i915_mutex_lock_interruptible(dev); - if (err) - goto out; - - err = i915_gem_object_set_to_cpu_domain(obj, write); - mutex_unlock(&dev->struct_mutex); - -out: - i915_gem_object_unpin_pages(obj); - return err; -} - -static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct drm_device *dev = obj->base.dev; - int err; - - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - err = i915_mutex_lock_interruptible(dev); - if (err) - goto out; - - err = i915_gem_object_set_to_gtt_domain(obj, false); - mutex_unlock(&dev->struct_mutex); - -out: - i915_gem_object_unpin_pages(obj); - return err; -} - -static const struct dma_buf_ops i915_dmabuf_ops = { - .map_dma_buf = i915_gem_map_dma_buf, - .unmap_dma_buf = i915_gem_unmap_dma_buf, - .release = drm_gem_dmabuf_release, - .map = i915_gem_dmabuf_kmap, - .unmap = i915_gem_dmabuf_kunmap, - .mmap = i915_gem_dmabuf_mmap, - .vmap = i915_gem_dmabuf_vmap, - .vunmap = i915_gem_dmabuf_vunmap, - .begin_cpu_access = i915_gem_begin_cpu_access, - .end_cpu_access = i915_gem_end_cpu_access, -}; - -struct dma_buf *i915_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gem_obj, int flags) -{ - struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - - exp_info.ops = &i915_dmabuf_ops; - exp_info.size = gem_obj->size; - exp_info.flags = flags; - exp_info.priv = gem_obj; - exp_info.resv = obj->resv; - - if (obj->ops->dmabuf_export) { - int ret = obj->ops->dmabuf_export(obj); - if (ret) - return ERR_PTR(ret); - } - - return drm_gem_dmabuf_export(dev, &exp_info); -} - -static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) -{ - struct sg_table *pages; - unsigned int sg_page_sizes; - - pages = dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - sg_page_sizes = i915_sg_page_sizes(pages->sgl); - - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); - - return 0; -} - -static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - dma_buf_unmap_attachment(obj->base.import_attach, pages, - DMA_BIDIRECTIONAL); -} - -static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { - .get_pages = i915_gem_object_get_pages_dmabuf, - .put_pages = i915_gem_object_put_pages_dmabuf, -}; - -struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf) -{ - struct dma_buf_attachment *attach; - struct drm_i915_gem_object *obj; - int ret; - - /* is this one of own objects? */ - if (dma_buf->ops == &i915_dmabuf_ops) { - obj = dma_buf_to_obj(dma_buf); - /* is it from our device? */ - if (obj->base.dev == dev) { - /* - * Importing dmabuf exported from out own gem increases - * refcount on gem itself instead of f_count of dmabuf. - */ - return &i915_gem_object_get(obj)->base; - } - } - - /* need to attach */ - attach = dma_buf_attach(dma_buf, dev->dev); - if (IS_ERR(attach)) - return ERR_CAST(attach); - - get_dma_buf(dma_buf); - - obj = i915_gem_object_alloc(); - if (obj == NULL) { - ret = -ENOMEM; - goto fail_detach; - } - - drm_gem_private_object_init(dev, &obj->base, dma_buf->size); - i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); - obj->base.import_attach = attach; - obj->resv = dma_buf->resv; - - /* We use GTT as shorthand for a coherent domain, one that is - * neither in the GPU cache nor in the CPU cache, where all - * writes are immediately visible in memory. (That's not strictly - * true, but it's close! There are internal buffers such as the - * write-combined buffer or a delay through the chipset for GTT - * writes that do require us to treat GTT as a separate cache domain.) - */ - obj->read_domains = I915_GEM_DOMAIN_GTT; - obj->write_domain = 0; - - return &obj->base; - -fail_detach: - dma_buf_detach(dma_buf, attach); - dma_buf_put(dma_buf); - - return ERR_PTR(ret); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_dmabuf.c" -#include "selftests/i915_gem_dmabuf.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 0bdb3e072ba5..a5783c4cb98b 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -28,6 +28,8 @@ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "intel_drv.h" #include "i915_trace.h" diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c deleted file mode 100644 index 699f3f180d8a..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ /dev/null @@ -1,2788 +0,0 @@ -/* - * Copyright © 2008,2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * Chris Wilson - * - */ - -#include -#include -#include -#include - -#include -#include - -#include "gem/i915_gem_ioctls.h" -#include "gt/intel_gt_pm.h" - -#include "i915_drv.h" -#include "i915_gem_clflush.h" -#include "i915_trace.h" -#include "intel_drv.h" -#include "intel_frontbuffer.h" - -enum { - FORCE_CPU_RELOC = 1, - FORCE_GTT_RELOC, - FORCE_GPU_RELOC, -#define DBG_FORCE_RELOC 0 /* choose one of the above! */ -}; - -#define __EXEC_OBJECT_HAS_REF BIT(31) -#define __EXEC_OBJECT_HAS_PIN BIT(30) -#define __EXEC_OBJECT_HAS_FENCE BIT(29) -#define __EXEC_OBJECT_NEEDS_MAP BIT(28) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(27) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */ -#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) - -#define __EXEC_HAS_RELOC BIT(31) -#define __EXEC_VALIDATED BIT(30) -#define __EXEC_INTERNAL_FLAGS (~0u << 30) -#define UPDATE PIN_OFFSET_FIXED - -#define BATCH_OFFSET_BIAS (256*1024) - -#define __I915_EXEC_ILLEGAL_FLAGS \ - (__I915_EXEC_UNKNOWN_FLAGS | \ - I915_EXEC_CONSTANTS_MASK | \ - I915_EXEC_RESOURCE_STREAMER) - -/* Catch emission of unexpected errors for CI! */ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) -#undef EINVAL -#define EINVAL ({ \ - DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ - 22; \ -}) -#endif - -/** - * DOC: User command execution - * - * Userspace submits commands to be executed on the GPU as an instruction - * stream within a GEM object we call a batchbuffer. This instructions may - * refer to other GEM objects containing auxiliary state such as kernels, - * samplers, render targets and even secondary batchbuffers. Userspace does - * not know where in the GPU memory these objects reside and so before the - * batchbuffer is passed to the GPU for execution, those addresses in the - * batchbuffer and auxiliary objects are updated. This is known as relocation, - * or patching. To try and avoid having to relocate each object on the next - * execution, userspace is told the location of those objects in this pass, - * but this remains just a hint as the kernel may choose a new location for - * any object in the future. - * - * At the level of talking to the hardware, submitting a batchbuffer for the - * GPU to execute is to add content to a buffer from which the HW - * command streamer is reading. - * - * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. - * Execlists, this command is not placed on the same buffer as the - * remaining items. - * - * 2. Add a command to invalidate caches to the buffer. - * - * 3. Add a batchbuffer start command to the buffer; the start command is - * essentially a token together with the GPU address of the batchbuffer - * to be executed. - * - * 4. Add a pipeline flush to the buffer. - * - * 5. Add a memory write command to the buffer to record when the GPU - * is done executing the batchbuffer. The memory write writes the - * global sequence number of the request, ``i915_request::global_seqno``; - * the i915 driver uses the current value in the register to determine - * if the GPU has completed the batchbuffer. - * - * 6. Add a user interrupt command to the buffer. This command instructs - * the GPU to issue an interrupt when the command, pipeline flush and - * memory write are completed. - * - * 7. Inform the hardware of the additional commands added to the buffer - * (by updating the tail pointer). - * - * Processing an execbuf ioctl is conceptually split up into a few phases. - * - * 1. Validation - Ensure all the pointers, handles and flags are valid. - * 2. Reservation - Assign GPU address space for every object - * 3. Relocation - Update any addresses to point to the final locations - * 4. Serialisation - Order the request with respect to its dependencies - * 5. Construction - Construct a request to execute the batchbuffer - * 6. Submission (at some point in the future execution) - * - * Reserving resources for the execbuf is the most complicated phase. We - * neither want to have to migrate the object in the address space, nor do - * we want to have to update any relocations pointing to this object. Ideally, - * we want to leave the object where it is and for all the existing relocations - * to match. If the object is given a new address, or if userspace thinks the - * object is elsewhere, we have to parse all the relocation entries and update - * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that - * all the target addresses in all of its objects match the value in the - * relocation entries and that they all match the presumed offsets given by the - * list of execbuffer objects. Using this knowledge, we know that if we haven't - * moved any buffers, all the relocation entries are valid and we can skip - * the update. (If userspace is wrong, the likely outcome is an impromptu GPU - * hang.) The requirement for using I915_EXEC_NO_RELOC are: - * - * The addresses written in the objects must match the corresponding - * reloc.presumed_offset which in turn must match the corresponding - * execobject.offset. - * - * Any render targets written to in the batch must be flagged with - * EXEC_OBJECT_WRITE. - * - * To avoid stalling, execobject.offset should match the current - * address of that object within the active context. - * - * The reservation is done is multiple phases. First we try and keep any - * object already bound in its current location - so as long as meets the - * constraints imposed by the new execbuffer. Any object left unbound after the - * first pass is then fitted into any available idle space. If an object does - * not fit, all objects are removed from the reservation and the process rerun - * after sorting the objects into a priority order (more difficult to fit - * objects are tried first). Failing that, the entire VM is cleared and we try - * to fit the execbuf once last time before concluding that it simply will not - * fit. - * - * A small complication to all of this is that we allow userspace not only to - * specify an alignment and a size for the object in the address space, but - * we also allow userspace to specify the exact offset. This objects are - * simpler to place (the location is known a priori) all we have to do is make - * sure the space is available. - * - * Once all the objects are in place, patching up the buried pointers to point - * to the final locations is a fairly simple job of walking over the relocation - * entry arrays, looking up the right address and rewriting the value into - * the object. Simple! ... The relocation entries are stored in user memory - * and so to access them we have to copy them into a local buffer. That copy - * has to avoid taking any pagefaults as they may lead back to a GEM object - * requiring the struct_mutex (i.e. recursive deadlock). So once again we split - * the relocation into multiple passes. First we try to do everything within an - * atomic context (avoid the pagefaults) which requires that we never wait. If - * we detect that we may wait, or if we need to fault, then we have to fallback - * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm - * bells yet?) Dropping the mutex means that we lose all the state we have - * built up so far for the execbuf and we must reset any global data. However, - * we do leave the objects pinned in their final locations - which is a - * potential issue for concurrent execbufs. Once we have left the mutex, we can - * allocate and copy all the relocation entries into a large array at our - * leisure, reacquire the mutex, reclaim all the objects and other state and - * then proceed to update any incorrect addresses with the objects. - * - * As we process the relocation entries, we maintain a record of whether the - * object is being written to. Using NORELOC, we expect userspace to provide - * this information instead. We also check whether we can skip the relocation - * by comparing the expected value inside the relocation entry with the target's - * final address. If they differ, we have to map the current object and rewrite - * the 4 or 8 byte pointer within. - * - * Serialising an execbuf is quite simple according to the rules of the GEM - * ABI. Execution within each context is ordered by the order of submission. - * Writes to any GEM object are in order of submission and are exclusive. Reads - * from a GEM object are unordered with respect to other reads, but ordered by - * writes. A write submitted after a read cannot occur before the read, and - * similarly any read submitted after a write cannot occur before the write. - * Writes are ordered between engines such that only one write occurs at any - * time (completing any reads beforehand) - using semaphores where available - * and CPU serialisation otherwise. Other GEM access obey the same rules, any - * write (either via mmaps using set-domain, or via pwrite) must flush all GPU - * reads before starting, and any read (either using set-domain or pread) must - * flush all GPU writes before starting. (Note we only employ a barrier before, - * we currently rely on userspace not concurrently starting a new execution - * whilst reading or writing to an object. This may be an advantage or not - * depending on how much you trust userspace not to shoot themselves in the - * foot.) Serialisation may just result in the request being inserted into - * a DAG awaiting its turn, but most simple is to wait on the CPU until - * all dependencies are resolved. - * - * After all of that, is just a matter of closing the request and handing it to - * the hardware (well, leaving it in a queue to be executed). However, we also - * offer the ability for batchbuffers to be run with elevated privileges so - * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) - * Before any batch is given extra privileges we first must check that it - * contains no nefarious instructions, we check that each instruction is from - * our whitelist and all registers are also from an allowed list. We first - * copy the user's batchbuffer to a shadow (so that the user doesn't have - * access to it, either by the CPU or GPU as we scan it) and then parse each - * instruction. If everything is ok, we set a flag telling the hardware to run - * the batchbuffer in trusted mode, otherwise the ioctl is rejected. - */ - -struct i915_execbuffer { - struct drm_i915_private *i915; /** i915 backpointer */ - struct drm_file *file; /** per-file lookup tables and limits */ - struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ - struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ - struct i915_vma **vma; - unsigned int *flags; - - struct intel_engine_cs *engine; /** engine to queue the request to */ - struct intel_context *context; /* logical state for the request */ - struct i915_gem_context *gem_context; /** caller's context */ - struct i915_address_space *vm; /** GTT and vma for the request */ - - struct i915_request *request; /** our request to build */ - struct i915_vma *batch; /** identity of the batch obj/vma */ - - /** actual size of execobj[] as we may extend it for the cmdparser */ - unsigned int buffer_count; - - /** list of vma not yet bound during reservation phase */ - struct list_head unbound; - - /** list of vma that have execobj.relocation_count */ - struct list_head relocs; - - /** - * Track the most recently used object for relocations, as we - * frequently have to perform multiple relocations within the same - * obj/page - */ - struct reloc_cache { - struct drm_mm_node node; /** temporary GTT binding */ - unsigned long vaddr; /** Current kmap address */ - unsigned long page; /** Currently mapped page index */ - unsigned int gen; /** Cached value of INTEL_GEN */ - bool use_64bit_reloc : 1; - bool has_llc : 1; - bool has_fence : 1; - bool needs_unfenced : 1; - - struct i915_request *rq; - u32 *rq_cmd; - unsigned int rq_size; - } reloc_cache; - - u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u32 context_flags; /** Set of execobj.flags to insert from the ctx */ - - u32 batch_start_offset; /** Location within object of batch */ - u32 batch_len; /** Length of batch within object */ - u32 batch_flags; /** Flags composed for emit_bb_start() */ - - /** - * Indicate either the size of the hastable used to resolve - * relocation handles, or if negative that we are using a direct - * index into the execobj[]. - */ - int lut_size; - struct hlist_head *buckets; /** ht for relocation handles */ -}; - -#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) - -/* - * Used to convert any address to canonical form. - * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, - * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the - * addresses to be in a canonical form: - * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct - * canonical form [63:48] == [47]." - */ -#define GEN8_HIGH_ADDRESS_BIT 47 -static inline u64 gen8_canonical_addr(u64 address) -{ - return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); -} - -static inline u64 gen8_noncanonical_addr(u64 address) -{ - return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); -} - -static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) -{ - return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; -} - -static int eb_create(struct i915_execbuffer *eb) -{ - if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { - unsigned int size = 1 + ilog2(eb->buffer_count); - - /* - * Without a 1:1 association between relocation handles and - * the execobject[] index, we instead create a hashtable. - * We size it dynamically based on available memory, starting - * first with 1:1 assocative hash and scaling back until - * the allocation succeeds. - * - * Later on we use a positive lut_size to indicate we are - * using this hashtable, and a negative value to indicate a - * direct lookup. - */ - do { - gfp_t flags; - - /* While we can still reduce the allocation size, don't - * raise a warning and allow the allocation to fail. - * On the last pass though, we want to try as hard - * as possible to perform the allocation and warn - * if it fails. - */ - flags = GFP_KERNEL; - if (size > 1) - flags |= __GFP_NORETRY | __GFP_NOWARN; - - eb->buckets = kzalloc(sizeof(struct hlist_head) << size, - flags); - if (eb->buckets) - break; - } while (--size); - - if (unlikely(!size)) - return -ENOMEM; - - eb->lut_size = size; - } else { - eb->lut_size = -eb->buffer_count; - } - - return 0; -} - -static bool -eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, - const struct i915_vma *vma, - unsigned int flags) -{ - if (vma->node.size < entry->pad_to_size) - return true; - - if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) - return true; - - if (flags & EXEC_OBJECT_PINNED && - vma->node.start != entry->offset) - return true; - - if (flags & __EXEC_OBJECT_NEEDS_BIAS && - vma->node.start < BATCH_OFFSET_BIAS) - return true; - - if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && - (vma->node.start + vma->node.size - 1) >> 32) - return true; - - if (flags & __EXEC_OBJECT_NEEDS_MAP && - !i915_vma_is_map_and_fenceable(vma)) - return true; - - return false; -} - -static inline bool -eb_pin_vma(struct i915_execbuffer *eb, - const struct drm_i915_gem_exec_object2 *entry, - struct i915_vma *vma) -{ - unsigned int exec_flags = *vma->exec_flags; - u64 pin_flags; - - if (vma->node.size) - pin_flags = vma->node.start; - else - pin_flags = entry->offset & PIN_OFFSET_MASK; - - pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT)) - pin_flags |= PIN_GLOBAL; - - if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) - return false; - - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - if (unlikely(i915_vma_pin_fence(vma))) { - i915_vma_unpin(vma); - return false; - } - - if (vma->fence) - exec_flags |= __EXEC_OBJECT_HAS_FENCE; - } - - *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; - return !eb_vma_misplaced(entry, vma, exec_flags); -} - -static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) -{ - GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); - - if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) - __i915_vma_unpin_fence(vma); - - __i915_vma_unpin(vma); -} - -static inline void -eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags) -{ - if (!(*flags & __EXEC_OBJECT_HAS_PIN)) - return; - - __eb_unreserve_vma(vma, *flags); - *flags &= ~__EXEC_OBJECT_RESERVED; -} - -static int -eb_validate_vma(struct i915_execbuffer *eb, - struct drm_i915_gem_exec_object2 *entry, - struct i915_vma *vma) -{ - if (unlikely(entry->flags & eb->invalid_flags)) - return -EINVAL; - - if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) - return -EINVAL; - - /* - * Offset can be used as input (EXEC_OBJECT_PINNED), reject - * any non-page-aligned or non-canonical addresses. - */ - if (unlikely(entry->flags & EXEC_OBJECT_PINNED && - entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) - return -EINVAL; - - /* pad_to_size was once a reserved field, so sanitize it */ - if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { - if (unlikely(offset_in_page(entry->pad_to_size))) - return -EINVAL; - } else { - entry->pad_to_size = 0; - } - - if (unlikely(vma->exec_flags)) { - DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", - entry->handle, (int)(entry - eb->exec)); - return -EINVAL; - } - - /* - * From drm_mm perspective address space is continuous, - * so from this point we're always using non-canonical - * form internally. - */ - entry->offset = gen8_noncanonical_addr(entry->offset); - - if (!eb->reloc_cache.has_fence) { - entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; - } else { - if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || - eb->reloc_cache.needs_unfenced) && - i915_gem_object_is_tiled(vma->obj)) - entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; - } - - if (!(entry->flags & EXEC_OBJECT_PINNED)) - entry->flags |= eb->context_flags; - - return 0; -} - -static int -eb_add_vma(struct i915_execbuffer *eb, - unsigned int i, unsigned batch_idx, - struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; - int err; - - GEM_BUG_ON(i915_vma_is_closed(vma)); - - if (!(eb->args->flags & __EXEC_VALIDATED)) { - err = eb_validate_vma(eb, entry, vma); - if (unlikely(err)) - return err; - } - - if (eb->lut_size > 0) { - vma->exec_handle = entry->handle; - hlist_add_head(&vma->exec_node, - &eb->buckets[hash_32(entry->handle, - eb->lut_size)]); - } - - if (entry->relocation_count) - list_add_tail(&vma->reloc_link, &eb->relocs); - - /* - * Stash a pointer from the vma to execobj, so we can query its flags, - * size, alignment etc as provided by the user. Also we stash a pointer - * to the vma inside the execobj so that we can use a direct lookup - * to find the right target VMA when doing relocations. - */ - eb->vma[i] = vma; - eb->flags[i] = entry->flags; - vma->exec_flags = &eb->flags[i]; - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if (i == batch_idx) { - if (entry->relocation_count && - !(eb->flags[i] & EXEC_OBJECT_PINNED)) - eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; - if (eb->reloc_cache.has_fence) - eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; - - eb->batch = vma; - } - - err = 0; - if (eb_pin_vma(eb, entry, vma)) { - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; - eb->args->flags |= __EXEC_HAS_RELOC; - } - } else { - eb_unreserve_vma(vma, vma->exec_flags); - - list_add_tail(&vma->exec_link, &eb->unbound); - if (drm_mm_node_allocated(&vma->node)) - err = i915_vma_unbind(vma); - if (unlikely(err)) - vma->exec_flags = NULL; - } - return err; -} - -static inline int use_cpu_reloc(const struct reloc_cache *cache, - const struct drm_i915_gem_object *obj) -{ - if (!i915_gem_object_has_struct_page(obj)) - return false; - - if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) - return true; - - if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) - return false; - - return (cache->has_llc || - obj->cache_dirty || - obj->cache_level != I915_CACHE_NONE); -} - -static int eb_reserve_vma(const struct i915_execbuffer *eb, - struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); - unsigned int exec_flags = *vma->exec_flags; - u64 pin_flags; - int err; - - pin_flags = PIN_USER | PIN_NONBLOCK; - if (exec_flags & EXEC_OBJECT_NEEDS_GTT) - pin_flags |= PIN_GLOBAL; - - /* - * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, - * limit address to the first 4GBs for unflagged objects. - */ - if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) - pin_flags |= PIN_ZONE_4G; - - if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) - pin_flags |= PIN_MAPPABLE; - - if (exec_flags & EXEC_OBJECT_PINNED) { - pin_flags |= entry->offset | PIN_OFFSET_FIXED; - pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */ - } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) { - pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; - } - - err = i915_vma_pin(vma, - entry->pad_to_size, entry->alignment, - pin_flags); - if (err) - return err; - - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; - eb->args->flags |= __EXEC_HAS_RELOC; - } - - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - err = i915_vma_pin_fence(vma); - if (unlikely(err)) { - i915_vma_unpin(vma); - return err; - } - - if (vma->fence) - exec_flags |= __EXEC_OBJECT_HAS_FENCE; - } - - *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; - GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags)); - - return 0; -} - -static int eb_reserve(struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - struct list_head last; - struct i915_vma *vma; - unsigned int i, pass; - int err; - - /* - * Attempt to pin all of the buffers into the GTT. - * This is done in 3 phases: - * - * 1a. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 1b. Increment pin count for already bound objects. - * 2. Bind new objects. - * 3. Decrement pin count. - * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. - */ - - pass = 0; - err = 0; - do { - list_for_each_entry(vma, &eb->unbound, exec_link) { - err = eb_reserve_vma(eb, vma); - if (err) - break; - } - if (err != -ENOSPC) - return err; - - /* Resort *all* the objects into priority order */ - INIT_LIST_HEAD(&eb->unbound); - INIT_LIST_HEAD(&last); - for (i = 0; i < count; i++) { - unsigned int flags = eb->flags[i]; - struct i915_vma *vma = eb->vma[i]; - - if (flags & EXEC_OBJECT_PINNED && - flags & __EXEC_OBJECT_HAS_PIN) - continue; - - eb_unreserve_vma(vma, &eb->flags[i]); - - if (flags & EXEC_OBJECT_PINNED) - /* Pinned must have their slot */ - list_add(&vma->exec_link, &eb->unbound); - else if (flags & __EXEC_OBJECT_NEEDS_MAP) - /* Map require the lowest 256MiB (aperture) */ - list_add_tail(&vma->exec_link, &eb->unbound); - else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) - /* Prioritise 4GiB region for restricted bo */ - list_add(&vma->exec_link, &last); - else - list_add_tail(&vma->exec_link, &last); - } - list_splice_tail(&last, &eb->unbound); - - switch (pass++) { - case 0: - break; - - case 1: - /* Too fragmented, unbind everything and retry */ - err = i915_gem_evict_vm(eb->vm); - if (err) - return err; - break; - - default: - return -ENOSPC; - } - } while (1); -} - -static unsigned int eb_batch_index(const struct i915_execbuffer *eb) -{ - if (eb->args->flags & I915_EXEC_BATCH_FIRST) - return 0; - else - return eb->buffer_count - 1; -} - -static int eb_select_context(struct i915_execbuffer *eb) -{ - struct i915_gem_context *ctx; - - ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); - if (unlikely(!ctx)) - return -ENOENT; - - eb->gem_context = ctx; - if (ctx->ppgtt) { - eb->vm = &ctx->ppgtt->vm; - eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - } else { - eb->vm = &eb->i915->ggtt.vm; - } - - eb->context_flags = 0; - if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) - eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return 0; -} - -static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) -{ - struct i915_request *rq; - - /* - * Completely unscientific finger-in-the-air estimates for suitable - * maximum user request size (to avoid blocking) and then backoff. - */ - if (intel_ring_update_space(ring) >= PAGE_SIZE) - return NULL; - - /* - * Find a request that after waiting upon, there will be at least half - * the ring available. The hysteresis allows us to compete for the - * shared ring and should mean that we sleep less often prior to - * claiming our resources, but not so long that the ring completely - * drains before we can submit our next request. - */ - list_for_each_entry(rq, &ring->request_list, ring_link) { - if (__intel_ring_space(rq->postfix, - ring->emit, ring->size) > ring->size / 2) - break; - } - if (&rq->ring_link == &ring->request_list) - return NULL; /* weird, we will check again later for real */ - - return i915_request_get(rq); -} - -static int eb_wait_for_ring(const struct i915_execbuffer *eb) -{ - struct i915_request *rq; - int ret = 0; - - /* - * Apply a light amount of backpressure to prevent excessive hogs - * from blocking waiting for space whilst holding struct_mutex and - * keeping all of their resources pinned. - */ - - rq = __eb_wait_for_ring(eb->context->ring); - if (rq) { - mutex_unlock(&eb->i915->drm.struct_mutex); - - if (i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT) < 0) - ret = -EINTR; - - i915_request_put(rq); - - mutex_lock(&eb->i915->drm.struct_mutex); - } - - return ret; -} - -static int eb_lookup_vmas(struct i915_execbuffer *eb) -{ - struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; - struct drm_i915_gem_object *obj; - unsigned int i, batch; - int err; - - if (unlikely(i915_gem_context_is_closed(eb->gem_context))) - return -ENOENT; - - if (unlikely(i915_gem_context_is_banned(eb->gem_context))) - return -EIO; - - INIT_LIST_HEAD(&eb->relocs); - INIT_LIST_HEAD(&eb->unbound); - - batch = eb_batch_index(eb); - - for (i = 0; i < eb->buffer_count; i++) { - u32 handle = eb->exec[i].handle; - struct i915_lut_handle *lut; - struct i915_vma *vma; - - vma = radix_tree_lookup(handles_vma, handle); - if (likely(vma)) - goto add_vma; - - obj = i915_gem_object_lookup(eb->file, handle); - if (unlikely(!obj)) { - err = -ENOENT; - goto err_vma; - } - - vma = i915_vma_instance(obj, eb->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - lut = i915_lut_handle_alloc(); - if (unlikely(!lut)) { - err = -ENOMEM; - goto err_obj; - } - - err = radix_tree_insert(handles_vma, handle, vma); - if (unlikely(err)) { - i915_lut_handle_free(lut); - goto err_obj; - } - - /* transfer ref to ctx */ - if (!vma->open_count++) - i915_vma_reopen(vma); - list_add(&lut->obj_link, &obj->lut_list); - list_add(&lut->ctx_link, &eb->gem_context->handles_list); - lut->ctx = eb->gem_context; - lut->handle = handle; - -add_vma: - err = eb_add_vma(eb, i, batch, vma); - if (unlikely(err)) - goto err_vma; - - GEM_BUG_ON(vma != eb->vma[i]); - GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); - GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && - eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i])); - } - - eb->args->flags |= __EXEC_VALIDATED; - return eb_reserve(eb); - -err_obj: - i915_gem_object_put(obj); -err_vma: - eb->vma[i] = NULL; - return err; -} - -static struct i915_vma * -eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) -{ - if (eb->lut_size < 0) { - if (handle >= -eb->lut_size) - return NULL; - return eb->vma[handle]; - } else { - struct hlist_head *head; - struct i915_vma *vma; - - head = &eb->buckets[hash_32(handle, eb->lut_size)]; - hlist_for_each_entry(vma, head, exec_node) { - if (vma->exec_handle == handle) - return vma; - } - return NULL; - } -} - -static void eb_release_vmas(const struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - - for (i = 0; i < count; i++) { - struct i915_vma *vma = eb->vma[i]; - unsigned int flags = eb->flags[i]; - - if (!vma) - break; - - GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); - vma->exec_flags = NULL; - eb->vma[i] = NULL; - - if (flags & __EXEC_OBJECT_HAS_PIN) - __eb_unreserve_vma(vma, flags); - - if (flags & __EXEC_OBJECT_HAS_REF) - i915_vma_put(vma); - } -} - -static void eb_reset_vmas(const struct i915_execbuffer *eb) -{ - eb_release_vmas(eb); - if (eb->lut_size > 0) - memset(eb->buckets, 0, - sizeof(struct hlist_head) << eb->lut_size); -} - -static void eb_destroy(const struct i915_execbuffer *eb) -{ - GEM_BUG_ON(eb->reloc_cache.rq); - - if (eb->lut_size > 0) - kfree(eb->buckets); -} - -static inline u64 -relocation_target(const struct drm_i915_gem_relocation_entry *reloc, - const struct i915_vma *target) -{ - return gen8_canonical_addr((int)reloc->delta + target->node.start); -} - -static void reloc_cache_init(struct reloc_cache *cache, - struct drm_i915_private *i915) -{ - cache->page = -1; - cache->vaddr = 0; - /* Must be a variable in the struct to allow GCC to unroll. */ - cache->gen = INTEL_GEN(i915); - cache->has_llc = HAS_LLC(i915); - cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); - cache->has_fence = cache->gen < 4; - cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; - cache->node.allocated = false; - cache->rq = NULL; - cache->rq_size = 0; -} - -static inline void *unmask_page(unsigned long p) -{ - return (void *)(uintptr_t)(p & PAGE_MASK); -} - -static inline unsigned int unmask_flags(unsigned long p) -{ - return p & ~PAGE_MASK; -} - -#define KMAP 0x4 /* after CLFLUSH_FLAGS */ - -static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) -{ - struct drm_i915_private *i915 = - container_of(cache, struct i915_execbuffer, reloc_cache)->i915; - return &i915->ggtt; -} - -static void reloc_gpu_flush(struct reloc_cache *cache) -{ - GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - - __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size); - i915_gem_object_unpin_map(cache->rq->batch->obj); - - i915_gem_chipset_flush(cache->rq->i915); - - i915_request_add(cache->rq); - cache->rq = NULL; -} - -static void reloc_cache_reset(struct reloc_cache *cache) -{ - void *vaddr; - - if (cache->rq) - reloc_gpu_flush(cache); - - if (!cache->vaddr) - return; - - vaddr = unmask_page(cache->vaddr); - if (cache->vaddr & KMAP) { - if (cache->vaddr & CLFLUSH_AFTER) - mb(); - - kunmap_atomic(vaddr); - i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); - } else { - wmb(); - io_mapping_unmap_atomic((void __iomem *)vaddr); - if (cache->node.allocated) { - struct i915_ggtt *ggtt = cache_to_ggtt(cache); - - ggtt->vm.clear_range(&ggtt->vm, - cache->node.start, - cache->node.size); - drm_mm_remove_node(&cache->node); - } else { - i915_vma_unpin((struct i915_vma *)cache->node.mm); - } - } - - cache->vaddr = 0; - cache->page = -1; -} - -static void *reloc_kmap(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, - unsigned long page) -{ - void *vaddr; - - if (cache->vaddr) { - kunmap_atomic(unmask_page(cache->vaddr)); - } else { - unsigned int flushes; - int err; - - err = i915_gem_object_prepare_write(obj, &flushes); - if (err) - return ERR_PTR(err); - - BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); - BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); - - cache->vaddr = flushes | KMAP; - cache->node.mm = (void *)obj; - if (flushes) - mb(); - } - - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); - cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; - cache->page = page; - - return vaddr; -} - -static void *reloc_iomap(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, - unsigned long page) -{ - struct i915_ggtt *ggtt = cache_to_ggtt(cache); - unsigned long offset; - void *vaddr; - - if (cache->vaddr) { - io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); - } else { - struct i915_vma *vma; - int err; - - if (use_cpu_reloc(cache, obj)) - return NULL; - - err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) - return ERR_PTR(err); - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK | - PIN_NONFAULT); - if (IS_ERR(vma)) { - memset(&cache->node, 0, sizeof(cache->node)); - err = drm_mm_insert_node_in_range - (&ggtt->vm.mm, &cache->node, - PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (err) /* no inactive aperture space, use cpu reloc */ - return NULL; - } else { - err = i915_vma_put_fence(vma); - if (err) { - i915_vma_unpin(vma); - return ERR_PTR(err); - } - - cache->node.start = vma->node.start; - cache->node.mm = (void *)vma; - } - } - - offset = cache->node.start; - if (cache->node.allocated) { - wmb(); - ggtt->vm.insert_page(&ggtt->vm, - i915_gem_object_get_dma_address(obj, page), - offset, I915_CACHE_NONE, 0); - } else { - offset += page << PAGE_SHIFT; - } - - vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, - offset); - cache->page = page; - cache->vaddr = (unsigned long)vaddr; - - return vaddr; -} - -static void *reloc_vaddr(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, - unsigned long page) -{ - void *vaddr; - - if (cache->page == page) { - vaddr = unmask_page(cache->vaddr); - } else { - vaddr = NULL; - if ((cache->vaddr & KMAP) == 0) - vaddr = reloc_iomap(obj, cache, page); - if (!vaddr) - vaddr = reloc_kmap(obj, cache, page); - } - - return vaddr; -} - -static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) -{ - if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { - if (flushes & CLFLUSH_BEFORE) { - clflushopt(addr); - mb(); - } - - *addr = value; - - /* - * Writes to the same cacheline are serialised by the CPU - * (including clflush). On the write path, we only require - * that it hits memory in an orderly fashion and place - * mb barriers at the start and end of the relocation phase - * to ensure ordering of clflush wrt to the system. - */ - if (flushes & CLFLUSH_AFTER) - clflushopt(addr); - } else - *addr = value; -} - -static int __reloc_gpu_alloc(struct i915_execbuffer *eb, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - struct drm_i915_gem_object *obj; - struct i915_request *rq; - struct i915_vma *batch; - u32 *cmd; - int err; - - if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) { - obj = vma->obj; - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - obj->write_domain = 0; - } - - GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU); - - obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - cmd = i915_gem_object_pin_map(obj, - cache->has_llc ? - I915_MAP_FORCE_WB : - I915_MAP_FORCE_WC); - i915_gem_object_unpin_pages(obj); - if (IS_ERR(cmd)) - return PTR_ERR(cmd); - - batch = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_unmap; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); - if (err) - goto err_unmap; - - rq = i915_request_create(eb->context); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = i915_request_await_object(rq, vma->obj, true); - if (err) - goto err_request; - - err = eb->engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); - if (err) - goto err_request; - - GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto skip_request; - - rq->batch = batch; - i915_vma_unpin(batch); - - cache->rq = rq; - cache->rq_cmd = cmd; - cache->rq_size = 0; - - /* Return with batch mapping (cmd) still pinned */ - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(batch); -err_unmap: - i915_gem_object_unpin_map(obj); - return err; -} - -static u32 *reloc_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - u32 *cmd; - - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) - reloc_gpu_flush(cache); - - if (unlikely(!cache->rq)) { - int err; - - /* If we need to copy for the cmdparser, we will stall anyway */ - if (eb_use_cmdparser(eb)) - return ERR_PTR(-EWOULDBLOCK); - - if (!intel_engine_can_store_dword(eb->engine)) - return ERR_PTR(-ENODEV); - - err = __reloc_gpu_alloc(eb, vma, len); - if (unlikely(err)) - return ERR_PTR(err); - } - - cmd = cache->rq_cmd + cache->rq_size; - cache->rq_size += len; - - return cmd; -} - -static u64 -relocate_entry(struct i915_vma *vma, - const struct drm_i915_gem_relocation_entry *reloc, - struct i915_execbuffer *eb, - const struct i915_vma *target) -{ - u64 offset = reloc->offset; - u64 target_offset = relocation_target(reloc, target); - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; - - if (!eb->reloc_cache.vaddr && - (DBG_FORCE_RELOC == FORCE_GPU_RELOC || - !reservation_object_test_signaled_rcu(vma->resv, true))) { - const unsigned int gen = eb->reloc_cache.gen; - unsigned int len; - u32 *batch; - u64 addr; - - if (wide) - len = offset & 7 ? 8 : 5; - else if (gen >= 4) - len = 4; - else - len = 3; - - batch = reloc_gpu(eb, vma, len); - if (IS_ERR(batch)) - goto repeat; - - addr = gen8_canonical_addr(vma->node.start + offset); - if (wide) { - if (offset & 7) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - - addr = gen8_canonical_addr(addr + 4); - - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = upper_32_bits(target_offset); - } else { - *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - *batch++ = upper_32_bits(target_offset); - } - } else if (gen >= 6) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; - } else if (gen >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; - } else { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = addr; - *batch++ = target_offset; - } - - goto out; - } - -repeat: - vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_offset), - eb->reloc_cache.vaddr); - - if (wide) { - offset += sizeof(u32); - target_offset >>= 32; - wide = false; - goto repeat; - } - -out: - return target->node.start | UPDATE; -} - -static u64 -eb_relocate_entry(struct i915_execbuffer *eb, - struct i915_vma *vma, - const struct drm_i915_gem_relocation_entry *reloc) -{ - struct i915_vma *target; - int err; - - /* we've already hold a reference to all valid objects */ - target = eb_get_vma(eb, reloc->target_handle); - if (unlikely(!target)) - return -ENOENT; - - /* Validate that the target is in a valid r/w GPU domain */ - if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { - DRM_DEBUG("reloc with multiple write domains: " - "target %d offset %d " - "read %08x write %08x", - reloc->target_handle, - (int) reloc->offset, - reloc->read_domains, - reloc->write_domain); - return -EINVAL; - } - if (unlikely((reloc->write_domain | reloc->read_domains) - & ~I915_GEM_GPU_DOMAINS)) { - DRM_DEBUG("reloc with read/write non-GPU domains: " - "target %d offset %d " - "read %08x write %08x", - reloc->target_handle, - (int) reloc->offset, - reloc->read_domains, - reloc->write_domain); - return -EINVAL; - } - - if (reloc->write_domain) { - *target->exec_flags |= EXEC_OBJECT_WRITE; - - /* - * Sandybridge PPGTT errata: We need a global gtt mapping - * for MI and pipe_control writes because the gpu doesn't - * properly redirect them through the ppgtt for non_secure - * batchbuffers. - */ - if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && - IS_GEN(eb->i915, 6)) { - err = i915_vma_bind(target, target->obj->cache_level, - PIN_GLOBAL); - if (WARN_ONCE(err, - "Unexpected failure to bind target VMA!")) - return err; - } - } - - /* - * If the relocation already has the right value in it, no - * more work needs to be done. - */ - if (!DBG_FORCE_RELOC && - gen8_canonical_addr(target->node.start) == reloc->presumed_offset) - return 0; - - /* Check that the relocation address is valid... */ - if (unlikely(reloc->offset > - vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { - DRM_DEBUG("Relocation beyond object bounds: " - "target %d offset %d size %d.\n", - reloc->target_handle, - (int)reloc->offset, - (int)vma->size); - return -EINVAL; - } - if (unlikely(reloc->offset & 3)) { - DRM_DEBUG("Relocation not 4-byte aligned: " - "target %d offset %d.\n", - reloc->target_handle, - (int)reloc->offset); - return -EINVAL; - } - - /* - * If we write into the object, we need to force the synchronisation - * barrier, either with an asynchronous clflush or if we executed the - * patching using the GPU (though that should be serialised by the - * timeline). To be completely sure, and since we are required to - * do relocations we are already stalling, disable the user's opt - * out of our synchronisation. - */ - *vma->exec_flags &= ~EXEC_OBJECT_ASYNC; - - /* and update the user's relocation entry */ - return relocate_entry(vma, reloc, eb, target); -} - -static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) -{ -#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) - struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; - struct drm_i915_gem_relocation_entry __user *urelocs; - const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); - unsigned int remain; - - urelocs = u64_to_user_ptr(entry->relocs_ptr); - remain = entry->relocation_count; - if (unlikely(remain > N_RELOC(ULONG_MAX))) - return -EINVAL; - - /* - * We must check that the entire relocation array is safe - * to read. However, if the array is not writable the user loses - * the updated relocation values. - */ - if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs)))) - return -EFAULT; - - do { - struct drm_i915_gem_relocation_entry *r = stack; - unsigned int count = - min_t(unsigned int, remain, ARRAY_SIZE(stack)); - unsigned int copied; - - /* - * This is the fast path and we cannot handle a pagefault - * whilst holding the struct mutex lest the user pass in the - * relocations contained within a mmaped bo. For in such a case - * we, the page fault handler would call i915_gem_fault() and - * we would try to acquire the struct mutex again. Obviously - * this is bad and so lockdep complains vehemently. - */ - pagefault_disable(); - copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); - pagefault_enable(); - if (unlikely(copied)) { - remain = -EFAULT; - goto out; - } - - remain -= count; - do { - u64 offset = eb_relocate_entry(eb, vma, r); - - if (likely(offset == 0)) { - } else if ((s64)offset < 0) { - remain = (int)offset; - goto out; - } else { - /* - * Note that reporting an error now - * leaves everything in an inconsistent - * state as we have *already* changed - * the relocation value inside the - * object. As we have not changed the - * reloc.presumed_offset or will not - * change the execobject.offset, on the - * call we may not rewrite the value - * inside the object, leaving it - * dangling and causing a GPU hang. Unless - * userspace dynamically rebuilds the - * relocations on each execbuf rather than - * presume a static tree. - * - * We did previously check if the relocations - * were writable (access_ok), an error now - * would be a strange race with mprotect, - * having already demonstrated that we - * can read from this userspace address. - */ - offset = gen8_canonical_addr(offset & ~UPDATE); - if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) { - remain = -EFAULT; - goto out; - } - } - } while (r++, --count); - urelocs += ARRAY_SIZE(stack); - } while (remain); -out: - reloc_cache_reset(&eb->reloc_cache); - return remain; -} - -static int -eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) -{ - const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); - struct drm_i915_gem_relocation_entry *relocs = - u64_to_ptr(typeof(*relocs), entry->relocs_ptr); - unsigned int i; - int err; - - for (i = 0; i < entry->relocation_count; i++) { - u64 offset = eb_relocate_entry(eb, vma, &relocs[i]); - - if ((s64)offset < 0) { - err = (int)offset; - goto err; - } - } - err = 0; -err: - reloc_cache_reset(&eb->reloc_cache); - return err; -} - -static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) -{ - const char __user *addr, *end; - unsigned long size; - char __maybe_unused c; - - size = entry->relocation_count; - if (size == 0) - return 0; - - if (size > N_RELOC(ULONG_MAX)) - return -EINVAL; - - addr = u64_to_user_ptr(entry->relocs_ptr); - size *= sizeof(struct drm_i915_gem_relocation_entry); - if (!access_ok(addr, size)) - return -EFAULT; - - end = addr + size; - for (; addr < end; addr += PAGE_SIZE) { - int err = __get_user(c, addr); - if (err) - return err; - } - return __get_user(c, end - 1); -} - -static int eb_copy_relocations(const struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - int err; - - for (i = 0; i < count; i++) { - const unsigned int nreloc = eb->exec[i].relocation_count; - struct drm_i915_gem_relocation_entry __user *urelocs; - struct drm_i915_gem_relocation_entry *relocs; - unsigned long size; - unsigned long copied; - - if (nreloc == 0) - continue; - - err = check_relocations(&eb->exec[i]); - if (err) - goto err; - - urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); - size = nreloc * sizeof(*relocs); - - relocs = kvmalloc_array(size, 1, GFP_KERNEL); - if (!relocs) { - err = -ENOMEM; - goto err; - } - - /* copy_from_user is limited to < 4GiB */ - copied = 0; - do { - unsigned int len = - min_t(u64, BIT_ULL(31), size - copied); - - if (__copy_from_user((char *)relocs + copied, - (char __user *)urelocs + copied, - len)) { -end_user: - user_access_end(); -end: - kvfree(relocs); - err = -EFAULT; - goto err; - } - - copied += len; - } while (copied < size); - - /* - * As we do not update the known relocation offsets after - * relocating (due to the complexities in lock handling), - * we need to mark them as invalid now so that we force the - * relocation processing next time. Just in case the target - * object is evicted and then rebound into its old - * presumed_offset before the next execbuffer - if that - * happened we would make the mistake of assuming that the - * relocations were valid. - */ - if (!user_access_begin(urelocs, size)) - goto end; - - for (copied = 0; copied < nreloc; copied++) - unsafe_put_user(-1, - &urelocs[copied].presumed_offset, - end_user); - user_access_end(); - - eb->exec[i].relocs_ptr = (uintptr_t)relocs; - } - - return 0; - -err: - while (i--) { - struct drm_i915_gem_relocation_entry *relocs = - u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); - if (eb->exec[i].relocation_count) - kvfree(relocs); - } - return err; -} - -static int eb_prefault_relocations(const struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - - if (unlikely(i915_modparams.prefault_disable)) - return 0; - - for (i = 0; i < count; i++) { - int err; - - err = check_relocations(&eb->exec[i]); - if (err) - return err; - } - - return 0; -} - -static noinline int eb_relocate_slow(struct i915_execbuffer *eb) -{ - struct drm_device *dev = &eb->i915->drm; - bool have_copy = false; - struct i915_vma *vma; - int err = 0; - -repeat: - if (signal_pending(current)) { - err = -ERESTARTSYS; - goto out; - } - - /* We may process another execbuffer during the unlock... */ - eb_reset_vmas(eb); - mutex_unlock(&dev->struct_mutex); - - /* - * We take 3 passes through the slowpatch. - * - * 1 - we try to just prefault all the user relocation entries and - * then attempt to reuse the atomic pagefault disabled fast path again. - * - * 2 - we copy the user entries to a local buffer here outside of the - * local and allow ourselves to wait upon any rendering before - * relocations - * - * 3 - we already have a local copy of the relocation entries, but - * were interrupted (EAGAIN) whilst waiting for the objects, try again. - */ - if (!err) { - err = eb_prefault_relocations(eb); - } else if (!have_copy) { - err = eb_copy_relocations(eb); - have_copy = err == 0; - } else { - cond_resched(); - err = 0; - } - if (err) { - mutex_lock(&dev->struct_mutex); - goto out; - } - - /* A frequent cause for EAGAIN are currently unavailable client pages */ - flush_workqueue(eb->i915->mm.userptr_wq); - - err = i915_mutex_lock_interruptible(dev); - if (err) { - mutex_lock(&dev->struct_mutex); - goto out; - } - - /* reacquire the objects */ - err = eb_lookup_vmas(eb); - if (err) - goto err; - - GEM_BUG_ON(!eb->batch); - - list_for_each_entry(vma, &eb->relocs, reloc_link) { - if (!have_copy) { - pagefault_disable(); - err = eb_relocate_vma(eb, vma); - pagefault_enable(); - if (err) - goto repeat; - } else { - err = eb_relocate_vma_slow(eb, vma); - if (err) - goto err; - } - } - - /* - * Leave the user relocations as are, this is the painfully slow path, - * and we want to avoid the complication of dropping the lock whilst - * having buffers reserved in the aperture and so causing spurious - * ENOSPC for random operations. - */ - -err: - if (err == -EAGAIN) - goto repeat; - -out: - if (have_copy) { - const unsigned int count = eb->buffer_count; - unsigned int i; - - for (i = 0; i < count; i++) { - const struct drm_i915_gem_exec_object2 *entry = - &eb->exec[i]; - struct drm_i915_gem_relocation_entry *relocs; - - if (!entry->relocation_count) - continue; - - relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); - kvfree(relocs); - } - } - - return err; -} - -static int eb_relocate(struct i915_execbuffer *eb) -{ - if (eb_lookup_vmas(eb)) - goto slow; - - /* The objects are in their final locations, apply the relocations. */ - if (eb->args->flags & __EXEC_HAS_RELOC) { - struct i915_vma *vma; - - list_for_each_entry(vma, &eb->relocs, reloc_link) { - if (eb_relocate_vma(eb, vma)) - goto slow; - } - } - - return 0; - -slow: - return eb_relocate_slow(eb); -} - -static int eb_move_to_gpu(struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - int err; - - for (i = 0; i < count; i++) { - unsigned int flags = eb->flags[i]; - struct i915_vma *vma = eb->vma[i]; - struct drm_i915_gem_object *obj = vma->obj; - - if (flags & EXEC_OBJECT_CAPTURE) { - struct i915_capture_list *capture; - - capture = kmalloc(sizeof(*capture), GFP_KERNEL); - if (unlikely(!capture)) - return -ENOMEM; - - capture->next = eb->request->capture_list; - capture->vma = eb->vma[i]; - eb->request->capture_list = capture; - } - - /* - * If the GPU is not _reading_ through the CPU cache, we need - * to make sure that any writes (both previous GPU writes from - * before a change in snooping levels and normal CPU writes) - * caught in that cache are flushed to main memory. - * - * We want to say - * obj->cache_dirty && - * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) - * but gcc's optimiser doesn't handle that as well and emits - * two jumps instead of one. Maybe one day... - */ - if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { - if (i915_gem_clflush_object(obj, 0)) - flags &= ~EXEC_OBJECT_ASYNC; - } - - if (flags & EXEC_OBJECT_ASYNC) - continue; - - err = i915_request_await_object - (eb->request, obj, flags & EXEC_OBJECT_WRITE); - if (err) - return err; - } - - for (i = 0; i < count; i++) { - unsigned int flags = eb->flags[i]; - struct i915_vma *vma = eb->vma[i]; - - err = i915_vma_move_to_active(vma, eb->request, flags); - if (unlikely(err)) { - i915_request_skip(eb->request, err); - return err; - } - - __eb_unreserve_vma(vma, flags); - vma->exec_flags = NULL; - - if (unlikely(flags & __EXEC_OBJECT_HAS_REF)) - i915_vma_put(vma); - } - eb->exec = NULL; - - /* Unconditionally flush any chipset caches (for streaming writes). */ - i915_gem_chipset_flush(eb->i915); - - return 0; -} - -static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) -{ - if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) - return false; - - /* Kernel clipping was a DRI1 misfeature */ - if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { - if (exec->num_cliprects || exec->cliprects_ptr) - return false; - } - - if (exec->DR4 == 0xffffffff) { - DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); - exec->DR4 = 0; - } - if (exec->DR1 || exec->DR4) - return false; - - if ((exec->batch_start_offset | exec->batch_len) & 0x7) - return false; - - return true; -} - -static int i915_reset_gen7_sol_offsets(struct i915_request *rq) -{ - u32 *cs; - int i; - - if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) { - DRM_DEBUG("sol reset is gen7/rcs only\n"); - return -EINVAL; - } - - cs = intel_ring_begin(rq, 4 * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(4); - for (i = 0; i < 4; i++) { - *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); - *cs++ = 0; - } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) -{ - struct drm_i915_gem_object *shadow_batch_obj; - struct i915_vma *vma; - int err; - - shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, - PAGE_ALIGN(eb->batch_len)); - if (IS_ERR(shadow_batch_obj)) - return ERR_CAST(shadow_batch_obj); - - err = intel_engine_cmd_parser(eb->engine, - eb->batch->obj, - shadow_batch_obj, - eb->batch_start_offset, - eb->batch_len, - is_master); - if (err) { - if (err == -EACCES) /* unhandled chained batch */ - vma = NULL; - else - vma = ERR_PTR(err); - goto out; - } - - vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - goto out; - - eb->vma[eb->buffer_count] = i915_vma_get(vma); - eb->flags[eb->buffer_count] = - __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; - vma->exec_flags = &eb->flags[eb->buffer_count]; - eb->buffer_count++; - -out: - i915_gem_object_unpin_pages(shadow_batch_obj); - return vma; -} - -static void -add_to_client(struct i915_request *rq, struct drm_file *file) -{ - rq->file_priv = file->driver_priv; - list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); -} - -static int eb_submit(struct i915_execbuffer *eb) -{ - int err; - - err = eb_move_to_gpu(eb); - if (err) - return err; - - if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { - err = i915_reset_gen7_sol_offsets(eb->request); - if (err) - return err; - } - - /* - * After we completed waiting for other engines (using HW semaphores) - * then we can signal that this request/batch is ready to run. This - * allows us to determine if the batch is still waiting on the GPU - * or actually running by checking the breadcrumb. - */ - if (eb->engine->emit_init_breadcrumb) { - err = eb->engine->emit_init_breadcrumb(eb->request); - if (err) - return err; - } - - err = eb->engine->emit_bb_start(eb->request, - eb->batch->node.start + - eb->batch_start_offset, - eb->batch_len, - eb->batch_flags); - if (err) - return err; - - return 0; -} - -/* - * Find one BSD ring to dispatch the corresponding BSD command. - * The engine index is returned. - */ -static unsigned int -gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - - /* Check whether the file_priv has already selected one ring. */ - if ((int)file_priv->bsd_engine < 0) - file_priv->bsd_engine = atomic_fetch_xor(1, - &dev_priv->mm.bsd_engine_dispatch_index); - - return file_priv->bsd_engine; -} - -static const enum intel_engine_id user_ring_map[] = { - [I915_EXEC_DEFAULT] = RCS0, - [I915_EXEC_RENDER] = RCS0, - [I915_EXEC_BLT] = BCS0, - [I915_EXEC_BSD] = VCS0, - [I915_EXEC_VEBOX] = VECS0 -}; - -static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - int err; - - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - err = i915_terminally_wedged(eb->i915); - if (err) - return err; - - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - err = intel_context_pin(ce); - if (err) - return err; - - eb->engine = ce->engine; - eb->context = ce; - return 0; -} - -static void eb_unpin_context(struct i915_execbuffer *eb) -{ - intel_context_unpin(eb->context); -} - -static unsigned int -eb_select_legacy_ring(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) -{ - struct drm_i915_private *i915 = eb->i915; - unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; - - if (user_ring_id != I915_EXEC_BSD && - (args->flags & I915_EXEC_BSD_MASK)) { - DRM_DEBUG("execbuf with non bsd ring but with invalid " - "bsd dispatch flags: %d\n", (int)(args->flags)); - return -1; - } - - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { - unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; - - if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_engine(i915, file); - } else if (bsd_idx >= I915_EXEC_BSD_RING1 && - bsd_idx <= I915_EXEC_BSD_RING2) { - bsd_idx >>= I915_EXEC_BSD_SHIFT; - bsd_idx--; - } else { - DRM_DEBUG("execbuf with unknown bsd ring: %u\n", - bsd_idx); - return -1; - } - - return _VCS(bsd_idx); - } - - if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { - DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return -1; - } - - return user_ring_map[user_ring_id]; -} - -static int -eb_select_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) -{ - struct intel_context *ce; - unsigned int idx; - int err; - - if (i915_gem_context_user_engines(eb->gem_context)) - idx = args->flags & I915_EXEC_RING_MASK; - else - idx = eb_select_legacy_ring(eb, file, args); - - ce = i915_gem_context_get_engine(eb->gem_context, idx); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - err = eb_pin_context(eb, ce); - intel_context_put(ce); - - return err; -} - -static void -__free_fence_array(struct drm_syncobj **fences, unsigned int n) -{ - while (n--) - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); - kvfree(fences); -} - -static struct drm_syncobj ** -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file) -{ - const unsigned long nfences = args->num_cliprects; - struct drm_i915_gem_exec_fence __user *user; - struct drm_syncobj **fences; - unsigned long n; - int err; - - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) - return NULL; - - /* Check multiplication overflow for access_ok() and kvmalloc_array() */ - BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); - if (nfences > min_t(unsigned long, - ULONG_MAX / sizeof(*user), - SIZE_MAX / sizeof(*fences))) - return ERR_PTR(-EINVAL); - - user = u64_to_user_ptr(args->cliprects_ptr); - if (!access_ok(user, nfences * sizeof(*user))) - return ERR_PTR(-EFAULT); - - fences = kvmalloc_array(nfences, sizeof(*fences), - __GFP_NOWARN | GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); - - for (n = 0; n < nfences; n++) { - struct drm_i915_gem_exec_fence fence; - struct drm_syncobj *syncobj; - - if (__copy_from_user(&fence, user++, sizeof(fence))) { - err = -EFAULT; - goto err; - } - - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { - err = -EINVAL; - goto err; - } - - syncobj = drm_syncobj_find(file, fence.handle); - if (!syncobj) { - DRM_DEBUG("Invalid syncobj handle provided\n"); - err = -ENOENT; - goto err; - } - - BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & - ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); - - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); - } - - return fences; - -err: - __free_fence_array(fences, n); - return ERR_PTR(err); -} - -static void -put_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_syncobj **fences) -{ - if (fences) - __free_fence_array(fences, args->num_cliprects); -} - -static int -await_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) -{ - const unsigned int nfences = eb->args->num_cliprects; - unsigned int n; - int err; - - for (n = 0; n < nfences; n++) { - struct drm_syncobj *syncobj; - struct dma_fence *fence; - unsigned int flags; - - syncobj = ptr_unpack_bits(fences[n], &flags, 2); - if (!(flags & I915_EXEC_FENCE_WAIT)) - continue; - - fence = drm_syncobj_fence_get(syncobj); - if (!fence) - return -EINVAL; - - err = i915_request_await_dma_fence(eb->request, fence); - dma_fence_put(fence); - if (err < 0) - return err; - } - - return 0; -} - -static void -signal_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) -{ - const unsigned int nfences = eb->args->num_cliprects; - struct dma_fence * const fence = &eb->request->fence; - unsigned int n; - - for (n = 0; n < nfences; n++) { - struct drm_syncobj *syncobj; - unsigned int flags; - - syncobj = ptr_unpack_bits(fences[n], &flags, 2); - if (!(flags & I915_EXEC_FENCE_SIGNAL)) - continue; - - drm_syncobj_replace_fence(syncobj, fence); - } -} - -static int -i915_gem_do_execbuffer(struct drm_device *dev, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec, - struct drm_syncobj **fences) -{ - struct i915_execbuffer eb; - struct dma_fence *in_fence = NULL; - struct dma_fence *exec_fence = NULL; - struct sync_file *out_fence = NULL; - int out_fence_fd = -1; - int err; - - BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); - BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & - ~__EXEC_OBJECT_UNKNOWN_FLAGS); - - eb.i915 = to_i915(dev); - eb.file = file; - eb.args = args; - if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) - args->flags |= __EXEC_HAS_RELOC; - - eb.exec = exec; - eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1); - eb.vma[0] = NULL; - eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1); - - eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; - reloc_cache_init(&eb.reloc_cache, eb.i915); - - eb.buffer_count = args->buffer_count; - eb.batch_start_offset = args->batch_start_offset; - eb.batch_len = args->batch_len; - - eb.batch_flags = 0; - if (args->flags & I915_EXEC_SECURE) { - if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) - return -EPERM; - - eb.batch_flags |= I915_DISPATCH_SECURE; - } - if (args->flags & I915_EXEC_IS_PINNED) - eb.batch_flags |= I915_DISPATCH_PINNED; - - if (args->flags & I915_EXEC_FENCE_IN) { - in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!in_fence) - return -EINVAL; - } - - if (args->flags & I915_EXEC_FENCE_SUBMIT) { - if (in_fence) { - err = -EINVAL; - goto err_in_fence; - } - - exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!exec_fence) { - err = -EINVAL; - goto err_in_fence; - } - } - - if (args->flags & I915_EXEC_FENCE_OUT) { - out_fence_fd = get_unused_fd_flags(O_CLOEXEC); - if (out_fence_fd < 0) { - err = out_fence_fd; - goto err_exec_fence; - } - } - - err = eb_create(&eb); - if (err) - goto err_out_fence; - - GEM_BUG_ON(!eb.lut_size); - - err = eb_select_context(&eb); - if (unlikely(err)) - goto err_destroy; - - /* - * Take a local wakeref for preparing to dispatch the execbuf as - * we expect to access the hardware fairly frequently in the - * process. Upon first dispatch, we acquire another prolonged - * wakeref that we hold until the GPU has been idle for at least - * 100ms. - */ - intel_gt_pm_get(eb.i915); - - err = i915_mutex_lock_interruptible(dev); - if (err) - goto err_rpm; - - err = eb_select_engine(&eb, file, args); - if (unlikely(err)) - goto err_unlock; - - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ - if (unlikely(err)) - goto err_engine; - - err = eb_relocate(&eb); - if (err) { - /* - * If the user expects the execobject.offset and - * reloc.presumed_offset to be an exact match, - * as for using NO_RELOC, then we cannot update - * the execobject.offset until we have completed - * relocation. - */ - args->flags &= ~__EXEC_HAS_RELOC; - goto err_vma; - } - - if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) { - DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); - err = -EINVAL; - goto err_vma; - } - if (eb.batch_start_offset > eb.batch->size || - eb.batch_len > eb.batch->size - eb.batch_start_offset) { - DRM_DEBUG("Attempting to use out-of-bounds batch\n"); - err = -EINVAL; - goto err_vma; - } - - if (eb_use_cmdparser(&eb)) { - struct i915_vma *vma; - - vma = eb_parse(&eb, drm_is_current_master(file)); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_vma; - } - - if (vma) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - eb.batch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; - eb.batch = vma; - } - } - - if (eb.batch_len == 0) - eb.batch_len = eb.batch->size - eb.batch_start_offset; - - /* - * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure - * batch" bit. Hence we need to pin secure batches into the global gtt. - * hsw should have this fixed, but bdw mucks it up again. */ - if (eb.batch_flags & I915_DISPATCH_SECURE) { - struct i915_vma *vma; - - /* - * So on first glance it looks freaky that we pin the batch here - * outside of the reservation loop. But: - * - The batch is already pinned into the relevant ppgtt, so we - * already have the backing storage fully allocated. - * - No other BO uses the global gtt (well contexts, but meh), - * so we don't really have issues with multiple objects not - * fitting due to fragmentation. - * So this is actually safe. - */ - vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_vma; - } - - eb.batch = vma; - } - - /* All GPU relocation batches must be submitted prior to the user rq */ - GEM_BUG_ON(eb.reloc_cache.rq); - - /* Allocate a request for this batch buffer nice and early. */ - eb.request = i915_request_create(eb.context); - if (IS_ERR(eb.request)) { - err = PTR_ERR(eb.request); - goto err_batch_unpin; - } - - if (in_fence) { - err = i915_request_await_dma_fence(eb.request, in_fence); - if (err < 0) - goto err_request; - } - - if (exec_fence) { - err = i915_request_await_execution(eb.request, exec_fence, - eb.engine->bond_execute); - if (err < 0) - goto err_request; - } - - if (fences) { - err = await_fence_array(&eb, fences); - if (err) - goto err_request; - } - - if (out_fence_fd != -1) { - out_fence = sync_file_create(&eb.request->fence); - if (!out_fence) { - err = -ENOMEM; - goto err_request; - } - } - - /* - * Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - eb.request->batch = eb.batch; - - trace_i915_request_queue(eb.request, eb.batch_flags); - err = eb_submit(&eb); -err_request: - add_to_client(eb.request, file); - i915_request_add(eb.request); - - if (fences) - signal_fence_array(&eb, fences); - - if (out_fence) { - if (err == 0) { - fd_install(out_fence_fd, out_fence->file); - args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ - args->rsvd2 |= (u64)out_fence_fd << 32; - out_fence_fd = -1; - } else { - fput(out_fence->file); - } - } - -err_batch_unpin: - if (eb.batch_flags & I915_DISPATCH_SECURE) - i915_vma_unpin(eb.batch); -err_vma: - if (eb.exec) - eb_release_vmas(&eb); -err_engine: - eb_unpin_context(&eb); -err_unlock: - mutex_unlock(&dev->struct_mutex); -err_rpm: - intel_gt_pm_put(eb.i915); - i915_gem_context_put(eb.gem_context); -err_destroy: - eb_destroy(&eb); -err_out_fence: - if (out_fence_fd != -1) - put_unused_fd(out_fence_fd); -err_exec_fence: - dma_fence_put(exec_fence); -err_in_fence: - dma_fence_put(in_fence); - return err; -} - -static size_t eb_element_size(void) -{ - return (sizeof(struct drm_i915_gem_exec_object2) + - sizeof(struct i915_vma *) + - sizeof(unsigned int)); -} - -static bool check_buffer_count(size_t count) -{ - const size_t sz = eb_element_size(); - - /* - * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup - * array size (see eb_create()). Otherwise, we can accept an array as - * large as can be addressed (though use large arrays at your peril)! - */ - - return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); -} - -/* - * Legacy execbuffer just creates an exec2 list from the original exec object - * list array and passes it to the real function. - */ -int -i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_execbuffer *args = data; - struct drm_i915_gem_execbuffer2 exec2; - struct drm_i915_gem_exec_object *exec_list = NULL; - struct drm_i915_gem_exec_object2 *exec2_list = NULL; - const size_t count = args->buffer_count; - unsigned int i; - int err; - - if (!check_buffer_count(count)) { - DRM_DEBUG("execbuf2 with %zd buffers\n", count); - return -EINVAL; - } - - exec2.buffers_ptr = args->buffers_ptr; - exec2.buffer_count = args->buffer_count; - exec2.batch_start_offset = args->batch_start_offset; - exec2.batch_len = args->batch_len; - exec2.DR1 = args->DR1; - exec2.DR4 = args->DR4; - exec2.num_cliprects = args->num_cliprects; - exec2.cliprects_ptr = args->cliprects_ptr; - exec2.flags = I915_EXEC_RENDER; - i915_execbuffer2_set_context_id(exec2, 0); - - if (!i915_gem_check_execbuffer(&exec2)) - return -EINVAL; - - /* Copy in the exec list from userland */ - exec_list = kvmalloc_array(count, sizeof(*exec_list), - __GFP_NOWARN | GFP_KERNEL); - exec2_list = kvmalloc_array(count + 1, eb_element_size(), - __GFP_NOWARN | GFP_KERNEL); - if (exec_list == NULL || exec2_list == NULL) { - DRM_DEBUG("Failed to allocate exec list for %d buffers\n", - args->buffer_count); - kvfree(exec_list); - kvfree(exec2_list); - return -ENOMEM; - } - err = copy_from_user(exec_list, - u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec_list) * count); - if (err) { - DRM_DEBUG("copy %d exec entries failed %d\n", - args->buffer_count, err); - kvfree(exec_list); - kvfree(exec2_list); - return -EFAULT; - } - - for (i = 0; i < args->buffer_count; i++) { - exec2_list[i].handle = exec_list[i].handle; - exec2_list[i].relocation_count = exec_list[i].relocation_count; - exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; - exec2_list[i].alignment = exec_list[i].alignment; - exec2_list[i].offset = exec_list[i].offset; - if (INTEL_GEN(to_i915(dev)) < 4) - exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; - else - exec2_list[i].flags = 0; - } - - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); - if (exec2.flags & __EXEC_HAS_RELOC) { - struct drm_i915_gem_exec_object __user *user_exec_list = - u64_to_user_ptr(args->buffers_ptr); - - /* Copy the new buffer offsets back to the user's exec list. */ - for (i = 0; i < args->buffer_count; i++) { - if (!(exec2_list[i].offset & UPDATE)) - continue; - - exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); - exec2_list[i].offset &= PIN_OFFSET_MASK; - if (__copy_to_user(&user_exec_list[i].offset, - &exec2_list[i].offset, - sizeof(user_exec_list[i].offset))) - break; - } - } - - kvfree(exec_list); - kvfree(exec2_list); - return err; -} - -int -i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_execbuffer2 *args = data; - struct drm_i915_gem_exec_object2 *exec2_list; - struct drm_syncobj **fences = NULL; - const size_t count = args->buffer_count; - int err; - - if (!check_buffer_count(count)) { - DRM_DEBUG("execbuf2 with %zd buffers\n", count); - return -EINVAL; - } - - if (!i915_gem_check_execbuffer(args)) - return -EINVAL; - - /* Allocate an extra slot for use by the command parser */ - exec2_list = kvmalloc_array(count + 1, eb_element_size(), - __GFP_NOWARN | GFP_KERNEL); - if (exec2_list == NULL) { - DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", - count); - return -ENOMEM; - } - if (copy_from_user(exec2_list, - u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec2_list) * count)) { - DRM_DEBUG("copy %zd exec entries failed\n", count); - kvfree(exec2_list); - return -EFAULT; - } - - if (args->flags & I915_EXEC_FENCE_ARRAY) { - fences = get_fence_array(args, file); - if (IS_ERR(fences)) { - kvfree(exec2_list); - return PTR_ERR(fences); - } - } - - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); - - /* - * Now that we have begun execution of the batchbuffer, we ignore - * any new error after this point. Also given that we have already - * updated the associated relocations, we try to write out the current - * object locations irrespective of any error. - */ - if (args->flags & __EXEC_HAS_RELOC) { - struct drm_i915_gem_exec_object2 __user *user_exec_list = - u64_to_user_ptr(args->buffers_ptr); - unsigned int i; - - /* Copy the new buffer offsets back to the user's exec list. */ - /* - * Note: count * sizeof(*user_exec_list) does not overflow, - * because we checked 'count' in check_buffer_count(). - * - * And this range already got effectively checked earlier - * when we did the "copy_from_user()" above. - */ - if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list))) - goto end; - - for (i = 0; i < args->buffer_count; i++) { - if (!(exec2_list[i].offset & UPDATE)) - continue; - - exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); - unsafe_put_user(exec2_list[i].offset, - &user_exec_list[i].offset, - end_user); - } -end_user: - user_access_end(); -end:; - } - - args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; - put_fence_array(args, fences); - kvfree(exec2_list); - return err; -} diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c deleted file mode 100644 index 21662176819f..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright © 2014-2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include "i915_drv.h" - -#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -static void internal_free_pages(struct sg_table *st) -{ - struct scatterlist *sg; - - for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); - } - - sg_free_table(st); - kfree(st); -} - -static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - unsigned int npages; - int max_order; - gfp_t gfp; - - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl()) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif - - gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; - if (IS_I965GM(i915) || IS_I965G(i915)) { - /* 965gm cannot relocate objects above 4GiB. */ - gfp &= ~__GFP_HIGHMEM; - gfp |= __GFP_DMA32; - } - -create_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; - - npages = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } - - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - - do { - int order = min(fls(npages) - 1, max_order); - struct page *page; - - do { - page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), - order); - if (page) - break; - if (!order--) - goto err; - - /* Limit subsequent allocations as well */ - max_order = order; - } while (1); - - sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; - st->nents++; - - npages -= 1 << order; - if (!npages) { - sg_mark_end(sg); - break; - } - - sg = __sg_next(sg); - } while (1); - - if (i915_gem_gtt_prepare_pages(obj, st)) { - /* Failed to dma-map try again with single page sg segments */ - if (get_order(st->sgl->length)) { - internal_free_pages(st); - max_order = 0; - goto create_st; - } - goto err; - } - - /* Mark the pages as dontneed whilst they are still pinned. As soon - * as they are unpinned they are allowed to be reaped by the shrinker, - * and the caller is expected to repopulate - the contents of this - * object are only valid whilst active and pinned. - */ - obj->mm.madv = I915_MADV_DONTNEED; - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; - -err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - internal_free_pages(st); - - return -ENOMEM; -} - -static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - i915_gem_gtt_finish_pages(obj, pages); - internal_free_pages(pages); - - obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; -} - -static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = i915_gem_object_get_pages_internal, - .put_pages = i915_gem_object_put_pages_internal, -}; - -/** - * i915_gem_object_create_internal: create an object with volatile pages - * @i915: the i915 device - * @size: the size in bytes of backing storage to allocate for the object - * - * Creates a new object that wraps some internal memory for private use. - * This object is not backed by swappable storage, and as such its contents - * are volatile and only valid whilst pinned. If the object is reaped by the - * shrinker, its pages and data will be discarded. Equally, it is not a full - * GEM object and so not valid for access from userspace. This makes it useful - * for hardware interfaces like ringbuffers (which are pinned from the time - * the request is written to the time the hardware stops accessing it), but - * not for contexts (which need to be preserved when not active for later - * reuse). Note that it is not cleared upon allocation. - */ -struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) -{ - struct drm_i915_gem_object *obj; - unsigned int cache_level; - - GEM_BUG_ON(!size); - GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - - drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops); - - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->write_domain = I915_GEM_DOMAIN_CPU; - - cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; - i915_gem_object_set_cache_coherency(obj, cache_level); - - return obj; -} diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c deleted file mode 100644 index c0ad19605297..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_pm.c +++ /dev/null @@ -1,251 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#include "gt/intel_gt_pm.h" - -#include "i915_drv.h" -#include "i915_gem_pm.h" -#include "i915_globals.h" - -static void i915_gem_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) - i915_gem_batch_pool_fini(&engine->batch_pool); - - i915_timelines_park(i915); - i915_vma_parked(i915); - - i915_globals_park(); -} - -static void idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work); - bool restart = true; - - cancel_delayed_work(&i915->gem.retire_work); - mutex_lock(&i915->drm.struct_mutex); - - intel_wakeref_lock(&i915->gt.wakeref); - if (!intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work)) { - i915_gem_park(i915); - restart = false; - } - intel_wakeref_unlock(&i915->gt.wakeref); - - mutex_unlock(&i915->drm.struct_mutex); - if (restart) - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} - -static void retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.retire_work.work); - - /* Come back later if the device is busy... */ - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_retire_requests(i915); - mutex_unlock(&i915->drm.struct_mutex); - } - - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} - -static int pm_notifier(struct notifier_block *nb, - unsigned long action, - void *data) -{ - struct drm_i915_private *i915 = - container_of(nb, typeof(*i915), gem.pm_notifier); - - switch (action) { - case INTEL_GT_UNPARK: - i915_globals_unpark(); - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - break; - - case INTEL_GT_PARK: - queue_work(i915->wq, &i915->gem.idle_work); - break; - } - - return NOTIFY_OK; -} - -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) -{ - bool result = true; - - do { - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - i915_gem_set_wedged(i915); - result = false; - } - } while (i915_retire_requests(i915) && result); - - GEM_BUG_ON(i915->gt.awake); - return result; -} - -bool i915_gem_load_power_context(struct drm_i915_private *i915) -{ - return switch_to_kernel_context_sync(i915); -} - -void i915_gem_suspend(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - intel_wakeref_auto(&i915->mm.userfault_wakeref, 0); - flush_workqueue(i915->wq); - - mutex_lock(&i915->drm.struct_mutex); - - /* - * We have to flush all the executing contexts to main memory so - * that they can saved in the hibernation image. To ensure the last - * context image is coherent, we have to switch away from it. That - * leaves the i915->kernel_context still active when - * we actually suspend, and its image in memory may not match the GPU - * state. Fortunately, the kernel_context is disposable and we do - * not rely on its state. - */ - switch_to_kernel_context_sync(i915); - - mutex_unlock(&i915->drm.struct_mutex); - - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); - flush_work(&i915->gem.idle_work); - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - i915_gem_drain_freed_objects(i915); - - intel_uc_suspend(i915); -} - -void i915_gem_suspend_late(struct drm_i915_private *i915) -{ - struct drm_i915_gem_object *obj; - struct list_head *phases[] = { - &i915->mm.unbound_list, - &i915->mm.bound_list, - NULL - }, **phase; - - /* - * Neither the BIOS, ourselves or any other kernel - * expects the system to be in execlists mode on startup, - * so we need to reset the GPU back to legacy mode. And the only - * known way to disable logical contexts is through a GPU reset. - * - * So in order to leave the system in a known default configuration, - * always reset the GPU upon unload and suspend. Afterwards we then - * clean up the GEM state tracking, flushing off the requests and - * leaving the system in a known idle state. - * - * Note that is of the upmost importance that the GPU is idle and - * all stray writes are flushed *before* we dismantle the backing - * storage for the pinned objects. - * - * However, since we are uncertain that resetting the GPU on older - * machines is a good idea, we don't - just in case it leaves the - * machine in an unusable condition. - */ - - mutex_lock(&i915->drm.struct_mutex); - for (phase = phases; *phase; phase++) { - list_for_each_entry(obj, *phase, mm.link) - WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); - } - mutex_unlock(&i915->drm.struct_mutex); - - intel_uc_sanitize(i915); - i915_gem_sanitize(i915); -} - -void i915_gem_resume(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - WARN_ON(i915->gt.awake); - - mutex_lock(&i915->drm.struct_mutex); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - - /* - * As we didn't flush the kernel context before suspend, we cannot - * guarantee that the context image is complete. So let's just reset - * it and start again. - */ - intel_gt_resume(i915); - - if (i915_gem_init_hw(i915)) - goto err_wedged; - - intel_uc_resume(i915); - - /* Always reload a context for powersaving. */ - if (!i915_gem_load_power_context(i915)) - goto err_wedged; - -out_unlock: - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); - return; - -err_wedged: - if (!i915_reset_failed(i915)) { - dev_err(i915->drm.dev, - "Failed to re-initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(i915); - } - goto out_unlock; -} - -void i915_gem_init__pm(struct drm_i915_private *i915) -{ - INIT_WORK(&i915->gem.idle_work, idle_work_handler); - INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); - - i915->gem.pm_notifier.notifier_call = pm_notifier; - blocking_notifier_chain_register(&i915->gt.pm_notifications, - &i915->gem.pm_notifier); -} diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h deleted file mode 100644 index 6f7d5d11ac3b..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_pm.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_GEM_PM_H__ -#define __I915_GEM_PM_H__ - -#include - -struct drm_i915_private; -struct work_struct; - -void i915_gem_init__pm(struct drm_i915_private *i915); - -bool i915_gem_load_power_context(struct drm_i915_private *i915); -void i915_gem_resume(struct drm_i915_private *i915); - -void i915_gem_idle_work_handler(struct work_struct *work); - -void i915_gem_suspend(struct drm_i915_private *i915); -void i915_gem_suspend_late(struct drm_i915_private *i915); - -#endif /* __I915_GEM_PM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c deleted file mode 100644 index 2c7aefb3e101..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Copyright © 2008-2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "i915_drv.h" -#include "i915_trace.h" - -static bool shrinker_lock(struct drm_i915_private *i915, - unsigned int flags, - bool *unlock) -{ - struct mutex *m = &i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(m)) { - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - - case MUTEX_TRYLOCK_FAILED: - *unlock = false; - if (flags & I915_SHRINK_ACTIVE && - mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) - *unlock = true; - return *unlock; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - } - - BUG(); -} - -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) -{ - if (!unlock) - return; - - mutex_unlock(&i915->drm.struct_mutex); -} - -static bool swap_available(void) -{ - return get_nr_swap_pages() > 0; -} - -static bool can_release_pages(struct drm_i915_gem_object *obj) -{ - /* Consider only shrinkable ojects. */ - if (!i915_gem_object_is_shrinkable(obj)) - return false; - - /* Only report true if by unbinding the object and putting its pages - * we can actually make forward progress towards freeing physical - * pages. - * - * If the pages are pinned for any other reason than being bound - * to the GPU, simply unbinding from the GPU is not going to succeed - * in releasing our pin count on the pages themselves. - */ - if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count) - return false; - - /* If any vma are "permanently" pinned, it will prevent us from - * reclaiming the obj->mm.pages. We only allow scanout objects to claim - * a permanent pin, along with a few others like the context objects. - * To simplify the scan, and to avoid walking the list of vma under the - * object, we just check the count of its permanently pinned. - */ - if (READ_ONCE(obj->pin_global)) - return false; - - /* We can only return physical pages to the system if we can either - * discard the contents (because the user has marked them as being - * purgeable) or if we can move their contents out to swap. - */ - return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; -} - -static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) -{ - if (i915_gem_object_unbind(obj) == 0) - __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); - return !i915_gem_object_has_pages(obj); -} - -static void try_to_writeback(struct drm_i915_gem_object *obj, - unsigned int flags) -{ - switch (obj->mm.madv) { - case I915_MADV_DONTNEED: - i915_gem_object_truncate(obj); - case __I915_MADV_PURGED: - return; - } - - if (flags & I915_SHRINK_WRITEBACK) - i915_gem_object_writeback(obj); -} - -/** - * i915_gem_shrink - Shrink buffer object caches - * @i915: i915 device - * @target: amount of memory to make available, in pages - * @nr_scanned: optional output for number of pages scanned (incremental) - * @flags: control flags for selecting cache types - * - * This function is the main interface to the shrinker. It will try to release - * up to @target pages of main memory backing storage from buffer objects. - * Selection of the specific caches can be done with @flags. This is e.g. useful - * when purgeable objects should be removed from caches preferentially. - * - * Note that it's not guaranteed that released amount is actually available as - * free system memory - the pages might still be in-used to due to other reasons - * (like cpu mmaps) or the mm core has reused them before we could grab them. - * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to - * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all(). - * - * Also note that any kind of pinning (both per-vma address space pins and - * backing storage pins at the buffer object level) result in the shrinker code - * having to skip the object. - * - * Returns: - * The number of pages of backing storage actually released. - */ -unsigned long -i915_gem_shrink(struct drm_i915_private *i915, - unsigned long target, - unsigned long *nr_scanned, - unsigned flags) -{ - const struct { - struct list_head *list; - unsigned int bit; - } phases[] = { - { &i915->mm.unbound_list, I915_SHRINK_UNBOUND }, - { &i915->mm.bound_list, I915_SHRINK_BOUND }, - { NULL, 0 }, - }, *phase; - intel_wakeref_t wakeref = 0; - unsigned long count = 0; - unsigned long scanned = 0; - bool unlock; - - if (!shrinker_lock(i915, flags, &unlock)) - return 0; - - /* - * When shrinking the active list, also consider active contexts. - * Active contexts are pinned until they are retired, and so can - * not be simply unbound to retire and unpin their pages. To shrink - * the contexts, we must wait until the gpu is idle. - * - * We don't care about errors here; if we cannot wait upon the GPU, - * we will free as much as we can and hope to get a second chance. - */ - if (flags & I915_SHRINK_ACTIVE) - i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - trace_i915_gem_shrink(i915, target, flags); - i915_retire_requests(i915); - - /* - * Unbinding of objects will require HW access; Let us not wake the - * device just to recover a little memory. If absolutely necessary, - * we will force the wake during oom-notifier. - */ - if (flags & I915_SHRINK_BOUND) { - wakeref = intel_runtime_pm_get_if_in_use(i915); - if (!wakeref) - flags &= ~I915_SHRINK_BOUND; - } - - /* - * As we may completely rewrite the (un)bound list whilst unbinding - * (due to retiring requests) we have to strictly process only - * one element of the list at the time, and recheck the list - * on every iteration. - * - * In particular, we must hold a reference whilst removing the - * object as we may end up waiting for and/or retiring the objects. - * This might release the final reference (held by the active list) - * and result in the object being freed from under us. This is - * similar to the precautions the eviction code must take whilst - * removing objects. - * - * Also note that although these lists do not hold a reference to - * the object we can safely grab one here: The final object - * unreferencing and the bound_list are both protected by the - * dev->struct_mutex and so we won't ever be able to observe an - * object on the bound_list with a reference count equals 0. - */ - for (phase = phases; phase->list; phase++) { - struct list_head still_in_list; - struct drm_i915_gem_object *obj; - - if ((flags & phase->bit) == 0) - continue; - - INIT_LIST_HEAD(&still_in_list); - - /* - * We serialize our access to unreferenced objects through - * the use of the struct_mutex. While the objects are not - * yet freed (due to RCU then a workqueue) we still want - * to be able to shrink their pages, so they remain on - * the unbound/bound list until actually freed. - */ - spin_lock(&i915->mm.obj_lock); - while (count < target && - (obj = list_first_entry_or_null(phase->list, - typeof(*obj), - mm.link))) { - list_move_tail(&obj->mm.link, &still_in_list); - - if (flags & I915_SHRINK_PURGEABLE && - obj->mm.madv != I915_MADV_DONTNEED) - continue; - - if (flags & I915_SHRINK_VMAPS && - !is_vmalloc_addr(obj->mm.mapping)) - continue; - - if (!(flags & I915_SHRINK_ACTIVE) && - (i915_gem_object_is_active(obj) || - i915_gem_object_is_framebuffer(obj))) - continue; - - if (!can_release_pages(obj)) - continue; - - spin_unlock(&i915->mm.obj_lock); - - if (unsafe_drop_pages(obj)) { - /* May arrive from get_pages on another bo */ - mutex_lock_nested(&obj->mm.lock, - I915_MM_SHRINKER); - if (!i915_gem_object_has_pages(obj)) { - try_to_writeback(obj, flags); - count += obj->base.size >> PAGE_SHIFT; - } - mutex_unlock(&obj->mm.lock); - } - scanned += obj->base.size >> PAGE_SHIFT; - - spin_lock(&i915->mm.obj_lock); - } - list_splice_tail(&still_in_list, phase->list); - spin_unlock(&i915->mm.obj_lock); - } - - if (flags & I915_SHRINK_BOUND) - intel_runtime_pm_put(i915, wakeref); - - i915_retire_requests(i915); - - shrinker_unlock(i915, unlock); - - if (nr_scanned) - *nr_scanned += scanned; - return count; -} - -/** - * i915_gem_shrink_all - Shrink buffer object caches completely - * @i915: i915 device - * - * This is a simple wraper around i915_gem_shrink() to aggressively shrink all - * caches completely. It also first waits for and retires all outstanding - * requests to also be able to release backing storage for active objects. - * - * This should only be used in code to intentionally quiescent the gpu or as a - * last-ditch effort when memory seems to have run out. - * - * Returns: - * The number of pages of backing storage actually released. - */ -unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - unsigned long freed = 0; - - with_intel_runtime_pm(i915, wakeref) { - freed = i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); - } - - return freed; -} - -static unsigned long -i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) -{ - struct drm_i915_private *i915 = - container_of(shrinker, struct drm_i915_private, mm.shrinker); - struct drm_i915_gem_object *obj; - unsigned long num_objects = 0; - unsigned long count = 0; - - spin_lock(&i915->mm.obj_lock); - list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) - if (can_release_pages(obj)) { - count += obj->base.size >> PAGE_SHIFT; - num_objects++; - } - - list_for_each_entry(obj, &i915->mm.bound_list, mm.link) - if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) { - count += obj->base.size >> PAGE_SHIFT; - num_objects++; - } - spin_unlock(&i915->mm.obj_lock); - - /* Update our preferred vmscan batch size for the next pass. - * Our rough guess for an effective batch size is roughly 2 - * available GEM objects worth of pages. That is we don't want - * the shrinker to fire, until it is worth the cost of freeing an - * entire GEM object. - */ - if (num_objects) { - unsigned long avg = 2 * count / num_objects; - - i915->mm.shrinker.batch = - max((i915->mm.shrinker.batch + avg) >> 1, - 128ul /* default SHRINK_BATCH */); - } - - return count; -} - -static unsigned long -i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) -{ - struct drm_i915_private *i915 = - container_of(shrinker, struct drm_i915_private, mm.shrinker); - unsigned long freed; - bool unlock; - - sc->nr_scanned = 0; - - if (!shrinker_lock(i915, 0, &unlock)) - return SHRINK_STOP; - - freed = i915_gem_shrink(i915, - sc->nr_to_scan, - &sc->nr_scanned, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE | - I915_SHRINK_WRITEBACK); - if (sc->nr_scanned < sc->nr_to_scan) - freed += i915_gem_shrink(i915, - sc->nr_to_scan - sc->nr_scanned, - &sc->nr_scanned, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); - if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { - intel_wakeref_t wakeref; - - with_intel_runtime_pm(i915, wakeref) { - freed += i915_gem_shrink(i915, - sc->nr_to_scan - sc->nr_scanned, - &sc->nr_scanned, - I915_SHRINK_ACTIVE | - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); - } - } - - shrinker_unlock(i915, unlock); - - return sc->nr_scanned ? freed : SHRINK_STOP; -} - -static int -i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) -{ - struct drm_i915_private *i915 = - container_of(nb, struct drm_i915_private, mm.oom_notifier); - struct drm_i915_gem_object *obj; - unsigned long unevictable, bound, unbound, freed_pages; - intel_wakeref_t wakeref; - - freed_pages = 0; - with_intel_runtime_pm(i915, wakeref) - freed_pages += i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); - - /* Because we may be allocating inside our own driver, we cannot - * assert that there are no objects with pinned pages that are not - * being pointed to by hardware. - */ - unbound = bound = unevictable = 0; - spin_lock(&i915->mm.obj_lock); - list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) { - if (!can_release_pages(obj)) - unevictable += obj->base.size >> PAGE_SHIFT; - else - unbound += obj->base.size >> PAGE_SHIFT; - } - list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { - if (!can_release_pages(obj)) - unevictable += obj->base.size >> PAGE_SHIFT; - else - bound += obj->base.size >> PAGE_SHIFT; - } - spin_unlock(&i915->mm.obj_lock); - - if (freed_pages || unbound || bound) - pr_info("Purging GPU memory, %lu pages freed, " - "%lu pages still pinned.\n", - freed_pages, unevictable); - - *(unsigned long *)ptr += freed_pages; - return NOTIFY_DONE; -} - -static int -i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) -{ - struct drm_i915_private *i915 = - container_of(nb, struct drm_i915_private, mm.vmap_notifier); - struct i915_vma *vma, *next; - unsigned long freed_pages = 0; - intel_wakeref_t wakeref; - bool unlock; - - if (!shrinker_lock(i915, 0, &unlock)) - return NOTIFY_DONE; - - /* Force everything onto the inactive lists */ - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT)) - goto out; - - with_intel_runtime_pm(i915, wakeref) - freed_pages += i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_VMAPS); - - /* We also want to clear any cached iomaps as they wrap vmap */ - mutex_lock(&i915->ggtt.vm.mutex); - list_for_each_entry_safe(vma, next, - &i915->ggtt.vm.bound_list, vm_link) { - unsigned long count = vma->node.size >> PAGE_SHIFT; - - if (!vma->iomap || i915_vma_is_active(vma)) - continue; - - mutex_unlock(&i915->ggtt.vm.mutex); - if (i915_vma_unbind(vma) == 0) - freed_pages += count; - mutex_lock(&i915->ggtt.vm.mutex); - } - mutex_unlock(&i915->ggtt.vm.mutex); - -out: - shrinker_unlock(i915, unlock); - - *(unsigned long *)ptr += freed_pages; - return NOTIFY_DONE; -} - -/** - * i915_gem_shrinker_register - Register the i915 shrinker - * @i915: i915 device - * - * This function registers and sets up the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_register(struct drm_i915_private *i915) -{ - i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; - i915->mm.shrinker.count_objects = i915_gem_shrinker_count; - i915->mm.shrinker.seeks = DEFAULT_SEEKS; - i915->mm.shrinker.batch = 4096; - WARN_ON(register_shrinker(&i915->mm.shrinker)); - - i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; - WARN_ON(register_oom_notifier(&i915->mm.oom_notifier)); - - i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; - WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); -} - -/** - * i915_gem_shrinker_unregister - Unregisters the i915 shrinker - * @i915: i915 device - * - * This function unregisters the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_unregister(struct drm_i915_private *i915) -{ - WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); - WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); - unregister_shrinker(&i915->mm.shrinker); -} - -void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, - struct mutex *mutex) -{ - bool unlock = false; - - if (!IS_ENABLED(CONFIG_LOCKDEP)) - return; - - if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) { - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_NORMAL, 0, _RET_IP_); - unlock = true; - } - - fs_reclaim_acquire(GFP_KERNEL); - - /* - * As we invariably rely on the struct_mutex within the shrinker, - * but have a complicated recursion dance, taint all the mutexes used - * within the shrinker with the struct_mutex. For completeness, we - * taint with all subclass of struct_mutex, even though we should - * only need tainting by I915_MM_NORMAL to catch possible ABBA - * deadlocks from using struct_mutex inside @mutex. - */ - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_SHRINKER, 0, _RET_IP_); - - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); - mutex_release(&mutex->dep_map, 0, _RET_IP_); - - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); - - fs_reclaim_release(GFP_KERNEL); - - if (unlock) - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); -} diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c deleted file mode 100644 index 0a8082cfc761..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ /dev/null @@ -1,721 +0,0 @@ -/* - * Copyright © 2008-2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * Chris Wilson - * - */ - -#include -#include "i915_drv.h" - -/* - * The BIOS typically reserves some of the system's memory for the exclusive - * use of the integrated graphics. This memory is no longer available for - * use by the OS and so the user finds that his system has less memory - * available than he put in. We refer to this memory as stolen. - * - * The BIOS will allocate its framebuffer from the stolen memory. Our - * goal is try to reuse that object for our own fbcon which must always - * be available for panics. Anything else we can reuse the stolen memory - * for is a boon. - */ - -int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, - struct drm_mm_node *node, u64 size, - unsigned alignment, u64 start, u64 end) -{ - int ret; - - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return -ENODEV; - - /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(dev_priv) >= 8 && start < 4096) - start = 4096; - - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, - size, alignment, 0, - start, end, DRM_MM_INSERT_BEST); - mutex_unlock(&dev_priv->mm.stolen_lock); - - return ret; -} - -int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, - struct drm_mm_node *node, u64 size, - unsigned alignment) -{ - return i915_gem_stolen_insert_node_in_range(dev_priv, node, size, - alignment, 0, U64_MAX); -} - -void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, - struct drm_mm_node *node) -{ - mutex_lock(&dev_priv->mm.stolen_lock); - drm_mm_remove_node(node); - mutex_unlock(&dev_priv->mm.stolen_lock); -} - -static int i915_adjust_stolen(struct drm_i915_private *dev_priv, - struct resource *dsm) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct resource *r; - - if (dsm->start == 0 || dsm->end <= dsm->start) - return -EINVAL; - - /* - * TODO: We have yet too encounter the case where the GTT wasn't at the - * end of stolen. With that assumption we could simplify this. - */ - - /* Make sure we don't clobber the GTT if it's within stolen memory */ - if (INTEL_GEN(dev_priv) <= 4 && - !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { - struct resource stolen[2] = {*dsm, *dsm}; - struct resource ggtt_res; - resource_size_t ggtt_start; - - ggtt_start = I915_READ(PGTBL_CTL); - if (IS_GEN(dev_priv, 4)) - ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) | - (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; - else - ggtt_start &= PGTBL_ADDRESS_LO_MASK; - - ggtt_res = - (struct resource) DEFINE_RES_MEM(ggtt_start, - ggtt_total_entries(ggtt) * 4); - - if (ggtt_res.start >= stolen[0].start && ggtt_res.start < stolen[0].end) - stolen[0].end = ggtt_res.start; - if (ggtt_res.end > stolen[1].start && ggtt_res.end <= stolen[1].end) - stolen[1].start = ggtt_res.end; - - /* Pick the larger of the two chunks */ - if (resource_size(&stolen[0]) > resource_size(&stolen[1])) - *dsm = stolen[0]; - else - *dsm = stolen[1]; - - if (stolen[0].start != stolen[1].start || - stolen[0].end != stolen[1].end) { - DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res); - DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm); - } - } - - /* - * Verify that nothing else uses this physical address. Stolen - * memory should be reserved by the BIOS and hidden from the - * kernel. So if the region is already marked as busy, something - * is seriously wrong. - */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start, - resource_size(dsm), - "Graphics Stolen Memory"); - if (r == NULL) { - /* - * One more attempt but this time requesting region from - * start + 1, as we have seen that this resolves the region - * conflict with the PCI Bus. - * This is a BIOS w/a: Some BIOS wrap stolen in the root - * PCI bus, but have an off-by-one error. Hence retry the - * reservation starting from 1 instead of 0. - * There's also BIOS with off-by-one on the other end. - */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1, - resource_size(dsm) - 2, - "Graphics Stolen Memory"); - /* - * GEN3 firmware likes to smash pci bridges into the stolen - * range. Apparently this works. - */ - if (r == NULL && !IS_GEN(dev_priv, 3)) { - DRM_ERROR("conflict detected with stolen region: %pR\n", - dsm); - - return -EBUSY; - } - } - - return 0; -} - -void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) -{ - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return; - - drm_mm_takedown(&dev_priv->mm.stolen); -} - -static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(IS_GM45(dev_priv) ? - CTG_STOLEN_RESERVED : - ELK_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; - - DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", - IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val); - - if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) - return; - - /* - * Whether ILK really reuses the ELK register for this is unclear. - * Let's see if we catch anyone with this supposedly enabled on ILK. - */ - WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n", - reg_val); - - if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) - return; - - *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; - WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); - - *size = stolen_top - *base; -} - -static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; - - switch (reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK) { - case GEN6_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - case GEN6_STOLEN_RESERVED_512K: - *size = 512 * 1024; - break; - case GEN6_STOLEN_RESERVED_256K: - *size = 256 * 1024; - break; - case GEN6_STOLEN_RESERVED_128K: - *size = 128 * 1024; - break; - default: - *size = 1024 * 1024; - MISSING_CASE(reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK); - } -} - -static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { - default: - MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); - /* fall through */ - case GEN7_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - } - - /* - * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the - * reserved location as (top - size). - */ - *base = stolen_top - *size; -} - -static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - *base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK; - - switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { - case GEN7_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - case GEN7_STOLEN_RESERVED_256K: - *size = 256 * 1024; - break; - default: - *size = 1024 * 1024; - MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); - } -} - -static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; - - switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { - case GEN8_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_2M: - *size = 2 * 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_4M: - *size = 4 * 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_8M: - *size = 8 * 1024 * 1024; - break; - default: - *size = 8 * 1024 * 1024; - MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); - } -} - -static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK)) - return; - - *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; - *size = stolen_top - *base; -} - -static void icl_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u64 reg_val = I915_READ64(GEN6_STOLEN_RESERVED); - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); - - *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; - - switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { - case GEN8_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_2M: - *size = 2 * 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_4M: - *size = 4 * 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_8M: - *size = 8 * 1024 * 1024; - break; - default: - *size = 8 * 1024 * 1024; - MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); - } -} - -int i915_gem_init_stolen(struct drm_i915_private *dev_priv) -{ - resource_size_t reserved_base, stolen_top; - resource_size_t reserved_total, reserved_size; - - mutex_init(&dev_priv->mm.stolen_lock); - - if (intel_vgpu_active(dev_priv)) { - DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); - return 0; - } - - if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) { - DRM_INFO("DMAR active, disabling use of stolen memory\n"); - return 0; - } - - if (resource_size(&intel_graphics_stolen_res) == 0) - return 0; - - dev_priv->dsm = intel_graphics_stolen_res; - - if (i915_adjust_stolen(dev_priv, &dev_priv->dsm)) - return 0; - - GEM_BUG_ON(dev_priv->dsm.start == 0); - GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); - - stolen_top = dev_priv->dsm.end + 1; - reserved_base = stolen_top; - reserved_size = 0; - - switch (INTEL_GEN(dev_priv)) { - case 2: - case 3: - break; - case 4: - if (!IS_G4X(dev_priv)) - break; - /* fall through */ - case 5: - g4x_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; - case 6: - gen6_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; - case 7: - if (IS_VALLEYVIEW(dev_priv)) - vlv_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - else - gen7_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; - case 8: - case 9: - case 10: - if (IS_LP(dev_priv)) - chv_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - else - bdw_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; - case 11: - default: - icl_get_stolen_reserved(dev_priv, &reserved_base, - &reserved_size); - break; - } - - /* - * Our expectation is that the reserved space is at the top of the - * stolen region and *never* at the bottom. If we see !reserved_base, - * it likely means we failed to read the registers correctly. - */ - if (!reserved_base) { - DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n", - &reserved_base, &reserved_size); - reserved_base = stolen_top; - reserved_size = 0; - } - - dev_priv->dsm_reserved = - (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size); - - if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) { - DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", - &dev_priv->dsm_reserved, &dev_priv->dsm); - return 0; - } - - /* It is possible for the reserved area to end before the end of stolen - * memory, so just consider the start. */ - reserved_total = stolen_top - reserved_base; - - DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", - (u64)resource_size(&dev_priv->dsm) >> 10, - ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); - - dev_priv->stolen_usable_size = - resource_size(&dev_priv->dsm) - reserved_total; - - /* Basic memrange allocator for stolen space. */ - drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); - - return 0; -} - -static struct sg_table * -i915_pages_create_for_stolen(struct drm_device *dev, - resource_size_t offset, resource_size_t size) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct sg_table *st; - struct scatterlist *sg; - - GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm))); - - /* We hide that we have no struct page backing our stolen object - * by wrapping the contiguous physical allocation with a fake - * dma mapping in a single scatterlist. - */ - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (st == NULL) - return ERR_PTR(-ENOMEM); - - if (sg_alloc_table(st, 1, GFP_KERNEL)) { - kfree(st); - return ERR_PTR(-ENOMEM); - } - - sg = st->sgl; - sg->offset = 0; - sg->length = size; - - sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset; - sg_dma_len(sg) = size; - - return st; -} - -static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) -{ - struct sg_table *pages = - i915_pages_create_for_stolen(obj->base.dev, - obj->stolen->start, - obj->stolen->size); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - __i915_gem_object_set_pages(obj, pages, obj->stolen->size); - - return 0; -} - -static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - /* Should only be called from i915_gem_object_release_stolen() */ - sg_free_table(pages); - kfree(pages); -} - -static void -i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); - - GEM_BUG_ON(!stolen); - - __i915_gem_object_unpin_pages(obj); - - i915_gem_stolen_remove_node(dev_priv, stolen); - kfree(stolen); -} - -static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { - .get_pages = i915_gem_object_get_pages_stolen, - .put_pages = i915_gem_object_put_pages_stolen, - .release = i915_gem_object_release_stolen, -}; - -static struct drm_i915_gem_object * -_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - struct drm_mm_node *stolen) -{ - struct drm_i915_gem_object *obj; - unsigned int cache_level; - - obj = i915_gem_object_alloc(); - if (obj == NULL) - return NULL; - - drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); - i915_gem_object_init(obj, &i915_gem_object_stolen_ops); - - obj->stolen = stolen; - obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; - cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; - i915_gem_object_set_cache_coherency(obj, cache_level); - - if (i915_gem_object_pin_pages(obj)) - goto cleanup; - - return obj; - -cleanup: - i915_gem_object_free(obj); - return NULL; -} - -struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - resource_size_t size) -{ - struct drm_i915_gem_object *obj; - struct drm_mm_node *stolen; - int ret; - - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; - - if (size == 0) - return NULL; - - stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); - if (!stolen) - return NULL; - - ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); - if (ret) { - kfree(stolen); - return NULL; - } - - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj) - return obj; - - i915_gem_stolen_remove_node(dev_priv, stolen); - kfree(stolen); - return NULL; -} - -struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, - resource_size_t stolen_offset, - resource_size_t gtt_offset, - resource_size_t size) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_object *obj; - struct drm_mm_node *stolen; - struct i915_vma *vma; - int ret; - - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", - &stolen_offset, >t_offset, &size); - - /* KISS and expect everything to be page-aligned */ - if (WARN_ON(size == 0) || - WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || - WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) - return NULL; - - stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); - if (!stolen) - return NULL; - - stolen->start = stolen_offset; - stolen->size = size; - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen); - mutex_unlock(&dev_priv->mm.stolen_lock); - if (ret) { - DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); - kfree(stolen); - return NULL; - } - - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj == NULL) { - DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); - i915_gem_stolen_remove_node(dev_priv, stolen); - kfree(stolen); - return NULL; - } - - /* Some objects just need physical mem from stolen space */ - if (gtt_offset == I915_GTT_OFFSET_NONE) - return obj; - - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err; - - vma = i915_vma_instance(obj, &ggtt->vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_pages; - } - - /* To simplify the initialisation sequence between KMS and GTT, - * we allow construction of the stolen object prior to - * setting up the GTT space. The actual reservation will occur - * later. - */ - ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, - size, gtt_offset, obj->cache_level, - 0); - if (ret) { - DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); - goto err_pages; - } - - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - - vma->pages = obj->mm.pages; - vma->flags |= I915_VMA_GLOBAL_BIND; - __i915_vma_set_map_and_fenceable(vma); - - mutex_lock(&ggtt->vm.mutex); - list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); - mutex_unlock(&ggtt->vm.mutex); - - spin_lock(&dev_priv->mm.obj_lock); - list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); - obj->bind_count++; - spin_unlock(&dev_priv->mm.obj_lock); - - return obj; - -err_pages: - i915_gem_object_unpin_pages(obj); -err: - i915_gem_object_put(obj); - return NULL; -} diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c deleted file mode 100644 index 86d6d92ccbc9..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ /dev/null @@ -1,460 +0,0 @@ -/* - * Copyright © 2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#include -#include -#include - -#include "gem/i915_gem_ioctls.h" - -#include "i915_drv.h" - -/** - * DOC: buffer object tiling - * - * i915_gem_set_tiling_ioctl() and i915_gem_get_tiling_ioctl() is the userspace - * interface to declare fence register requirements. - * - * In principle GEM doesn't care at all about the internal data layout of an - * object, and hence it also doesn't care about tiling or swizzling. There's two - * exceptions: - * - * - For X and Y tiling the hardware provides detilers for CPU access, so called - * fences. Since there's only a limited amount of them the kernel must manage - * these, and therefore userspace must tell the kernel the object tiling if it - * wants to use fences for detiling. - * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which - * depends upon the physical page frame number. When swapping such objects the - * page frame number might change and the kernel must be able to fix this up - * and hence now the tiling. Note that on a subset of platforms with - * asymmetric memory channel population the swizzling pattern changes in an - * unknown way, and for those the kernel simply forbids swapping completely. - * - * Since neither of this applies for new tiling layouts on modern platforms like - * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled. - * Anything else can be handled in userspace entirely without the kernel's - * invovlement. - */ - -/** - * i915_gem_fence_size - required global GTT size for a fence - * @i915: i915 device - * @size: object size - * @tiling: tiling mode - * @stride: tiling stride - * - * Return the required global GTT size for a fence (view of a tiled object), - * taking into account potential fence register mapping. - */ -u32 i915_gem_fence_size(struct drm_i915_private *i915, - u32 size, unsigned int tiling, unsigned int stride) -{ - u32 ggtt_size; - - GEM_BUG_ON(!size); - - if (tiling == I915_TILING_NONE) - return size; - - GEM_BUG_ON(!stride); - - if (INTEL_GEN(i915) >= 4) { - stride *= i915_gem_tile_height(tiling); - GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE)); - return roundup(size, stride); - } - - /* Previous chips need a power-of-two fence region when tiling */ - if (IS_GEN(i915, 3)) - ggtt_size = 1024*1024; - else - ggtt_size = 512*1024; - - while (ggtt_size < size) - ggtt_size <<= 1; - - return ggtt_size; -} - -/** - * i915_gem_fence_alignment - required global GTT alignment for a fence - * @i915: i915 device - * @size: object size - * @tiling: tiling mode - * @stride: tiling stride - * - * Return the required global GTT alignment for a fence (a view of a tiled - * object), taking into account potential fence register mapping. - */ -u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, - unsigned int tiling, unsigned int stride) -{ - GEM_BUG_ON(!size); - - /* - * Minimum alignment is 4k (GTT page size), but might be greater - * if a fence register is needed for the object. - */ - if (tiling == I915_TILING_NONE) - return I915_GTT_MIN_ALIGNMENT; - - if (INTEL_GEN(i915) >= 4) - return I965_FENCE_PAGE; - - /* - * Previous chips need to be aligned to the size of the smallest - * fence register that can contain the object. - */ - return i915_gem_fence_size(i915, size, tiling, stride); -} - -/* Check pitch constriants for all chips & tiling formats */ -static bool -i915_tiling_ok(struct drm_i915_gem_object *obj, - unsigned int tiling, unsigned int stride) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - unsigned int tile_width; - - /* Linear is always fine */ - if (tiling == I915_TILING_NONE) - return true; - - if (tiling > I915_TILING_LAST) - return false; - - /* check maximum stride & object size */ - /* i965+ stores the end address of the gtt mapping in the fence - * reg, so dont bother to check the size */ - if (INTEL_GEN(i915) >= 7) { - if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL) - return false; - } else if (INTEL_GEN(i915) >= 4) { - if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) - return false; - } else { - if (stride > 8192) - return false; - - if (!is_power_of_2(stride)) - return false; - } - - if (IS_GEN(i915, 2) || - (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915))) - tile_width = 128; - else - tile_width = 512; - - if (!stride || !IS_ALIGNED(stride, tile_width)) - return false; - - return true; -} - -static bool i915_vma_fence_prepare(struct i915_vma *vma, - int tiling_mode, unsigned int stride) -{ - struct drm_i915_private *i915 = vma->vm->i915; - u32 size, alignment; - - if (!i915_vma_is_map_and_fenceable(vma)) - return true; - - size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride); - if (vma->node.size < size) - return false; - - alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride); - if (!IS_ALIGNED(vma->node.start, alignment)) - return false; - - return true; -} - -/* Make the current GTT allocation valid for the change in tiling. */ -static int -i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, - int tiling_mode, unsigned int stride) -{ - struct i915_vma *vma; - int ret; - - if (tiling_mode == I915_TILING_NONE) - return 0; - - for_each_ggtt_vma(vma, obj) { - if (i915_vma_fence_prepare(vma, tiling_mode, stride)) - continue; - - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } - - return 0; -} - -int -i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, - unsigned int tiling, unsigned int stride) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_vma *vma; - int err; - - /* Make sure we don't cross-contaminate obj->tiling_and_stride */ - BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); - - GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); - GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); - lockdep_assert_held(&i915->drm.struct_mutex); - - if ((tiling | stride) == obj->tiling_and_stride) - return 0; - - if (i915_gem_object_is_framebuffer(obj)) - return -EBUSY; - - /* We need to rebind the object if its current allocation - * no longer meets the alignment restrictions for its new - * tiling mode. Otherwise we can just leave it alone, but - * need to ensure that any fence register is updated before - * the next fenced (either through the GTT or by the BLT unit - * on older GPUs) access. - * - * After updating the tiling parameters, we then flag whether - * we need to update an associated fence register. Note this - * has to also include the unfenced register the GPU uses - * whilst executing a fenced command for an untiled object. - */ - - err = i915_gem_object_fence_prepare(obj, tiling, stride); - if (err) - return err; - - i915_gem_object_lock(obj); - if (i915_gem_object_is_framebuffer(obj)) { - i915_gem_object_unlock(obj); - return -EBUSY; - } - - /* If the memory has unknown (i.e. varying) swizzling, we pin the - * pages to prevent them being swapped out and causing corruption - * due to the change in swizzling. - */ - mutex_lock(&obj->mm.lock); - if (i915_gem_object_has_pages(obj) && - obj->mm.madv == I915_MADV_WILLNEED && - i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - if (tiling == I915_TILING_NONE) { - GEM_BUG_ON(!obj->mm.quirked); - __i915_gem_object_unpin_pages(obj); - obj->mm.quirked = false; - } - if (!i915_gem_object_is_tiled(obj)) { - GEM_BUG_ON(obj->mm.quirked); - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; - } - } - mutex_unlock(&obj->mm.lock); - - for_each_ggtt_vma(vma, obj) { - vma->fence_size = - i915_gem_fence_size(i915, vma->size, tiling, stride); - vma->fence_alignment = - i915_gem_fence_alignment(i915, - vma->size, tiling, stride); - - if (vma->fence) - vma->fence->dirty = true; - } - - obj->tiling_and_stride = tiling | stride; - i915_gem_object_unlock(obj); - - /* Force the fence to be reacquired for GTT access */ - i915_gem_object_release_mmap(obj); - - /* Try to preallocate memory required to save swizzling on put-pages */ - if (i915_gem_object_needs_bit17_swizzle(obj)) { - if (!obj->bit_17) { - obj->bit_17 = bitmap_zalloc(obj->base.size >> PAGE_SHIFT, - GFP_KERNEL); - } - } else { - bitmap_free(obj->bit_17); - obj->bit_17 = NULL; - } - - return 0; -} - -/** - * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode - * @dev: DRM device - * @data: data pointer for the ioctl - * @file: DRM file for the ioctl call - * - * Sets the tiling mode of an object, returning the required swizzling of - * bit 6 of addresses in the object. - * - * Called by the user via ioctl. - * - * Returns: - * Zero on success, negative errno on failure. - */ -int -i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_set_tiling *args = data; - struct drm_i915_gem_object *obj; - int err; - - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) - return -ENOENT; - - /* - * The tiling mode of proxy objects is handled by its generator, and - * not allowed to be changed by userspace. - */ - if (i915_gem_object_is_proxy(obj)) { - err = -ENXIO; - goto err; - } - - if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) { - err = -EINVAL; - goto err; - } - - if (args->tiling_mode == I915_TILING_NONE) { - args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; - args->stride = 0; - } else { - if (args->tiling_mode == I915_TILING_X) - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x; - else - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y; - - /* Hide bit 17 swizzling from the user. This prevents old Mesa - * from aborting the application on sw fallbacks to bit 17, - * and we use the pread/pwrite bit17 paths to swizzle for it. - * If there was a user that was relying on the swizzle - * information for drm_intel_bo_map()ed reads/writes this would - * break it, but we don't have any of those. - */ - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) - args->swizzle_mode = I915_BIT_6_SWIZZLE_9; - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) - args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; - - /* If we can't handle the swizzling, make it untiled. */ - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { - args->tiling_mode = I915_TILING_NONE; - args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; - args->stride = 0; - } - } - - err = mutex_lock_interruptible(&dev->struct_mutex); - if (err) - goto err; - - err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); - mutex_unlock(&dev->struct_mutex); - - /* We have to maintain this existing ABI... */ - args->stride = i915_gem_object_get_stride(obj); - args->tiling_mode = i915_gem_object_get_tiling(obj); - -err: - i915_gem_object_put(obj); - return err; -} - -/** - * i915_gem_get_tiling_ioctl - IOCTL handler to get tiling mode - * @dev: DRM device - * @data: data pointer for the ioctl - * @file: DRM file for the ioctl call - * - * Returns the current tiling mode and required bit 6 swizzling for the object. - * - * Called by the user via ioctl. - * - * Returns: - * Zero on success, negative errno on failure. - */ -int -i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_get_tiling *args = data; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj; - int err = -ENOENT; - - rcu_read_lock(); - obj = i915_gem_object_lookup_rcu(file, args->handle); - if (obj) { - args->tiling_mode = - READ_ONCE(obj->tiling_and_stride) & TILING_MASK; - err = 0; - } - rcu_read_unlock(); - if (unlikely(err)) - return err; - - switch (args->tiling_mode) { - case I915_TILING_X: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; - break; - case I915_TILING_Y: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; - break; - default: - case I915_TILING_NONE: - args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; - break; - } - - /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ - if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) - args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN; - else - args->phys_swizzle_mode = args->swizzle_mode; - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) - args->swizzle_mode = I915_BIT_6_SWIZZLE_9; - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) - args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; - - return 0; -} diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c deleted file mode 100644 index 2c1b6bb7a040..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ /dev/null @@ -1,851 +0,0 @@ -/* - * Copyright © 2012-2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include - -#include - -#include "gem/i915_gem_ioctls.h" - -#include "i915_drv.h" -#include "i915_trace.h" -#include "intel_drv.h" - -struct i915_mm_struct { - struct mm_struct *mm; - struct drm_i915_private *i915; - struct i915_mmu_notifier *mn; - struct hlist_node node; - struct kref kref; - struct work_struct work; -}; - -#if defined(CONFIG_MMU_NOTIFIER) -#include - -struct i915_mmu_notifier { - spinlock_t lock; - struct hlist_node node; - struct mmu_notifier mn; - struct rb_root_cached objects; - struct i915_mm_struct *mm; -}; - -struct i915_mmu_object { - struct i915_mmu_notifier *mn; - struct drm_i915_gem_object *obj; - struct interval_tree_node it; -}; - -static void add_object(struct i915_mmu_object *mo) -{ - GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); - interval_tree_insert(&mo->it, &mo->mn->objects); -} - -static void del_object(struct i915_mmu_object *mo) -{ - if (RB_EMPTY_NODE(&mo->it.rb)) - return; - - interval_tree_remove(&mo->it, &mo->mn->objects); - RB_CLEAR_NODE(&mo->it.rb); -} - -static void -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) -{ - struct i915_mmu_object *mo = obj->userptr.mmu_object; - - /* - * During mm_invalidate_range we need to cancel any userptr that - * overlaps the range being invalidated. Doing so requires the - * struct_mutex, and that risks recursion. In order to cause - * recursion, the user must alias the userptr address space with - * a GTT mmapping (possible with a MAP_FIXED) - then when we have - * to invalidate that mmaping, mm_invalidate_range is called with - * the userptr address *and* the struct_mutex held. To prevent that - * we set a flag under the i915_mmu_notifier spinlock to indicate - * whether this object is valid. - */ - if (!mo) - return; - - spin_lock(&mo->mn->lock); - if (value) - add_object(mo); - else - del_object(mo); - spin_unlock(&mo->mn->lock); -} - -static int -userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, - const struct mmu_notifier_range *range) -{ - struct i915_mmu_notifier *mn = - container_of(_mn, struct i915_mmu_notifier, mn); - struct interval_tree_node *it; - struct mutex *unlock = NULL; - unsigned long end; - int ret = 0; - - if (RB_EMPTY_ROOT(&mn->objects.rb_root)) - return 0; - - /* interval ranges are inclusive, but invalidate range is exclusive */ - end = range->end - 1; - - spin_lock(&mn->lock); - it = interval_tree_iter_first(&mn->objects, range->start, end); - while (it) { - struct drm_i915_gem_object *obj; - - if (!mmu_notifier_range_blockable(range)) { - ret = -EAGAIN; - break; - } - - /* - * The mmu_object is released late when destroying the - * GEM object so it is entirely possible to gain a - * reference on an object in the process of being freed - * since our serialisation is via the spinlock and not - * the struct_mutex - and consequently use it after it - * is freed and then double free it. To prevent that - * use-after-free we only acquire a reference on the - * object if it is not in the process of being destroyed. - */ - obj = container_of(it, struct i915_mmu_object, it)->obj; - if (!kref_get_unless_zero(&obj->base.refcount)) { - it = interval_tree_iter_next(it, range->start, end); - continue; - } - spin_unlock(&mn->lock); - - if (!unlock) { - unlock = &mn->mm->i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(unlock)) { - default: - case MUTEX_TRYLOCK_FAILED: - if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { - i915_gem_object_put(obj); - return -EINTR; - } - /* fall through */ - case MUTEX_TRYLOCK_SUCCESS: - break; - - case MUTEX_TRYLOCK_RECURSIVE: - unlock = ERR_PTR(-EEXIST); - break; - } - } - - ret = i915_gem_object_unbind(obj); - if (ret == 0) - ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); - i915_gem_object_put(obj); - if (ret) - goto unlock; - - spin_lock(&mn->lock); - - /* - * As we do not (yet) protect the mmu from concurrent insertion - * over this range, there is no guarantee that this search will - * terminate given a pathologic workload. - */ - it = interval_tree_iter_first(&mn->objects, range->start, end); - } - spin_unlock(&mn->lock); - -unlock: - if (!IS_ERR_OR_NULL(unlock)) - mutex_unlock(unlock); - - return ret; - -} - -static const struct mmu_notifier_ops i915_gem_userptr_notifier = { - .invalidate_range_start = userptr_mn_invalidate_range_start, -}; - -static struct i915_mmu_notifier * -i915_mmu_notifier_create(struct i915_mm_struct *mm) -{ - struct i915_mmu_notifier *mn; - - mn = kmalloc(sizeof(*mn), GFP_KERNEL); - if (mn == NULL) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&mn->lock); - mn->mn.ops = &i915_gem_userptr_notifier; - mn->objects = RB_ROOT_CACHED; - mn->mm = mm; - - return mn; -} - -static void -i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) -{ - struct i915_mmu_object *mo; - - mo = fetch_and_zero(&obj->userptr.mmu_object); - if (!mo) - return; - - spin_lock(&mo->mn->lock); - del_object(mo); - spin_unlock(&mo->mn->lock); - kfree(mo); -} - -static struct i915_mmu_notifier * -i915_mmu_notifier_find(struct i915_mm_struct *mm) -{ - struct i915_mmu_notifier *mn; - int err = 0; - - mn = mm->mn; - if (mn) - return mn; - - mn = i915_mmu_notifier_create(mm); - if (IS_ERR(mn)) - err = PTR_ERR(mn); - - down_write(&mm->mm->mmap_sem); - mutex_lock(&mm->i915->mm_lock); - if (mm->mn == NULL && !err) { - /* Protected by mmap_sem (write-lock) */ - err = __mmu_notifier_register(&mn->mn, mm->mm); - if (!err) { - /* Protected by mm_lock */ - mm->mn = fetch_and_zero(&mn); - } - } else if (mm->mn) { - /* - * Someone else raced and successfully installed the mmu - * notifier, we can cancel our own errors. - */ - err = 0; - } - mutex_unlock(&mm->i915->mm_lock); - up_write(&mm->mm->mmap_sem); - - if (mn && !IS_ERR(mn)) - kfree(mn); - - return err ? ERR_PTR(err) : mm->mn; -} - -static int -i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, - unsigned flags) -{ - struct i915_mmu_notifier *mn; - struct i915_mmu_object *mo; - - if (flags & I915_USERPTR_UNSYNCHRONIZED) - return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; - - if (WARN_ON(obj->userptr.mm == NULL)) - return -EINVAL; - - mn = i915_mmu_notifier_find(obj->userptr.mm); - if (IS_ERR(mn)) - return PTR_ERR(mn); - - mo = kzalloc(sizeof(*mo), GFP_KERNEL); - if (!mo) - return -ENOMEM; - - mo->mn = mn; - mo->obj = obj; - mo->it.start = obj->userptr.ptr; - mo->it.last = obj->userptr.ptr + obj->base.size - 1; - RB_CLEAR_NODE(&mo->it.rb); - - obj->userptr.mmu_object = mo; - return 0; -} - -static void -i915_mmu_notifier_free(struct i915_mmu_notifier *mn, - struct mm_struct *mm) -{ - if (mn == NULL) - return; - - mmu_notifier_unregister(&mn->mn, mm); - kfree(mn); -} - -#else - -static void -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) -{ -} - -static void -i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) -{ -} - -static int -i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, - unsigned flags) -{ - if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0) - return -ENODEV; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - return 0; -} - -static void -i915_mmu_notifier_free(struct i915_mmu_notifier *mn, - struct mm_struct *mm) -{ -} - -#endif - -static struct i915_mm_struct * -__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real) -{ - struct i915_mm_struct *mm; - - /* Protected by dev_priv->mm_lock */ - hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real) - if (mm->mm == real) - return mm; - - return NULL; -} - -static int -i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_mm_struct *mm; - int ret = 0; - - /* During release of the GEM object we hold the struct_mutex. This - * precludes us from calling mmput() at that time as that may be - * the last reference and so call exit_mmap(). exit_mmap() will - * attempt to reap the vma, and if we were holding a GTT mmap - * would then call drm_gem_vm_close() and attempt to reacquire - * the struct mutex. So in order to avoid that recursion, we have - * to defer releasing the mm reference until after we drop the - * struct_mutex, i.e. we need to schedule a worker to do the clean - * up. - */ - mutex_lock(&dev_priv->mm_lock); - mm = __i915_mm_struct_find(dev_priv, current->mm); - if (mm == NULL) { - mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (mm == NULL) { - ret = -ENOMEM; - goto out; - } - - kref_init(&mm->kref); - mm->i915 = to_i915(obj->base.dev); - - mm->mm = current->mm; - mmgrab(current->mm); - - mm->mn = NULL; - - /* Protected by dev_priv->mm_lock */ - hash_add(dev_priv->mm_structs, - &mm->node, (unsigned long)mm->mm); - } else - kref_get(&mm->kref); - - obj->userptr.mm = mm; -out: - mutex_unlock(&dev_priv->mm_lock); - return ret; -} - -static void -__i915_mm_struct_free__worker(struct work_struct *work) -{ - struct i915_mm_struct *mm = container_of(work, typeof(*mm), work); - i915_mmu_notifier_free(mm->mn, mm->mm); - mmdrop(mm->mm); - kfree(mm); -} - -static void -__i915_mm_struct_free(struct kref *kref) -{ - struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref); - - /* Protected by dev_priv->mm_lock */ - hash_del(&mm->node); - mutex_unlock(&mm->i915->mm_lock); - - INIT_WORK(&mm->work, __i915_mm_struct_free__worker); - queue_work(mm->i915->mm.userptr_wq, &mm->work); -} - -static void -i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj) -{ - if (obj->userptr.mm == NULL) - return; - - kref_put_mutex(&obj->userptr.mm->kref, - __i915_mm_struct_free, - &to_i915(obj->base.dev)->mm_lock); - obj->userptr.mm = NULL; -} - -struct get_pages_work { - struct work_struct work; - struct drm_i915_gem_object *obj; - struct task_struct *task; -}; - -static struct sg_table * -__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, - struct page **pvec, int num_pages) -{ - unsigned int max_segment = i915_sg_segment_size(); - struct sg_table *st; - unsigned int sg_page_sizes; - int ret; - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return ERR_PTR(-ENOMEM); - -alloc_table: - ret = __sg_alloc_table_from_pages(st, pvec, num_pages, - 0, num_pages << PAGE_SHIFT, - max_segment, - GFP_KERNEL); - if (ret) { - kfree(st); - return ERR_PTR(ret); - } - - ret = i915_gem_gtt_prepare_pages(obj, st); - if (ret) { - sg_free_table(st); - - if (max_segment > PAGE_SIZE) { - max_segment = PAGE_SIZE; - goto alloc_table; - } - - kfree(st); - return ERR_PTR(ret); - } - - sg_page_sizes = i915_sg_page_sizes(st->sgl); - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return st; -} - -static void -__i915_gem_userptr_get_pages_worker(struct work_struct *_work) -{ - struct get_pages_work *work = container_of(_work, typeof(*work), work); - struct drm_i915_gem_object *obj = work->obj; - const int npages = obj->base.size >> PAGE_SHIFT; - struct page **pvec; - int pinned, ret; - - ret = -ENOMEM; - pinned = 0; - - pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (pvec != NULL) { - struct mm_struct *mm = obj->userptr.mm->mm; - unsigned int flags = 0; - - if (!i915_gem_object_is_readonly(obj)) - flags |= FOLL_WRITE; - - ret = -EFAULT; - if (mmget_not_zero(mm)) { - down_read(&mm->mmap_sem); - while (pinned < npages) { - ret = get_user_pages_remote - (work->task, mm, - obj->userptr.ptr + pinned * PAGE_SIZE, - npages - pinned, - flags, - pvec + pinned, NULL, NULL); - if (ret < 0) - break; - - pinned += ret; - } - up_read(&mm->mmap_sem); - mmput(mm); - } - } - - mutex_lock(&obj->mm.lock); - if (obj->userptr.work == &work->work) { - struct sg_table *pages = ERR_PTR(ret); - - if (pinned == npages) { - pages = __i915_gem_userptr_alloc_pages(obj, pvec, - npages); - if (!IS_ERR(pages)) { - pinned = 0; - pages = NULL; - } - } - - obj->userptr.work = ERR_CAST(pages); - if (IS_ERR(pages)) - __i915_gem_userptr_set_active(obj, false); - } - mutex_unlock(&obj->mm.lock); - - release_pages(pvec, pinned); - kvfree(pvec); - - i915_gem_object_put(obj); - put_task_struct(work->task); - kfree(work); -} - -static struct sg_table * -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) -{ - struct get_pages_work *work; - - /* Spawn a worker so that we can acquire the - * user pages without holding our mutex. Access - * to the user pages requires mmap_sem, and we have - * a strict lock ordering of mmap_sem, struct_mutex - - * we already hold struct_mutex here and so cannot - * call gup without encountering a lock inversion. - * - * Userspace will keep on repeating the operation - * (thanks to EAGAIN) until either we hit the fast - * path or the worker completes. If the worker is - * cancelled or superseded, the task is still run - * but the results ignored. (This leads to - * complications that we may have a stray object - * refcount that we need to be wary of when - * checking for existing objects during creation.) - * If the worker encounters an error, it reports - * that error back to this function through - * obj->userptr.work = ERR_PTR. - */ - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (work == NULL) - return ERR_PTR(-ENOMEM); - - obj->userptr.work = &work->work; - - work->obj = i915_gem_object_get(obj); - - work->task = current; - get_task_struct(work->task); - - INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); - queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work); - - return ERR_PTR(-EAGAIN); -} - -static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) -{ - const int num_pages = obj->base.size >> PAGE_SHIFT; - struct mm_struct *mm = obj->userptr.mm->mm; - struct page **pvec; - struct sg_table *pages; - bool active; - int pinned; - - /* If userspace should engineer that these pages are replaced in - * the vma between us binding this page into the GTT and completion - * of rendering... Their loss. If they change the mapping of their - * pages they need to create a new bo to point to the new vma. - * - * However, that still leaves open the possibility of the vma - * being copied upon fork. Which falls under the same userspace - * synchronisation issue as a regular bo, except that this time - * the process may not be expecting that a particular piece of - * memory is tied to the GPU. - * - * Fortunately, we can hook into the mmu_notifier in order to - * discard the page references prior to anything nasty happening - * to the vma (discard or cloning) which should prevent the more - * egregious cases from causing harm. - */ - - if (obj->userptr.work) { - /* active flag should still be held for the pending work */ - if (IS_ERR(obj->userptr.work)) - return PTR_ERR(obj->userptr.work); - else - return -EAGAIN; - } - - pvec = NULL; - pinned = 0; - - if (mm == current->mm) { - pvec = kvmalloc_array(num_pages, sizeof(struct page *), - GFP_KERNEL | - __GFP_NORETRY | - __GFP_NOWARN); - if (pvec) /* defer to worker if malloc fails */ - pinned = __get_user_pages_fast(obj->userptr.ptr, - num_pages, - !i915_gem_object_is_readonly(obj), - pvec); - } - - active = false; - if (pinned < 0) { - pages = ERR_PTR(pinned); - pinned = 0; - } else if (pinned < num_pages) { - pages = __i915_gem_userptr_get_pages_schedule(obj); - active = pages == ERR_PTR(-EAGAIN); - } else { - pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages); - active = !IS_ERR(pages); - } - if (active) - __i915_gem_userptr_set_active(obj, true); - - if (IS_ERR(pages)) - release_pages(pvec, pinned); - kvfree(pvec); - - return PTR_ERR_OR_ZERO(pages); -} - -static void -i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - struct sgt_iter sgt_iter; - struct page *page; - - /* Cancel any inflight work and force them to restart their gup */ - obj->userptr.work = NULL; - __i915_gem_userptr_set_active(obj, false); - if (!pages) - return; - - __i915_gem_object_release_shmem(obj, pages, true); - i915_gem_gtt_finish_pages(obj, pages); - - for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) - set_page_dirty(page); - - mark_page_accessed(page); - put_page(page); - } - obj->mm.dirty = false; - - sg_free_table(pages); - kfree(pages); -} - -static void -i915_gem_userptr_release(struct drm_i915_gem_object *obj) -{ - i915_gem_userptr_release__mmu_notifier(obj); - i915_gem_userptr_release__mm_struct(obj); -} - -static int -i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) -{ - if (obj->userptr.mmu_object) - return 0; - - return i915_gem_userptr_init__mmu_notifier(obj, 0); -} - -static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE | - I915_GEM_OBJECT_ASYNC_CANCEL, - .get_pages = i915_gem_userptr_get_pages, - .put_pages = i915_gem_userptr_put_pages, - .dmabuf_export = i915_gem_userptr_dmabuf_export, - .release = i915_gem_userptr_release, -}; - -/* - * Creates a new mm object that wraps some normal memory from the process - * context - user memory. - * - * We impose several restrictions upon the memory being mapped - * into the GPU. - * 1. It must be page aligned (both start/end addresses, i.e ptr and size). - * 2. It must be normal system memory, not a pointer into another map of IO - * space (e.g. it must not be a GTT mmapping of another object). - * 3. We only allow a bo as large as we could in theory map into the GTT, - * that is we limit the size to the total size of the GTT. - * 4. The bo is marked as being snoopable. The backing pages are left - * accessible directly by the CPU, but reads and writes by the GPU may - * incur the cost of a snoop (unless you have an LLC architecture). - * - * Synchronisation between multiple users and the GPU is left to userspace - * through the normal set-domain-ioctl. The kernel will enforce that the - * GPU relinquishes the VMA before it is returned back to the system - * i.e. upon free(), munmap() or process termination. However, the userspace - * malloc() library may not immediately relinquish the VMA after free() and - * instead reuse it whilst the GPU is still reading and writing to the VMA. - * Caveat emptor. - * - * Also note, that the object created here is not currently a "first class" - * object, in that several ioctls are banned. These are the CPU access - * ioctls: mmap(), pwrite and pread. In practice, you are expected to use - * direct access via your pointer rather than use those ioctls. Another - * restriction is that we do not allow userptr surfaces to be pinned to the - * hardware and so we reject any attempt to create a framebuffer out of a - * userptr. - * - * If you think this is a good interface to use to pass GPU memory between - * drivers, please use dma-buf instead. In fact, wherever possible use - * dma-buf instead. - */ -int -i915_gem_userptr_ioctl(struct drm_device *dev, - void *data, - struct drm_file *file) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_userptr *args = data; - struct drm_i915_gem_object *obj; - int ret; - u32 handle; - - if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) { - /* We cannot support coherent userptr objects on hw without - * LLC and broken snooping. - */ - return -ENODEV; - } - - if (args->flags & ~(I915_USERPTR_READ_ONLY | - I915_USERPTR_UNSYNCHRONIZED)) - return -EINVAL; - - if (!args->user_size) - return -EINVAL; - - if (offset_in_page(args->user_ptr | args->user_size)) - return -EINVAL; - - if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size)) - return -EFAULT; - - if (args->flags & I915_USERPTR_READ_ONLY) { - struct i915_hw_ppgtt *ppgtt; - - /* - * On almost all of the older hw, we cannot tell the GPU that - * a page is readonly. - */ - ppgtt = dev_priv->kernel_context->ppgtt; - if (!ppgtt || !ppgtt->vm.has_read_only) - return -ENODEV; - } - - obj = i915_gem_object_alloc(); - if (obj == NULL) - return -ENOMEM; - - drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops); - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->write_domain = I915_GEM_DOMAIN_CPU; - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - obj->userptr.ptr = args->user_ptr; - if (args->flags & I915_USERPTR_READ_ONLY) - i915_gem_object_set_readonly(obj); - - /* And keep a pointer to the current->mm for resolving the user pages - * at binding. This means that we need to hook into the mmu_notifier - * in order to detect if the mmu is destroyed. - */ - ret = i915_gem_userptr_init__mm_struct(obj); - if (ret == 0) - ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags); - if (ret == 0) - ret = drm_gem_handle_create(file, &obj->base, &handle); - - /* drop reference from allocate - handle holds it now */ - i915_gem_object_put(obj); - if (ret) - return ret; - - args->handle = handle; - return 0; -} - -int i915_gem_init_userptr(struct drm_i915_private *dev_priv) -{ - mutex_init(&dev_priv->mm_lock); - hash_init(dev_priv->mm_structs); - - dev_priv->mm.userptr_wq = - alloc_workqueue("i915-userptr-acquire", - WQ_HIGHPRI | WQ_UNBOUND, - 0); - if (!dev_priv->mm.userptr_wq) - return -ENOMEM; - - return 0; -} - -void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv) -{ - destroy_workqueue(dev_priv->mm.userptr_wq); -} diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c deleted file mode 100644 index 888b7d3f04c3..000000000000 --- a/drivers/gpu/drm/i915/i915_gemfs.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include -#include - -#include "i915_drv.h" -#include "i915_gemfs.h" - -int i915_gemfs_init(struct drm_i915_private *i915) -{ - struct file_system_type *type; - struct vfsmount *gemfs; - - type = get_fs_type("tmpfs"); - if (!type) - return -ENODEV; - - gemfs = kern_mount(type); - if (IS_ERR(gemfs)) - return PTR_ERR(gemfs); - - /* - * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most - * likely 2M. Note that within_size may overallocate huge-pages, if say - * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under - * memory pressure shmem should split any huge-pages which can be - * shrunk. - */ - - if (has_transparent_hugepage()) { - struct super_block *sb = gemfs->mnt_sb; - /* FIXME: Disabled until we get W/A for read BW issue. */ - char options[] = "huge=never"; - int flags = 0; - int err; - - err = sb->s_op->remount_fs(sb, &flags, options); - if (err) { - kern_unmount(gemfs); - return err; - } - } - - i915->mm.gemfs = gemfs; - - return 0; -} - -void i915_gemfs_fini(struct drm_i915_private *i915) -{ - kern_unmount(i915->mm.gemfs); -} diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h deleted file mode 100644 index cca8bdc5b93e..000000000000 --- a/drivers/gpu/drm/i915/i915_gemfs.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __I915_GEMFS_H__ -#define __I915_GEMFS_H__ - -struct drm_i915_private; - -int i915_gemfs_init(struct drm_i915_private *i915); - -void i915_gemfs_fini(struct drm_i915_private *i915); - -#endif diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index db52a58eadcc..2d5fcba98841 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -8,7 +8,7 @@ #include #include "i915_active.h" -#include "i915_gem_context.h" +#include "gem/i915_gem_context.h" #include "gem/i915_gem_object.h" #include "i915_globals.h" #include "i915_request.h" diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4f85cbdddb0d..c86865a34972 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -36,6 +36,8 @@ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gpu_error.h" #include "intel_atomic.h" diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 379fd89a180f..2e33a9b4eae7 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -195,6 +195,8 @@ #include #include +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" #include "gt/intel_lrc_reg.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 18b34b0bf872..da1e6984a8cc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,9 @@ #include #include +#include "gem/i915_gem_context.h" +#include "gt/intel_context.h" + #include "i915_active.h" #include "i915_drv.h" #include "i915_globals.h" diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6a063d3fccee..f454cf2450b5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -45,7 +45,6 @@ #include #include "i915_drv.h" -#include "i915_gem_clflush.h" #include "i915_trace.h" #include "intel_acpi.h" #include "intel_atomic.h" diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index ffdab22db2b0..a4f98ccef0fe 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -26,6 +26,8 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_context.h" +#include "gem/i915_gem_context.h" #include "intel_guc_submission.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index b64b45d9b538..80dcd879fc58 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -29,6 +29,8 @@ #include #include +#include "gem/i915_gem_pm.h" + #include "i915_drv.h" #include "i915_reg.h" #include "intel_drv.h" diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c deleted file mode 100644 index 419fd4d6a8f0..000000000000 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "huge_gem_object.h" - -static void huge_free_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - unsigned long nreal = obj->scratch / PAGE_SIZE; - struct scatterlist *sg; - - for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) - __free_page(sg_page(sg)); - - sg_free_table(pages); - kfree(pages); -} - -static int huge_get_pages(struct drm_i915_gem_object *obj) -{ -#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) - const unsigned long nreal = obj->scratch / PAGE_SIZE; - const unsigned long npages = obj->base.size / PAGE_SIZE; - struct scatterlist *sg, *src, *end; - struct sg_table *pages; - unsigned long n; - - pages = kmalloc(sizeof(*pages), GFP); - if (!pages) - return -ENOMEM; - - if (sg_alloc_table(pages, npages, GFP)) { - kfree(pages); - return -ENOMEM; - } - - sg = pages->sgl; - for (n = 0; n < nreal; n++) { - struct page *page; - - page = alloc_page(GFP | __GFP_HIGHMEM); - if (!page) { - sg_mark_end(sg); - goto err; - } - - sg_set_page(sg, page, PAGE_SIZE, 0); - sg = __sg_next(sg); - } - if (nreal < npages) { - for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) { - sg_set_page(sg, sg_page(src), PAGE_SIZE, 0); - src = __sg_next(src); - if (src == end) - src = pages->sgl; - } - } - - if (i915_gem_gtt_prepare_pages(obj, pages)) - goto err; - - __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); - - return 0; - -err: - huge_free_pages(obj, pages); - - return -ENOMEM; -#undef GFP -} - -static void huge_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - i915_gem_gtt_finish_pages(obj, pages); - huge_free_pages(obj, pages); - - obj->mm.dirty = false; -} - -static const struct drm_i915_gem_object_ops huge_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = huge_get_pages, - .put_pages = huge_put_pages, -}; - -struct drm_i915_gem_object * -huge_gem_object(struct drm_i915_private *i915, - phys_addr_t phys_size, - dma_addr_t dma_size) -{ - struct drm_i915_gem_object *obj; - unsigned int cache_level; - - GEM_BUG_ON(!phys_size || phys_size > dma_size); - GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); - GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE)); - - if (overflows_type(dma_size, obj->base.size)) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - - drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops); - - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->write_domain = I915_GEM_DOMAIN_CPU; - cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; - i915_gem_object_set_cache_coherency(obj, cache_level); - obj->scratch = phys_size; - - return obj; -} diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h deleted file mode 100644 index a6133a9e8029..000000000000 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __HUGE_GEM_OBJECT_H -#define __HUGE_GEM_OBJECT_H - -struct drm_i915_gem_object * -huge_gem_object(struct drm_i915_private *i915, - phys_addr_t phys_size, - dma_addr_t dma_size); - -static inline phys_addr_t -huge_gem_object_phys_size(struct drm_i915_gem_object *obj) -{ - return obj->scratch; -} - -static inline dma_addr_t -huge_gem_object_dma_size(struct drm_i915_gem_object *obj) -{ - return obj->base.size; -} - -#endif /* !__HUGE_GEM_OBJECT_H */ diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c deleted file mode 100644 index b22b8249dfbd..000000000000 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ /dev/null @@ -1,1793 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" - -#include - -#include "igt_gem_utils.h" -#include "mock_drm.h" -#include "i915_random.h" - -static const unsigned int page_sizes[] = { - I915_GTT_PAGE_SIZE_2M, - I915_GTT_PAGE_SIZE_64K, - I915_GTT_PAGE_SIZE_4K, -}; - -static unsigned int get_largest_page_size(struct drm_i915_private *i915, - u64 rem) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { - unsigned int page_size = page_sizes[i]; - - if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) - return page_size; - } - - return 0; -} - -static void huge_pages_free_pages(struct sg_table *st) -{ - struct scatterlist *sg; - - for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); - } - - sg_free_table(st); - kfree(st); -} - -static int get_huge_pages(struct drm_i915_gem_object *obj) -{ -#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) - unsigned int page_mask = obj->mm.page_mask; - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - u64 rem; - - st = kmalloc(sizeof(*st), GFP); - if (!st) - return -ENOMEM; - - if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { - kfree(st); - return -ENOMEM; - } - - rem = obj->base.size; - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - - /* - * Our goal here is simple, we want to greedily fill the object from - * largest to smallest page-size, while ensuring that we use *every* - * page-size as per the given page-mask. - */ - do { - unsigned int bit = ilog2(page_mask); - unsigned int page_size = BIT(bit); - int order = get_order(page_size); - - do { - struct page *page; - - GEM_BUG_ON(order >= MAX_ORDER); - page = alloc_pages(GFP | __GFP_ZERO, order); - if (!page) - goto err; - - sg_set_page(sg, page, page_size, 0); - sg_page_sizes |= page_size; - st->nents++; - - rem -= page_size; - if (!rem) { - sg_mark_end(sg); - break; - } - - sg = __sg_next(sg); - } while ((rem - ((page_size-1) & page_mask)) >= page_size); - - page_mask &= (page_size-1); - } while (page_mask); - - if (i915_gem_gtt_prepare_pages(obj, st)) - goto err; - - obj->mm.madv = I915_MADV_DONTNEED; - - GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; - -err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - huge_pages_free_pages(st); - - return -ENOMEM; -} - -static void put_huge_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - i915_gem_gtt_finish_pages(obj, pages); - huge_pages_free_pages(pages); - - obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; -} - -static const struct drm_i915_gem_object_ops huge_page_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = get_huge_pages, - .put_pages = put_huge_pages, -}; - -static struct drm_i915_gem_object * -huge_pages_object(struct drm_i915_private *i915, - u64 size, - unsigned int page_mask) -{ - struct drm_i915_gem_object *obj; - - GEM_BUG_ON(!size); - GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); - - if (size >> PAGE_SHIFT > INT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - - drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops); - - obj->write_domain = I915_GEM_DOMAIN_CPU; - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; - - obj->mm.page_mask = page_mask; - - return obj; -} - -static int fake_get_huge_pages(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - const u64 max_len = rounddown_pow_of_two(UINT_MAX); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - u64 rem; - - st = kmalloc(sizeof(*st), GFP); - if (!st) - return -ENOMEM; - - if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { - kfree(st); - return -ENOMEM; - } - - /* Use optimal page sized chunks to fill in the sg table */ - rem = obj->base.size; - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - do { - unsigned int page_size = get_largest_page_size(i915, rem); - unsigned int len = min(page_size * div_u64(rem, page_size), - max_len); - - GEM_BUG_ON(!page_size); - - sg->offset = 0; - sg->length = len; - sg_dma_len(sg) = len; - sg_dma_address(sg) = page_size; - - sg_page_sizes |= len; - - st->nents++; - - rem -= len; - if (!rem) { - sg_mark_end(sg); - break; - } - - sg = sg_next(sg); - } while (1); - - i915_sg_trim(st); - - obj->mm.madv = I915_MADV_DONTNEED; - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; -} - -static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int page_size; - - st = kmalloc(sizeof(*st), GFP); - if (!st) - return -ENOMEM; - - if (sg_alloc_table(st, 1, GFP)) { - kfree(st); - return -ENOMEM; - } - - sg = st->sgl; - st->nents = 1; - - page_size = get_largest_page_size(i915, obj->base.size); - GEM_BUG_ON(!page_size); - - sg->offset = 0; - sg->length = obj->base.size; - sg_dma_len(sg) = obj->base.size; - sg_dma_address(sg) = page_size; - - obj->mm.madv = I915_MADV_DONTNEED; - - __i915_gem_object_set_pages(obj, st, sg->length); - - return 0; -#undef GFP -} - -static void fake_free_huge_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - sg_free_table(pages); - kfree(pages); -} - -static void fake_put_huge_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - fake_free_huge_pages(obj, pages); - obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; -} - -static const struct drm_i915_gem_object_ops fake_ops = { - .flags = I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = fake_get_huge_pages, - .put_pages = fake_put_huge_pages, -}; - -static const struct drm_i915_gem_object_ops fake_ops_single = { - .flags = I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = fake_get_huge_pages_single, - .put_pages = fake_put_huge_pages, -}; - -static struct drm_i915_gem_object * -fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) -{ - struct drm_i915_gem_object *obj; - - GEM_BUG_ON(!size); - GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); - - if (size >> PAGE_SHIFT > UINT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - - drm_gem_private_object_init(&i915->drm, &obj->base, size); - - if (single) - i915_gem_object_init(obj, &fake_ops_single); - else - i915_gem_object_init(obj, &fake_ops); - - obj->write_domain = I915_GEM_DOMAIN_CPU; - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; - - return obj; -} - -static int igt_check_page_sizes(struct i915_vma *vma) -{ - struct drm_i915_private *i915 = vma->vm->i915; - unsigned int supported = INTEL_INFO(i915)->page_sizes; - struct drm_i915_gem_object *obj = vma->obj; - int err = 0; - - if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { - pr_err("unsupported page_sizes.sg=%u, supported=%u\n", - vma->page_sizes.sg & ~supported, supported); - err = -EINVAL; - } - - if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { - pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", - vma->page_sizes.gtt & ~supported, supported); - err = -EINVAL; - } - - if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { - pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", - vma->page_sizes.phys, obj->mm.page_sizes.phys); - err = -EINVAL; - } - - if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { - pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", - vma->page_sizes.sg, obj->mm.page_sizes.sg); - err = -EINVAL; - } - - if (obj->mm.page_sizes.gtt) { - pr_err("obj->page_sizes.gtt(%u) should never be set\n", - obj->mm.page_sizes.gtt); - err = -EINVAL; - } - - return err; -} - -static int igt_mock_exhaust_device_supported_pages(void *arg) -{ - struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; - unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int i, j, single; - int err; - - /* - * Sanity check creating objects with every valid page support - * combination for our mock device. - */ - - for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { - unsigned int combination = 0; - - for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { - if (i & BIT(j)) - combination |= page_sizes[j]; - } - - mkwrite_device_info(i915)->page_sizes = combination; - - for (single = 0; single <= 1; ++single) { - obj = fake_huge_pages_object(i915, combination, !!single); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_device; - } - - if (obj->base.size != combination) { - pr_err("obj->base.size=%zu, expected=%u\n", - obj->base.size, combination); - err = -EINVAL; - goto out_put; - } - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto out_close; - - err = igt_check_page_sizes(vma); - - if (vma->page_sizes.sg != combination) { - pr_err("page_sizes.sg=%u, expected=%u\n", - vma->page_sizes.sg, combination); - err = -EINVAL; - } - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_gem_object_put(obj); - - if (err) - goto out_device; - } - } - - goto out_device; - -out_close: - i915_vma_close(vma); -out_put: - i915_gem_object_put(obj); -out_device: - mkwrite_device_info(i915)->page_sizes = saved_mask; - - return err; -} - -static int igt_mock_ppgtt_misaligned_dma(void *arg) -{ - struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; - unsigned long supported = INTEL_INFO(i915)->page_sizes; - struct drm_i915_gem_object *obj; - int bit; - int err; - - /* - * Sanity check dma misalignment for huge pages -- the dma addresses we - * insert into the paging structures need to always respect the page - * size alignment. - */ - - bit = ilog2(I915_GTT_PAGE_SIZE_64K); - - for_each_set_bit_from(bit, &supported, - ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { - IGT_TIMEOUT(end_time); - unsigned int page_size = BIT(bit); - unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - unsigned int offset; - unsigned int size = - round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; - struct i915_vma *vma; - - obj = fake_huge_pages_object(i915, size, true); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - if (obj->base.size != size) { - pr_err("obj->base.size=%zu, expected=%u\n", - obj->base.size, size); - err = -EINVAL; - goto out_put; - } - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - /* Force the page size for this object */ - obj->mm.page_sizes.sg = page_size; - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_unpin; - } - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - - err = igt_check_page_sizes(vma); - - if (vma->page_sizes.gtt != page_size) { - pr_err("page_sizes.gtt=%u, expected %u\n", - vma->page_sizes.gtt, page_size); - err = -EINVAL; - } - - i915_vma_unpin(vma); - - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - /* - * Try all the other valid offsets until the next - * boundary -- should always fall back to using 4K - * pages. - */ - for (offset = 4096; offset < page_size; offset += 4096) { - err = i915_vma_unbind(vma); - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - err = i915_vma_pin(vma, 0, 0, flags | offset); - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - err = igt_check_page_sizes(vma); - - if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { - pr_err("page_sizes.gtt=%u, expected %llu\n", - vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); - err = -EINVAL; - } - - i915_vma_unpin(vma); - - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - if (igt_timeout(end_time, - "%s timed out at offset %x with page-size %x\n", - __func__, offset, page_size)) - break; - } - - i915_vma_close(vma); - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); - - return err; -} - -static void close_object_list(struct list_head *objects, - struct i915_hw_ppgtt *ppgtt) -{ - struct drm_i915_gem_object *obj, *on; - - list_for_each_entry_safe(obj, on, objects, st_link) { - struct i915_vma *vma; - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (!IS_ERR(vma)) - i915_vma_close(vma); - - list_del(&obj->st_link); - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } -} - -static int igt_mock_ppgtt_huge_fill(void *arg) -{ - struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; - unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT; - unsigned long page_num; - bool single = false; - LIST_HEAD(objects); - IGT_TIMEOUT(end_time); - int err = -ENODEV; - - for_each_prime_number_from(page_num, 1, max_pages) { - struct drm_i915_gem_object *obj; - u64 size = page_num << PAGE_SHIFT; - struct i915_vma *vma; - unsigned int expected_gtt = 0; - int i; - - obj = fake_huge_pages_object(i915, size, single); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - break; - } - - if (obj->base.size != size) { - pr_err("obj->base.size=%zd, expected=%llu\n", - obj->base.size, size); - i915_gem_object_put(obj); - err = -EINVAL; - break; - } - - err = i915_gem_object_pin_pages(obj); - if (err) { - i915_gem_object_put(obj); - break; - } - - list_add(&obj->st_link, &objects); - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - break; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - break; - - err = igt_check_page_sizes(vma); - if (err) { - i915_vma_unpin(vma); - break; - } - - /* - * Figure out the expected gtt page size knowing that we go from - * largest to smallest page size sg chunks, and that we align to - * the largest page size. - */ - for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { - unsigned int page_size = page_sizes[i]; - - if (HAS_PAGE_SIZES(i915, page_size) && - size >= page_size) { - expected_gtt |= page_size; - size &= page_size-1; - } - } - - GEM_BUG_ON(!expected_gtt); - GEM_BUG_ON(size); - - if (expected_gtt & I915_GTT_PAGE_SIZE_4K) - expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; - - i915_vma_unpin(vma); - - if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { - if (!IS_ALIGNED(vma->node.start, - I915_GTT_PAGE_SIZE_2M)) { - pr_err("node.start(%llx) not aligned to 2M\n", - vma->node.start); - err = -EINVAL; - break; - } - - if (!IS_ALIGNED(vma->node.size, - I915_GTT_PAGE_SIZE_2M)) { - pr_err("node.size(%llx) not aligned to 2M\n", - vma->node.size); - err = -EINVAL; - break; - } - } - - if (vma->page_sizes.gtt != expected_gtt) { - pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", - vma->page_sizes.gtt, expected_gtt, - obj->base.size, yesno(!!single)); - err = -EINVAL; - break; - } - - if (igt_timeout(end_time, - "%s timed out at size %zd\n", - __func__, obj->base.size)) - break; - - single = !single; - } - - close_object_list(&objects, ppgtt); - - if (err == -ENOMEM || err == -ENOSPC) - err = 0; - - return err; -} - -static int igt_mock_ppgtt_64K(void *arg) -{ - struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; - struct drm_i915_gem_object *obj; - const struct object_info { - unsigned int size; - unsigned int gtt; - unsigned int offset; - } objects[] = { - /* Cases with forced padding/alignment */ - { - .size = SZ_64K, - .gtt = I915_GTT_PAGE_SIZE_64K, - .offset = 0, - }, - { - .size = SZ_64K + SZ_4K, - .gtt = I915_GTT_PAGE_SIZE_4K, - .offset = 0, - }, - { - .size = SZ_64K - SZ_4K, - .gtt = I915_GTT_PAGE_SIZE_4K, - .offset = 0, - }, - { - .size = SZ_2M, - .gtt = I915_GTT_PAGE_SIZE_64K, - .offset = 0, - }, - { - .size = SZ_2M - SZ_4K, - .gtt = I915_GTT_PAGE_SIZE_4K, - .offset = 0, - }, - { - .size = SZ_2M + SZ_4K, - .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, - .offset = 0, - }, - { - .size = SZ_2M + SZ_64K, - .gtt = I915_GTT_PAGE_SIZE_64K, - .offset = 0, - }, - { - .size = SZ_2M - SZ_64K, - .gtt = I915_GTT_PAGE_SIZE_64K, - .offset = 0, - }, - /* Try without any forced padding/alignment */ - { - .size = SZ_64K, - .offset = SZ_2M, - .gtt = I915_GTT_PAGE_SIZE_4K, - }, - { - .size = SZ_128K, - .offset = SZ_2M - SZ_64K, - .gtt = I915_GTT_PAGE_SIZE_4K, - }, - }; - struct i915_vma *vma; - int i, single; - int err; - - /* - * Sanity check some of the trickiness with 64K pages -- either we can - * safely mark the whole page-table(2M block) as 64K, or we have to - * always fallback to 4K. - */ - - if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) - return 0; - - for (i = 0; i < ARRAY_SIZE(objects); ++i) { - unsigned int size = objects[i].size; - unsigned int expected_gtt = objects[i].gtt; - unsigned int offset = objects[i].offset; - unsigned int flags = PIN_USER; - - for (single = 0; single <= 1; single++) { - obj = fake_huge_pages_object(i915, size, !!single); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_object_put; - - /* - * Disable 2M pages -- We only want to use 64K/4K pages - * for this test. - */ - obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_object_unpin; - } - - if (offset) - flags |= PIN_OFFSET_FIXED | offset; - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - goto out_vma_close; - - err = igt_check_page_sizes(vma); - if (err) - goto out_vma_unpin; - - if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { - if (!IS_ALIGNED(vma->node.start, - I915_GTT_PAGE_SIZE_2M)) { - pr_err("node.start(%llx) not aligned to 2M\n", - vma->node.start); - err = -EINVAL; - goto out_vma_unpin; - } - - if (!IS_ALIGNED(vma->node.size, - I915_GTT_PAGE_SIZE_2M)) { - pr_err("node.size(%llx) not aligned to 2M\n", - vma->node.size); - err = -EINVAL; - goto out_vma_unpin; - } - } - - if (vma->page_sizes.gtt != expected_gtt) { - pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", - vma->page_sizes.gtt, expected_gtt, i, - yesno(!!single)); - err = -EINVAL; - goto out_vma_unpin; - } - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - } - - return 0; - -out_vma_unpin: - i915_vma_unpin(vma); -out_vma_close: - i915_vma_close(vma); -out_object_unpin: - i915_gem_object_unpin_pages(obj); -out_object_put: - i915_gem_object_put(obj); - - return err; -} - -static struct i915_vma * -gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) -{ - struct drm_i915_private *i915 = vma->vm->i915; - const int gen = INTEL_GEN(i915); - unsigned int count = vma->size >> PAGE_SHIFT; - struct drm_i915_gem_object *obj; - struct i915_vma *batch; - unsigned int size; - u32 *cmd; - int n; - int err; - - size = (1 + 4 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - offset += vma->node.start; - - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = val; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = val; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = val; - } - - offset += PAGE_SIZE; - } - - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(obj); - - batch = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (err) - goto err; - - return batch; - -err: - i915_gem_object_put(obj); - - return ERR_PTR(err); -} - -static int gpu_write(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u32 dword, - u32 value) -{ - struct i915_request *rq; - struct i915_vma *batch; - int err; - - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); - - err = i915_gem_object_set_to_gtt_domain(vma->obj, true); - if (err) - return err; - - batch = gpu_write_dw(vma, dword * sizeof(u32), value); - if (IS_ERR(batch)) - return PTR_ERR(batch); - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto err_request; - - i915_gem_object_set_active_reference(batch->obj); - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto err_request; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); -err_request: - if (err) - i915_request_skip(rq, err); - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_close(batch); - - return err; -} - -static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) -{ - unsigned int needs_flush; - unsigned long n; - int err; - - err = i915_gem_object_prepare_read(obj, &needs_flush); - if (err) - return err; - - for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { - u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); - - if (needs_flush & CLFLUSH_BEFORE) - drm_clflush_virt_range(ptr, PAGE_SIZE); - - if (ptr[dword] != val) { - pr_err("n=%lu ptr[%u]=%u, val=%u\n", - n, dword, ptr[dword], val); - kunmap_atomic(ptr); - err = -EINVAL; - break; - } - - kunmap_atomic(ptr); - } - - i915_gem_object_finish_access(obj); - - return err; -} - -static int __igt_write_huge(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct drm_i915_gem_object *obj, - u64 size, u64 offset, - u32 dword, u32 val) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - struct i915_vma *vma; - int err; - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_vma_unbind(vma); - if (err) - goto out_vma_close; - - err = i915_vma_pin(vma, size, 0, flags | offset); - if (err) { - /* - * The ggtt may have some pages reserved so - * refrain from erroring out. - */ - if (err == -ENOSPC && i915_is_ggtt(vm)) - err = 0; - - goto out_vma_close; - } - - err = igt_check_page_sizes(vma); - if (err) - goto out_vma_unpin; - - err = gpu_write(vma, ctx, engine, dword, val); - if (err) { - pr_err("gpu-write failed at offset=%llx\n", offset); - goto out_vma_unpin; - } - - err = cpu_check(obj, dword, val); - if (err) { - pr_err("cpu-check failed at offset=%llx\n", offset); - goto out_vma_unpin; - } - -out_vma_unpin: - i915_vma_unpin(vma); -out_vma_close: - i915_vma_destroy(vma); - - return err; -} - -static int igt_write_huge(struct i915_gem_context *ctx, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - static struct intel_engine_cs *engines[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; - I915_RND_STATE(prng); - IGT_TIMEOUT(end_time); - unsigned int max_page_size; - unsigned int id; - u64 max; - u64 num; - u64 size; - int *order; - int i, n; - int err = 0; - - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - - size = obj->base.size; - if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) - size = round_up(size, I915_GTT_PAGE_SIZE_2M); - - max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); - max = div_u64((vm->total - size), max_page_size); - - n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) { - pr_info("store-dword-imm not supported on engine=%u\n", - id); - continue; - } - engines[n++] = engine; - } - - if (!n) - return 0; - - /* - * To keep things interesting when alternating between engines in our - * randomized order, lets also make feeding to the same engine a few - * times in succession a possibility by enlarging the permutation array. - */ - order = i915_random_order(n * I915_NUM_ENGINES, &prng); - if (!order) - return -ENOMEM; - - /* - * Try various offsets in an ascending/descending fashion until we - * timeout -- we want to avoid issues hidden by effectively always using - * offset = 0. - */ - i = 0; - for_each_prime_number_from(num, 0, max) { - u64 offset_low = num * max_page_size; - u64 offset_high = (max - num) * max_page_size; - u32 dword = offset_in_page(num) / 4; - - engine = engines[order[i] % n]; - i = (i + 1) % (n * I915_NUM_ENGINES); - - /* - * In order to utilize 64K pages we need to both pad the vma - * size and ensure the vma offset is at the start of the pt - * boundary, however to improve coverage we opt for testing both - * aligned and unaligned offsets. - */ - if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) - offset_low = round_down(offset_low, - I915_GTT_PAGE_SIZE_2M); - - err = __igt_write_huge(ctx, engine, obj, size, offset_low, - dword, num + 1); - if (err) - break; - - err = __igt_write_huge(ctx, engine, obj, size, offset_high, - dword, num + 1); - if (err) - break; - - if (igt_timeout(end_time, - "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", - __func__, engine->id, offset_low, offset_high, - max_page_size)) - break; - } - - kfree(order); - - return err; -} - -static int igt_ppgtt_exhaust_huge(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - unsigned long supported = INTEL_INFO(i915)->page_sizes; - static unsigned int pages[ARRAY_SIZE(page_sizes)]; - struct drm_i915_gem_object *obj; - unsigned int size_mask; - unsigned int page_mask; - int n, i; - int err = -ENODEV; - - if (supported == I915_GTT_PAGE_SIZE_4K) - return 0; - - /* - * Sanity check creating objects with a varying mix of page sizes -- - * ensuring that our writes lands in the right place. - */ - - n = 0; - for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) - pages[n++] = BIT(i); - - for (size_mask = 2; size_mask < BIT(n); size_mask++) { - unsigned int size = 0; - - for (i = 0; i < n; i++) { - if (size_mask & BIT(i)) - size |= pages[i]; - } - - /* - * For our page mask we want to enumerate all the page-size - * combinations which will fit into our chosen object size. - */ - for (page_mask = 2; page_mask <= size_mask; page_mask++) { - unsigned int page_sizes = 0; - - for (i = 0; i < n; i++) { - if (page_mask & BIT(i)) - page_sizes |= pages[i]; - } - - /* - * Ensure that we can actually fill the given object - * with our chosen page mask. - */ - if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) - continue; - - obj = huge_pages_object(i915, size, page_sizes); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_device; - } - - err = i915_gem_object_pin_pages(obj); - if (err) { - i915_gem_object_put(obj); - - if (err == -ENOMEM) { - pr_info("unable to get pages, size=%u, pages=%u\n", - size, page_sizes); - err = 0; - break; - } - - pr_err("pin_pages failed, size=%u, pages=%u\n", - size_mask, page_mask); - - goto out_device; - } - - /* Force the page-size for the gtt insertion */ - obj->mm.page_sizes.sg = page_sizes; - - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("exhaust write-huge failed with size=%u\n", - size); - goto out_unpin; - } - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - } - - goto out_device; - -out_unpin: - i915_gem_object_unpin_pages(obj); - i915_gem_object_put(obj); -out_device: - mkwrite_device_info(i915)->page_sizes = supported; - - return err; -} - -static int igt_ppgtt_internal_huge(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_64K, - SZ_128K, - SZ_256K, - SZ_512K, - SZ_1M, - SZ_2M, - }; - int i; - int err; - - /* - * Sanity check that the HW uses huge pages correctly through internal - * -- ensure that our writes land in the right place. - */ - - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; - - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { - pr_info("internal unable to allocate huge-page(s) with size=%u\n", - size); - goto out_unpin; - } - - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("internal write-huge failed with size=%u\n", - size); - goto out_unpin; - } - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); - - return err; -} - -static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) -{ - return i915->mm.gemfs && has_transparent_hugepage(); -} - -static int igt_ppgtt_gemfs_huge(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_2M, - SZ_4M, - SZ_8M, - SZ_16M, - SZ_32M, - }; - int i; - int err; - - /* - * Sanity check that the HW uses huge pages correctly through gemfs -- - * ensure that our writes land in the right place. - */ - - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - return 0; - } - - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; - - obj = i915_gem_object_create_shmem(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { - pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", - size); - goto out_unpin; - } - - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("gemfs write-huge failed with size=%u\n", - size); - goto out_unpin; - } - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); - - return err; -} - -static int igt_ppgtt_pin_update(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *dev_priv = ctx->i915; - unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - int first, last; - int err; - - /* - * Make sure there's no funny business when doing a PIN_UPDATE -- in the - * past we had a subtle issue with being able to incorrectly do multiple - * alloc va ranges on the same object when doing a PIN_UPDATE, which - * resulted in some pretty nasty bugs, though only when using - * huge-gtt-pages. - */ - - if (!ppgtt || !i915_vm_is_4lvl(&ppgtt->vm)) { - pr_info("48b PPGTT not supported, skipping\n"); - return 0; - } - - first = ilog2(I915_GTT_PAGE_SIZE_64K); - last = ilog2(I915_GTT_PAGE_SIZE_2M); - - for_each_set_bit_from(first, &supported, last + 1) { - unsigned int page_size = BIT(first); - - obj = i915_gem_object_create_internal(dev_priv, page_size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, SZ_2M, 0, flags); - if (err) - goto out_close; - - if (vma->page_sizes.sg < page_size) { - pr_info("Unable to allocate page-size %x, finishing test early\n", - page_size); - goto out_unpin; - } - - err = igt_check_page_sizes(vma); - if (err) - goto out_unpin; - - if (vma->page_sizes.gtt != page_size) { - dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); - - /* - * The only valid reason for this to ever fail would be - * if the dma-mapper screwed us over when we did the - * dma_map_sg(), since it has the final say over the dma - * address. - */ - if (IS_ALIGNED(addr, page_size)) { - pr_err("page_sizes.gtt=%u, expected=%u\n", - vma->page_sizes.gtt, page_size); - err = -EINVAL; - } else { - pr_info("dma address misaligned, finishing test early\n"); - } - - goto out_unpin; - } - - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); - if (err) - goto out_unpin; - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_gem_object_put(obj); - } - - obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - goto out_close; - - /* - * Make sure we don't end up with something like where the pde is still - * pointing to the 2M page, and the pt we just filled-in is dangling -- - * we can check this by writing to the first page where it would then - * land in the now stale 2M page. - */ - - err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf); - if (err) - goto out_unpin; - - err = cpu_check(obj, 0, 0xdeadbeaf); - -out_unpin: - i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); -out_put: - i915_gem_object_put(obj); - - return err; -} - -static int igt_tmpfs_fallback(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 *vaddr; - int err = 0; - - /* - * Make sure that we don't burst into a ball of flames upon falling back - * to tmpfs, which we rely on if on the off-chance we encouter a failure - * when setting up gemfs. - */ - - i915->mm.gemfs = NULL; - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_restore; - } - - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto out_put; - } - *vaddr = 0xdeadbeaf; - - __i915_gem_object_flush_map(obj, 0, 64); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto out_close; - - err = igt_check_page_sizes(vma); - - i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); -out_put: - i915_gem_object_put(obj); -out_restore: - i915->mm.gemfs = gemfs; - - return err; -} - -static int igt_shrink_thp(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - unsigned int flags = PIN_USER; - int err; - - /* - * Sanity check shrinking huge-paged object -- make sure nothing blows - * up. - */ - - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - return 0; - } - - obj = i915_gem_object_create_shmem(i915, SZ_2M); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - goto out_close; - - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { - pr_info("failed to allocate THP, finishing test early\n"); - goto out_unpin; - } - - err = igt_check_page_sizes(vma); - if (err) - goto out_unpin; - - err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf); - if (err) - goto out_unpin; - - i915_vma_unpin(vma); - - /* - * Now that the pages are *unpinned* shrink-all should invoke - * shmem to truncate our pages. - */ - i915_gem_shrink_all(i915); - if (i915_gem_object_has_pages(obj)) { - pr_err("shrink-all didn't truncate the pages\n"); - err = -EINVAL; - goto out_close; - } - - if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { - pr_err("residual page-size bits left\n"); - err = -EINVAL; - goto out_close; - } - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - goto out_close; - - err = cpu_check(obj, 0, 0xdeadbeaf); - -out_unpin: - i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); -out_put: - i915_gem_object_put(obj); - - return err; -} - -int i915_gem_huge_page_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_mock_exhaust_device_supported_pages), - SUBTEST(igt_mock_ppgtt_misaligned_dma), - SUBTEST(igt_mock_ppgtt_huge_fill), - SUBTEST(igt_mock_ppgtt_64K), - }; - struct drm_i915_private *dev_priv; - struct i915_hw_ppgtt *ppgtt; - int err; - - dev_priv = mock_gem_device(); - if (!dev_priv) - return -ENOMEM; - - /* Pretend to be a device which supports the 48b PPGTT */ - mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; - mkwrite_device_info(dev_priv)->ppgtt_size = 48; - - mutex_lock(&dev_priv->drm.struct_mutex); - ppgtt = i915_ppgtt_create(dev_priv); - if (IS_ERR(ppgtt)) { - err = PTR_ERR(ppgtt); - goto out_unlock; - } - - if (!i915_vm_is_4lvl(&ppgtt->vm)) { - pr_err("failed to create 48b PPGTT\n"); - err = -EINVAL; - goto out_close; - } - - /* If we were ever hit this then it's time to mock the 64K scratch */ - if (!i915_vm_has_scratch_64K(&ppgtt->vm)) { - pr_err("PPGTT missing 64K scratch page\n"); - err = -EINVAL; - goto out_close; - } - - err = i915_subtests(tests, ppgtt); - -out_close: - i915_ppgtt_put(ppgtt); - -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); - drm_dev_put(&dev_priv->drm); - - return err; -} - -int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_shrink_thp), - SUBTEST(igt_ppgtt_pin_update), - SUBTEST(igt_tmpfs_fallback), - SUBTEST(igt_ppgtt_exhaust_huge), - SUBTEST(igt_ppgtt_gemfs_huge), - SUBTEST(igt_ppgtt_internal_huge), - }; - struct drm_file *file; - struct i915_gem_context *ctx; - intel_wakeref_t wakeref; - int err; - - if (!HAS_PPGTT(dev_priv)) { - pr_info("PPGTT not supported, skipping live-selftests\n"); - return 0; - } - - if (i915_terminally_wedged(dev_priv)) - return 0; - - file = mock_file(dev_priv); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(dev_priv); - - ctx = live_context(dev_priv, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_unlock; - } - - if (ctx->ppgtt) - ctx->ppgtt->vm.scrub_64K = true; - - err = i915_subtests(tests, ctx); - -out_unlock: - intel_runtime_pm_put(dev_priv, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); - - mock_file_free(dev_priv, file); - - return err; -} diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index eee838dc0634..cc1ca4be1a00 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -4,7 +4,9 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_selftest.h" +#include "gem/i915_gem_pm.h" + +#include "i915_selftest.h" #include "igt_flush_test.h" #include "lib_sw_fence.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index c6a9bff85311..83643929416c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -6,11 +6,13 @@ #include -#include "../i915_selftest.h" +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" + +#include "i915_selftest.h" -#include "igt_gem_utils.h" #include "igt_flush_test.h" -#include "mock_context.h" +#include "mock_drm.h" static int switch_to_context(struct drm_i915_private *i915, struct i915_gem_context *ctx) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c deleted file mode 100644 index cb25b5fc8027..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include - -#include "../i915_selftest.h" -#include "i915_random.h" - -static int cpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) -{ - unsigned int needs_clflush; - struct page *page; - void *map; - u32 *cpu; - int err; - - err = i915_gem_object_prepare_write(obj, &needs_clflush); - if (err) - return err; - - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); - map = kmap_atomic(page); - cpu = map + offset_in_page(offset); - - if (needs_clflush & CLFLUSH_BEFORE) - drm_clflush_virt_range(cpu, sizeof(*cpu)); - - *cpu = v; - - if (needs_clflush & CLFLUSH_AFTER) - drm_clflush_virt_range(cpu, sizeof(*cpu)); - - kunmap_atomic(map); - i915_gem_object_finish_access(obj); - - return 0; -} - -static int cpu_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) -{ - unsigned int needs_clflush; - struct page *page; - void *map; - u32 *cpu; - int err; - - err = i915_gem_object_prepare_read(obj, &needs_clflush); - if (err) - return err; - - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); - map = kmap_atomic(page); - cpu = map + offset_in_page(offset); - - if (needs_clflush & CLFLUSH_BEFORE) - drm_clflush_virt_range(cpu, sizeof(*cpu)); - - *v = *cpu; - - kunmap_atomic(map); - i915_gem_object_finish_access(obj); - - return 0; -} - -static int gtt_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) -{ - struct i915_vma *vma; - u32 __iomem *map; - int err; - - err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) - return err; - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - map = i915_vma_pin_iomap(vma); - i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); - - iowrite32(v, &map[offset / sizeof(*map)]); - i915_vma_unpin_iomap(vma); - - return 0; -} - -static int gtt_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) -{ - struct i915_vma *vma; - u32 __iomem *map; - int err; - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - return err; - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - map = i915_vma_pin_iomap(vma); - i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); - - *v = ioread32(&map[offset / sizeof(*map)]); - i915_vma_unpin_iomap(vma); - - return 0; -} - -static int wc_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) -{ - u32 *map; - int err; - - err = i915_gem_object_set_to_wc_domain(obj, true); - if (err) - return err; - - map = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(map)) - return PTR_ERR(map); - - map[offset / sizeof(*map)] = v; - i915_gem_object_unpin_map(obj); - - return 0; -} - -static int wc_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) -{ - u32 *map; - int err; - - err = i915_gem_object_set_to_wc_domain(obj, false); - if (err) - return err; - - map = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(map)) - return PTR_ERR(map); - - *v = map[offset / sizeof(*map)]; - i915_gem_object_unpin_map(obj); - - return 0; -} - -static int gpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_request *rq; - struct i915_vma *vma; - u32 *cs; - int err; - - err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) - return err; - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - rq = i915_request_create(i915->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); - } - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) { - i915_request_add(rq); - i915_vma_unpin(vma); - return PTR_ERR(cs); - } - - if (INTEL_GEN(i915) >= 8) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; - *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); - *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); - *cs++ = v; - } else if (INTEL_GEN(i915) >= 4) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = 0; - *cs++ = i915_ggtt_offset(vma) + offset; - *cs++ = v; - } else { - *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = i915_ggtt_offset(vma) + offset; - *cs++ = v; - *cs++ = MI_NOOP; - } - intel_ring_advance(rq, cs); - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unpin(vma); - - i915_request_add(rq); - - return err; -} - -static bool always_valid(struct drm_i915_private *i915) -{ - return true; -} - -static bool needs_fence_registers(struct drm_i915_private *i915) -{ - return !i915_terminally_wedged(i915); -} - -static bool needs_mi_store_dword(struct drm_i915_private *i915) -{ - if (i915_terminally_wedged(i915)) - return false; - - return intel_engine_can_store_dword(i915->engine[RCS0]); -} - -static const struct igt_coherency_mode { - const char *name; - int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); - int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); - bool (*valid)(struct drm_i915_private *i915); -} igt_coherency_mode[] = { - { "cpu", cpu_set, cpu_get, always_valid }, - { "gtt", gtt_set, gtt_get, needs_fence_registers }, - { "wc", wc_set, wc_get, always_valid }, - { "gpu", gpu_set, NULL, needs_mi_store_dword }, - { }, -}; - -static int igt_gem_coherency(void *arg) -{ - const unsigned int ncachelines = PAGE_SIZE/64; - I915_RND_STATE(prng); - struct drm_i915_private *i915 = arg; - const struct igt_coherency_mode *read, *write, *over; - struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; - unsigned long count, n; - u32 *offsets, *values; - int err = 0; - - /* We repeatedly write, overwrite and read from a sequence of - * cachelines in order to try and detect incoherency (unflushed writes - * from either the CPU or GPU). Each setter/getter uses our cache - * domain API which should prevent incoherency. - */ - - offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); - if (!offsets) - return -ENOMEM; - for (count = 0; count < ncachelines; count++) - offsets[count] = count * 64 + 4 * (count % 16); - - values = offsets + ncachelines; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - for (over = igt_coherency_mode; over->name; over++) { - if (!over->set) - continue; - - if (!over->valid(i915)) - continue; - - for (write = igt_coherency_mode; write->name; write++) { - if (!write->set) - continue; - - if (!write->valid(i915)) - continue; - - for (read = igt_coherency_mode; read->name; read++) { - if (!read->get) - continue; - - if (!read->valid(i915)) - continue; - - for_each_prime_number_from(count, 1, ncachelines) { - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto unlock; - } - - i915_random_reorder(offsets, ncachelines, &prng); - for (n = 0; n < count; n++) - values[n] = prandom_u32_state(&prng); - - for (n = 0; n < count; n++) { - err = over->set(obj, offsets[n], ~values[n]); - if (err) { - pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", - n, count, over->name, err); - goto put_object; - } - } - - for (n = 0; n < count; n++) { - err = write->set(obj, offsets[n], values[n]); - if (err) { - pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", - n, count, write->name, err); - goto put_object; - } - } - - for (n = 0; n < count; n++) { - u32 found; - - err = read->get(obj, offsets[n], &found); - if (err) { - pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", - n, count, read->name, err); - goto put_object; - } - - if (found != values[n]) { - pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", - n, count, over->name, - write->name, values[n], - read->name, found, - ~values[n], offsets[n]); - err = -EINVAL; - goto put_object; - } - } - - __i915_gem_object_release_unless_active(obj); - } - } - } - } -unlock: - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kfree(offsets); - return err; - -put_object: - __i915_gem_object_release_unless_active(obj); - goto unlock; -} - -int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_gem_coherency), - }; - - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c deleted file mode 100644 index c69c6d9a998b..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ /dev/null @@ -1,1752 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include - -#include "gt/intel_reset.h" -#include "i915_selftest.h" - -#include "i915_random.h" -#include "igt_flush_test.h" -#include "igt_gem_utils.h" -#include "igt_live_test.h" -#include "igt_reset.h" -#include "igt_spinner.h" - -#include "mock_drm.h" -#include "mock_gem_device.h" -#include "huge_gem_object.h" - -#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) - -static int live_nop_switch(void *arg) -{ - const unsigned int nctx = 1024; - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_gem_context **ctx; - enum intel_engine_id id; - intel_wakeref_t wakeref; - struct igt_live_test t; - struct drm_file *file; - unsigned long n; - int err = -ENODEV; - - /* - * Create as many contexts as we can feasibly get away with - * and check we can switch between them rapidly. - * - * Serves as very simple stress test for submission and HW switching - * between contexts. - */ - - if (!DRIVER_CAPS(i915)->has_logical_contexts) - return 0; - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); - if (!ctx) { - err = -ENOMEM; - goto out_unlock; - } - - for (n = 0; n < nctx; n++) { - ctx[n] = live_context(i915, file); - if (IS_ERR(ctx[n])) { - err = PTR_ERR(ctx[n]); - goto out_unlock; - } - } - - for_each_engine(engine, i915, id) { - struct i915_request *rq; - unsigned long end_time, prime; - ktime_t times[2] = {}; - - times[0] = ktime_get_raw(); - for (n = 0; n < nctx; n++) { - rq = igt_request_alloc(ctx[n], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; - } - i915_request_add(rq); - } - if (i915_request_wait(rq, - I915_WAIT_LOCKED, - HZ / 5) < 0) { - pr_err("Failed to populated %d contexts\n", nctx); - i915_gem_set_wedged(i915); - err = -EIO; - goto out_unlock; - } - - times[1] = ktime_get_raw(); - - pr_info("Populated %d contexts on %s in %lluns\n", - nctx, engine->name, ktime_to_ns(times[1] - times[0])); - - err = igt_live_test_begin(&t, i915, __func__, engine->name); - if (err) - goto out_unlock; - - end_time = jiffies + i915_selftest.timeout_jiffies; - for_each_prime_number_from(prime, 2, 8192) { - times[1] = ktime_get_raw(); - - for (n = 0; n < prime; n++) { - rq = igt_request_alloc(ctx[n % nctx], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; - } - - /* - * This space is left intentionally blank. - * - * We do not actually want to perform any - * action with this request, we just want - * to measure the latency in allocation - * and submission of our breadcrumbs - - * ensuring that the bare request is sufficient - * for the system to work (i.e. proper HEAD - * tracking of the rings, interrupt handling, - * etc). It also gives us the lowest bounds - * for latency. - */ - - i915_request_add(rq); - } - if (i915_request_wait(rq, - I915_WAIT_LOCKED, - HZ / 5) < 0) { - pr_err("Switching between %ld contexts timed out\n", - prime); - i915_gem_set_wedged(i915); - break; - } - - times[1] = ktime_sub(ktime_get_raw(), times[1]); - if (prime == 2) - times[0] = times[1]; - - if (__igt_timeout(end_time, NULL)) - break; - } - - err = igt_live_test_end(&t); - if (err) - goto out_unlock; - - pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", - engine->name, - ktime_to_ns(times[0]), - prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); - } - -out_unlock: - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); - return err; -} - -static struct i915_vma * -gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) -{ - struct drm_i915_gem_object *obj; - const int gen = INTEL_GEN(vma->vm->i915); - unsigned long n, size; - u32 *cmd; - int err; - - size = (4 * count + 1) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(vma->vm->i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); - offset += vma->node.start; - - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = value; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = value; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = value; - } - offset += PAGE_SIZE; - } - *cmd = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - goto err; - - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static unsigned long real_page_count(struct drm_i915_gem_object *obj) -{ - return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; -} - -static unsigned long fake_page_count(struct drm_i915_gem_object *obj) -{ - return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; -} - -static int gpu_fill(struct drm_i915_gem_object *obj, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - unsigned int dw) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - struct i915_request *rq; - struct i915_vma *vma; - struct i915_vma *batch; - unsigned int flags; - int err; - - GEM_BUG_ON(obj->base.size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - return err; - - err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); - if (err) - return err; - - /* Within the GTT the huge objects maps every page onto - * its 1024 real pages (using phys_pfn = dma_pfn % 1024). - * We set the nth dword within the page using the nth - * mapping via the GTT - this should exercise the GTT mapping - * whilst checking that each context provides a unique view - * into the object. - */ - batch = gpu_fill_dw(vma, - (dw * real_page_count(obj)) << PAGE_SHIFT | - (dw * sizeof(u32)), - real_page_count(obj), - dw); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_vma; - } - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - flags = 0; - if (INTEL_GEN(vm->i915) <= 5) - flags |= I915_DISPATCH_SECURE; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); - if (err) - goto err_request; - - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto skip_request; - - i915_gem_object_set_active_reference(batch->obj); - i915_vma_unpin(batch); - i915_vma_close(batch); - - i915_vma_unpin(vma); - - i915_request_add(rq); - - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); -err_vma: - i915_vma_unpin(vma); - return err; -} - -static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) -{ - const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); - unsigned int n, m, need_flush; - int err; - - err = i915_gem_object_prepare_write(obj, &need_flush); - if (err) - return err; - - for (n = 0; n < real_page_count(obj); n++) { - u32 *map; - - map = kmap_atomic(i915_gem_object_get_page(obj, n)); - for (m = 0; m < DW_PER_PAGE; m++) - map[m] = value; - if (!has_llc) - drm_clflush_virt_range(map, PAGE_SIZE); - kunmap_atomic(map); - } - - i915_gem_object_finish_access(obj); - obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; - obj->write_domain = 0; - return 0; -} - -static noinline int cpu_check(struct drm_i915_gem_object *obj, - unsigned int idx, unsigned int max) -{ - unsigned int n, m, needs_flush; - int err; - - err = i915_gem_object_prepare_read(obj, &needs_flush); - if (err) - return err; - - for (n = 0; n < real_page_count(obj); n++) { - u32 *map; - - map = kmap_atomic(i915_gem_object_get_page(obj, n)); - if (needs_flush & CLFLUSH_BEFORE) - drm_clflush_virt_range(map, PAGE_SIZE); - - for (m = 0; m < max; m++) { - if (map[m] != m) { - pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", - __builtin_return_address(0), idx, - n, real_page_count(obj), m, max, - map[m], m); - err = -EINVAL; - goto out_unmap; - } - } - - for (; m < DW_PER_PAGE; m++) { - if (map[m] != STACK_MAGIC) { - pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", - __builtin_return_address(0), idx, n, m, - map[m], STACK_MAGIC); - err = -EINVAL; - goto out_unmap; - } - } - -out_unmap: - kunmap_atomic(map); - if (err) - break; - } - - i915_gem_object_finish_access(obj); - return err; -} - -static int file_add_object(struct drm_file *file, - struct drm_i915_gem_object *obj) -{ - int err; - - GEM_BUG_ON(obj->base.handle_count); - - /* tie the object to the drm_file for easy reaping */ - err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); - if (err < 0) - return err; - - i915_gem_object_get(obj); - obj->base.handle_count++; - return 0; -} - -static struct drm_i915_gem_object * -create_test_object(struct i915_gem_context *ctx, - struct drm_file *file, - struct list_head *objects) -{ - struct drm_i915_gem_object *obj; - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &ctx->i915->ggtt.vm; - u64 size; - int err; - - size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); - size = round_down(size, DW_PER_PAGE * PAGE_SIZE); - - obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); - if (IS_ERR(obj)) - return obj; - - err = file_add_object(file, obj); - i915_gem_object_put(obj); - if (err) - return ERR_PTR(err); - - err = cpu_fill(obj, STACK_MAGIC); - if (err) { - pr_err("Failed to fill object with cpu, err=%d\n", - err); - return ERR_PTR(err); - } - - list_add_tail(&obj->st_link, objects); - return obj; -} - -static unsigned long max_dwords(struct drm_i915_gem_object *obj) -{ - unsigned long npages = fake_page_count(obj); - - GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); - return npages / DW_PER_PAGE; -} - -static int igt_ctx_exec(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = -ENODEV; - - /* - * Create a few different contexts (with different mm) and write - * through each ctx/mm using the GPU making sure those writes end - * up in the expected pages of our obj. - */ - - if (!DRIVER_CAPS(i915)->has_logical_contexts) - return 0; - - for_each_engine(engine, i915, id) { - struct drm_i915_gem_object *obj = NULL; - unsigned long ncontexts, ndwords, dw; - struct igt_live_test t; - struct drm_file *file; - IGT_TIMEOUT(end_time); - LIST_HEAD(objects); - - if (!intel_engine_can_store_dword(engine)) - continue; - - if (!engine->context_size) - continue; /* No logical context support in HW */ - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - - err = igt_live_test_begin(&t, i915, __func__, engine->name); - if (err) - goto out_unlock; - - ncontexts = 0; - ndwords = 0; - dw = 0; - while (!time_after(jiffies, end_time)) { - struct i915_gem_context *ctx; - intel_wakeref_t wakeref; - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_unlock; - } - - if (!obj) { - obj = create_test_object(ctx, file, &objects); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_unlock; - } - } - - with_intel_runtime_pm(i915, wakeref) - err = gpu_fill(obj, ctx, engine, dw); - if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", - ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->ppgtt), err); - goto out_unlock; - } - - if (++dw == max_dwords(obj)) { - obj = NULL; - dw = 0; - } - - ndwords++; - ncontexts++; - } - - pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", - ncontexts, engine->name, ndwords); - - ncontexts = dw = 0; - list_for_each_entry(obj, &objects, st_link) { - unsigned int rem = - min_t(unsigned int, ndwords - dw, max_dwords(obj)); - - err = cpu_check(obj, ncontexts++, rem); - if (err) - break; - - dw += rem; - } - -out_unlock: - if (igt_live_test_end(&t)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); - if (err) - return err; - } - - return 0; -} - -static int igt_shared_ctx_exec(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_gem_context *parent; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_live_test t; - struct drm_file *file; - int err = 0; - - /* - * Create a few different contexts with the same mm and write - * through each ctx using the GPU making sure those writes end - * up in the expected pages of our obj. - */ - if (!DRIVER_CAPS(i915)->has_logical_contexts) - return 0; - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - - parent = live_context(i915, file); - if (IS_ERR(parent)) { - err = PTR_ERR(parent); - goto out_unlock; - } - - if (!parent->ppgtt) { /* not full-ppgtt; nothing to share */ - err = 0; - goto out_unlock; - } - - err = igt_live_test_begin(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - for_each_engine(engine, i915, id) { - unsigned long ncontexts, ndwords, dw; - struct drm_i915_gem_object *obj = NULL; - IGT_TIMEOUT(end_time); - LIST_HEAD(objects); - - if (!intel_engine_can_store_dword(engine)) - continue; - - dw = 0; - ndwords = 0; - ncontexts = 0; - while (!time_after(jiffies, end_time)) { - struct i915_gem_context *ctx; - intel_wakeref_t wakeref; - - ctx = kernel_context(i915); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_test; - } - - __assign_ppgtt(ctx, parent->ppgtt); - - if (!obj) { - obj = create_test_object(parent, file, &objects); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - kernel_context_close(ctx); - goto out_test; - } - } - - err = 0; - with_intel_runtime_pm(i915, wakeref) - err = gpu_fill(obj, ctx, engine, dw); - if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", - ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->ppgtt), err); - kernel_context_close(ctx); - goto out_test; - } - - if (++dw == max_dwords(obj)) { - obj = NULL; - dw = 0; - } - - ndwords++; - ncontexts++; - - kernel_context_close(ctx); - } - pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", - ncontexts, engine->name, ndwords); - - ncontexts = dw = 0; - list_for_each_entry(obj, &objects, st_link) { - unsigned int rem = - min_t(unsigned int, ndwords - dw, max_dwords(obj)); - - err = cpu_check(obj, ncontexts++, rem); - if (err) - goto out_test; - - dw += rem; - } - } -out_test: - if (igt_live_test_end(&t)) - err = -EIO; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); - return err; -} - -static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj; - u32 *cmd; - int err; - - if (INTEL_GEN(vma->vm->i915) < 8) - return ERR_PTR(-EINVAL); - - obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - *cmd++ = MI_STORE_REGISTER_MEM_GEN8; - *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); - *cmd++ = lower_32_bits(vma->node.start); - *cmd++ = upper_32_bits(vma->node.start); - *cmd = MI_BATCH_BUFFER_END; - - __i915_gem_object_flush_map(obj, 0, 64); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static int -emit_rpcs_query(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct i915_request **rq_out) -{ - struct i915_request *rq; - struct i915_vma *batch; - struct i915_vma *vma; - int err; - - GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - - vma = i915_vma_instance(obj, &ce->gem_context->ppgtt->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - return err; - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; - - batch = rpcs_query_batch(vma); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_vma; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); - if (err) - goto err_request; - - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto skip_request; - - i915_gem_object_set_active_reference(batch->obj); - i915_vma_unpin(batch); - i915_vma_close(batch); - - i915_vma_unpin(vma); - - *rq_out = i915_request_get(rq); - - i915_request_add(rq); - - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); -err_vma: - i915_vma_unpin(vma); - - return err; -} - -#define TEST_IDLE BIT(0) -#define TEST_BUSY BIT(1) -#define TEST_RESET BIT(2) - -static int -__sseu_prepare(struct drm_i915_private *i915, - const char *name, - unsigned int flags, - struct intel_context *ce, - struct igt_spinner **spin) -{ - struct i915_request *rq; - int ret; - - *spin = NULL; - if (!(flags & (TEST_BUSY | TEST_RESET))) - return 0; - - *spin = kzalloc(sizeof(**spin), GFP_KERNEL); - if (!*spin) - return -ENOMEM; - - ret = igt_spinner_init(*spin, i915); - if (ret) - goto err_free; - - rq = igt_spinner_create_request(*spin, - ce->gem_context, - ce->engine, - MI_NOOP); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - goto err_fini; - } - - i915_request_add(rq); - - if (!igt_wait_for_spinner(*spin, rq)) { - pr_err("%s: Spinner failed to start!\n", name); - ret = -ETIMEDOUT; - goto err_end; - } - - return 0; - -err_end: - igt_spinner_end(*spin); -err_fini: - igt_spinner_fini(*spin); -err_free: - kfree(fetch_and_zero(spin)); - return ret; -} - -static int -__read_slice_count(struct drm_i915_private *i915, - struct intel_context *ce, - struct drm_i915_gem_object *obj, - struct igt_spinner *spin, - u32 *rpcs) -{ - struct i915_request *rq = NULL; - u32 s_mask, s_shift; - unsigned int cnt; - u32 *buf, val; - long ret; - - ret = emit_rpcs_query(obj, ce, &rq); - if (ret) - return ret; - - if (spin) - igt_spinner_end(spin); - - ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); - if (ret < 0) - return ret; - - buf = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - return ret; - } - - if (INTEL_GEN(i915) >= 11) { - s_mask = GEN11_RPCS_S_CNT_MASK; - s_shift = GEN11_RPCS_S_CNT_SHIFT; - } else { - s_mask = GEN8_RPCS_S_CNT_MASK; - s_shift = GEN8_RPCS_S_CNT_SHIFT; - } - - val = *buf; - cnt = (val & s_mask) >> s_shift; - *rpcs = val; - - i915_gem_object_unpin_map(obj); - - return cnt; -} - -static int -__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, - const char *prefix, const char *suffix) -{ - if (slices == expected) - return 0; - - if (slices < 0) { - pr_err("%s: %s read slice count failed with %d%s\n", - name, prefix, slices, suffix); - return slices; - } - - pr_err("%s: %s slice count %d is not %u%s\n", - name, prefix, slices, expected, suffix); - - pr_info("RPCS=0x%x; %u%sx%u%s\n", - rpcs, slices, - (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", - (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, - (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); - - return -EINVAL; -} - -static int -__sseu_finish(struct drm_i915_private *i915, - const char *name, - unsigned int flags, - struct intel_context *ce, - struct drm_i915_gem_object *obj, - unsigned int expected, - struct igt_spinner *spin) -{ - unsigned int slices = hweight32(ce->engine->sseu.slice_mask); - u32 rpcs = 0; - int ret = 0; - - if (flags & TEST_RESET) { - ret = i915_reset_engine(ce->engine, "sseu"); - if (ret) - goto out; - } - - ret = __read_slice_count(i915, ce, obj, - flags & TEST_RESET ? NULL : spin, &rpcs); - ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); - if (ret) - goto out; - - ret = __read_slice_count(i915, ce->engine->kernel_context, obj, - NULL, &rpcs); - ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); - -out: - if (spin) - igt_spinner_end(spin); - - if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - - ret = __read_slice_count(i915, ce, obj, NULL, &rpcs); - ret = __check_rpcs(name, rpcs, ret, expected, - "Context", " after idle!"); - } - - return ret; -} - -static int -__sseu_test(struct drm_i915_private *i915, - const char *name, - unsigned int flags, - struct intel_context *ce, - struct drm_i915_gem_object *obj, - struct intel_sseu sseu) -{ - struct igt_spinner *spin = NULL; - int ret; - - ret = __sseu_prepare(i915, name, flags, ce, &spin); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - if (ret) - goto out_spin; - - ret = __sseu_finish(i915, name, flags, ce, obj, - hweight32(sseu.slice_mask), spin); - -out_spin: - if (spin) { - igt_spinner_end(spin); - igt_spinner_fini(spin); - kfree(spin); - } - return ret; -} - -static int -__igt_ctx_sseu(struct drm_i915_private *i915, - const char *name, - unsigned int flags) -{ - struct intel_engine_cs *engine = i915->engine[RCS0]; - struct intel_sseu default_sseu = engine->sseu; - struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - struct intel_sseu pg_sseu; - intel_wakeref_t wakeref; - struct drm_file *file; - int ret; - - if (INTEL_GEN(i915) < 9) - return 0; - - if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) - return 0; - - if (hweight32(default_sseu.slice_mask) < 2) - return 0; - - /* - * Gen11 VME friendly power-gated configuration with half enabled - * sub-slices. - */ - pg_sseu = default_sseu; - pg_sseu.slice_mask = 1; - pg_sseu.subslice_mask = - ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); - - pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", - name, flags, hweight32(default_sseu.slice_mask), - hweight32(pg_sseu.slice_mask)); - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - if (flags & TEST_RESET) - igt_global_reset_lock(i915); - - mutex_lock(&i915->drm.struct_mutex); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } - i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - ret = PTR_ERR(obj); - goto out_unlock; - } - - wakeref = intel_runtime_pm_get(i915); - - ce = i915_gem_context_get_engine(ctx, RCS0); - if (IS_ERR(ce)) { - ret = PTR_ERR(ce); - goto out_rpm; - } - - ret = intel_context_pin(ce); - if (ret) - goto out_context; - - /* First set the default mask. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); - if (ret) - goto out_fail; - - /* Then set a power-gated configuration. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; - - /* Back to defaults. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); - if (ret) - goto out_fail; - - /* One last power-gated configuration for the road. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; - -out_fail: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - ret = -EIO; - - intel_context_unpin(ce); -out_context: - intel_context_put(ce); -out_rpm: - intel_runtime_pm_put(i915, wakeref); - i915_gem_object_put(obj); - -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - if (flags & TEST_RESET) - igt_global_reset_unlock(i915); - - mock_file_free(i915, file); - - if (ret) - pr_err("%s: Failed with %d!\n", name, ret); - - return ret; -} - -static int igt_ctx_sseu(void *arg) -{ - struct { - const char *name; - unsigned int flags; - } *phase, phases[] = { - { .name = "basic", .flags = 0 }, - { .name = "idle", .flags = TEST_IDLE }, - { .name = "busy", .flags = TEST_BUSY }, - { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, - { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, - { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, - }; - unsigned int i; - int ret = 0; - - for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); - i++, phase++) - ret = __igt_ctx_sseu(arg, phase->name, phase->flags); - - return ret; -} - -static int igt_ctx_readonly(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj = NULL; - struct i915_gem_context *ctx; - struct i915_hw_ppgtt *ppgtt; - unsigned long idx, ndwords, dw; - struct igt_live_test t; - struct drm_file *file; - I915_RND_STATE(prng); - IGT_TIMEOUT(end_time); - LIST_HEAD(objects); - int err = -ENODEV; - - /* - * Create a few read-only objects (with the occasional writable object) - * and try to write into these object checking that the GPU discards - * any write to a read-only object. - */ - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - - err = igt_live_test_begin(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_unlock; - } - - ppgtt = ctx->ppgtt ?: i915->mm.aliasing_ppgtt; - if (!ppgtt || !ppgtt->vm.has_read_only) { - err = 0; - goto out_unlock; - } - - ndwords = 0; - dw = 0; - while (!time_after(jiffies, end_time)) { - struct intel_engine_cs *engine; - unsigned int id; - - for_each_engine(engine, i915, id) { - intel_wakeref_t wakeref; - - if (!intel_engine_can_store_dword(engine)) - continue; - - if (!obj) { - obj = create_test_object(ctx, file, &objects); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_unlock; - } - - if (prandom_u32_state(&prng) & 1) - i915_gem_object_set_readonly(obj); - } - - err = 0; - with_intel_runtime_pm(i915, wakeref) - err = gpu_fill(obj, ctx, engine, dw); - if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", - ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->ppgtt), err); - goto out_unlock; - } - - if (++dw == max_dwords(obj)) { - obj = NULL; - dw = 0; - } - ndwords++; - } - } - pr_info("Submitted %lu dwords (across %u engines)\n", - ndwords, RUNTIME_INFO(i915)->num_engines); - - dw = 0; - idx = 0; - list_for_each_entry(obj, &objects, st_link) { - unsigned int rem = - min_t(unsigned int, ndwords - dw, max_dwords(obj)); - unsigned int num_writes; - - num_writes = rem; - if (i915_gem_object_is_readonly(obj)) - num_writes = 0; - - err = cpu_check(obj, idx++, num_writes); - if (err) - break; - - dw += rem; - } - -out_unlock: - if (igt_live_test_end(&t)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); - return err; -} - -static int check_scratch(struct i915_gem_context *ctx, u64 offset) -{ - struct drm_mm_node *node = - __drm_mm_interval_first(&ctx->ppgtt->vm.mm, - offset, offset + sizeof(u32) - 1); - if (!node || node->start > offset) - return 0; - - GEM_BUG_ON(offset >= node->start + node->size); - - pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", - upper_32_bits(offset), lower_32_bits(offset)); - return -EINVAL; -} - -static int write_to_scratch(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, u32 value) -{ - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - struct i915_request *rq; - struct i915_vma *vma; - u32 *cmd; - int err; - - GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - if (INTEL_GEN(i915) >= 8) { - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - } else { - *cmd++ = 0; - *cmd++ = offset; - } - *cmd++ = value; - *cmd = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, 64); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); - if (err) - goto err; - - err = check_scratch(ctx, offset); - if (err) - goto err_unpin; - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); - if (err) - goto err_request; - - err = i915_vma_move_to_active(vma, rq, 0); - if (err) - goto skip_request; - - i915_gem_object_set_active_reference(obj); - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_request_add(rq); - - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(vma); -err: - i915_gem_object_put(obj); - return err; -} - -static int read_from_scratch(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, u32 *value) -{ - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ - const u32 result = 0x100; - struct i915_request *rq; - struct i915_vma *vma; - u32 *cmd; - int err; - - GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - memset(cmd, POISON_INUSE, PAGE_SIZE); - if (INTEL_GEN(i915) >= 8) { - *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; - *cmd++ = RCS_GPR0; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = MI_STORE_REGISTER_MEM_GEN8; - *cmd++ = RCS_GPR0; - *cmd++ = result; - *cmd++ = 0; - } else { - *cmd++ = MI_LOAD_REGISTER_MEM; - *cmd++ = RCS_GPR0; - *cmd++ = offset; - *cmd++ = MI_STORE_REGISTER_MEM; - *cmd++ = RCS_GPR0; - *cmd++ = result; - } - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); - if (err) - goto err; - - err = check_scratch(ctx, offset); - if (err) - goto err_unpin; - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); - if (err) - goto err_request; - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto skip_request; - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_request_add(rq); - - err = i915_gem_object_set_to_cpu_domain(obj, false); - if (err) - goto err; - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - *value = cmd[result / sizeof(*cmd)]; - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(vma); -err: - i915_gem_object_put(obj); - return err; -} - -static int igt_vm_isolation(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx_a, *ctx_b; - struct intel_engine_cs *engine; - intel_wakeref_t wakeref; - struct igt_live_test t; - struct drm_file *file; - I915_RND_STATE(prng); - unsigned long count; - unsigned int id; - u64 vm_total; - int err; - - if (INTEL_GEN(i915) < 7) - return 0; - - /* - * The simple goal here is that a write into one context is not - * observed in a second (separate page tables and scratch). - */ - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - - err = igt_live_test_begin(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - ctx_a = live_context(i915, file); - if (IS_ERR(ctx_a)) { - err = PTR_ERR(ctx_a); - goto out_unlock; - } - - ctx_b = live_context(i915, file); - if (IS_ERR(ctx_b)) { - err = PTR_ERR(ctx_b); - goto out_unlock; - } - - /* We can only test vm isolation, if the vm are distinct */ - if (ctx_a->ppgtt == ctx_b->ppgtt) - goto out_unlock; - - vm_total = ctx_a->ppgtt->vm.total; - GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total); - vm_total -= I915_GTT_PAGE_SIZE; - - wakeref = intel_runtime_pm_get(i915); - - count = 0; - for_each_engine(engine, i915, id) { - IGT_TIMEOUT(end_time); - unsigned long this = 0; - - if (!intel_engine_can_store_dword(engine)) - continue; - - while (!__igt_timeout(end_time, NULL)) { - u32 value = 0xc5c5c5c5; - u64 offset; - - div64_u64_rem(i915_prandom_u64_state(&prng), - vm_total, &offset); - offset &= -sizeof(u32); - offset += I915_GTT_PAGE_SIZE; - - err = write_to_scratch(ctx_a, engine, - offset, 0xdeadbeef); - if (err == 0) - err = read_from_scratch(ctx_b, engine, - offset, &value); - if (err) - goto out_rpm; - - if (value) { - pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", - engine->name, value, - upper_32_bits(offset), - lower_32_bits(offset), - this); - err = -EINVAL; - goto out_rpm; - } - - this++; - } - count += this; - } - pr_info("Checked %lu scratch offsets across %d engines\n", - count, RUNTIME_INFO(i915)->num_engines); - -out_rpm: - intel_runtime_pm_put(i915, wakeref); -out_unlock: - if (igt_live_test_end(&t)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); - return err; -} - -static __maybe_unused const char * -__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (engines == ALL_ENGINES) - return "all"; - - for_each_engine_masked(engine, i915, engines, tmp) - return engine->name; - - return "none"; -} - -static void mock_barrier_task(void *data) -{ - unsigned int *counter = data; - - ++*counter; -} - -static int mock_context_barrier(void *arg) -{ -#undef pr_fmt -#define pr_fmt(x) "context_barrier_task():" # x - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx; - struct i915_request *rq; - unsigned int counter; - int err; - - /* - * The context barrier provides us with a callback after it emits - * a request; useful for retiring old state after loading new. - */ - - mutex_lock(&i915->drm.struct_mutex); - - ctx = mock_context(i915, "mock"); - if (!ctx) { - err = -ENOMEM; - goto unlock; - } - - counter = 0; - err = context_barrier_task(ctx, 0, - NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately with 0 engines\n"); - err = -EINVAL; - goto out; - } - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately for all unused engines\n"); - err = -EINVAL; - goto out; - } - - rq = igt_request_alloc(ctx, i915->engine[RCS0]); - if (IS_ERR(rq)) { - pr_err("Request allocation failed!\n"); - goto out; - } - i915_request_add(rq); - - counter = 0; - context_barrier_inject_fault = BIT(RCS0); - err = context_barrier_task(ctx, ALL_ENGINES, - NULL, mock_barrier_task, &counter); - context_barrier_inject_fault = 0; - if (err == -ENXIO) - err = 0; - else - pr_err("Did not hit fault injection!\n"); - if (counter != 0) { - pr_err("Invoked callback on error!\n"); - err = -EIO; - } - if (err) - goto out; - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - mock_device_flush(i915); - if (counter == 0) { - pr_err("Did not retire on each active engines\n"); - err = -EINVAL; - goto out; - } - -out: - mock_context_close(ctx); -unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -#undef pr_fmt -#define pr_fmt(x) x -} - -int i915_gem_context_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(mock_context_barrier), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - drm_dev_put(&i915->drm); - return err; -} - -int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_nop_switch), - SUBTEST(igt_ctx_exec), - SUBTEST(igt_ctx_readonly), - SUBTEST(igt_ctx_sseu), - SUBTEST(igt_shared_ctx_exec), - SUBTEST(igt_vm_isolation), - }; - - if (i915_terminally_wedged(dev_priv)) - return 0; - - return i915_subtests(tests, dev_priv); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c deleted file mode 100644 index cc65a503e2f0..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" - -#include "mock_gem_device.h" -#include "mock_dmabuf.h" - -static int igt_dmabuf_export(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - i915_gem_object_put(obj); - if (IS_ERR(dmabuf)) { - pr_err("i915_gem_prime_export failed with err=%d\n", - (int)PTR_ERR(dmabuf)); - return PTR_ERR(dmabuf); - } - - dma_buf_put(dmabuf); - return 0; -} - -static int igt_dmabuf_import_self(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct drm_gem_object *import; - struct dma_buf *dmabuf; - int err; - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - if (IS_ERR(dmabuf)) { - pr_err("i915_gem_prime_export failed with err=%d\n", - (int)PTR_ERR(dmabuf)); - err = PTR_ERR(dmabuf); - goto out; - } - - import = i915_gem_prime_import(&i915->drm, dmabuf); - if (IS_ERR(import)) { - pr_err("i915_gem_prime_import failed with err=%d\n", - (int)PTR_ERR(import)); - err = PTR_ERR(import); - goto out_dmabuf; - } - - if (import != &obj->base) { - pr_err("i915_gem_prime_import created a new object!\n"); - err = -EINVAL; - goto out_import; - } - - err = 0; -out_import: - i915_gem_object_put(to_intel_bo(import)); -out_dmabuf: - dma_buf_put(dmabuf); -out: - i915_gem_object_put(obj); - return err; -} - -static int igt_dmabuf_import(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *obj_map, *dma_map; - u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff }; - int err, i; - - dmabuf = mock_dmabuf(1); - if (IS_ERR(dmabuf)) - return PTR_ERR(dmabuf); - - obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); - if (IS_ERR(obj)) { - pr_err("i915_gem_prime_import failed with err=%d\n", - (int)PTR_ERR(obj)); - err = PTR_ERR(obj); - goto out_dmabuf; - } - - if (obj->base.dev != &i915->drm) { - pr_err("i915_gem_prime_import created a non-i915 object!\n"); - err = -EINVAL; - goto out_obj; - } - - if (obj->base.size != PAGE_SIZE) { - pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n", - (long long)obj->base.size, PAGE_SIZE); - err = -EINVAL; - goto out_obj; - } - - dma_map = dma_buf_vmap(dmabuf); - if (!dma_map) { - pr_err("dma_buf_vmap failed\n"); - err = -ENOMEM; - goto out_obj; - } - - if (0) { /* Can not yet map dmabuf */ - obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(obj_map)) { - err = PTR_ERR(obj_map); - pr_err("i915_gem_object_pin_map failed with err=%d\n", err); - goto out_dma_map; - } - - for (i = 0; i < ARRAY_SIZE(pattern); i++) { - memset(dma_map, pattern[i], PAGE_SIZE); - if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) { - err = -EINVAL; - pr_err("imported vmap not all set to %x!\n", pattern[i]); - i915_gem_object_unpin_map(obj); - goto out_dma_map; - } - } - - for (i = 0; i < ARRAY_SIZE(pattern); i++) { - memset(obj_map, pattern[i], PAGE_SIZE); - if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) { - err = -EINVAL; - pr_err("exported vmap not all set to %x!\n", pattern[i]); - i915_gem_object_unpin_map(obj); - goto out_dma_map; - } - } - - i915_gem_object_unpin_map(obj); - } - - err = 0; -out_dma_map: - dma_buf_vunmap(dmabuf, dma_map); -out_obj: - i915_gem_object_put(obj); -out_dmabuf: - dma_buf_put(dmabuf); - return err; -} - -static int igt_dmabuf_import_ownership(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *ptr; - int err; - - dmabuf = mock_dmabuf(1); - if (IS_ERR(dmabuf)) - return PTR_ERR(dmabuf); - - ptr = dma_buf_vmap(dmabuf); - if (!ptr) { - pr_err("dma_buf_vmap failed\n"); - err = -ENOMEM; - goto err_dmabuf; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_vunmap(dmabuf, ptr); - - obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); - if (IS_ERR(obj)) { - pr_err("i915_gem_prime_import failed with err=%d\n", - (int)PTR_ERR(obj)); - err = PTR_ERR(obj); - goto err_dmabuf; - } - - dma_buf_put(dmabuf); - - err = i915_gem_object_pin_pages(obj); - if (err) { - pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); - goto out_obj; - } - - err = 0; - i915_gem_object_unpin_pages(obj); -out_obj: - i915_gem_object_put(obj); - return err; - -err_dmabuf: - dma_buf_put(dmabuf); - return err; -} - -static int igt_dmabuf_export_vmap(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *ptr; - int err; - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - if (IS_ERR(dmabuf)) { - pr_err("i915_gem_prime_export failed with err=%d\n", - (int)PTR_ERR(dmabuf)); - err = PTR_ERR(dmabuf); - goto err_obj; - } - i915_gem_object_put(obj); - - ptr = dma_buf_vmap(dmabuf); - if (!ptr) { - pr_err("dma_buf_vmap failed\n"); - err = -ENOMEM; - goto out; - } - - if (memchr_inv(ptr, 0, dmabuf->size)) { - pr_err("Exported object not initialiased to zero!\n"); - err = -EINVAL; - goto out; - } - - memset(ptr, 0xc5, dmabuf->size); - - err = 0; - dma_buf_vunmap(dmabuf, ptr); -out: - dma_buf_put(dmabuf); - return err; - -err_obj: - i915_gem_object_put(obj); - return err; -} - -static int igt_dmabuf_export_kmap(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *ptr; - int err; - - obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - i915_gem_object_put(obj); - if (IS_ERR(dmabuf)) { - err = PTR_ERR(dmabuf); - pr_err("i915_gem_prime_export failed with err=%d\n", err); - return err; - } - - ptr = dma_buf_kmap(dmabuf, 0); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - - if (memchr_inv(ptr, 0, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 0, ptr); - pr_err("Exported page[0] not initialiased to zero!\n"); - err = -EINVAL; - goto err; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_kunmap(dmabuf, 0, ptr); - - ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - pr_err("i915_gem_object_pin_map failed with err=%d\n", err); - goto err; - } - memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE); - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - ptr = dma_buf_kmap(dmabuf, 1); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - - if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 1, ptr); - pr_err("Exported page[1] not set to 0xaa!\n"); - err = -EINVAL; - goto err; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_kunmap(dmabuf, 1, ptr); - - ptr = dma_buf_kmap(dmabuf, 0); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 0, ptr); - pr_err("Exported page[0] did not retain 0xc5!\n"); - err = -EINVAL; - goto err; - } - dma_buf_kunmap(dmabuf, 0, ptr); - - ptr = dma_buf_kmap(dmabuf, 2); - if (ptr) { - pr_err("Erroneously kmapped beyond the end of the object!\n"); - dma_buf_kunmap(dmabuf, 2, ptr); - err = -EINVAL; - goto err; - } - - ptr = dma_buf_kmap(dmabuf, -1); - if (ptr) { - pr_err("Erroneously kmapped before the start of the object!\n"); - dma_buf_kunmap(dmabuf, -1, ptr); - err = -EINVAL; - goto err; - } - - err = 0; -err: - dma_buf_put(dmabuf); - return err; -} - -int i915_gem_dmabuf_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_dmabuf_export), - SUBTEST(igt_dmabuf_import_self), - SUBTEST(igt_dmabuf_import), - SUBTEST(igt_dmabuf_import_ownership), - SUBTEST(igt_dmabuf_export_vmap), - SUBTEST(igt_dmabuf_export_kmap), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - drm_dev_put(&i915->drm); - return err; -} - -int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_dmabuf_export), - }; - - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 4fc6e5445dd1..1d8235303edf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -22,11 +22,13 @@ * */ -#include "../i915_selftest.h" +#include "gem/i915_gem_pm.h" +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" + +#include "i915_selftest.h" -#include "igt_gem_utils.h" #include "lib_sw_fence.h" -#include "mock_context.h" #include "mock_drm.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 9cca66e4420a..f1e95eaf6923 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -25,10 +25,11 @@ #include #include -#include "../i915_selftest.h" +#include "gem/selftests/mock_context.h" + #include "i915_random.h" +#include "i915_selftest.h" -#include "mock_context.h" #include "mock_drm.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c deleted file mode 100644 index a3dd2f1be95b..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" - -#include "igt_flush_test.h" -#include "mock_gem_device.h" -#include "huge_gem_object.h" - -static int igt_gem_object(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - int err = -ENOMEM; - - /* Basic test to ensure we can create an object */ - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - pr_err("i915_gem_object_create failed, err=%d\n", err); - goto out; - } - - err = 0; - i915_gem_object_put(obj); -out: - return err; -} - -static int igt_gem_huge(void *arg) -{ - const unsigned int nreal = 509; /* just to be awkward */ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - unsigned int n; - int err; - - /* Basic sanitycheck of our huge fake object allocation */ - - obj = huge_gem_object(i915, - nreal * PAGE_SIZE, - i915->ggtt.vm.total + PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) { - pr_err("Failed to allocate %u pages (%lu total), err=%d\n", - nreal, obj->base.size / PAGE_SIZE, err); - goto out; - } - - for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { - if (i915_gem_object_get_page(obj, n) != - i915_gem_object_get_page(obj, n % nreal)) { - pr_err("Page lookup mismatch at index %u [%u]\n", - n, n % nreal); - err = -EINVAL; - goto out_unpin; - } - } - -out_unpin: - i915_gem_object_unpin_pages(obj); -out: - i915_gem_object_put(obj); - return err; -} - -int i915_gem_object_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_gem_object), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - drm_dev_put(&i915->drm); - return err; -} - -int i915_gem_object_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_gem_huge), - }; - - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index b60591531e4a..4fd5356c6577 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -24,12 +24,14 @@ #include -#include "../i915_selftest.h" +#include "gem/i915_gem_pm.h" +#include "gem/selftests/mock_context.h" + #include "i915_random.h" +#include "i915_selftest.h" #include "igt_live_test.h" #include "lib_sw_fence.h" -#include "mock_context.h" #include "mock_drm.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index ff9ebe50fae8..acb2cc5136b7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -6,8 +6,10 @@ #include -#include "../i915_selftest.h" +#include "gem/i915_gem_pm.h" + #include "i915_random.h" +#include "i915_selftest.h" #include "igt_flush_test.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 0027c1fac336..425b76133850 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -24,10 +24,11 @@ #include -#include "../i915_selftest.h" +#include "gem/selftests/mock_context.h" + +#include "i915_selftest.h" #include "mock_gem_device.h" -#include "mock_context.h" #include "mock_gtt.h" static bool assert_vma(struct i915_vma *vma, diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index e42f3c58536a..5bfd1b2626a2 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -4,9 +4,11 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_drv.h" +#include "gem/i915_gem_context.h" + +#include "i915_drv.h" +#include "i915_selftest.h" -#include "../i915_selftest.h" #include "igt_flush_test.h" int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/selftests/igt_gem_utils.c deleted file mode 100644 index 16891b1a3e50..000000000000 --- a/drivers/gpu/drm/i915/selftests/igt_gem_utils.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - */ - -#include "igt_gem_utils.h" - -#include "gt/intel_context.h" - -#include "../i915_gem_context.h" -#include "../i915_gem_pm.h" -#include "../i915_request.h" - -struct i915_request * -igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) -{ - struct intel_context *ce; - struct i915_request *rq; - - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - ce = i915_gem_context_get_engine(ctx, engine->id); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - rq = intel_context_create_request(ce); - intel_context_put(ce); - - return rq; -} diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/selftests/igt_gem_utils.h deleted file mode 100644 index 0f17251cf75d..000000000000 --- a/drivers/gpu/drm/i915/selftests/igt_gem_utils.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - */ - -#ifndef __IGT_GEM_UTILS_H__ -#define __IGT_GEM_UTILS_H__ - -struct i915_request; -struct i915_gem_context; -struct intel_engine_cs; - -struct i915_request * -igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); - -#endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index ece8a8a0d3b0..38d6f1b10c54 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -4,7 +4,8 @@ * Copyright © 2018 Intel Corporation */ -#include "igt_gem_utils.h" +#include "gem/selftests/igt_gem_utils.h" + #include "igt_spinner.h" int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h index d312e7cdab68..34a88ac9b47a 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.h +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h @@ -7,13 +7,12 @@ #ifndef __I915_SELFTESTS_IGT_SPINNER_H__ #define __I915_SELFTESTS_IGT_SPINNER_H__ -#include "../i915_selftest.h" - +#include "gem/i915_gem_context.h" #include "gt/intel_engine.h" -#include "../i915_drv.h" -#include "../i915_request.h" -#include "../i915_gem_context.h" +#include "i915_drv.h" +#include "i915_request.h" +#include "i915_selftest.h" struct igt_spinner { struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index b05a21eaa8f4..7fd0321e0947 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -22,7 +22,8 @@ * */ -#include "../i915_selftest.h" +#include "i915_selftest.h" +#include "gem/i915_gem_pm.h" /* max doorbell number + negative test for each client type */ #define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM) diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c deleted file mode 100644 index 10e67c931ed1..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "mock_context.h" -#include "mock_gtt.h" - -struct i915_gem_context * -mock_context(struct drm_i915_private *i915, - const char *name) -{ - struct i915_gem_context *ctx; - struct i915_gem_engines *e; - int ret; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return NULL; - - kref_init(&ctx->ref); - INIT_LIST_HEAD(&ctx->link); - ctx->i915 = i915; - - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) - goto err_free; - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->handles_list); - INIT_LIST_HEAD(&ctx->hw_id_link); - mutex_init(&ctx->mutex); - - ret = i915_gem_context_pin_hw_id(ctx); - if (ret < 0) - goto err_engines; - - if (name) { - struct i915_hw_ppgtt *ppgtt; - - ctx->name = kstrdup(name, GFP_KERNEL); - if (!ctx->name) - goto err_put; - - ppgtt = mock_ppgtt(i915, name); - if (!ppgtt) - goto err_put; - - __set_ppgtt(ctx, ppgtt); - } - - return ctx; - -err_engines: - free_engines(rcu_access_pointer(ctx->engines)); -err_free: - kfree(ctx); - return NULL; - -err_put: - i915_gem_context_set_closed(ctx); - i915_gem_context_put(ctx); - return NULL; -} - -void mock_context_close(struct i915_gem_context *ctx) -{ - context_close(ctx); -} - -void mock_init_contexts(struct drm_i915_private *i915) -{ - init_contexts(i915); -} - -struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file) -{ - struct i915_gem_context *ctx; - int err; - - lockdep_assert_held(&i915->drm.struct_mutex); - - ctx = i915_gem_create_context(i915, 0); - if (IS_ERR(ctx)) - return ctx; - - err = gem_context_register(ctx, file->driver_priv); - if (err < 0) - goto err_ctx; - - return ctx; - -err_ctx: - context_close(ctx); - return ERR_PTR(err); -} - -struct i915_gem_context * -kernel_context(struct drm_i915_private *i915) -{ - return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL); -} - -void kernel_context_close(struct i915_gem_context *ctx) -{ - context_close(ctx); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h deleted file mode 100644 index 29b9d60a158b..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_context.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __MOCK_CONTEXT_H -#define __MOCK_CONTEXT_H - -void mock_init_contexts(struct drm_i915_private *i915); - -struct i915_gem_context * -mock_context(struct drm_i915_private *i915, - const char *name); - -void mock_context_close(struct i915_gem_context *ctx); - -struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file); - -struct i915_gem_context *kernel_context(struct drm_i915_private *i915); -void kernel_context_close(struct i915_gem_context *ctx); - -#endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c deleted file mode 100644 index ca682caf1062..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "mock_dmabuf.h" - -static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, - enum dma_data_direction dir) -{ - struct mock_dmabuf *mock = to_mock(attachment->dmabuf); - struct sg_table *st; - struct scatterlist *sg; - int i, err; - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return ERR_PTR(-ENOMEM); - - err = sg_alloc_table(st, mock->npages, GFP_KERNEL); - if (err) - goto err_free; - - sg = st->sgl; - for (i = 0; i < mock->npages; i++) { - sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0); - sg = sg_next(sg); - } - - if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { - err = -ENOMEM; - goto err_st; - } - - return st; - -err_st: - sg_free_table(st); -err_free: - kfree(st); - return ERR_PTR(err); -} - -static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, - struct sg_table *st, - enum dma_data_direction dir) -{ - dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); - sg_free_table(st); - kfree(st); -} - -static void mock_dmabuf_release(struct dma_buf *dma_buf) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - int i; - - for (i = 0; i < mock->npages; i++) - put_page(mock->pages[i]); - - kfree(mock); -} - -static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); -} - -static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - vm_unmap_ram(vaddr, mock->npages); -} - -static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return kmap(mock->pages[page_num]); -} - -static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return kunmap(mock->pages[page_num]); -} - -static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) -{ - return -ENODEV; -} - -static const struct dma_buf_ops mock_dmabuf_ops = { - .map_dma_buf = mock_map_dma_buf, - .unmap_dma_buf = mock_unmap_dma_buf, - .release = mock_dmabuf_release, - .map = mock_dmabuf_kmap, - .unmap = mock_dmabuf_kunmap, - .mmap = mock_dmabuf_mmap, - .vmap = mock_dmabuf_vmap, - .vunmap = mock_dmabuf_vunmap, -}; - -static struct dma_buf *mock_dmabuf(int npages) -{ - struct mock_dmabuf *mock; - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - struct dma_buf *dmabuf; - int i; - - mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), - GFP_KERNEL); - if (!mock) - return ERR_PTR(-ENOMEM); - - mock->npages = npages; - for (i = 0; i < npages; i++) { - mock->pages[i] = alloc_page(GFP_KERNEL); - if (!mock->pages[i]) - goto err; - } - - exp_info.ops = &mock_dmabuf_ops; - exp_info.size = npages * PAGE_SIZE; - exp_info.flags = O_CLOEXEC; - exp_info.priv = mock; - - dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(dmabuf)) - goto err; - - return dmabuf; - -err: - while (i--) - put_page(mock->pages[i]); - kfree(mock); - return ERR_PTR(-ENOMEM); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h deleted file mode 100644 index ec80613159b9..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h +++ /dev/null @@ -1,41 +0,0 @@ - -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __MOCK_DMABUF_H__ -#define __MOCK_DMABUF_H__ - -#include - -struct mock_dmabuf { - int npages; - struct page *pages[]; -}; - -static struct mock_dmabuf *to_mock(struct dma_buf *buf) -{ - return buf->priv; -} - -#endif /* !__MOCK_DMABUF_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 9fd02025d382..e25b74a27f83 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -27,13 +27,14 @@ #include "gt/mock_engine.h" -#include "mock_context.h" #include "mock_request.h" #include "mock_gem_device.h" -#include "mock_gem_object.h" #include "mock_gtt.h" #include "mock_uncore.h" +#include "gem/selftests/mock_context.h" +#include "gem/selftests/mock_gem_object.h" + void mock_device_flush(struct drm_i915_private *i915) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/selftests/mock_gem_object.h deleted file mode 100644 index 20acdbee7bd0..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_gem_object.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __MOCK_GEM_OBJECT_H__ -#define __MOCK_GEM_OBJECT_H__ - -struct mock_object { - struct drm_i915_gem_object base; -}; - -#endif /* !__MOCK_GEM_OBJECT_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index b99f7576153c..9390fc09984b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -22,9 +22,9 @@ * */ +#include "gem/selftests/igt_gem_utils.h" #include "gt/mock_engine.h" -#include "igt_gem_utils.h" #include "mock_request.h" struct i915_request * -- cgit v1.2.3 From dbc651836372a7b3aaebe2f924db872c5d0804df Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 7 Jun 2019 09:45:20 +0100 Subject: drm/i915: Convert some more bits to use engine mmio accessors Remove a couple dev_priv locals as a consequence. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190607084521.16845-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 27 +++++++++++++-------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++-- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 2 +- drivers/gpu/drm/i915/intel_guc_submission.c | 4 ++-- 5 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index fed704802c57..f27b6c002627 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2021,31 +2021,30 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) static void enable_execlists(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ - if (INTEL_GEN(dev_priv) >= 11) - I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + if (INTEL_GEN(engine->i915) >= 11) + ENGINE_WRITE(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); else - I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + ENGINE_WRITE(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); - I915_WRITE(RING_MI_MODE(engine->mmio_base), - _MASKED_BIT_DISABLE(STOP_RING)); + ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); - I915_WRITE(RING_HWS_PGA(engine->mmio_base), - i915_ggtt_offset(engine->status_page.vma)); - POSTING_READ(RING_HWS_PGA(engine->mmio_base)); + ENGINE_WRITE(engine, + RING_HWS_PGA, + i915_ggtt_offset(engine->status_page.vma)); + ENGINE_POSTING_READ(engine, RING_HWS_PGA); } static bool unexpected_starting_state(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; bool unexpected = false; - if (I915_READ(RING_MI_MODE(engine->mmio_base)) & STOP_RING) { + if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) { DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n"); unexpected = true; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 550cf4b63586..87be9c1b6021 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1713,8 +1713,9 @@ static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { /* GFX_MODE is per-ring on gen7+ */ - I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + ENGINE_WRITE(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); } } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2f85de034d8f..193a93857d99 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1219,7 +1219,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error, if (HAS_PPGTT(dev_priv)) { int i; - ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); + ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7); if (IS_GEN(dev_priv, 6)) { ee->vm_info.pp_dir_base = diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b7c13d5deb15..8778f56a034c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2698,7 +2698,7 @@ enum i915_power_well_id { #define GFX_MODE _MMIO(0x2520) #define GFX_MODE_GEN7 _MMIO(0x229c) -#define RING_MODE_GEN7(engine) _MMIO((engine)->mmio_base + 0x29c) +#define RING_MODE_GEN7(base) _MMIO((base) + 0x29c) #define GFX_RUN_LIST_ENABLE (1 << 15) #define GFX_INTERRUPT_STEERING (1 << 14) #define GFX_TLB_INVALIDATE_EXPLICIT (1 << 13) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index a4f98ccef0fe..89592ef778b8 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -1306,7 +1306,7 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv) */ irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MODE_GEN7(engine), irqs); + ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); /* route USER_INTERRUPT to Host, all others are sent to GuC. */ irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | @@ -1353,7 +1353,7 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MODE_GEN7(engine), irqs); + ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); /* route all GT interrupts to the host */ I915_WRITE(GUC_BCS_RCS_IER, 0); -- cgit v1.2.3 From e33a4be83a64cd4f127f002abbe7f62b833fa3ac Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 11 Jun 2019 11:45:44 +0100 Subject: drm/i915: Remove I915_POSTING_READ_FW Only a few call sites remain which have been converted to uncore mmio accessors and so the macro can be removed. Signed-off-by: Tvrtko Ursulin Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190611104548.30545-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 9 +++++---- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_guc_submission.c | 4 ++-- drivers/gpu/drm/i915/intel_pm.c | 31 +++++++++++++++-------------- 5 files changed, 24 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f90bab84ac52..2cbd60c4a5dc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2877,7 +2877,6 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, */ #define I915_READ_FW(reg__) __I915_REG_OP(read_fw, dev_priv, (reg__)) #define I915_WRITE_FW(reg__, val__) __I915_REG_OP(write_fw, dev_priv, (reg__), (val__)) -#define POSTING_READ_FW(reg__) __I915_REG_OP(posting_read_fw, dev_priv, (reg__)) /* "Broadcast RGB" property */ #define INTEL_BROADCAST_RGB_AUTO 0 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 17e8809c5312..4017ecf561f6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -263,11 +263,12 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) i915_gem_chipset_flush(dev_priv); with_intel_runtime_pm(dev_priv, wakeref) { - spin_lock_irq(&dev_priv->uncore.lock); + struct intel_uncore *uncore = &dev_priv->uncore; - POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); - - spin_unlock_irq(&dev_priv->uncore.lock); + spin_lock_irq(&uncore->lock); + intel_uncore_posting_read_fw(uncore, + RING_HEAD(RENDER_RING_BASE)); + spin_unlock_irq(&uncore->lock); } } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 11c451358fb8..9db9fbd0e70c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -387,7 +387,7 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask) { ilk_update_gt_irq(dev_priv, mask, mask); - POSTING_READ_FW(GTIMR); + intel_uncore_posting_read_fw(&dev_priv->uncore, GTIMR); } void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 89592ef778b8..97f6970d8da8 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -557,10 +557,10 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) */ static void flush_ggtt_writes(struct i915_vma *vma) { - struct drm_i915_private *dev_priv = vma->vm->i915; + struct drm_i915_private *i915 = vma->vm->i915; if (i915_vma_is_map_and_fenceable(vma)) - POSTING_READ_FW(GUC_STATUS); + intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS); } static void inject_preempt_context(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 93e411e6ad19..84588ff8732f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1949,6 +1949,7 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_uncore *uncore = &dev_priv->uncore; const struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; int sprite0_start, sprite1_start, fifo_size; @@ -1974,13 +1975,13 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, * intel_pipe_update_start() has already disabled interrupts * for us, so a plain spin_lock() is sufficient here. */ - spin_lock(&dev_priv->uncore.lock); + spin_lock(&uncore->lock); switch (crtc->pipe) { u32 dsparb, dsparb2, dsparb3; case PIPE_A: - dsparb = I915_READ_FW(DSPARB); - dsparb2 = I915_READ_FW(DSPARB2); + dsparb = intel_uncore_read_fw(uncore, DSPARB); + dsparb2 = intel_uncore_read_fw(uncore, DSPARB2); dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) | VLV_FIFO(SPRITEB, 0xff)); @@ -1992,12 +1993,12 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) | VLV_FIFO(SPRITEB_HI, sprite1_start >> 8)); - I915_WRITE_FW(DSPARB, dsparb); - I915_WRITE_FW(DSPARB2, dsparb2); + intel_uncore_write_fw(uncore, DSPARB, dsparb); + intel_uncore_write_fw(uncore, DSPARB2, dsparb2); break; case PIPE_B: - dsparb = I915_READ_FW(DSPARB); - dsparb2 = I915_READ_FW(DSPARB2); + dsparb = intel_uncore_read_fw(uncore, DSPARB); + dsparb2 = intel_uncore_read_fw(uncore, DSPARB2); dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) | VLV_FIFO(SPRITED, 0xff)); @@ -2009,12 +2010,12 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) | VLV_FIFO(SPRITED_HI, sprite1_start >> 8)); - I915_WRITE_FW(DSPARB, dsparb); - I915_WRITE_FW(DSPARB2, dsparb2); + intel_uncore_write_fw(uncore, DSPARB, dsparb); + intel_uncore_write_fw(uncore, DSPARB2, dsparb2); break; case PIPE_C: - dsparb3 = I915_READ_FW(DSPARB3); - dsparb2 = I915_READ_FW(DSPARB2); + dsparb3 = intel_uncore_read_fw(uncore, DSPARB3); + dsparb2 = intel_uncore_read_fw(uncore, DSPARB2); dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) | VLV_FIFO(SPRITEF, 0xff)); @@ -2026,16 +2027,16 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) | VLV_FIFO(SPRITEF_HI, sprite1_start >> 8)); - I915_WRITE_FW(DSPARB3, dsparb3); - I915_WRITE_FW(DSPARB2, dsparb2); + intel_uncore_write_fw(uncore, DSPARB3, dsparb3); + intel_uncore_write_fw(uncore, DSPARB2, dsparb2); break; default: break; } - POSTING_READ_FW(DSPARB); + intel_uncore_posting_read_fw(uncore, DSPARB); - spin_unlock(&dev_priv->uncore.lock); + spin_unlock(&uncore->lock); } #undef VLV_FIFO -- cgit v1.2.3 From 422d7df4f090bbbc4d49e66d533a259ba63ec70d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 14 Jun 2019 17:46:06 +0100 Subject: drm/i915: Replace engine->timeline with a plain list To continue the onslaught of removing the assumption of a global execution ordering, another casualty is the engine->timeline. Without an actual timeline to track, it is overkill and we can replace it with a much less grand plain list. We still need a list of requests inflight, for the simple purpose of finding inflight requests (for retiring, resetting, preemption etc). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190614164606.15633-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine.h | 6 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 62 ++++++++--------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 6 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 95 ++++++++++++++------------ drivers/gpu/drm/i915/gt/intel_reset.c | 10 +-- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 15 ++-- drivers/gpu/drm/i915/gt/mock_engine.c | 18 ++--- drivers/gpu/drm/i915/i915_gpu_error.c | 5 +- drivers/gpu/drm/i915/i915_request.c | 43 ++++-------- drivers/gpu/drm/i915/i915_request.h | 2 +- drivers/gpu/drm/i915/i915_scheduler.c | 38 +++++------ drivers/gpu/drm/i915/i915_timeline.c | 1 - drivers/gpu/drm/i915/i915_timeline.h | 19 ------ drivers/gpu/drm/i915/i915_timeline_types.h | 4 -- drivers/gpu/drm/i915/intel_guc_submission.c | 16 ++--- drivers/gpu/drm/i915/selftests/mock_timeline.c | 1 - 16 files changed, 153 insertions(+), 188 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 3b5a6d152997..2f1c6871ee95 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -565,4 +565,10 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) #endif +void intel_engine_init_active(struct intel_engine_cs *engine, + unsigned int subclass); +#define ENGINE_PHYSICAL 0 +#define ENGINE_MOCK 1 +#define ENGINE_VIRTUAL 2 + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 22242e927baa..898692989313 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -617,14 +617,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) if (err) return err; - err = i915_timeline_init(engine->i915, - &engine->timeline, - engine->status_page.vma); - if (err) - goto err_hwsp; - - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - + intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init_hangcheck(engine); @@ -637,10 +630,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); return 0; - -err_hwsp: - cleanup_status_page(engine); - return err; } /** @@ -797,6 +786,27 @@ static int pin_context(struct i915_gem_context *ctx, return 0; } +void +intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) +{ + INIT_LIST_HEAD(&engine->active.requests); + + spin_lock_init(&engine->active.lock); + lockdep_set_subclass(&engine->active.lock, subclass); + + /* + * Due to an interesting quirk in lockdep's internal debug tracking, + * after setting a subclass we must ensure the lock is used. Otherwise, + * nr_unused_locks is incremented once too often. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + local_irq_disable(); + lock_map_acquire(&engine->active.lock.dep_map); + lock_map_release(&engine->active.lock.dep_map); + local_irq_enable(); +#endif +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -860,6 +870,8 @@ err_unpin: */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { + GEM_BUG_ON(!list_empty(&engine->active.requests)); + cleanup_status_page(engine); intel_engine_fini_breadcrumbs(engine); @@ -874,8 +886,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_context_unpin(engine->kernel_context); GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); - i915_timeline_fini(&engine->timeline); - intel_wa_list_free(&engine->ctx_wa_list); intel_wa_list_free(&engine->wa_list); intel_wa_list_free(&engine->whitelist); @@ -1482,16 +1492,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) - print_request(m, rq, "\t\tfirst "); - - rq = list_last_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) - print_request(m, rq, "\t\tlast "); - rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1572,7 +1572,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); write_seqlock(&engine->stats.lock); if (unlikely(engine->stats.enabled == ~0)) { @@ -1598,7 +1598,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) unlock: write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); return err; } @@ -1683,22 +1683,22 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->timeline.lock, flags); - list_for_each_entry(request, &engine->timeline.requests, link) { + spin_lock_irqsave(&engine->active.lock, flags); + list_for_each_entry(request, &engine->active.requests, sched.link) { if (i915_request_completed(request)) continue; if (!i915_request_started(request)) - break; + continue; /* More than one preemptible request may match! */ if (!match_ring(request)) - break; + continue; active = request; break; } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 33a31aa2d2ae..b2faca8e5dec 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -288,7 +288,11 @@ struct intel_engine_cs { struct intel_ring *buffer; - struct i915_timeline timeline; + struct { + spinlock_t lock; + struct list_head requests; + } active; + struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d0a51752386f..c400c66d0ee5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -298,8 +298,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * Check against the first request in ELSP[1], it will, thanks to the * power of PI, be the highest priority of that context. */ - if (!list_is_last(&rq->link, &engine->timeline.requests) && - rq_prio(list_next_entry(rq, link)) > last_prio) + if (!list_is_last(&rq->sched.link, &engine->active.requests) && + rq_prio(list_next_entry(rq, sched.link)) > last_prio) return true; if (rb) { @@ -434,11 +434,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct list_head *uninitialized_var(pl); int prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline.requests, - link) { + &engine->active.requests, + sched.link) { struct intel_engine_cs *owner; if (i915_request_completed(rq)) @@ -465,7 +465,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) } GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - list_add(&rq->sched.link, pl); + list_move(&rq->sched.link, pl); active = rq; } else { rq->engine = owner; @@ -933,11 +933,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) rb_entry(rb, typeof(*ve), nodes[engine->id].rb); struct i915_request *rq; - spin_lock(&ve->base.timeline.lock); + spin_lock(&ve->base.active.lock); rq = ve->request; if (unlikely(!rq)) { /* lost the race to a sibling */ - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); rb = rb_first_cached(&execlists->virtual); @@ -950,13 +950,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (rq_prio(rq) >= queue_prio(execlists)) { if (!virtual_matches(ve, rq, engine)) { - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); rb = rb_next(rb); continue; } if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); return; /* leave this rq for another engine */ } @@ -1011,7 +1011,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = rq; } - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); break; } @@ -1068,8 +1068,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - list_del_init(&rq->sched.link); - __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); @@ -1170,7 +1168,7 @@ static void process_csb(struct intel_engine_cs *engine) const u8 num_entries = execlists->csb_size; u8 head, tail; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -1330,7 +1328,7 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); process_csb(engine); if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) @@ -1351,15 +1349,16 @@ static void execlists_submission_tasklet(unsigned long data) !!intel_wakeref_active(&engine->wakeref), engine->execlists.active); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void queue_request(struct intel_engine_cs *engine, struct i915_sched_node *node, int prio) { + GEM_BUG_ON(!list_empty(&node->link)); list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); } @@ -1390,7 +1389,7 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); queue_request(engine, &request->sched, rq_prio(request)); @@ -1399,7 +1398,7 @@ static void execlists_submit_request(struct i915_request *request) submit_queue(engine, rq_prio(request)); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void __execlists_context_fini(struct intel_context *ce) @@ -2050,8 +2049,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) intel_engine_stop_cs(engine); /* And flush any current direct submission. */ - spin_lock_irqsave(&engine->timeline.lock, flags); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static bool lrc_regs_ok(const struct i915_request *rq) @@ -2094,11 +2093,11 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists) static struct i915_request *active_request(struct i915_request *rq) { - const struct list_head * const list = &rq->engine->timeline.requests; + const struct list_head * const list = &rq->engine->active.requests; const struct intel_context * const context = rq->hw_context; struct i915_request *active = NULL; - list_for_each_entry_from_reverse(rq, list, link) { + list_for_each_entry_from_reverse(rq, list, sched.link) { if (i915_request_completed(rq)) break; @@ -2215,11 +2214,11 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) GEM_TRACE("%s\n", engine->name); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __execlists_reset(engine, stalled); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void nop_submission_tasklet(unsigned long data) @@ -2250,12 +2249,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __execlists_reset(engine, true); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline.requests, link) { + list_for_each_entry(rq, &engine->active.requests, sched.link) { if (!i915_request_signaled(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -2286,7 +2285,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); - spin_lock(&ve->base.timeline.lock); + spin_lock(&ve->base.active.lock); if (ve->request) { ve->request->engine = engine; __i915_request_submit(ve->request); @@ -2295,7 +2294,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) ve->base.execlists.queue_priority_hint = INT_MIN; ve->request = NULL; } - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); } /* Remaining _unready_ requests will be nop'ed when submitted */ @@ -2307,7 +2306,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.func = nop_submission_tasklet; - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void execlists_reset_finish(struct intel_engine_cs *engine) @@ -3010,12 +3009,18 @@ error_deref_obj: return ret; } +static struct list_head *virtual_queue(struct virtual_engine *ve) +{ + return &ve->base.execlists.default_priolist.requests[0]; +} + static void virtual_context_destroy(struct kref *kref) { struct virtual_engine *ve = container_of(kref, typeof(*ve), context.ref); unsigned int n; + GEM_BUG_ON(!list_empty(virtual_queue(ve))); GEM_BUG_ON(ve->request); GEM_BUG_ON(ve->context.inflight); @@ -3026,13 +3031,13 @@ static void virtual_context_destroy(struct kref *kref) if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->timeline.lock); + spin_lock_irq(&sibling->active.lock); /* Detachment is lazily performed in the execlists tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->timeline.lock); + spin_unlock_irq(&sibling->active.lock); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); @@ -3040,8 +3045,6 @@ static void virtual_context_destroy(struct kref *kref) __execlists_context_fini(&ve->context); kfree(ve->bonds); - - i915_timeline_fini(&ve->base.timeline); kfree(ve); } @@ -3161,16 +3164,16 @@ static void virtual_submission_tasklet(unsigned long data) if (unlikely(!(mask & sibling->mask))) { if (!RB_EMPTY_NODE(&node->rb)) { - spin_lock(&sibling->timeline.lock); + spin_lock(&sibling->active.lock); rb_erase_cached(&node->rb, &sibling->execlists.virtual); RB_CLEAR_NODE(&node->rb); - spin_unlock(&sibling->timeline.lock); + spin_unlock(&sibling->active.lock); } continue; } - spin_lock(&sibling->timeline.lock); + spin_lock(&sibling->active.lock); if (!RB_EMPTY_NODE(&node->rb)) { /* @@ -3214,7 +3217,7 @@ submit_engine: tasklet_hi_schedule(&sibling->execlists.tasklet); } - spin_unlock(&sibling->timeline.lock); + spin_unlock(&sibling->active.lock); } local_irq_enable(); } @@ -3231,9 +3234,13 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); GEM_BUG_ON(ve->request); + GEM_BUG_ON(!list_empty(virtual_queue(ve))); + ve->base.execlists.queue_priority_hint = rq_prio(rq); WRITE_ONCE(ve->request, rq); + list_move_tail(&rq->sched.link, virtual_queue(ve)); + tasklet_schedule(&ve->base.execlists.tasklet); } @@ -3297,10 +3304,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); - err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL); - if (err) - goto err_put; - i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL); + intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_execlists(&ve->base); @@ -3311,6 +3315,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.submit_request = virtual_submit_request; ve->base.bond_execute = virtual_bond_execute; + INIT_LIST_HEAD(virtual_queue(ve)); ve->base.execlists.queue_priority_hint = INT_MIN; tasklet_init(&ve->base.execlists.tasklet, virtual_submission_tasklet, @@ -3465,11 +3470,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int count; struct rb_node *rb; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); last = NULL; count = 0; - list_for_each_entry(rq, &engine->timeline.requests, link) { + list_for_each_entry(rq, &engine->active.requests, sched.link) { if (count++ < max - 1) show_request(m, rq, "\t\tE "); else @@ -3532,7 +3537,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tV "); } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } void intel_lr_context_reset(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 6e6807a3f748..84c670bdb081 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -49,12 +49,12 @@ static void engine_skip_context(struct i915_request *rq) struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *hung_ctx = rq->gem_context; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); if (!i915_request_is_active(rq)) return; - list_for_each_entry_continue(rq, &engine->timeline.requests, link) + list_for_each_entry_continue(rq, &engine->active.requests, sched.link) if (rq->gem_context == hung_ctx) i915_request_skip(rq, -EIO); } @@ -130,7 +130,7 @@ void i915_reset_request(struct i915_request *rq, bool guilty) rq->fence.seqno, yesno(guilty)); - lockdep_assert_held(&rq->engine->timeline.lock); + lockdep_assert_held(&rq->engine->active.lock); GEM_BUG_ON(i915_request_completed(rq)); if (guilty) { @@ -785,10 +785,10 @@ static void nop_submit_request(struct i915_request *request) engine->name, request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __i915_request_submit(request); i915_request_mark_complete(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); intel_engine_queue_breadcrumbs(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index cc901edec09a..019bf039f616 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -730,14 +730,13 @@ static void reset_prepare(struct intel_engine_cs *engine) static void reset_ring(struct intel_engine_cs *engine, bool stalled) { - struct i915_timeline *tl = &engine->timeline; struct i915_request *pos, *rq; unsigned long flags; u32 head; rq = NULL; - spin_lock_irqsave(&tl->lock, flags); - list_for_each_entry(pos, &tl->requests, link) { + spin_lock_irqsave(&engine->active.lock, flags); + list_for_each_entry(pos, &engine->active.requests, sched.link) { if (!i915_request_completed(pos)) { rq = pos; break; @@ -791,7 +790,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) } engine->buffer->head = intel_ring_wrap(engine->buffer, head); - spin_unlock_irqrestore(&tl->lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void reset_finish(struct intel_engine_cs *engine) @@ -877,10 +876,10 @@ static void cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline.requests, link) { + list_for_each_entry(request, &engine->active.requests, sched.link) { if (!i915_request_signaled(request)) dma_fence_set_error(&request->fence, -EIO); @@ -889,7 +888,7 @@ static void cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -1267,8 +1266,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - GEM_BUG_ON(timeline == &engine->timeline); - lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index d1ef515bac8d..086801b51441 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -229,17 +229,17 @@ static void mock_cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline.requests, sched.link) { + list_for_each_entry(request, &engine->active.requests, sched.link) { if (!i915_request_signaled(request)) dma_fence_set_error(&request->fence, -EIO); i915_request_mark_complete(request); } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -285,28 +285,23 @@ int mock_engine_init(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; int err; + intel_engine_init_active(engine, ENGINE_MOCK); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); - if (i915_timeline_init(i915, &engine->timeline, NULL)) - goto err_breadcrumbs; - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - engine->kernel_context = i915_gem_context_get_engine(i915->kernel_context, engine->id); if (IS_ERR(engine->kernel_context)) - goto err_timeline; + goto err_breadcrumbs; err = intel_context_pin(engine->kernel_context); intel_context_put(engine->kernel_context); if (err) - goto err_timeline; + goto err_breadcrumbs; return 0; -err_timeline: - i915_timeline_fini(&engine->timeline); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); return -ENOMEM; @@ -340,7 +335,6 @@ void mock_engine_free(struct intel_engine_cs *engine) intel_context_unpin(engine->kernel_context); intel_engine_fini_breadcrumbs(engine); - i915_timeline_fini(&engine->timeline); kfree(engine); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 26c9c0595bdf..f411e3244208 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1275,7 +1275,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline.requests, link) + list_for_each_entry_from(request, &engine->active.requests, sched.link) count++; if (!count) return; @@ -1288,7 +1288,8 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline.requests, link) { + list_for_each_entry_from(request, + &engine->active.requests, sched.link) { if (count >= ee->num_requests) { /* * If the ring request list was changed in diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c99136f78af9..9819483d1b5d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -232,9 +232,9 @@ static bool i915_request_retire(struct i915_request *rq) local_irq_disable(); - spin_lock(&rq->engine->timeline.lock); - list_del(&rq->link); - spin_unlock(&rq->engine->timeline.lock); + spin_lock(&rq->engine->active.lock); + list_del(&rq->sched.link); + spin_unlock(&rq->engine->active.lock); spin_lock(&rq->lock); i915_request_mark_complete(rq); @@ -254,6 +254,7 @@ static bool i915_request_retire(struct i915_request *rq) intel_context_unpin(rq->hw_context); i915_request_remove_from_client(rq); + list_del(&rq->link); free_capture_list(rq); i915_sched_node_fini(&rq->sched); @@ -373,28 +374,17 @@ __i915_request_await_execution(struct i915_request *rq, return 0; } -static void move_to_timeline(struct i915_request *request, - struct i915_timeline *timeline) -{ - GEM_BUG_ON(request->timeline == &request->engine->timeline); - lockdep_assert_held(&request->engine->timeline.lock); - - spin_lock(&request->timeline->lock); - list_move_tail(&request->link, &timeline->requests); - spin_unlock(&request->timeline->lock); -} - void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld -> current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, request->fence.context, request->fence.seqno, hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); @@ -422,6 +412,8 @@ void __i915_request_submit(struct i915_request *request) /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + list_move_tail(&request->sched.link, &engine->active.requests); + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); @@ -437,9 +429,6 @@ void __i915_request_submit(struct i915_request *request) engine->emit_fini_breadcrumb(request, request->ring->vaddr + request->postfix); - /* Transfer from per-context onto the global per-engine timeline */ - move_to_timeline(request, &engine->timeline); - engine->serial++; trace_i915_request_execute(request); @@ -451,11 +440,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -468,7 +457,7 @@ void __i915_request_unsubmit(struct i915_request *request) hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); /* * Only unwind in reverse order, required so that the per-context list @@ -486,9 +475,6 @@ void __i915_request_unsubmit(struct i915_request *request) spin_unlock(&request->lock); - /* Transfer back from the global per-engine timeline to per-context */ - move_to_timeline(request, request->timeline); - /* We've already spun, don't charge on resubmitting. */ if (request->sched.semaphores && i915_request_started(request)) { request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE; @@ -510,11 +496,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static int __i915_sw_fence_call @@ -669,7 +655,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->engine = ce->engine; rq->ring = ce->ring; rq->timeline = tl; - GEM_BUG_ON(rq->timeline == &ce->engine->timeline); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -1136,9 +1121,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) 0); } - spin_lock_irq(&timeline->lock); list_add_tail(&rq->link, &timeline->requests); - spin_unlock_irq(&timeline->lock); /* * Make sure that no request gazumped us - if it was allocated after diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c9f7d07991c8..edbbdfec24ab 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -217,7 +217,7 @@ struct i915_request { bool waitboost; - /** engine->request_list entry for this request */ + /** timeline->request entry for this request */ struct list_head link; /** ring->request_list entry for this request */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 78ceb56d7801..2e9b38bdc33c 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -77,7 +77,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) bool first = true; int idx, i; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); assert_priolists(execlists); /* buckets sorted from highest [in slot 0] to lowest priority */ @@ -162,9 +162,9 @@ sched_lock_engine(const struct i915_sched_node *node, * check that the rq still belongs to the newly locked engine. */ while (locked != (engine = READ_ONCE(rq->engine))) { - spin_unlock(&locked->timeline.lock); + spin_unlock(&locked->active.lock); memset(cache, 0, sizeof(*cache)); - spin_lock(&engine->timeline.lock); + spin_lock(&engine->active.lock); locked = engine; } @@ -189,7 +189,7 @@ static void kick_submission(struct intel_engine_cs *engine, int prio) * tasklet, i.e. we have not change the priority queue * sufficiently to oust the running context. */ - if (inflight && !i915_scheduler_need_preempt(prio, rq_prio(inflight))) + if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight))) return; tasklet_hi_schedule(&engine->execlists.tasklet); @@ -278,7 +278,7 @@ static void __i915_schedule(struct i915_sched_node *node, memset(&cache, 0, sizeof(cache)); engine = node_to_request(node)->engine; - spin_lock(&engine->timeline.lock); + spin_lock(&engine->active.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ engine = sched_lock_engine(node, engine, &cache); @@ -287,7 +287,7 @@ static void __i915_schedule(struct i915_sched_node *node, node = dep->signaler; engine = sched_lock_engine(node, engine, &cache); - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); /* Recheck after acquiring the engine->timeline.lock */ if (prio <= node->attr.priority || node_signaled(node)) @@ -296,14 +296,8 @@ static void __i915_schedule(struct i915_sched_node *node, GEM_BUG_ON(node_to_request(node)->engine != engine); node->attr.priority = prio; - if (!list_empty(&node->link)) { - GEM_BUG_ON(intel_engine_is_virtual(engine)); - if (!cache.priolist) - cache.priolist = - i915_sched_lookup_priolist(engine, - prio); - list_move_tail(&node->link, cache.priolist); - } else { + + if (list_empty(&node->link)) { /* * If the request is not in the priolist queue because * it is not yet runnable, then it doesn't contribute @@ -312,8 +306,16 @@ static void __i915_schedule(struct i915_sched_node *node, * queue; but in that case we may still need to reorder * the inflight requests. */ - if (!i915_sw_fence_done(&node_to_request(node)->submit)) - continue; + continue; + } + + if (!intel_engine_is_virtual(engine) && + !i915_request_is_active(node_to_request(node))) { + if (!cache.priolist) + cache.priolist = + i915_sched_lookup_priolist(engine, + prio); + list_move_tail(&node->link, cache.priolist); } if (prio <= engine->execlists.queue_priority_hint) @@ -325,7 +327,7 @@ static void __i915_schedule(struct i915_sched_node *node, kick_submission(engine, prio); } - spin_unlock(&engine->timeline.lock); + spin_unlock(&engine->active.lock); } void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) @@ -439,8 +441,6 @@ void i915_sched_node_fini(struct i915_sched_node *node) { struct i915_dependency *dep, *tmp; - GEM_BUG_ON(!list_empty(&node->link)); - spin_lock_irq(&schedule_lock); /* diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 000e1a9b6750..c311ce9c6f9d 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -251,7 +251,6 @@ int i915_timeline_init(struct drm_i915_private *i915, timeline->fence_context = dma_fence_context_alloc(1); - spin_lock_init(&timeline->lock); mutex_init(&timeline->mutex); INIT_ACTIVE_REQUEST(&timeline->last_request); diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 27668a1a69a3..36e5e5a65155 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -36,25 +36,6 @@ int i915_timeline_init(struct drm_i915_private *i915, struct i915_vma *hwsp); void i915_timeline_fini(struct i915_timeline *tl); -static inline void -i915_timeline_set_subclass(struct i915_timeline *timeline, - unsigned int subclass) -{ - lockdep_set_subclass(&timeline->lock, subclass); - - /* - * Due to an interesting quirk in lockdep's internal debug tracking, - * after setting a subclass we must ensure the lock is used. Otherwise, - * nr_unused_locks is incremented once too often. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - local_irq_disable(); - lock_map_acquire(&timeline->lock.dep_map); - lock_map_release(&timeline->lock.dep_map); - local_irq_enable(); -#endif -} - struct i915_timeline * i915_timeline_create(struct drm_i915_private *i915, struct i915_vma *global_hwsp); diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h index 1688705f4a2b..fce5cb4f1090 100644 --- a/drivers/gpu/drm/i915/i915_timeline_types.h +++ b/drivers/gpu/drm/i915/i915_timeline_types.h @@ -23,10 +23,6 @@ struct i915_timeline { u64 fence_context; u32 seqno; - spinlock_t lock; -#define TIMELINE_CLIENT 0 /* default subclass */ -#define TIMELINE_ENGINE 1 -#define TIMELINE_VIRTUAL 2 struct mutex mutex; /* protects the flow of requests */ unsigned int pin_count; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 97f6970d8da8..db531ebc7704 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -740,7 +740,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); if (port_isset(port)) { if (intel_engine_has_preemption(engine)) { @@ -822,7 +822,7 @@ static void guc_submission_tasklet(unsigned long data) struct i915_request *rq; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); rq = port_request(port); while (rq && i915_request_completed(rq)) { @@ -847,7 +847,7 @@ static void guc_submission_tasklet(unsigned long data) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) guc_dequeue(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void guc_reset_prepare(struct intel_engine_cs *engine) @@ -884,7 +884,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled) struct i915_request *rq; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); execlists_cancel_port_requests(execlists); @@ -900,7 +900,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled) intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled); out_unlock: - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void guc_cancel_requests(struct intel_engine_cs *engine) @@ -926,13 +926,13 @@ static void guc_cancel_requests(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline.requests, link) { + list_for_each_entry(rq, &engine->active.requests, sched.link) { if (!i915_request_signaled(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -961,7 +961,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine) execlists->queue = RB_ROOT_CACHED; GEM_BUG_ON(port_isset(execlists->port)); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void guc_reset_finish(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index e084476469ef..65b52be23d42 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -13,7 +13,6 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) timeline->i915 = NULL; timeline->fence_context = context; - spin_lock_init(&timeline->lock); mutex_init(&timeline->mutex); INIT_ACTIVE_REQUEST(&timeline->last_request); -- cgit v1.2.3