diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
42 files changed, 3829 insertions, 4119 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index daf9284ef1f5..f0435c6feb68 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -24,13 +24,11 @@ static void __do_clflush(struct drm_i915_gem_object *obj) i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); } -static int clflush_work(struct dma_fence_work *base) +static void clflush_work(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); __do_clflush(clflush->obj); - - return 0; } static void clflush_release(struct dma_fence_work *base) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c deleted file mode 100644 index 44821d94544f..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ /dev/null @@ -1,355 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "i915_gem_client_blt.h" -#include "i915_gem_object_blt.h" - -struct i915_sleeve { - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - struct sg_table *pages; - struct i915_page_sizes page_sizes; -}; - -static int vma_set_pages(struct i915_vma *vma) -{ - struct i915_sleeve *sleeve = vma->private; - - vma->pages = sleeve->pages; - vma->page_sizes = sleeve->page_sizes; - - return 0; -} - -static void vma_clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - vma->pages = NULL; -} - -static void vma_bind(struct i915_address_space *vm, - struct i915_vm_pt_stash *stash, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags); -} - -static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) -{ - vm->vma_ops.unbind_vma(vm, vma); -} - -static const struct i915_vma_ops proxy_vma_ops = { - .set_pages = vma_set_pages, - .clear_pages = vma_clear_pages, - .bind_vma = vma_bind, - .unbind_vma = vma_unbind, -}; - -static struct i915_sleeve *create_sleeve(struct i915_address_space *vm, - struct drm_i915_gem_object *obj, - struct sg_table *pages, - struct i915_page_sizes *page_sizes) -{ - struct i915_sleeve *sleeve; - struct i915_vma *vma; - int err; - - sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL); - if (!sleeve) - return ERR_PTR(-ENOMEM); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_free; - } - - vma->private = sleeve; - vma->ops = &proxy_vma_ops; - - sleeve->vma = vma; - sleeve->pages = pages; - sleeve->page_sizes = *page_sizes; - - return sleeve; - -err_free: - kfree(sleeve); - return ERR_PTR(err); -} - -static void destroy_sleeve(struct i915_sleeve *sleeve) -{ - kfree(sleeve); -} - -struct clear_pages_work { - struct dma_fence dma; - struct dma_fence_cb cb; - struct i915_sw_fence wait; - struct work_struct work; - struct irq_work irq_work; - struct i915_sleeve *sleeve; - struct intel_context *ce; - u32 value; -}; - -static const char *clear_pages_work_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *clear_pages_work_timeline_name(struct dma_fence *fence) -{ - return "clear"; -} - -static void clear_pages_work_release(struct dma_fence *fence) -{ - struct clear_pages_work *w = container_of(fence, typeof(*w), dma); - - destroy_sleeve(w->sleeve); - - i915_sw_fence_fini(&w->wait); - - BUILD_BUG_ON(offsetof(typeof(*w), dma)); - dma_fence_free(&w->dma); -} - -static const struct dma_fence_ops clear_pages_work_ops = { - .get_driver_name = clear_pages_work_driver_name, - .get_timeline_name = clear_pages_work_timeline_name, - .release = clear_pages_work_release, -}; - -static void clear_pages_signal_irq_worker(struct irq_work *work) -{ - struct clear_pages_work *w = container_of(work, typeof(*w), irq_work); - - dma_fence_signal(&w->dma); - dma_fence_put(&w->dma); -} - -static void clear_pages_dma_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct clear_pages_work *w = container_of(cb, typeof(*w), cb); - - if (fence->error) - dma_fence_set_error(&w->dma, fence->error); - - /* - * Push the signalling of the fence into yet another worker to avoid - * the nightmare locking around the fence spinlock. - */ - irq_work_queue(&w->irq_work); -} - -static void clear_pages_worker(struct work_struct *work) -{ - struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_gem_object *obj = w->sleeve->vma->obj; - struct i915_vma *vma = w->sleeve->vma; - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - struct i915_vma *batch; - int err = w->dma.error; - - if (unlikely(err)) - goto out_signal; - - if (obj->cache_dirty) { - if (i915_gem_object_has_struct_page(obj)) - drm_clflush_sg(w->sleeve->pages); - obj->cache_dirty = false; - } - obj->read_domains = I915_GEM_GPU_DOMAINS; - obj->write_domain = 0; - - i915_gem_ww_ctx_init(&ww, false); - intel_engine_pm_get(w->ce->engine); -retry: - err = intel_context_pin_ww(w->ce, &ww); - if (err) - goto out_signal; - - batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_ctx; - } - - rq = i915_request_create(w->ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - /* There's no way the fence has signalled */ - if (dma_fence_add_callback(&rq->fence, &w->cb, - clear_pages_dma_fence_cb)) - GEM_BUG_ON(1); - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - /* - * w->dma is already exported via (vma|obj)->resv we need only - * keep track of the GPU activity within this vma/request, and - * propagate the signal from the request to w->dma. - */ - err = __i915_vma_move_to_active(vma, rq); - if (err) - goto out_request; - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); -out_request: - if (unlikely(err)) { - i915_request_set_error_once(rq, err); - err = 0; - } - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(w->ce, batch); -out_ctx: - intel_context_unpin(w->ce); -out_signal: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - i915_vma_unpin(w->sleeve->vma); - intel_engine_pm_put(w->ce->engine); - - if (unlikely(err)) { - dma_fence_set_error(&w->dma, err); - dma_fence_signal(&w->dma); - dma_fence_put(&w->dma); - } -} - -static int pin_wait_clear_pages_work(struct clear_pages_work *w, - struct intel_context *ce) -{ - struct i915_vma *vma = w->sleeve->vma; - struct i915_gem_ww_ctx ww; - int err; - - i915_gem_ww_ctx_init(&ww, false); -retry: - err = i915_gem_object_lock(vma->obj, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out; - - err = i915_sw_fence_await_reservation(&w->wait, - vma->obj->base.resv, NULL, - true, 0, I915_FENCE_GFP); - if (err) - goto err_unpin_vma; - - dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma); - -err_unpin_vma: - if (err) - i915_vma_unpin(vma); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - return err; -} - -static int __i915_sw_fence_call -clear_pages_work_notify(struct i915_sw_fence *fence, - enum i915_sw_fence_notify state) -{ - struct clear_pages_work *w = container_of(fence, typeof(*w), wait); - - switch (state) { - case FENCE_COMPLETE: - schedule_work(&w->work); - break; - - case FENCE_FREE: - dma_fence_put(&w->dma); - break; - } - - return NOTIFY_DONE; -} - -static DEFINE_SPINLOCK(fence_lock); - -/* XXX: better name please */ -int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct sg_table *pages, - struct i915_page_sizes *page_sizes, - u32 value) -{ - struct clear_pages_work *work; - struct i915_sleeve *sleeve; - int err; - - sleeve = create_sleeve(ce->vm, obj, pages, page_sizes); - if (IS_ERR(sleeve)) - return PTR_ERR(sleeve); - - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) { - destroy_sleeve(sleeve); - return -ENOMEM; - } - - work->value = value; - work->sleeve = sleeve; - work->ce = ce; - - INIT_WORK(&work->work, clear_pages_worker); - - init_irq_work(&work->irq_work, clear_pages_signal_irq_worker); - - dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); - i915_sw_fence_init(&work->wait, clear_pages_work_notify); - - err = pin_wait_clear_pages_work(work, ce); - if (err < 0) - dma_fence_set_error(&work->dma, err); - - dma_fence_get(&work->dma); - i915_sw_fence_commit(&work->wait); - - return err; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_client_blt.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h deleted file mode 100644 index 3dbd28c22ff5..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ -#ifndef __I915_GEM_CLIENT_BLT_H__ -#define __I915_GEM_CLIENT_BLT_H__ - -#include <linux/types.h> - -struct drm_i915_gem_object; -struct i915_page_sizes; -struct intel_context; -struct sg_table; - -int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct sg_table *pages, - struct i915_page_sizes *page_sizes, - u32 value); - -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7720b8c22c81..cff72679ad7c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -67,35 +67,32 @@ #include <linux/log2.h> #include <linux/nospec.h> +#include <drm/drm_syncobj.h> + #include "gt/gen6_ppgtt.h" #include "gt/intel_context.h" #include "gt/intel_context_param.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_user.h" -#include "gt/intel_execlists_submission.h" /* virtual_engine */ #include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "i915_gem_context.h" -#include "i915_globals.h" #include "i915_trace.h" #include "i915_user_extensions.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -static struct i915_global_gem_context { - struct i915_global base; - struct kmem_cache *slab_luts; -} global; +static struct kmem_cache *slab_luts; struct i915_lut_handle *i915_lut_handle_alloc(void) { - return kmem_cache_alloc(global.slab_luts, GFP_KERNEL); + return kmem_cache_alloc(slab_luts, GFP_KERNEL); } void i915_lut_handle_free(struct i915_lut_handle *lut) { - return kmem_cache_free(global.slab_luts, lut); + return kmem_cache_free(slab_luts, lut); } static void lut_close(struct i915_gem_context *ctx) @@ -167,6 +164,577 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } +static int validate_priority(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param *args) +{ + s64 priority = args->value; + + if (args->size) + return -EINVAL; + + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + return -ENODEV; + + if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + return -EINVAL; + + if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + return -EPERM; + + return 0; +} + +static void proto_context_close(struct i915_gem_proto_context *pc) +{ + int i; + + if (pc->vm) + i915_vm_put(pc->vm); + if (pc->user_engines) { + for (i = 0; i < pc->num_user_engines; i++) + kfree(pc->user_engines[i].siblings); + kfree(pc->user_engines); + } + kfree(pc); +} + +static int proto_context_set_persistence(struct drm_i915_private *i915, + struct i915_gem_proto_context *pc, + bool persist) +{ + if (persist) { + /* + * Only contexts that are short-lived [that will expire or be + * reset] are allowed to survive past termination. We require + * hangcheck to ensure that the persistent requests are healthy. + */ + if (!i915->params.enable_hangcheck) + return -EINVAL; + + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + } else { + /* To cancel a context we use "preempt-to-idle" */ + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + return -ENODEV; + + /* + * If the cancel fails, we then need to reset, cleanly! + * + * If the per-engine reset fails, all hope is lost! We resort + * to a full GPU reset in that unlikely case, but realistically + * if the engine could not reset, the full reset does not fare + * much better. The damage has been done. + * + * However, if we cannot reset an engine by itself, we cannot + * cleanup a hanging persistent context without causing + * colateral damage, and we should not pretend we can by + * exposing the interface. + */ + if (!intel_has_reset_engine(&i915->gt)) + return -ENODEV; + + pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE); + } + + return 0; +} + +static struct i915_gem_proto_context * +proto_context_create(struct drm_i915_private *i915, unsigned int flags) +{ + struct i915_gem_proto_context *pc, *err; + + pc = kzalloc(sizeof(*pc), GFP_KERNEL); + if (!pc) + return ERR_PTR(-ENOMEM); + + pc->num_user_engines = -1; + pc->user_engines = NULL; + pc->user_flags = BIT(UCONTEXT_BANNABLE) | + BIT(UCONTEXT_RECOVERABLE); + if (i915->params.enable_hangcheck) + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + pc->sched.priority = I915_PRIORITY_NORMAL; + + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { + if (!HAS_EXECLISTS(i915)) { + err = ERR_PTR(-EINVAL); + goto proto_close; + } + pc->single_timeline = true; + } + + return pc; + +proto_close: + proto_context_close(pc); + return err; +} + +static int proto_context_register_locked(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + u32 *id) +{ + int ret; + void *old; + + lockdep_assert_held(&fpriv->proto_context_lock); + + ret = xa_alloc(&fpriv->context_xa, id, NULL, xa_limit_32b, GFP_KERNEL); + if (ret) + return ret; + + old = xa_store(&fpriv->proto_context_xa, *id, pc, GFP_KERNEL); + if (xa_is_err(old)) { + xa_erase(&fpriv->context_xa, *id); + return xa_err(old); + } + WARN_ON(old); + + return 0; +} + +static int proto_context_register(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + u32 *id) +{ + int ret; + + mutex_lock(&fpriv->proto_context_lock); + ret = proto_context_register_locked(fpriv, pc, id); + mutex_unlock(&fpriv->proto_context_lock); + + return ret; +} + +static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + const struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct i915_address_space *vm; + + if (args->size) + return -EINVAL; + + if (!HAS_FULL_PPGTT(i915)) + return -ENODEV; + + if (upper_32_bits(args->value)) + return -ENOENT; + + vm = i915_gem_vm_lookup(fpriv, args->value); + if (!vm) + return -ENOENT; + + if (pc->vm) + i915_vm_put(pc->vm); + pc->vm = vm; + + return 0; +} + +struct set_proto_ctx_engines { + struct drm_i915_private *i915; + unsigned num_engines; + struct i915_gem_proto_engine *engines; +}; + +static int +set_proto_ctx_engines_balance(struct i915_user_extension __user *base, + void *data) +{ + struct i915_context_engines_load_balance __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_proto_ctx_engines *set = data; + struct drm_i915_private *i915 = set->i915; + struct intel_engine_cs **siblings; + u16 num_siblings, idx; + unsigned int n; + int err; + + if (!HAS_EXECLISTS(i915)) + return -ENODEV; + + if (get_user(idx, &ext->engine_index)) + return -EFAULT; + + if (idx >= set->num_engines) { + drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", + idx, set->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->num_engines); + if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_INVALID) { + drm_dbg(&i915->drm, + "Invalid placement[%d], already occupied\n", idx); + return -EEXIST; + } + + if (get_user(num_siblings, &ext->num_siblings)) + return -EFAULT; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + err = check_user_mbz(&ext->mbz64); + if (err) + return err; + + if (num_siblings == 0) + return 0; + + siblings = kmalloc_array(num_siblings, sizeof(*siblings), GFP_KERNEL); + if (!siblings) + return -ENOMEM; + + for (n = 0; n < num_siblings; n++) { + struct i915_engine_class_instance ci; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { + err = -EFAULT; + goto err_siblings; + } + + siblings[n] = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!siblings[n]) { + drm_dbg(&i915->drm, + "Invalid sibling[%d]: { class:%d, inst:%d }\n", + n, ci.engine_class, ci.engine_instance); + err = -EINVAL; + goto err_siblings; + } + } + + if (num_siblings == 1) { + set->engines[idx].type = I915_GEM_ENGINE_TYPE_PHYSICAL; + set->engines[idx].engine = siblings[0]; + kfree(siblings); + } else { + set->engines[idx].type = I915_GEM_ENGINE_TYPE_BALANCED; + set->engines[idx].num_siblings = num_siblings; + set->engines[idx].siblings = siblings; + } + + return 0; + +err_siblings: + kfree(siblings); + + return err; +} + +static int +set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_bond __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_proto_ctx_engines *set = data; + struct drm_i915_private *i915 = set->i915; + struct i915_engine_class_instance ci; + struct intel_engine_cs *master; + u16 idx, num_bonds; + int err, n; + + if (get_user(idx, &ext->virtual_index)) + return -EFAULT; + + if (idx >= set->num_engines) { + drm_dbg(&i915->drm, + "Invalid index for virtual engine: %d >= %d\n", + idx, set->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->num_engines); + if (set->engines[idx].type == I915_GEM_ENGINE_TYPE_INVALID) { + drm_dbg(&i915->drm, "Invalid engine at %d\n", idx); + return -EINVAL; + } + + if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_PHYSICAL) { + drm_dbg(&i915->drm, + "Bonding with virtual engines not allowed\n"); + return -EINVAL; + } + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { + err = check_user_mbz(&ext->mbz64[n]); + if (err) + return err; + } + + if (copy_from_user(&ci, &ext->master, sizeof(ci))) + return -EFAULT; + + master = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!master) { + drm_dbg(&i915->drm, + "Unrecognised master engine: { class:%u, instance:%u }\n", + ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + if (intel_engine_uses_guc(master)) { + DRM_DEBUG("bonding extension not supported with GuC submission"); + return -ENODEV; + } + + if (get_user(num_bonds, &ext->num_bonds)) + return -EFAULT; + + for (n = 0; n < num_bonds; n++) { + struct intel_engine_cs *bond; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) + return -EFAULT; + + bond = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!bond) { + drm_dbg(&i915->drm, + "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", + n, ci.engine_class, ci.engine_instance); + return -EINVAL; + } + } + + return 0; +} + +static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = { + [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance, + [I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond, +}; + +static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + const struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct set_proto_ctx_engines set = { .i915 = i915 }; + struct i915_context_param_engines __user *user = + u64_to_user_ptr(args->value); + unsigned int n; + u64 extensions; + int err; + + if (pc->num_user_engines >= 0) { + drm_dbg(&i915->drm, "Cannot set engines twice"); + return -EINVAL; + } + + if (args->size < sizeof(*user) || + !IS_ALIGNED(args->size - sizeof(*user), sizeof(*user->engines))) { + drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", + args->size); + return -EINVAL; + } + + set.num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + /* RING_MASK has no shift so we can use it directly here */ + if (set.num_engines > I915_EXEC_RING_MASK + 1) + return -EINVAL; + + set.engines = kmalloc_array(set.num_engines, sizeof(*set.engines), GFP_KERNEL); + if (!set.engines) + return -ENOMEM; + + for (n = 0; n < set.num_engines; n++) { + struct i915_engine_class_instance ci; + struct intel_engine_cs *engine; + + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { + kfree(set.engines); + return -EFAULT; + } + + memset(&set.engines[n], 0, sizeof(set.engines[n])); + + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) + continue; + + engine = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!engine) { + drm_dbg(&i915->drm, + "Invalid engine[%d]: { class:%d, instance:%d }\n", + n, ci.engine_class, ci.engine_instance); + kfree(set.engines); + return -ENOENT; + } + + set.engines[n].type = I915_GEM_ENGINE_TYPE_PHYSICAL; + set.engines[n].engine = engine; + } + + err = -EFAULT; + if (!get_user(extensions, &user->extensions)) + err = i915_user_extensions(u64_to_user_ptr(extensions), + set_proto_ctx_engines_extensions, + ARRAY_SIZE(set_proto_ctx_engines_extensions), + &set); + if (err) { + kfree(set.engines); + return err; + } + + pc->num_user_engines = set.num_engines; + pc->user_engines = set.engines; + + return 0; +} + +static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_sseu *sseu; + int ret; + + if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (GRAPHICS_VER(i915) != 11) + return -ENODEV; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd) + return -EINVAL; + + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) + return -EINVAL; + + if (!!(user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) != (pc->num_user_engines >= 0)) + return -EINVAL; + + if (pc->num_user_engines >= 0) { + int idx = user_sseu.engine.engine_instance; + struct i915_gem_proto_engine *pe; + + if (idx >= pc->num_user_engines) + return -EINVAL; + + pe = &pc->user_engines[idx]; + + /* Only render engine supports RPCS configuration. */ + if (pe->engine->class != RENDER_CLASS) + return -EINVAL; + + sseu = &pe->sseu; + } else { + /* Only render engine supports RPCS configuration. */ + if (user_sseu.engine.engine_class != I915_ENGINE_CLASS_RENDER) + return -EINVAL; + + /* There is only one render engine */ + if (user_sseu.engine.engine_instance != 0) + return -EINVAL; + + sseu = &pc->legacy_rcs_sseu; + } + + ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu); + if (ret) + return ret; + + args->size = sizeof(user_sseu); + + return 0; +} + +static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + struct drm_i915_gem_context_param *args) +{ + int ret = 0; + + switch (args->param) { + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + if (args->size) + ret = -EINVAL; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_NO_ERROR_CAPTURE); + else + pc->user_flags &= ~BIT(UCONTEXT_NO_ERROR_CAPTURE); + break; + + case I915_CONTEXT_PARAM_BANNABLE: + if (args->size) + ret = -EINVAL; + else if (!capable(CAP_SYS_ADMIN) && !args->value) + ret = -EPERM; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_BANNABLE); + else + pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); + break; + + case I915_CONTEXT_PARAM_RECOVERABLE: + if (args->size) + ret = -EINVAL; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_RECOVERABLE); + else + pc->user_flags &= ~BIT(UCONTEXT_RECOVERABLE); + break; + + case I915_CONTEXT_PARAM_PRIORITY: + ret = validate_priority(fpriv->dev_priv, args); + if (!ret) + pc->sched.priority = args->value; + break; + + case I915_CONTEXT_PARAM_SSEU: + ret = set_proto_ctx_sseu(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_VM: + ret = set_proto_ctx_vm(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_ENGINES: + ret = set_proto_ctx_engines(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_PERSISTENCE: + if (args->size) + ret = -EINVAL; + ret = proto_context_set_persistence(fpriv->dev_priv, pc, + args->value); + break; + + case I915_CONTEXT_PARAM_NO_ZEROMAP: + case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: + default: + ret = -EINVAL; + break; + } + + return ret; +} + static struct i915_address_space * context_get_vm_rcu(struct i915_gem_context *ctx) { @@ -205,14 +773,16 @@ context_get_vm_rcu(struct i915_gem_context *ctx) } while (1); } -static void intel_context_set_gem(struct intel_context *ce, - struct i915_gem_context *ctx) +static int intel_context_set_gem(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_sseu sseu) { + int ret = 0; + GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); RCU_INIT_POINTER(ce->gem_context, ctx); - if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring_size = SZ_16K; if (rcu_access_pointer(ctx->vm)) { struct i915_address_space *vm; @@ -225,15 +795,23 @@ static void intel_context_set_gem(struct intel_context *ce, ce->vm = vm; } - GEM_BUG_ON(ce->timeline); - if (ctx->timeline) - ce->timeline = intel_timeline_get(ctx->timeline); - if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_timeslices(ce->engine)) + intel_engine_has_timeslices(ce->engine) && + intel_engine_has_semaphores(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); - intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us); + if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) && + ctx->i915->params.request_timeout_ms) { + unsigned int timeout_ms = ctx->i915->params.request_timeout_ms; + + intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000); + } + + /* A valid SSEU has no zero fields */ + if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) + ret = intel_context_reconfigure_sseu(ce, sseu); + + return ret; } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -301,11 +879,12 @@ static struct i915_gem_engines *alloc_engines(unsigned int count) return e; } -static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx, + struct intel_sseu rcs_sseu) { const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; - struct i915_gem_engines *e; + struct i915_gem_engines *e, *err; enum intel_engine_id id; e = alloc_engines(I915_NUM_ENGINES); @@ -314,6 +893,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) for_each_engine(engine, gt, id) { struct intel_context *ce; + struct intel_sseu sseu = {}; + int ret; if (engine->legacy_idx == INVALID_ENGINE) continue; @@ -323,18 +904,79 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) ce = intel_context_create(engine); if (IS_ERR(ce)) { - __free_engines(e, e->num_engines + 1); - return ERR_CAST(ce); + err = ERR_CAST(ce); + goto free_engines; } - intel_context_set_gem(ce, ctx); - e->engines[engine->legacy_idx] = ce; - e->num_engines = max(e->num_engines, engine->legacy_idx); + e->num_engines = max(e->num_engines, engine->legacy_idx + 1); + + if (engine->class == RENDER_CLASS) + sseu = rcs_sseu; + + ret = intel_context_set_gem(ce, ctx, sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } + + } + + return e; + +free_engines: + free_engines(e); + return err; +} + +static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, + unsigned int num_engines, + struct i915_gem_proto_engine *pe) +{ + struct i915_gem_engines *e, *err; + unsigned int n; + + e = alloc_engines(num_engines); + for (n = 0; n < num_engines; n++) { + struct intel_context *ce; + int ret; + + switch (pe[n].type) { + case I915_GEM_ENGINE_TYPE_PHYSICAL: + ce = intel_context_create(pe[n].engine); + break; + + case I915_GEM_ENGINE_TYPE_BALANCED: + ce = intel_engine_create_virtual(pe[n].siblings, + pe[n].num_siblings); + break; + + case I915_GEM_ENGINE_TYPE_INVALID: + default: + GEM_WARN_ON(pe[n].type != I915_GEM_ENGINE_TYPE_INVALID); + continue; + } + + if (IS_ERR(ce)) { + err = ERR_CAST(ce); + goto free_engines; + } + + e->engines[n] = ce; + + ret = intel_context_set_gem(ce, ctx, pe->sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } } - e->num_engines++; + e->num_engines = num_engines; return e; + +free_engines: + free_engines(e); + return err; } void i915_gem_context_release(struct kref *ref) @@ -347,9 +989,6 @@ void i915_gem_context_release(struct kref *ref) mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); - if (ctx->timeline) - intel_timeline_put(ctx->timeline); - put_pid(ctx->pid); mutex_destroy(&ctx->mutex); @@ -441,7 +1080,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; - if (ban && intel_context_set_banned(ce)) + if (ban && intel_context_ban(ce, NULL)) continue; /* @@ -566,6 +1205,9 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm); + if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + ctx->file_priv = ERR_PTR(-EBADF); /* @@ -635,57 +1277,6 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) return 0; } -static struct i915_gem_context * -__create_context(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx; - struct i915_gem_engines *e; - int err; - int i; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return ERR_PTR(-ENOMEM); - - kref_init(&ctx->ref); - ctx->i915 = i915; - ctx->sched.priority = I915_PRIORITY_NORMAL; - mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->link); - - spin_lock_init(&ctx->stale.lock); - INIT_LIST_HEAD(&ctx->stale.engines); - - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) { - err = PTR_ERR(e); - goto err_free; - } - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - mutex_init(&ctx->lut_mutex); - - /* NB: Mark all slices as needing a remap so that when the context first - * loads it will restore whatever remap state already exists. If there - * is no remap info, it will be a NOP. */ - ctx->remap_slice = ALL_L3_SLICES(i915); - - i915_gem_context_set_bannable(ctx); - i915_gem_context_set_recoverable(ctx); - __context_set_persistence(ctx, true /* cgroup hook? */); - - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) - ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - - return ctx; - -err_free: - kfree(ctx); - return ERR_PTR(err); -} - static inline struct i915_gem_engines * __context_engines_await(const struct i915_gem_context *ctx, bool *user_engines) @@ -714,168 +1305,112 @@ __context_engines_await(const struct i915_gem_context *ctx, return engines; } -static int +static void context_apply_all(struct i915_gem_context *ctx, - int (*fn)(struct intel_context *ce, void *data), + void (*fn)(struct intel_context *ce, void *data), void *data) { struct i915_gem_engines_iter it; struct i915_gem_engines *e; struct intel_context *ce; - int err = 0; e = __context_engines_await(ctx, NULL); - for_each_gem_engine(ce, e, it) { - err = fn(ce, data); - if (err) - break; - } + for_each_gem_engine(ce, e, it) + fn(ce, data); i915_sw_fence_complete(&e->fence); - - return err; -} - -static int __apply_ppgtt(struct intel_context *ce, void *vm) -{ - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(vm); - return 0; -} - -static struct i915_address_space * -__set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) -{ - struct i915_address_space *old; - - old = rcu_replace_pointer(ctx->vm, - i915_vm_open(vm), - lockdep_is_held(&ctx->mutex)); - GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - - context_apply_all(ctx, __apply_ppgtt, vm); - - return old; -} - -static void __assign_ppgtt(struct i915_gem_context *ctx, - struct i915_address_space *vm) -{ - if (vm == rcu_access_pointer(ctx->vm)) - return; - - vm = __set_ppgtt(ctx, vm); - if (vm) - i915_vm_close(vm); -} - -static void __set_timeline(struct intel_timeline **dst, - struct intel_timeline *src) -{ - struct intel_timeline *old = *dst; - - *dst = src ? intel_timeline_get(src) : NULL; - - if (old) - intel_timeline_put(old); -} - -static int __apply_timeline(struct intel_context *ce, void *timeline) -{ - __set_timeline(&ce->timeline, timeline); - return 0; -} - -static void __assign_timeline(struct i915_gem_context *ctx, - struct intel_timeline *timeline) -{ - __set_timeline(&ctx->timeline, timeline); - context_apply_all(ctx, __apply_timeline, timeline); -} - -static int __apply_watchdog(struct intel_context *ce, void *timeout_us) -{ - return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us); -} - -static int -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us) -{ - int ret; - - ret = context_apply_all(ctx, __apply_watchdog, - (void *)(uintptr_t)timeout_us); - if (!ret) - ctx->watchdog.timeout_us = timeout_us; - - return ret; -} - -static void __set_default_fence_expiry(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int ret; - - if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) || - !i915->params.request_timeout_ms) - return; - - /* Default expiry for user fences. */ - ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000); - if (ret) - drm_notice(&i915->drm, - "Failed to configure default fence expiry! (%d)", - ret); } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, + const struct i915_gem_proto_context *pc) { struct i915_gem_context *ctx; + struct i915_address_space *vm = NULL; + struct i915_gem_engines *e; + int err; + int i; - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(i915)) - return ERR_PTR(-EINVAL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); - ctx = __create_context(i915); - if (IS_ERR(ctx)) - return ctx; + kref_init(&ctx->ref); + ctx->i915 = i915; + ctx->sched = pc->sched; + mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->link); - if (HAS_FULL_PPGTT(i915)) { + spin_lock_init(&ctx->stale.lock); + INIT_LIST_HEAD(&ctx->stale.engines); + + if (pc->vm) { + vm = i915_vm_get(pc->vm); + } else if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) { drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); - context_close(ctx); - return ERR_CAST(ppgtt); + err = PTR_ERR(ppgtt); + goto err_ctx; } + vm = &ppgtt->vm; + } + if (vm) { + RCU_INIT_POINTER(ctx->vm, i915_vm_open(vm)); - mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, &ppgtt->vm); - mutex_unlock(&ctx->mutex); + /* i915_vm_open() takes a reference */ + i915_vm_put(vm); + } - i915_vm_put(&ppgtt->vm); + mutex_init(&ctx->engines_mutex); + if (pc->num_user_engines >= 0) { + i915_gem_context_set_user_engines(ctx); + e = user_engines(ctx, pc->num_user_engines, pc->user_engines); + } else { + i915_gem_context_clear_user_engines(ctx); + e = default_engines(ctx, pc->legacy_rcs_sseu); + } + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto err_vm; } + RCU_INIT_POINTER(ctx->engines, e); - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct intel_timeline *timeline; + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + mutex_init(&ctx->lut_mutex); - timeline = intel_timeline_create(&i915->gt); - if (IS_ERR(timeline)) { - context_close(ctx); - return ERR_CAST(timeline); - } + /* NB: Mark all slices as needing a remap so that when the context first + * loads it will restore whatever remap state already exists. If there + * is no remap info, it will be a NOP. */ + ctx->remap_slice = ALL_L3_SLICES(i915); - __assign_timeline(ctx, timeline); - intel_timeline_put(timeline); - } + ctx->user_flags = pc->user_flags; - __set_default_fence_expiry(ctx); + for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) + ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + + if (pc->single_timeline) { + err = drm_syncobj_create(&ctx->syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) + goto err_engines; + } trace_i915_context_create(ctx); return ctx; + +err_engines: + free_engines(e); +err_vm: + if (ctx->vm) + i915_vm_close(ctx->vm); +err_ctx: + kfree(ctx); + return ERR_PTR(err); } static void init_contexts(struct i915_gem_contexts *gc) @@ -889,83 +1424,83 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } -static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv, - u32 *id) +static void gem_context_register(struct i915_gem_context *ctx, + struct drm_i915_file_private *fpriv, + u32 id) { struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm; - int ret; + void *old; ctx->file_priv = fpriv; - mutex_lock(&ctx->mutex); - vm = i915_gem_context_vm(ctx); - if (vm) - WRITE_ONCE(vm->file, fpriv); /* XXX */ - mutex_unlock(&ctx->mutex); - ctx->pid = get_task_pid(current, PIDTYPE_PID); snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); /* And finally expose ourselves to userspace via the idr */ - ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); - if (ret) - goto err_pid; + old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); + WARN_ON(old); spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); - - return 0; - -err_pid: - put_pid(fetch_and_zero(&ctx->pid)); - return ret; } int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; - u32 id; - xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC); + mutex_init(&file_priv->proto_context_lock); + xa_init_flags(&file_priv->proto_context_xa, XA_FLAGS_ALLOC); + + /* 0 reserved for the default context */ + xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC1); /* 0 reserved for invalid/unassigned ppgtt */ xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1); - ctx = i915_gem_create_context(i915, 0); + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) { + err = PTR_ERR(pc); + goto err; + } + + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; } - err = gem_context_register(ctx, file_priv, &id); - if (err < 0) - goto err_ctx; + gem_context_register(ctx, file_priv, 0); - GEM_BUG_ON(id); return 0; -err_ctx: - context_close(ctx); err: xa_destroy(&file_priv->vm_xa); xa_destroy(&file_priv->context_xa); + xa_destroy(&file_priv->proto_context_xa); + mutex_destroy(&file_priv->proto_context_lock); return err; } void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx; + xa_for_each(&file_priv->proto_context_xa, idx, pc) + proto_context_close(pc); + xa_destroy(&file_priv->proto_context_xa); + mutex_destroy(&file_priv->proto_context_lock); + xa_for_each(&file_priv->context_xa, idx, ctx) context_close(ctx); xa_destroy(&file_priv->context_xa); @@ -995,8 +1530,6 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); - ppgtt->vm.file = file_priv; - if (args->extensions) { err = i915_user_extensions(u64_to_user_ptr(args->extensions), NULL, 0, @@ -1040,120 +1573,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return 0; } -struct context_barrier_task { - struct i915_active base; - void (*task)(void *data); - void *data; -}; - -static void cb_retire(struct i915_active *base) -{ - struct context_barrier_task *cb = container_of(base, typeof(*cb), base); - - if (cb->task) - cb->task(cb->data); - - i915_active_fini(&cb->base); - kfree(cb); -} - -I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); -static int context_barrier_task(struct i915_gem_context *ctx, - intel_engine_mask_t engines, - bool (*skip)(struct intel_context *ce, void *data), - int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data), - int (*emit)(struct i915_request *rq, void *data), - void (*task)(void *data), - void *data) -{ - struct context_barrier_task *cb; - struct i915_gem_engines_iter it; - struct i915_gem_engines *e; - struct i915_gem_ww_ctx ww; - struct intel_context *ce; - int err = 0; - - GEM_BUG_ON(!task); - - cb = kmalloc(sizeof(*cb), GFP_KERNEL); - if (!cb) - return -ENOMEM; - - i915_active_init(&cb->base, NULL, cb_retire, 0); - err = i915_active_acquire(&cb->base); - if (err) { - kfree(cb); - return err; - } - - e = __context_engines_await(ctx, NULL); - if (!e) { - i915_active_release(&cb->base); - return -ENOENT; - } - - for_each_gem_engine(ce, e, it) { - struct i915_request *rq; - - if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - ce->engine->mask)) { - err = -ENXIO; - break; - } - - if (!(ce->engine->mask & engines)) - continue; - - if (skip && skip(ce, data)) - continue; - - i915_gem_ww_ctx_init(&ww, true); -retry: - err = intel_context_pin_ww(ce, &ww); - if (err) - goto err; - - if (pin) - err = pin(ce, &ww, data); - if (err) - goto err_unpin; - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = 0; - if (emit) - err = emit(rq, data); - if (err == 0) - err = i915_active_add_request(&cb->base, rq); - - i915_request_add(rq); -err_unpin: - intel_context_unpin(ce); -err: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - if (err) - break; - } - i915_sw_fence_complete(&e->fence); - - cb->task = err ? NULL : task; /* caller needs to unwind instead */ - cb->data = data; - - i915_active_release(&cb->base); - - return err; -} - static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1186,211 +1605,6 @@ err_put: return err; } -static void set_ppgtt_barrier(void *data) -{ - struct i915_address_space *old = data; - - if (GRAPHICS_VER(old->i915) < 8) - gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - - i915_vm_close(old); -} - -static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data) -{ - struct i915_address_space *vm = ce->vm; - - if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915)) - /* ppGTT is not part of the legacy context image */ - return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww); - - return 0; -} - -static int emit_ppgtt_update(struct i915_request *rq, void *data) -{ - struct i915_address_space *vm = rq->context->vm; - struct intel_engine_cs *engine = rq->engine; - u32 base = engine->mmio_base; - u32 *cs; - int i; - - if (i915_vm_is_4lvl(vm)) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - const dma_addr_t pd_daddr = px_dma(ppgtt->pd); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - *cs++ = lower_32_bits(pd_daddr); - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - int err; - - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } - - return 0; -} - -static bool skip_ppgtt_update(struct intel_context *ce, void *data) -{ - if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return !ce->state; - else - return !atomic_read(&ce->pin_count); -} - -static int set_ppgtt(struct drm_i915_file_private *file_priv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_address_space *vm, *old; - int err; - - if (args->size) - return -EINVAL; - - if (!rcu_access_pointer(ctx->vm)) - return -ENODEV; - - if (upper_32_bits(args->value)) - return -ENOENT; - - rcu_read_lock(); - vm = xa_load(&file_priv->vm_xa, args->value); - if (vm && !kref_get_unless_zero(&vm->ref)) - vm = NULL; - rcu_read_unlock(); - if (!vm) - return -ENOENT; - - err = mutex_lock_interruptible(&ctx->mutex); - if (err) - goto out; - - if (i915_gem_context_is_closed(ctx)) { - err = -ENOENT; - goto unlock; - } - - if (vm == rcu_access_pointer(ctx->vm)) - goto unlock; - - old = __set_ppgtt(ctx, vm); - - /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - lut_close(ctx); - - /* - * We need to flush any requests using the current ppgtt before - * we release it as the requests do not hold a reference themselves, - * only indirectly through the context. - */ - err = context_barrier_task(ctx, ALL_ENGINES, - skip_ppgtt_update, - pin_ppgtt_update, - emit_ppgtt_update, - set_ppgtt_barrier, - old); - if (err) { - i915_vm_close(__set_ppgtt(ctx, old)); - i915_vm_close(old); - lut_close(ctx); /* force a rebuild of the old obj:vma cache */ - } - -unlock: - mutex_unlock(&ctx->mutex); -out: - i915_vm_put(vm); - return err; -} - -static int __apply_ringsize(struct intel_context *ce, void *sz) -{ - return intel_context_set_ring_size(ce, (unsigned long)sz); -} - -static int set_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE)) - return -EINVAL; - - if (args->value < I915_GTT_PAGE_SIZE) - return -EINVAL; - - if (args->value > 128 * I915_GTT_PAGE_SIZE) - return -EINVAL; - - return context_apply_all(ctx, - __apply_ringsize, - __intel_context_ring_size(args->value)); -} - -static int __get_ringsize(struct intel_context *ce, void *arg) -{ - long sz; - - sz = intel_context_get_ring_size(ce); - GEM_BUG_ON(sz > INT_MAX); - - return sz; /* stop on first engine */ -} - -static int get_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - int sz; - - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - sz = context_apply_all(ctx, __get_ringsize, NULL); - if (sz < 0) - return sz; - - args->value = sz; - return 0; -} - int i915_gem_user_to_context_sseu(struct intel_gt *gt, const struct drm_i915_gem_context_param_sseu *user, @@ -1545,382 +1759,6 @@ out_ce: return ret; } -struct set_engines { - struct i915_gem_context *ctx; - struct i915_gem_engines *engines; -}; - -static int -set_engines__load_balance(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_load_balance __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct drm_i915_private *i915 = set->ctx->i915; - struct intel_engine_cs *stack[16]; - struct intel_engine_cs **siblings; - struct intel_context *ce; - u16 num_siblings, idx; - unsigned int n; - int err; - - if (!HAS_EXECLISTS(i915)) - return -ENODEV; - - if (intel_uc_uses_guc_submission(&i915->gt.uc)) - return -ENODEV; /* not implement yet */ - - if (get_user(idx, &ext->engine_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (set->engines->engines[idx]) { - drm_dbg(&i915->drm, - "Invalid placement[%d], already occupied\n", idx); - return -EEXIST; - } - - if (get_user(num_siblings, &ext->num_siblings)) - return -EFAULT; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - err = check_user_mbz(&ext->mbz64); - if (err) - return err; - - siblings = stack; - if (num_siblings > ARRAY_SIZE(stack)) { - siblings = kmalloc_array(num_siblings, - sizeof(*siblings), - GFP_KERNEL); - if (!siblings) - return -ENOMEM; - } - - for (n = 0; n < num_siblings; n++) { - struct i915_engine_class_instance ci; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { - err = -EFAULT; - goto out_siblings; - } - - siblings[n] = intel_engine_lookup_user(i915, - ci.engine_class, - ci.engine_instance); - if (!siblings[n]) { - drm_dbg(&i915->drm, - "Invalid sibling[%d]: { class:%d, inst:%d }\n", - n, ci.engine_class, ci.engine_instance); - err = -EINVAL; - goto out_siblings; - } - } - - ce = intel_execlists_create_virtual(siblings, n); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out_siblings; - } - - intel_context_set_gem(ce, set->ctx); - - if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { - intel_context_put(ce); - err = -EEXIST; - goto out_siblings; - } - -out_siblings: - if (siblings != stack) - kfree(siblings); - - return err; -} - -static int -set_engines__bond(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_bond __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct drm_i915_private *i915 = set->ctx->i915; - struct i915_engine_class_instance ci; - struct intel_engine_cs *virtual; - struct intel_engine_cs *master; - u16 idx, num_bonds; - int err, n; - - if (get_user(idx, &ext->virtual_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - drm_dbg(&i915->drm, - "Invalid index for virtual engine: %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (!set->engines->engines[idx]) { - drm_dbg(&i915->drm, "Invalid engine at %d\n", idx); - return -EINVAL; - } - virtual = set->engines->engines[idx]->engine; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { - err = check_user_mbz(&ext->mbz64[n]); - if (err) - return err; - } - - if (copy_from_user(&ci, &ext->master, sizeof(ci))) - return -EFAULT; - - master = intel_engine_lookup_user(i915, - ci.engine_class, ci.engine_instance); - if (!master) { - drm_dbg(&i915->drm, - "Unrecognised master engine: { class:%u, instance:%u }\n", - ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - if (get_user(num_bonds, &ext->num_bonds)) - return -EFAULT; - - for (n = 0; n < num_bonds; n++) { - struct intel_engine_cs *bond; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) - return -EFAULT; - - bond = intel_engine_lookup_user(i915, - ci.engine_class, - ci.engine_instance); - if (!bond) { - drm_dbg(&i915->drm, - "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", - n, ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - /* - * A non-virtual engine has no siblings to choose between; and - * a submit fence will always be directed to the one engine. - */ - if (intel_engine_is_virtual(virtual)) { - err = intel_virtual_engine_attach_bond(virtual, - master, - bond); - if (err) - return err; - } - } - - return 0; -} - -static const i915_user_extension_fn set_engines__extensions[] = { - [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, - [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, -}; - -static int -set_engines(struct i915_gem_context *ctx, - const struct drm_i915_gem_context_param *args) -{ - struct drm_i915_private *i915 = ctx->i915; - struct i915_context_param_engines __user *user = - u64_to_user_ptr(args->value); - struct set_engines set = { .ctx = ctx }; - unsigned int num_engines, n; - u64 extensions; - int err; - - if (!args->size) { /* switch back to legacy user_ring_map */ - if (!i915_gem_context_user_engines(ctx)) - return 0; - - set.engines = default_engines(ctx); - if (IS_ERR(set.engines)) - return PTR_ERR(set.engines); - - goto replace; - } - - BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); - if (args->size < sizeof(*user) || - !IS_ALIGNED(args->size, sizeof(*user->engines))) { - drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", - args->size); - return -EINVAL; - } - - /* - * Note that I915_EXEC_RING_MASK limits execbuf to only using the - * first 64 engines defined here. - */ - num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); - set.engines = alloc_engines(num_engines); - if (!set.engines) - return -ENOMEM; - - for (n = 0; n < num_engines; n++) { - struct i915_engine_class_instance ci; - struct intel_engine_cs *engine; - struct intel_context *ce; - - if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { - __free_engines(set.engines, n); - return -EFAULT; - } - - if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && - ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { - set.engines->engines[n] = NULL; - continue; - } - - engine = intel_engine_lookup_user(ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!engine) { - drm_dbg(&i915->drm, - "Invalid engine[%d]: { class:%d, instance:%d }\n", - n, ci.engine_class, ci.engine_instance); - __free_engines(set.engines, n); - return -ENOENT; - } - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - __free_engines(set.engines, n); - return PTR_ERR(ce); - } - - intel_context_set_gem(ce, ctx); - - set.engines->engines[n] = ce; - } - set.engines->num_engines = num_engines; - - err = -EFAULT; - if (!get_user(extensions, &user->extensions)) - err = i915_user_extensions(u64_to_user_ptr(extensions), - set_engines__extensions, - ARRAY_SIZE(set_engines__extensions), - &set); - if (err) { - free_engines(set.engines); - return err; - } - -replace: - mutex_lock(&ctx->engines_mutex); - if (i915_gem_context_is_closed(ctx)) { - mutex_unlock(&ctx->engines_mutex); - free_engines(set.engines); - return -ENOENT; - } - if (args->size) - i915_gem_context_set_user_engines(ctx); - else - i915_gem_context_clear_user_engines(ctx); - set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1); - mutex_unlock(&ctx->engines_mutex); - - /* Keep track of old engine sets for kill_context() */ - engines_idle_release(ctx, set.engines); - - return 0; -} - -static int -get_engines(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_context_param_engines __user *user; - struct i915_gem_engines *e; - size_t n, count, size; - bool user_engines; - int err = 0; - - e = __context_engines_await(ctx, &user_engines); - if (!e) - return -ENOENT; - - if (!user_engines) { - i915_sw_fence_complete(&e->fence); - args->size = 0; - return 0; - } - - count = e->num_engines; - - /* Be paranoid in case we have an impedance mismatch */ - if (!check_struct_size(user, engines, count, &size)) { - err = -EINVAL; - goto err_free; - } - if (overflows_type(size, args->size)) { - err = -EINVAL; - goto err_free; - } - - if (!args->size) { - args->size = size; - goto err_free; - } - - if (args->size < size) { - err = -EINVAL; - goto err_free; - } - - user = u64_to_user_ptr(args->value); - if (put_user(0, &user->extensions)) { - err = -EFAULT; - goto err_free; - } - - for (n = 0; n < count; n++) { - struct i915_engine_class_instance ci = { - .engine_class = I915_ENGINE_CLASS_INVALID, - .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, - }; - - if (e->engines[n]) { - ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->uabi_instance; - } - - if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { - err = -EFAULT; - goto err_free; - } - } - - args->size = size; - -err_free: - i915_sw_fence_complete(&e->fence); - return err; -} - static int set_persistence(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) @@ -1931,41 +1769,30 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static int __apply_priority(struct intel_context *ce, void *arg) +static void __apply_priority(struct intel_context *ce, void *arg) { struct i915_gem_context *ctx = arg; if (!intel_engine_has_timeslices(ce->engine)) - return 0; + return; - if (ctx->sched.priority >= I915_PRIORITY_NORMAL) + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) intel_context_set_use_semaphores(ce); else intel_context_clear_use_semaphores(ce); - - return 0; } static int set_priority(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { - s64 priority = args->value; - - if (args->size) - return -EINVAL; - - if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - return -ENODEV; - - if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - return -EINVAL; + int err; - if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - return -EPERM; + err = validate_priority(ctx->i915, args); + if (err) + return err; - ctx->sched.priority = priority; + ctx->sched.priority = args->value; context_apply_all(ctx, __apply_priority, ctx); return 0; @@ -1978,15 +1805,6 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, int ret = 0; switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - if (args->size) - ret = -EINVAL; - else if (args->value) - set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - else - clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: if (args->size) ret = -EINVAL; @@ -2024,23 +1842,15 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_sseu(ctx, args); break; - case I915_CONTEXT_PARAM_VM: - ret = set_ppgtt(fpriv, ctx, args); - break; - - case I915_CONTEXT_PARAM_ENGINES: - ret = set_engines(ctx, args); - break; - case I915_CONTEXT_PARAM_PERSISTENCE: ret = set_persistence(ctx, args); break; - case I915_CONTEXT_PARAM_RINGSIZE: - ret = set_ringsize(ctx, args); - break; - + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_VM: + case I915_CONTEXT_PARAM_ENGINES: default: ret = -EINVAL; break; @@ -2050,7 +1860,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, } struct create_ext { - struct i915_gem_context *ctx; + struct i915_gem_proto_context *pc; struct drm_i915_file_private *fpriv; }; @@ -2065,233 +1875,84 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) if (local.param.ctx_id) return -EINVAL; - return ctx_setparam(arg->fpriv, arg->ctx, &local.param); + return set_proto_ctx_param(arg->fpriv, arg->pc, &local.param); } -static int copy_ring_size(struct intel_context *dst, - struct intel_context *src) +static int invalid_ext(struct i915_user_extension __user *ext, void *data) { - long sz; - - sz = intel_context_get_ring_size(src); - if (sz < 0) - return sz; - - return intel_context_set_ring_size(dst, sz); + return -EINVAL; } -static int clone_engines(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *clone, *e; - bool user_engines; - unsigned long n; - - e = __context_engines_await(src, &user_engines); - if (!e) - return -ENOENT; - - clone = alloc_engines(e->num_engines); - if (!clone) - goto err_unlock; - - for (n = 0; n < e->num_engines; n++) { - struct intel_engine_cs *engine; - - if (!e->engines[n]) { - clone->engines[n] = NULL; - continue; - } - engine = e->engines[n]->engine; - - /* - * Virtual engines are singletons; they can only exist - * inside a single context, because they embed their - * HW context... As each virtual context implies a single - * timeline (each engine can only dequeue a single request - * at any time), it would be surprising for two contexts - * to use the same engine. So let's create a copy of - * the virtual engine instead. - */ - if (intel_engine_is_virtual(engine)) - clone->engines[n] = - intel_execlists_clone_virtual(engine); - else - clone->engines[n] = intel_context_create(engine); - if (IS_ERR_OR_NULL(clone->engines[n])) { - __free_engines(clone, n); - goto err_unlock; - } - - intel_context_set_gem(clone->engines[n], dst); - - /* Copy across the preferred ringsize */ - if (copy_ring_size(clone->engines[n], e->engines[n])) { - __free_engines(clone, n + 1); - goto err_unlock; - } - } - clone->num_engines = n; - i915_sw_fence_complete(&e->fence); - - /* Serialised by constructor */ - engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1)); - if (user_engines) - i915_gem_context_set_user_engines(dst); - else - i915_gem_context_clear_user_engines(dst); - return 0; - -err_unlock: - i915_sw_fence_complete(&e->fence); - return -ENOMEM; -} - -static int clone_flags(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->user_flags = src->user_flags; - return 0; -} - -static int clone_schedattr(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->sched = src->sched; - return 0; -} - -static int clone_sseu(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; - unsigned long n; - int err; - - /* no locking required; sole access under constructor*/ - clone = __context_engines_static(dst); - if (e->num_engines != clone->num_engines) { - err = -EINVAL; - goto unlock; - } - - for (n = 0; n < e->num_engines; n++) { - struct intel_context *ce = e->engines[n]; - - if (clone->engines[n]->engine->class != ce->engine->class) { - /* Must have compatible engine maps! */ - err = -EINVAL; - goto unlock; - } - - /* serialises with set_sseu */ - err = intel_context_lock_pinned(ce); - if (err) - goto unlock; - - clone->engines[n]->sseu = ce->sseu; - intel_context_unlock_pinned(ce); - } - - err = 0; -unlock: - i915_gem_context_unlock_engines(src); - return err; -} +static const i915_user_extension_fn create_extensions[] = { + [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, + [I915_CONTEXT_CREATE_EXT_CLONE] = invalid_ext, +}; -static int clone_timeline(struct i915_gem_context *dst, - struct i915_gem_context *src) +static bool client_is_banned(struct drm_i915_file_private *file_priv) { - if (src->timeline) - __assign_timeline(dst, src->timeline); - - return 0; + return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; } -static int clone_vm(struct i915_gem_context *dst, - struct i915_gem_context *src) +static inline struct i915_gem_context * +__context_lookup(struct drm_i915_file_private *file_priv, u32 id) { - struct i915_address_space *vm; - int err = 0; - - if (!rcu_access_pointer(src->vm)) - return 0; + struct i915_gem_context *ctx; rcu_read_lock(); - vm = context_get_vm_rcu(src); + ctx = xa_load(&file_priv->context_xa, id); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; rcu_read_unlock(); - if (!mutex_lock_interruptible(&dst->mutex)) { - __assign_ppgtt(dst, vm); - mutex_unlock(&dst->mutex); - } else { - err = -EINTR; - } - - i915_vm_put(vm); - return err; + return ctx; } -static int create_clone(struct i915_user_extension __user *ext, void *data) -{ - static int (* const fn[])(struct i915_gem_context *dst, - struct i915_gem_context *src) = { -#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y - MAP(ENGINES, clone_engines), - MAP(FLAGS, clone_flags), - MAP(SCHEDATTR, clone_schedattr), - MAP(SSEU, clone_sseu), - MAP(TIMELINE, clone_timeline), - MAP(VM, clone_vm), -#undef MAP - }; - struct drm_i915_gem_context_create_ext_clone local; - const struct create_ext *arg = data; - struct i915_gem_context *dst = arg->ctx; - struct i915_gem_context *src; - int err, bit; +static struct i915_gem_context * +finalize_create_context_locked(struct drm_i915_file_private *file_priv, + struct i915_gem_proto_context *pc, u32 id) +{ + struct i915_gem_context *ctx; + void *old; - if (copy_from_user(&local, ext, sizeof(local))) - return -EFAULT; + lockdep_assert_held(&file_priv->proto_context_lock); - BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) != - I915_CONTEXT_CLONE_UNKNOWN); + ctx = i915_gem_create_context(file_priv->dev_priv, pc); + if (IS_ERR(ctx)) + return ctx; - if (local.flags & I915_CONTEXT_CLONE_UNKNOWN) - return -EINVAL; + gem_context_register(ctx, file_priv, id); - if (local.rsvd) - return -EINVAL; + old = xa_erase(&file_priv->proto_context_xa, id); + GEM_BUG_ON(old != pc); + proto_context_close(pc); - rcu_read_lock(); - src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id); - rcu_read_unlock(); - if (!src) - return -ENOENT; + /* One for the xarray and one for the caller */ + return i915_gem_context_get(ctx); +} - GEM_BUG_ON(src == dst); +struct i915_gem_context * +i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) +{ + struct i915_gem_proto_context *pc; + struct i915_gem_context *ctx; - for (bit = 0; bit < ARRAY_SIZE(fn); bit++) { - if (!(local.flags & BIT(bit))) - continue; + ctx = __context_lookup(file_priv, id); + if (ctx) + return ctx; - err = fn[bit](dst, src); - if (err) - return err; + mutex_lock(&file_priv->proto_context_lock); + /* Try one more time under the lock */ + ctx = __context_lookup(file_priv, id); + if (!ctx) { + pc = xa_load(&file_priv->proto_context_xa, id); + if (!pc) + ctx = ERR_PTR(-ENOENT); + else + ctx = finalize_create_context_locked(file_priv, pc, id); } + mutex_unlock(&file_priv->proto_context_lock); - return 0; -} - -static const i915_user_extension_fn create_extensions[] = { - [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, - [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone, -}; - -static bool client_is_banned(struct drm_i915_file_private *file_priv) -{ - return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; + return ctx; } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, @@ -2321,9 +1982,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ext_data.ctx = i915_gem_create_context(i915, args->flags); - if (IS_ERR(ext_data.ctx)) - return PTR_ERR(ext_data.ctx); + ext_data.pc = proto_context_create(i915, args->flags); + if (IS_ERR(ext_data.pc)) + return PTR_ERR(ext_data.pc); if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) { ret = i915_user_extensions(u64_to_user_ptr(args->extensions), @@ -2331,20 +1992,39 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, ARRAY_SIZE(create_extensions), &ext_data); if (ret) - goto err_ctx; + goto err_pc; } - ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id); - if (ret < 0) - goto err_ctx; + if (GRAPHICS_VER(i915) > 12) { + struct i915_gem_context *ctx; + + /* Get ourselves a context ID */ + ret = xa_alloc(&ext_data.fpriv->context_xa, &id, NULL, + xa_limit_32b, GFP_KERNEL); + if (ret) + goto err_pc; + + ctx = i915_gem_create_context(i915, ext_data.pc); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto err_pc; + } + + proto_context_close(ext_data.pc); + gem_context_register(ctx, ext_data.fpriv, id); + } else { + ret = proto_context_register(ext_data.fpriv, ext_data.pc, &id); + if (ret < 0) + goto err_pc; + } args->ctx_id = id; drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id); return 0; -err_ctx: - context_close(ext_data.ctx); +err_pc: + proto_context_close(ext_data.pc); return ret; } @@ -2353,6 +2033,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_context_destroy *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; if (args->pad != 0) @@ -2361,11 +2042,24 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (!args->ctx_id) return -ENOENT; + /* We need to hold the proto-context lock here to prevent races + * with finalize_create_context_locked(). + */ + mutex_lock(&file_priv->proto_context_lock); ctx = xa_erase(&file_priv->context_xa, args->ctx_id); - if (!ctx) + pc = xa_erase(&file_priv->proto_context_xa, args->ctx_id); + mutex_unlock(&file_priv->proto_context_lock); + + if (!ctx && !pc) return -ENOENT; + GEM_WARN_ON(ctx && pc); + + if (pc) + proto_context_close(pc); + + if (ctx) + context_close(ctx); - context_close(ctx); return 0; } @@ -2433,15 +2127,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, int ret = 0; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - args->size = 0; - args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; rcu_read_lock(); @@ -2480,20 +2169,15 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_ppgtt(file_priv, ctx, args); break; - case I915_CONTEXT_PARAM_ENGINES: - ret = get_engines(ctx, args); - break; - case I915_CONTEXT_PARAM_PERSISTENCE: args->size = 0; args->value = i915_gem_context_is_persistent(ctx); break; - case I915_CONTEXT_PARAM_RINGSIZE: - ret = get_ringsize(ctx, args); - break; - + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_ENGINES: + case I915_CONTEXT_PARAM_RINGSIZE: default: ret = -EINVAL; break; @@ -2508,16 +2192,32 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, { struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_gem_context_param *args = data; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; - int ret; + int ret = 0; - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + mutex_lock(&file_priv->proto_context_lock); + ctx = __context_lookup(file_priv, args->ctx_id); + if (!ctx) { + pc = xa_load(&file_priv->proto_context_xa, args->ctx_id); + if (pc) { + /* Contexts should be finalized inside + * GEM_CONTEXT_CREATE starting with graphics + * version 13. + */ + WARN_ON(GRAPHICS_VER(file_priv->dev_priv) > 12); + ret = set_proto_ctx_param(file_priv, pc, args); + } else { + ret = -ENOENT; + } + } + mutex_unlock(&file_priv->proto_context_lock); - ret = ctx_setparam(file_priv, ctx, args); + if (ctx) { + ret = ctx_setparam(file_priv, ctx, args); + i915_gem_context_put(ctx); + } - i915_gem_context_put(ctx); return ret; } @@ -2527,16 +2227,13 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; - int ret; if (args->flags || args->pad) return -EINVAL; - ret = -ENOENT; - rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); - if (!ctx) - goto out; + ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); /* * We opt for unserialised reads here. This may result in tearing @@ -2553,10 +2250,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, args->batch_active = atomic_read(&ctx->guilty_count); args->batch_pending = atomic_read(&ctx->active_count); - ret = 0; -out: - rcu_read_unlock(); - return ret; + i915_gem_context_put(ctx); + return 0; } /* GEM context-engines iterator: for_each_gem_engine() */ @@ -2584,27 +2279,16 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) #include "selftests/i915_gem_context.c" #endif -static void i915_global_gem_context_shrink(void) +void i915_gem_context_module_exit(void) { - kmem_cache_shrink(global.slab_luts); + kmem_cache_destroy(slab_luts); } -static void i915_global_gem_context_exit(void) -{ - kmem_cache_destroy(global.slab_luts); -} - -static struct i915_global_gem_context global = { { - .shrink = i915_global_gem_context_shrink, - .exit = i915_global_gem_context_exit, -} }; - -int __init i915_global_gem_context_init(void) +int __init i915_gem_context_module_init(void) { - global.slab_luts = KMEM_CACHE(i915_lut_handle, 0); - if (!global.slab_luts) + slab_luts = KMEM_CACHE(i915_lut_handle, 0); + if (!slab_luts) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index b5c908f3f4f2..18060536b0c2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -133,6 +133,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +struct i915_gem_context * +i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id); + static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { @@ -221,6 +224,9 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); for (i915_gem_engines_iter_init(&(it), (engines)); \ ((ce) = i915_gem_engines_iter_next(&(it)));) +void i915_gem_context_module_exit(void); +int i915_gem_context_module_init(void); + struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 340473aa70de..94c03a97cb77 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -30,33 +30,187 @@ struct i915_address_space; struct intel_timeline; struct intel_ring; +/** + * struct i915_gem_engines - A set of engines + */ struct i915_gem_engines { union { + /** @link: Link in i915_gem_context::stale::engines */ struct list_head link; + + /** @rcu: RCU to use when freeing */ struct rcu_head rcu; }; + + /** @fence: Fence used for delayed destruction of engines */ struct i915_sw_fence fence; + + /** @ctx: i915_gem_context backpointer */ struct i915_gem_context *ctx; + + /** @num_engines: Number of engines in this set */ unsigned int num_engines; + + /** @engines: Array of engines */ struct intel_context *engines[]; }; +/** + * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set + */ struct i915_gem_engines_iter { + /** @idx: Index into i915_gem_engines::engines */ unsigned int idx; + + /** @engines: Engine set being iterated */ const struct i915_gem_engines *engines; }; /** + * enum i915_gem_engine_type - Describes the type of an i915_gem_proto_engine + */ +enum i915_gem_engine_type { + /** @I915_GEM_ENGINE_TYPE_INVALID: An invalid engine */ + I915_GEM_ENGINE_TYPE_INVALID = 0, + + /** @I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine */ + I915_GEM_ENGINE_TYPE_PHYSICAL, + + /** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */ + I915_GEM_ENGINE_TYPE_BALANCED, +}; + +/** + * struct i915_gem_proto_engine - prototype engine + * + * This struct describes an engine that a context may contain. Engines + * have three types: + * + * - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they + * show up as a NULL in i915_gem_engines::engines[i] and any attempt to + * use them by the user results in -EINVAL. They are also useful during + * proto-context construction because the client may create invalid + * engines and then set them up later as virtual engines. + * + * - I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine, described by + * i915_gem_proto_engine::engine. + * + * - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described + * i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings. + */ +struct i915_gem_proto_engine { + /** @type: Type of this engine */ + enum i915_gem_engine_type type; + + /** @engine: Engine, for physical */ + struct intel_engine_cs *engine; + + /** @num_siblings: Number of balanced siblings */ + unsigned int num_siblings; + + /** @siblings: Balanced siblings */ + struct intel_engine_cs **siblings; + + /** @sseu: Client-set SSEU parameters */ + struct intel_sseu sseu; +}; + +/** + * struct i915_gem_proto_context - prototype context + * + * The struct i915_gem_proto_context represents the creation parameters for + * a struct i915_gem_context. This is used to gather parameters provided + * either through creation flags or via SET_CONTEXT_PARAM so that, when we + * create the final i915_gem_context, those parameters can be immutable. + * + * The context uAPI allows for two methods of setting context parameters: + * SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM. The former is + * allowed to be called at any time while the later happens as part of + * GEM_CONTEXT_CREATE. When these were initially added, Currently, + * everything settable via one is settable via the other. While some + * params are fairly simple and setting them on a live context is harmless + * such the context priority, others are far trickier such as the VM or the + * set of engines. To avoid some truly nasty race conditions, we don't + * allow setting the VM or the set of engines on live contexts. + * + * The way we dealt with this without breaking older userspace that sets + * the VM or engine set via SET_CONTEXT_PARAM is to delay the creation of + * the actual context until after the client is done configuring it with + * SET_CONTEXT_PARAM. From the perspective of the client, it has the same + * u32 context ID the whole time. From the perspective of i915, however, + * it's an i915_gem_proto_context right up until the point where we attempt + * to do something which the proto-context can't handle at which point the + * real context gets created. + * + * This is accomplished via a little xarray dance. When GEM_CONTEXT_CREATE + * is called, we create a proto-context, reserve a slot in context_xa but + * leave it NULL, the proto-context in the corresponding slot in + * proto_context_xa. Then, whenever we go to look up a context, we first + * check context_xa. If it's there, we return the i915_gem_context and + * we're done. If it's not, we look in proto_context_xa and, if we find it + * there, we create the actual context and kill the proto-context. + * + * At the time we made this change (April, 2021), we did a fairly complete + * audit of existing userspace to ensure this wouldn't break anything: + * + * - Mesa/i965 didn't use the engines or VM APIs at all + * + * - Mesa/ANV used the engines API but via CONTEXT_CREATE_EXT_SETPARAM and + * didn't use the VM API. + * + * - Mesa/iris didn't use the engines or VM APIs at all + * + * - The open-source compute-runtime didn't yet use the engines API but + * did use the VM API via SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM + * was always the second ioctl on that context, immediately following + * GEM_CONTEXT_CREATE. + * + * - The media driver sets engines and bonding/balancing via + * SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM to set the VM was + * always the second ioctl on that context, immediately following + * GEM_CONTEXT_CREATE and setting engines immediately followed that. + * + * In order for this dance to work properly, any modification to an + * i915_gem_proto_context that is exposed to the client via + * drm_i915_file_private::proto_context_xa must be guarded by + * drm_i915_file_private::proto_context_lock. The exception is when a + * proto-context has not yet been exposed such as when handling + * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE. + */ +struct i915_gem_proto_context { + /** @vm: See &i915_gem_context.vm */ + struct i915_address_space *vm; + + /** @user_flags: See &i915_gem_context.user_flags */ + unsigned long user_flags; + + /** @sched: See &i915_gem_context.sched */ + struct i915_sched_attr sched; + + /** @num_user_engines: Number of user-specified engines or -1 */ + int num_user_engines; + + /** @user_engines: User-specified engines */ + struct i915_gem_proto_engine *user_engines; + + /** @legacy_rcs_sseu: Client-set SSEU parameters for the legacy RCS */ + struct intel_sseu legacy_rcs_sseu; + + /** @single_timeline: See See &i915_gem_context.syncobj */ + bool single_timeline; +}; + +/** * struct i915_gem_context - client state * * The struct i915_gem_context represents the combined view of the driver and * logical hardware state for a particular client. */ struct i915_gem_context { - /** i915: i915 device backpointer */ + /** @i915: i915 device backpointer */ struct drm_i915_private *i915; - /** file_priv: owning file descriptor */ + /** @file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; /** @@ -81,9 +235,23 @@ struct i915_gem_context { * CONTEXT_USER_ENGINES flag is set). */ struct i915_gem_engines __rcu *engines; - struct mutex engines_mutex; /* guards writes to engines */ - struct intel_timeline *timeline; + /** @engines_mutex: guards writes to engines */ + struct mutex engines_mutex; + + /** + * @syncobj: Shared timeline syncobj + * + * When the SHARED_TIMELINE flag is set on context creation, we + * emulate a single timeline across all engines using this syncobj. + * For every execbuffer2 call, this syncobj is used as both an in- + * and out-fence. Unlike the real intel_timeline, this doesn't + * provide perfect atomic in-order guarantees if the client races + * with itself by calling execbuffer2 twice concurrently. However, + * if userspace races with itself, that's not likely to yield well- + * defined results anyway so we choose to not care. + */ + struct drm_syncobj *syncobj; /** * @vm: unique address space (GTT) @@ -106,7 +274,7 @@ struct i915_gem_context { */ struct pid *pid; - /** link: place with &drm_i915_private.context_list */ + /** @link: place with &drm_i915_private.context_list */ struct list_head link; /** @@ -129,7 +297,6 @@ struct i915_gem_context { * @user_flags: small set of booleans controlled by the user */ unsigned long user_flags; -#define UCONTEXT_NO_ZEROMAP 0 #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 @@ -142,11 +309,13 @@ struct i915_gem_context { #define CONTEXT_CLOSED 0 #define CONTEXT_USER_ENGINES 1 + /** @mutex: guards everything that isn't engines or handles_vma */ struct mutex mutex; + /** @sched: scheduler parameters */ struct i915_sched_attr sched; - /** guilty_count: How many times this context has caused a GPU hang. */ + /** @guilty_count: How many times this context has caused a GPU hang. */ atomic_t guilty_count; /** * @active_count: How many times this context was active during a GPU @@ -154,25 +323,23 @@ struct i915_gem_context { */ atomic_t active_count; - struct { - u64 timeout_us; - } watchdog; - /** * @hang_timestamp: The last time(s) this context caused a GPU hang */ unsigned long hang_timestamp[2]; #define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - /** remap_slice: Bitmask of cache lines that need remapping */ + /** @remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; /** - * handles_vma: rbtree to look up our context specific obj/vma for + * @handles_vma: rbtree to look up our context specific obj/vma for * the user handle. (user handles are per fd, but the binding is * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** @lut_mutex: Locks handles_vma */ struct mutex lut_mutex; /** @@ -184,8 +351,11 @@ struct i915_gem_context { */ char name[TASK_COMM_LEN + 8]; + /** @stale: tracks stale engines to be destroyed */ struct { + /** @lock: guards engines */ spinlock_t lock; + /** @engines: list of stale engines */ struct list_head engines; } stale; }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 548ddf39d853..23fee13a3384 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -11,13 +11,14 @@ #include "i915_trace.h" #include "i915_user_extensions.h" -static u32 object_max_page_size(struct drm_i915_gem_object *obj) +static u32 object_max_page_size(struct intel_memory_region **placements, + unsigned int n_placements) { u32 max_page_size = 0; int i; - for (i = 0; i < obj->mm.n_placements; i++) { - struct intel_memory_region *mr = obj->mm.placements[i]; + for (i = 0; i < n_placements; i++) { + struct intel_memory_region *mr = placements[i]; GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); max_page_size = max_t(u32, max_page_size, mr->min_page_size); @@ -27,10 +28,13 @@ static u32 object_max_page_size(struct drm_i915_gem_object *obj) return max_page_size; } -static void object_set_placements(struct drm_i915_gem_object *obj, - struct intel_memory_region **placements, - unsigned int n_placements) +static int object_set_placements(struct drm_i915_gem_object *obj, + struct intel_memory_region **placements, + unsigned int n_placements) { + struct intel_memory_region **arr; + unsigned int i; + GEM_BUG_ON(!n_placements); /* @@ -44,9 +48,20 @@ static void object_set_placements(struct drm_i915_gem_object *obj, obj->mm.placements = &i915->mm.regions[mr->id]; obj->mm.n_placements = 1; } else { - obj->mm.placements = placements; + arr = kmalloc_array(n_placements, + sizeof(struct intel_memory_region *), + GFP_KERNEL); + if (!arr) + return -ENOMEM; + + for (i = 0; i < n_placements; i++) + arr[i] = placements[i]; + + obj->mm.placements = arr; obj->mm.n_placements = n_placements; } + + return 0; } static int i915_gem_publish(struct drm_i915_gem_object *obj, @@ -67,40 +82,67 @@ static int i915_gem_publish(struct drm_i915_gem_object *obj, return 0; } -static int -i915_gem_setup(struct drm_i915_gem_object *obj, u64 size) +/** + * Creates a new object using the same path as DRM_I915_GEM_CREATE_EXT + * @i915: i915 private + * @size: size of the buffer, in bytes + * @placements: possible placement regions, in priority order + * @n_placements: number of possible placement regions + * + * This function is exposed primarily for selftests and does very little + * error checking. It is assumed that the set of placement regions has + * already been verified to be valid. + */ +struct drm_i915_gem_object * +__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements) { - struct intel_memory_region *mr = obj->mm.placements[0]; + struct intel_memory_region *mr = placements[0]; + struct drm_i915_gem_object *obj; unsigned int flags; int ret; - size = round_up(size, object_max_page_size(obj)); + i915_gem_flush_free_objects(i915); + + size = round_up(size, object_max_page_size(placements, n_placements)); if (size == 0) - return -EINVAL; + return ERR_PTR(-EINVAL); /* For most of the ABI (e.g. mmap) we think in system pages */ GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); if (i915_gem_object_size_2big(size)) - return -E2BIG; + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + ret = object_set_placements(obj, placements, n_placements); + if (ret) + goto object_free; /* - * For now resort to CPU based clearing for device local-memory, in the - * near future this will use the blitter engine for accelerated, GPU - * based clearing. + * I915_BO_ALLOC_USER will make sure the object is cleared before + * any user access. */ - flags = 0; - if (mr->type == INTEL_MEMORY_LOCAL) - flags = I915_BO_ALLOC_CPU_CLEAR; + flags = I915_BO_ALLOC_USER; - ret = mr->ops->init_object(mr, obj, size, flags); + ret = mr->ops->init_object(mr, obj, size, 0, flags); if (ret) - return ret; + goto object_free; GEM_BUG_ON(size != obj->base.size); trace_i915_gem_object_create(obj); - return 0; + return obj; + +object_free: + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); + i915_gem_object_free(obj); + return ERR_PTR(ret); } int @@ -113,7 +155,6 @@ i915_gem_dumb_create(struct drm_file *file, enum intel_memory_type mem_type; int cpp = DIV_ROUND_UP(args->bpp, 8); u32 format; - int ret; switch (cpp) { case 1: @@ -146,22 +187,13 @@ i915_gem_dumb_create(struct drm_file *file, if (HAS_LMEM(to_i915(dev))) mem_type = INTEL_MEMORY_LOCAL; - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; - mr = intel_memory_region_by_type(to_i915(dev), mem_type); - object_set_placements(obj, &mr, 1); - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(to_i915(dev), args->size, &mr, 1); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - i915_gem_object_free(obj); - return ret; } /** @@ -178,31 +210,20 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; struct drm_i915_gem_object *obj; struct intel_memory_region *mr; - int ret; - - i915_gem_flush_free_objects(i915); - - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); - object_set_placements(obj, &mr, 1); - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(i915, args->size, &mr, 1); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - i915_gem_object_free(obj); - return ret; } struct create_ext { struct drm_i915_private *i915; - struct drm_i915_gem_object *vanilla_object; + struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; + unsigned int n_placements; }; static void repr_placements(char *buf, size_t size, @@ -233,8 +254,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, struct drm_i915_private *i915 = ext_data->i915; struct drm_i915_gem_memory_class_instance __user *uregions = u64_to_user_ptr(args->regions); - struct drm_i915_gem_object *obj = ext_data->vanilla_object; - struct intel_memory_region **placements; + struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; u32 mask; int i, ret = 0; @@ -248,6 +268,8 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, ret = -EINVAL; } + BUILD_BUG_ON(ARRAY_SIZE(i915->mm.regions) != ARRAY_SIZE(placements)); + BUILD_BUG_ON(ARRAY_SIZE(ext_data->placements) != ARRAY_SIZE(placements)); if (args->num_regions > ARRAY_SIZE(i915->mm.regions)) { drm_dbg(&i915->drm, "num_regions is too large\n"); ret = -EINVAL; @@ -256,21 +278,13 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, if (ret) return ret; - placements = kmalloc_array(args->num_regions, - sizeof(struct intel_memory_region *), - GFP_KERNEL); - if (!placements) - return -ENOMEM; - mask = 0; for (i = 0; i < args->num_regions; i++) { struct drm_i915_gem_memory_class_instance region; struct intel_memory_region *mr; - if (copy_from_user(®ion, uregions, sizeof(region))) { - ret = -EFAULT; - goto out_free; - } + if (copy_from_user(®ion, uregions, sizeof(region))) + return -EFAULT; mr = intel_memory_region_lookup(i915, region.memory_class, @@ -296,14 +310,14 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, ++uregions; } - if (obj->mm.placements) { + if (ext_data->n_placements) { ret = -EINVAL; goto out_dump; } - object_set_placements(obj, placements, args->num_regions); - if (args->num_regions == 1) - kfree(placements); + ext_data->n_placements = args->num_regions; + for (i = 0; i < args->num_regions; i++) + ext_data->placements[i] = placements[i]; return 0; @@ -311,11 +325,11 @@ out_dump: if (1) { char buf[256]; - if (obj->mm.placements) { + if (ext_data->n_placements) { repr_placements(buf, sizeof(buf), - obj->mm.placements, - obj->mm.n_placements); + ext_data->placements, + ext_data->n_placements); drm_dbg(&i915->drm, "Placements were already set in previous EXT. Existing placements: %s\n", buf); @@ -325,8 +339,6 @@ out_dump: drm_dbg(&i915->drm, "New placements(so far validated): %s\n", buf); } -out_free: - kfree(placements); return ret; } @@ -361,44 +373,30 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_create_ext *args = data; struct create_ext ext_data = { .i915 = i915 }; - struct intel_memory_region **placements_ext; struct drm_i915_gem_object *obj; int ret; if (args->flags) return -EINVAL; - i915_gem_flush_free_objects(i915); - - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; - - ext_data.vanilla_object = obj; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), &ext_data); - placements_ext = obj->mm.placements; if (ret) - goto object_free; + return ret; - if (!placements_ext) { - struct intel_memory_region *mr = + if (!ext_data.n_placements) { + ext_data.placements[0] = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); - - object_set_placements(obj, &mr, 1); + ext_data.n_placements = 1; } - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(i915, args->size, + ext_data.placements, + ext_data.n_placements); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - if (obj->mm.n_placements > 1) - kfree(placements_ext); - i915_gem_object_free(obj); - return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 616c3a2f1baf..afa34111de02 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -12,6 +12,8 @@ #include "i915_gem_object.h" #include "i915_scatterlist.h" +I915_SELFTEST_DECLARE(static bool force_different_devices;) + static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) { return to_intel_bo(buf->priv); @@ -25,15 +27,11 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme struct scatterlist *src, *dst; int ret, i; - ret = i915_gem_object_pin_pages_unlocked(obj); - if (ret) - goto err; - /* Copy sg so that we make an independent mapping */ st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (st == NULL) { ret = -ENOMEM; - goto err_unpin_pages; + goto err; } ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); @@ -58,8 +56,6 @@ err_free_sg: sg_free_table(st); err_free: kfree(st); -err_unpin_pages: - i915_gem_object_unpin_pages(obj); err: return ERR_PTR(ret); } @@ -68,13 +64,9 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *sg, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC); sg_free_table(sg); kfree(sg); - - i915_gem_object_unpin_pages(obj); } static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map) @@ -168,7 +160,46 @@ retry: return err; } +static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf); + struct i915_gem_ww_ctx ww; + int err; + + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) + return -EOPNOTSUPP; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_SMEM); + if (err) + continue; + + err = i915_gem_object_wait_migration(obj, 0); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + } + + return err; +} + +static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf); + + i915_gem_object_unpin_pages(obj); +} + static const struct dma_buf_ops i915_dmabuf_ops = { + .attach = i915_gem_dmabuf_attach, + .detach = i915_gem_dmabuf_detach, .map_dma_buf = i915_gem_map_dma_buf, .unmap_dma_buf = i915_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, @@ -204,6 +235,8 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) struct sg_table *pages; unsigned int sg_page_sizes; + assert_object_held(obj); + pages = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL); if (IS_ERR(pages)) @@ -241,7 +274,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, if (dma_buf->ops == &i915_dmabuf_ops) { obj = dma_buf_to_obj(dma_buf); /* is it from our device? */ - if (obj->base.dev == dev) { + if (obj->base.dev == dev && + !I915_SELFTEST_ONLY(force_different_devices)) { /* * Importing dmabuf exported from out own gem increases * refcount on gem itself instead of f_count of dmabuf. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..b684a62bf3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -268,6 +268,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = 0; + if (IS_DGFX(to_i915(dev))) + return -ENODEV; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (!obj) { @@ -303,6 +306,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, enum i915_cache_level level; int ret = 0; + if (IS_DGFX(i915)) + return -ENODEV; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; @@ -375,7 +381,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - /* Frame buffer must be in LMEM (no migration yet) */ + /* Frame buffer must be in LMEM */ if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); @@ -484,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, u32 write_domain = args->write_domain; int err; + if (IS_DGFX(to_i915(dev))) + return -ENODEV; + /* Only handle setting domains to types used by the CPU. */ if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index a8abc9af5ff4..1aa249908b64 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -25,10 +25,8 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" -#include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "i915_memcpy.h" struct eb_vma { struct i915_vma *vma; @@ -279,18 +277,9 @@ struct i915_execbuffer { bool has_llc : 1; bool has_fence : 1; bool needs_unfenced : 1; - - struct i915_request *rq; - u32 *rq_cmd; - unsigned int rq_size; - struct intel_gt_buffer_pool_node *pool; } reloc_cache; - struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ - struct intel_context *reloc_context; - u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u32 context_flags; /** Set of execobj.flags to insert from the ctx */ u64 batch_len; /** Length of batch within object */ u32 batch_start_offset; /** Location within object of batch */ @@ -541,9 +530,6 @@ eb_validate_vma(struct i915_execbuffer *eb, entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; } - if (!(entry->flags & EXEC_OBJECT_PINNED)) - entry->flags |= eb->context_flags; - return 0; } @@ -743,17 +729,13 @@ static int eb_select_context(struct i915_execbuffer *eb) struct i915_gem_context *ctx; ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); - if (unlikely(!ctx)) - return -ENOENT; + if (unlikely(IS_ERR(ctx))) + return PTR_ERR(ctx); eb->gem_context = ctx; if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - eb->context_flags = 0; - if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) - eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; - return 0; } @@ -922,21 +904,38 @@ err: return err; } -static int eb_validate_vmas(struct i915_execbuffer *eb) +static int eb_lock_vmas(struct i915_execbuffer *eb) { unsigned int i; int err; - INIT_LIST_HEAD(&eb->unbound); - for (i = 0; i < eb->buffer_count; i++) { - struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct eb_vma *ev = &eb->vma[i]; struct i915_vma *vma = ev->vma; err = i915_gem_object_lock(vma->obj, &eb->ww); if (err) return err; + } + + return 0; +} + +static int eb_validate_vmas(struct i915_execbuffer *eb) +{ + unsigned int i; + int err; + + INIT_LIST_HEAD(&eb->unbound); + + err = eb_lock_vmas(eb); + if (err) + return err; + + for (i = 0; i < eb->buffer_count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; err = eb_pin_vma(eb, entry, ev); if (err == -EDEADLK) @@ -994,7 +993,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr) +static void eb_release_vmas(struct i915_execbuffer *eb, bool final) { const unsigned int count = eb->buffer_count; unsigned int i; @@ -1008,11 +1007,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release eb_unreserve_vma(ev); - if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) { - ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT; - i915_gem_object_userptr_submit_fini(vma->obj); - } - if (final) i915_vma_put(vma); } @@ -1022,8 +1016,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release static void eb_destroy(const struct i915_execbuffer *eb) { - GEM_BUG_ON(eb->reloc_cache.rq); - if (eb->lut_size > 0) kfree(eb->buckets); } @@ -1035,14 +1027,6 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target->node.start); } -static void reloc_cache_clear(struct reloc_cache *cache) -{ - cache->rq = NULL; - cache->rq_cmd = NULL; - cache->pool = NULL; - cache->rq_size = 0; -} - static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { @@ -1055,7 +1039,6 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->graphics_ver < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - reloc_cache_clear(cache); } static inline void *unmask_page(unsigned long p) @@ -1077,48 +1060,10 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } -static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache) -{ - if (!cache->pool) - return; - - /* - * This is a bit nasty, normally we keep objects locked until the end - * of execbuffer, but we already submit this, and have to unlock before - * dropping the reference. Fortunately we can only hold 1 pool node at - * a time, so this should be harmless. - */ - i915_gem_ww_unlock_single(cache->pool->obj); - intel_gt_buffer_pool_put(cache->pool); - cache->pool = NULL; -} - -static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache) -{ - struct drm_i915_gem_object *obj = cache->rq->batch->obj; - - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - intel_gt_chipset_flush(cache->rq->engine->gt); - - i915_request_add(cache->rq); - reloc_cache_put_pool(eb, cache); - reloc_cache_clear(cache); - - eb->reloc_pool = NULL; -} - static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { void *vaddr; - if (cache->rq) - reloc_gpu_flush(eb, cache); - if (!cache->vaddr) return; @@ -1300,291 +1245,6 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; } -static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - int err; - - assert_vma_held(vma); - - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - obj->write_domain = 0; - - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - - return err; -} - -static int __reloc_gpu_alloc(struct i915_execbuffer *eb, - struct intel_engine_cs *engine, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; - struct i915_request *rq; - struct i915_vma *batch; - u32 *cmd; - int err; - - if (!pool) { - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE, - cache->has_llc ? - I915_MAP_WB : - I915_MAP_WC); - if (IS_ERR(pool)) - return PTR_ERR(pool); - } - eb->reloc_pool = NULL; - - err = i915_gem_object_lock(pool->obj, &eb->ww); - if (err) - goto err_pool; - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err_pool; - } - intel_gt_buffer_pool_mark_used(pool); - - memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); - - batch = i915_vma_instance(pool->obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_unmap; - } - - err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK); - if (err) - goto err_unmap; - - if (engine == eb->context->engine) { - rq = i915_request_create(eb->context); - } else { - struct intel_context *ce = eb->reloc_context; - - if (!ce) { - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err_unpin; - } - - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(eb->context->vm); - eb->reloc_context = ce; - } - - err = intel_context_pin_ww(ce, &eb->ww); - if (err) - goto err_unpin; - - rq = i915_request_create(ce); - intel_context_unpin(ce); - } - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = intel_gt_buffer_pool_mark_active(pool, rq); - if (err) - goto err_request; - - err = reloc_move_to_gpu(rq, vma); - if (err) - goto err_request; - - err = eb->engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - cache->graphics_ver > 5 ? 0 : I915_DISPATCH_SECURE); - if (err) - goto skip_request; - - assert_vma_held(batch); - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - rq->batch = batch; - i915_vma_unpin(batch); - - cache->rq = rq; - cache->rq_cmd = cmd; - cache->rq_size = 0; - cache->pool = pool; - - /* Return with batch mapping (cmd) still pinned */ - return 0; - -skip_request: - i915_request_set_error_once(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(batch); -err_unmap: - i915_gem_object_unpin_map(pool->obj); -err_pool: - eb->reloc_pool = pool; - return err; -} - -static bool reloc_can_use_engine(const struct intel_engine_cs *engine) -{ - return engine->class != VIDEO_DECODE_CLASS || GRAPHICS_VER(engine->i915) != 6; -} - -static u32 *reloc_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - u32 *cmd; - - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) - reloc_gpu_flush(eb, cache); - - if (unlikely(!cache->rq)) { - int err; - struct intel_engine_cs *engine = eb->engine; - - if (!reloc_can_use_engine(engine)) { - engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; - if (!engine) - return ERR_PTR(-ENODEV); - } - - err = __reloc_gpu_alloc(eb, engine, vma, len); - if (unlikely(err)) - return ERR_PTR(err); - } - - cmd = cache->rq_cmd + cache->rq_size; - cache->rq_size += len; - - return cmd; -} - -static inline bool use_reloc_gpu(struct i915_vma *vma) -{ - if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) - return true; - - if (DBG_FORCE_RELOC) - return false; - - return !dma_resv_test_signaled(vma->resv, true); -} - -static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) -{ - struct page *page; - unsigned long addr; - - GEM_BUG_ON(vma->pages != vma->obj->mm.pages); - - page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT); - addr = PFN_PHYS(page_to_pfn(page)); - GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */ - - return addr + offset_in_page(offset); -} - -static int __reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) -{ - const unsigned int ver = eb->reloc_cache.graphics_ver; - unsigned int len; - u32 *batch; - u64 addr; - - if (ver >= 8) - len = offset & 7 ? 8 : 5; - else if (ver >= 4) - len = 4; - else - len = 3; - - batch = reloc_gpu(eb, vma, len); - if (batch == ERR_PTR(-EDEADLK)) - return -EDEADLK; - else if (IS_ERR(batch)) - return false; - - addr = gen8_canonical_addr(vma->node.start + offset); - if (ver >= 8) { - if (offset & 7) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_addr); - - addr = gen8_canonical_addr(addr + 4); - - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = upper_32_bits(target_addr); - } else { - *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_addr); - *batch++ = upper_32_bits(target_addr); - } - } else if (ver >= 6) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_addr; - } else if (IS_I965G(eb->i915)) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = vma_phys_addr(vma, offset); - *batch++ = target_addr; - } else if (ver >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_addr; - } else if (ver >= 3 && - !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = addr; - *batch++ = target_addr; - } else { - *batch++ = MI_STORE_DWORD_IMM; - *batch++ = vma_phys_addr(vma, offset); - *batch++ = target_addr; - } - - return true; -} - -static int reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) -{ - if (eb->reloc_cache.vaddr) - return false; - - if (!use_reloc_gpu(vma)) - return false; - - return __reloc_entry_gpu(eb, vma, offset, target_addr); -} - static u64 relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, @@ -1593,32 +1253,25 @@ relocate_entry(struct i915_vma *vma, { u64 target_addr = relocation_target(reloc, target); u64 offset = reloc->offset; - int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr); - - if (reloc_gpu < 0) - return reloc_gpu; - - if (!reloc_gpu) { - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; repeat: - vaddr = reloc_vaddr(vma->obj, eb, - offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - - GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_addr), - eb->reloc_cache.vaddr); - - if (wide) { - offset += sizeof(u32); - target_addr >>= 32; - wide = false; - goto repeat; - } + vaddr = reloc_vaddr(vma->obj, eb, + offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_addr), + eb->reloc_cache.vaddr); + + if (wide) { + offset += sizeof(u32); + target_addr >>= 32; + wide = false; + goto repeat; } return target->node.start | UPDATE; @@ -1990,7 +1643,7 @@ repeat: } /* We may process another execbuffer during the unlock... */ - eb_release_vmas(eb, false, true); + eb_release_vmas(eb, false); i915_gem_ww_ctx_fini(&eb->ww); if (rq) { @@ -2059,9 +1712,7 @@ repeat_validate: list_for_each_entry(ev, &eb->relocs, reloc_link) { if (!have_copy) { - pagefault_disable(); err = eb_relocate_vma(eb, ev); - pagefault_enable(); if (err) break; } else { @@ -2094,7 +1745,7 @@ repeat_validate: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false, false); + eb_release_vmas(eb, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto repeat_validate; @@ -2191,7 +1842,7 @@ retry: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false, false); + eb_release_vmas(eb, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto retry; @@ -2268,7 +1919,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) #ifdef CONFIG_MMU_NOTIFIER if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { - spin_lock(&eb->i915->mm.notifier_lock); + read_lock(&eb->i915->mm.notifier_lock); /* * count is always at least 1, otherwise __EXEC_USERPTR_USED @@ -2286,7 +1937,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) break; } - spin_unlock(&eb->i915->mm.notifier_lock); + read_unlock(&eb->i915->mm.notifier_lock); } #endif @@ -2372,217 +2023,6 @@ shadow_batch_pin(struct i915_execbuffer *eb, return vma; } -struct eb_parse_work { - struct dma_fence_work base; - struct intel_engine_cs *engine; - struct i915_vma *batch; - struct i915_vma *shadow; - struct i915_vma *trampoline; - unsigned long batch_offset; - unsigned long batch_length; - unsigned long *jump_whitelist; - const void *batch_map; - void *shadow_map; -}; - -static int __eb_parse(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - int ret; - bool cookie; - - cookie = dma_fence_begin_signalling(); - ret = intel_engine_cmd_parser(pw->engine, - pw->batch, - pw->batch_offset, - pw->batch_length, - pw->shadow, - pw->jump_whitelist, - pw->shadow_map, - pw->batch_map); - dma_fence_end_signalling(cookie); - - return ret; -} - -static void __eb_parse_release(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - - if (!IS_ERR_OR_NULL(pw->jump_whitelist)) - kfree(pw->jump_whitelist); - - if (pw->batch_map) - i915_gem_object_unpin_map(pw->batch->obj); - else - i915_gem_object_unpin_pages(pw->batch->obj); - - i915_gem_object_unpin_map(pw->shadow->obj); - - if (pw->trampoline) - i915_active_release(&pw->trampoline->active); - i915_active_release(&pw->shadow->active); - i915_active_release(&pw->batch->active); -} - -static const struct dma_fence_work_ops eb_parse_ops = { - .name = "eb_parse", - .work = __eb_parse, - .release = __eb_parse_release, -}; - -static inline int -__parser_mark_active(struct i915_vma *vma, - struct intel_timeline *tl, - struct dma_fence *fence) -{ - struct intel_gt_buffer_pool_node *node = vma->private; - - return i915_active_ref(&node->active, tl->fence_context, fence); -} - -static int -parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl) -{ - int err; - - mutex_lock(&tl->mutex); - - err = __parser_mark_active(pw->shadow, tl, &pw->base.dma); - if (err) - goto unlock; - - if (pw->trampoline) { - err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma); - if (err) - goto unlock; - } - -unlock: - mutex_unlock(&tl->mutex); - return err; -} - -static int eb_parse_pipeline(struct i915_execbuffer *eb, - struct i915_vma *shadow, - struct i915_vma *trampoline) -{ - struct eb_parse_work *pw; - struct drm_i915_gem_object *batch = eb->batch->vma->obj; - bool needs_clflush; - int err; - - GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); - GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length)); - - pw = kzalloc(sizeof(*pw), GFP_KERNEL); - if (!pw) - return -ENOMEM; - - err = i915_active_acquire(&eb->batch->vma->active); - if (err) - goto err_free; - - err = i915_active_acquire(&shadow->active); - if (err) - goto err_batch; - - if (trampoline) { - err = i915_active_acquire(&trampoline->active); - if (err) - goto err_shadow; - } - - pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB); - if (IS_ERR(pw->shadow_map)) { - err = PTR_ERR(pw->shadow_map); - goto err_trampoline; - } - - needs_clflush = - !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - pw->batch_map = ERR_PTR(-ENODEV); - if (needs_clflush && i915_has_memcpy_from_wc()) - pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC); - - if (IS_ERR(pw->batch_map)) { - err = i915_gem_object_pin_pages(batch); - if (err) - goto err_unmap_shadow; - pw->batch_map = NULL; - } - - pw->jump_whitelist = - intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len, - trampoline); - if (IS_ERR(pw->jump_whitelist)) { - err = PTR_ERR(pw->jump_whitelist); - goto err_unmap_batch; - } - - dma_fence_work_init(&pw->base, &eb_parse_ops); - - pw->engine = eb->engine; - pw->batch = eb->batch->vma; - pw->batch_offset = eb->batch_start_offset; - pw->batch_length = eb->batch_len; - pw->shadow = shadow; - pw->trampoline = trampoline; - - /* Mark active refs early for this worker, in case we get interrupted */ - err = parser_mark_active(pw, eb->context->timeline); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(pw->batch->resv, 1); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(shadow->resv, 1); - if (err) - goto err_commit; - - /* Wait for all writes (and relocs) into the batch to complete */ - err = i915_sw_fence_await_reservation(&pw->base.chain, - pw->batch->resv, NULL, false, - 0, I915_FENCE_GFP); - if (err < 0) - goto err_commit; - - /* Keep the batch alive and unwritten as we parse */ - dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - - /* Force execution to wait for completion of the parser */ - dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - - dma_fence_work_commit_imm(&pw->base); - return 0; - -err_commit: - i915_sw_fence_set_error_once(&pw->base.chain, err); - dma_fence_work_commit_imm(&pw->base); - return err; - -err_unmap_batch: - if (pw->batch_map) - i915_gem_object_unpin_map(batch); - else - i915_gem_object_unpin_pages(batch); -err_unmap_shadow: - i915_gem_object_unpin_map(shadow->obj); -err_trampoline: - if (trampoline) - i915_active_release(&trampoline->active); -err_shadow: - i915_active_release(&shadow->active); -err_batch: - i915_active_release(&eb->batch->vma->active); -err_free: - kfree(pw); - return err; -} - static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) { /* @@ -2672,7 +2112,15 @@ static int eb_parse(struct i915_execbuffer *eb) goto err_trampoline; } - err = eb_parse_pipeline(eb, shadow, trampoline); + err = dma_resv_reserve_shared(shadow->resv, 1); + if (err) + goto err_trampoline; + + err = intel_engine_cmd_parser(eb->engine, + eb->batch->vma, + eb->batch_start_offset, + eb->batch_len, + shadow, trampoline); if (err) goto err_unpin_batch; @@ -2983,7 +2431,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n) while (n--) { drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); dma_fence_put(fences[n].dma_fence); - kfree(fences[n].chain_fence); + dma_fence_chain_free(fences[n].chain_fence); } kvfree(fences); } @@ -3097,9 +2545,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb, return -EINVAL; } - f->chain_fence = - kmalloc(sizeof(*f->chain_fence), - GFP_KERNEL); + f->chain_fence = dma_fence_chain_alloc(); if (!f->chain_fence) { drm_syncobj_put(syncobj); dma_fence_put(fence); @@ -3359,8 +2805,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.exec = exec; eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); eb.vma[0].vma = NULL; - eb.reloc_pool = eb.batch_pool = NULL; - eb.reloc_context = NULL; + eb.batch_pool = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -3435,7 +2880,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = eb_lookup_vmas(&eb); if (err) { - eb_release_vmas(&eb, true, true); + eb_release_vmas(&eb, true); goto err_engine; } @@ -3458,9 +2903,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, batch = eb.batch->vma; - /* All GPU relocation batches must be submitted prior to the user rq */ - GEM_BUG_ON(eb.reloc_cache.rq); - /* Allocate a request for this batch buffer nice and early. */ eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { @@ -3468,11 +2910,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + if (unlikely(eb.gem_context->syncobj)) { + struct dma_fence *fence; + + fence = drm_syncobj_fence_get(eb.gem_context->syncobj); + err = i915_request_await_dma_fence(eb.request, fence); + dma_fence_put(fence); + if (err) + goto err_ext; + } + if (in_fence) { if (args->flags & I915_EXEC_FENCE_SUBMIT) err = i915_request_await_execution(eb.request, - in_fence, - eb.engine->bond_execute); + in_fence); else err = i915_request_await_dma_fence(eb.request, in_fence); @@ -3525,10 +2976,16 @@ err_request: fput(out_fence->file); } } + + if (unlikely(eb.gem_context->syncobj)) { + drm_syncobj_replace_fence(eb.gem_context->syncobj, + &eb.request->fence); + } + i915_request_put(eb.request); err_vma: - eb_release_vmas(&eb, true, true); + eb_release_vmas(&eb, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); WARN_ON(err == -EDEADLK); @@ -3536,10 +2993,6 @@ err_vma: if (eb.batch_pool) intel_gt_buffer_pool_put(eb.batch_pool); - if (eb.reloc_pool) - intel_gt_buffer_pool_put(eb.reloc_pool); - if (eb.reloc_context) - intel_context_put(eb.reloc_context); err_engine: eb_put_engine(&eb); err_context: @@ -3653,7 +3106,3 @@ end:; kvfree(exec2_list); return err; } - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_execbuffer.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index ce6b664b10aa..13b217f75055 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -177,8 +177,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; /* * Mark the object as volatile, such that the pages are marked as diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 3b4aa28a076d..eb345305dc52 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -4,74 +4,10 @@ */ #include "intel_memory_region.h" -#include "intel_region_ttm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_lmem.h" #include "i915_drv.h" -static void lmem_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); - obj->mm.dirty = false; - sg_free_table(pages); - kfree(pages); -} - -static int lmem_get_pages(struct drm_i915_gem_object *obj) -{ - unsigned int flags; - struct sg_table *pages; - - flags = I915_ALLOC_MIN_PAGE_SIZE; - if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) - flags |= I915_ALLOC_CONTIGUOUS; - - obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region, - obj->base.size, - flags); - if (IS_ERR(obj->mm.st_mm_node)) - return PTR_ERR(obj->mm.st_mm_node); - - /* Range manager is always contigous */ - if (obj->mm.region->is_range_manager) - obj->flags |= I915_BO_ALLOC_CONTIGUOUS; - pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node); - if (IS_ERR(pages)) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); - return PTR_ERR(pages); - } - - __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); - - if (obj->flags & I915_BO_ALLOC_CPU_CLEAR) { - void __iomem *vaddr = - i915_gem_object_lmem_io_map(obj, 0, obj->base.size); - - if (!vaddr) { - struct sg_table *pages = - __i915_gem_object_unset_pages(obj); - - if (!IS_ERR_OR_NULL(pages)) - lmem_put_pages(obj, pages); - } - - memset_io(vaddr, 0, obj->base.size); - io_mapping_unmap(vaddr); - } - - return 0; -} - -const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { - .name = "i915_gem_object_lmem", - .flags = I915_GEM_OBJECT_HAS_IOMEM, - - .get_pages = lmem_get_pages, - .put_pages = lmem_put_pages, - .release = i915_gem_object_release_memory_region, -}; - void __iomem * i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, unsigned long n, @@ -87,39 +23,91 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, return io_mapping_map_wc(&obj->mm.region->iomap, offset, size); } +/** + * i915_gem_object_is_lmem - Whether the object is resident in + * lmem + * @obj: The object to check. + * + * Even if an object is allowed to migrate and change memory region, + * this function checks whether it will always be present in lmem when + * valid *or* if that's not the case, whether it's currently resident in lmem. + * For migratable and evictable objects, the latter only makes sense when + * the object is locked. + * + * Return: Whether the object migratable but resident in lmem, or not + * migratable and will be present in lmem when valid. + */ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) { - struct intel_memory_region *mr = obj->mm.region; + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); +#ifdef CONFIG_LOCKDEP + if (i915_gem_object_migratable(obj) && + i915_gem_object_evictable(obj)) + assert_object_held(obj); +#endif return mr && (mr->type == INTEL_MEMORY_LOCAL || mr->type == INTEL_MEMORY_STOLEN_LOCAL); } +/** + * __i915_gem_object_is_lmem - Whether the object is resident in + * lmem while in the fence signaling critical path. + * @obj: The object to check. + * + * This function is intended to be called from within the fence signaling + * path where the fence keeps the object from being migrated. For example + * during gpu reset or similar. + * + * Return: Whether the object is resident in lmem. + */ +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + +#ifdef CONFIG_LOCKDEP + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true)); +#endif + return mr && (mr->type == INTEL_MEMORY_LOCAL || + mr->type == INTEL_MEMORY_STOLEN_LOCAL); +} + +/** + * __i915_gem_object_create_lmem_with_ps - Create lmem object and force the + * minimum page size for the backing pages. + * @i915: The i915 instance. + * @size: The size in bytes for the object. Note that we need to round the size + * up depending on the @page_size. The final object size can be fished out from + * the drm GEM object. + * @page_size: The requested minimum page size in bytes for this object. This is + * useful if we need something bigger than the regions min_page_size due to some + * hw restriction, or in some very specialised cases where it needs to be + * smaller, where the internal fragmentation cost is too great when rounding up + * the object size. + * @flags: The optional BO allocation flags. + * + * Note that this interface assumes you know what you are doing when forcing the + * @page_size. If this is smaller than the regions min_page_size then it can + * never be inserted into any GTT, otherwise it might lead to undefined + * behaviour. + * + * Return: The object pointer, which might be an ERR_PTR in the case of failure. + */ struct drm_i915_gem_object * -i915_gem_object_create_lmem(struct drm_i915_private *i915, - resource_size_t size, - unsigned int flags) +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], - size, flags); + size, page_size, flags); } -int __i915_gem_lmem_object_init(struct intel_memory_region *mem, - struct drm_i915_gem_object *obj, - resource_size_t size, - unsigned int flags) +struct drm_i915_gem_object * +i915_gem_object_create_lmem(struct drm_i915_private *i915, + resource_size_t size, + unsigned int flags) { - static struct lock_class_key lock_class; - struct drm_i915_private *i915 = mem->i915; - - drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class, flags); - - obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - - i915_gem_object_init_memory_region(obj, mem); - - return 0; + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + size, 0, flags); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index fac6bc5a5ebb..4ee81fc66302 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -21,14 +21,16 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags); struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, unsigned int flags); -int __i915_gem_lmem_object_init(struct intel_memory_region *mem, - struct drm_i915_gem_object *obj, - resource_size_t size, - unsigned int flags); - #endif /* !__I915_GEM_LMEM_H */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 215326764606..5130e8ed9564 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -19,6 +19,7 @@ #include "i915_gem_mman.h" #include "i915_trace.h" #include "i915_user_extensions.h" +#include "i915_gem_ttm.h" #include "i915_vma.h" static inline bool @@ -624,6 +625,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo; int err; + GEM_BUG_ON(obj->ops->mmap_offset || obj->ops->mmap_ops); + mmo = lookup_mmo(obj, mmap_type); if (mmo) goto out; @@ -642,7 +645,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, goto insert; /* Attempt to reap some mmap space from dead objects */ - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, + NULL); if (err) goto err; @@ -666,40 +670,58 @@ err: } static int -__assign_mmap_offset(struct drm_file *file, - u32 handle, +__assign_mmap_offset(struct drm_i915_gem_object *obj, enum i915_mmap_type mmap_type, - u64 *offset) + u64 *offset, struct drm_file *file) { - struct drm_i915_gem_object *obj; struct i915_mmap_offset *mmo; - int err; - obj = i915_gem_object_lookup(file, handle); - if (!obj) - return -ENOENT; + if (i915_gem_object_never_mmap(obj)) + return -ENODEV; - if (i915_gem_object_never_mmap(obj)) { - err = -ENODEV; - goto out; + if (obj->ops->mmap_offset) { + if (mmap_type != I915_MMAP_TYPE_FIXED) + return -ENODEV; + + *offset = obj->ops->mmap_offset(obj); + return 0; } + if (mmap_type == I915_MMAP_TYPE_FIXED) + return -ENODEV; + if (mmap_type != I915_MMAP_TYPE_GTT && !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) { - err = -ENODEV; - goto out; - } + !i915_gem_object_has_iomem(obj)) + return -ENODEV; mmo = mmap_offset_attach(obj, mmap_type, file); - if (IS_ERR(mmo)) { - err = PTR_ERR(mmo); - goto out; - } + if (IS_ERR(mmo)) + return PTR_ERR(mmo); *offset = drm_vma_node_offset_addr(&mmo->vma_node); - err = 0; -out: + return 0; +} + +static int +__assign_mmap_offset_handle(struct drm_file *file, + u32 handle, + enum i915_mmap_type mmap_type, + u64 *offset) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_lookup(file, handle); + if (!obj) + return -ENOENT; + + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out_put; + err = __assign_mmap_offset(obj, mmap_type, offset, file); + i915_gem_object_unlock(obj); +out_put: i915_gem_object_put(obj); return err; } @@ -712,14 +734,16 @@ i915_gem_dumb_mmap_offset(struct drm_file *file, { enum i915_mmap_type mmap_type; - if (boot_cpu_has(X86_FEATURE_PAT)) + if (HAS_LMEM(to_i915(dev))) + mmap_type = I915_MMAP_TYPE_FIXED; + else if (boot_cpu_has(X86_FEATURE_PAT)) mmap_type = I915_MMAP_TYPE_WC; else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt)) return -ENODEV; else mmap_type = I915_MMAP_TYPE_GTT; - return __assign_mmap_offset(file, handle, mmap_type, offset); + return __assign_mmap_offset_handle(file, handle, mmap_type, offset); } /** @@ -783,11 +807,15 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, type = I915_MMAP_TYPE_UC; break; + case I915_MMAP_OFFSET_FIXED: + type = I915_MMAP_TYPE_FIXED; + break; + default: return -EINVAL; } - return __assign_mmap_offset(file, args->handle, type, &args->offset); + return __assign_mmap_offset_handle(file, args->handle, type, &args->offset); } static void vm_open(struct vm_area_struct *vma) @@ -891,8 +919,18 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) * destroyed and will be invalid when the vma manager lock * is released. */ - mmo = container_of(node, struct i915_mmap_offset, vma_node); - obj = i915_gem_object_get_rcu(mmo->obj); + if (!node->driver_private) { + mmo = container_of(node, struct i915_mmap_offset, vma_node); + obj = i915_gem_object_get_rcu(mmo->obj); + + GEM_BUG_ON(obj && obj->ops->mmap_ops); + } else { + obj = i915_gem_object_get_rcu + (container_of(node, struct drm_i915_gem_object, + base.vma_node)); + + GEM_BUG_ON(obj && !obj->ops->mmap_ops); + } } drm_vma_offset_unlock_lookup(dev->vma_offset_manager); rcu_read_unlock(); @@ -913,8 +951,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) return PTR_ERR(anon); } - vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; - vma->vm_private_data = mmo; + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; /* * We keep the ref on mmo->obj, not vm_file, but we require @@ -928,6 +965,15 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) /* Drop the initial creation reference, the vma is now holding one. */ fput(anon); + if (obj->ops->mmap_ops) { + vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = obj->ops->mmap_ops; + vma->vm_private_data = node->driver_private; + return 0; + } + + vma->vm_private_data = mmo; + switch (mmo->mmap_type) { case I915_MMAP_TYPE_WC: vma->vm_page_prot = @@ -935,6 +981,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &vm_ops_cpu; break; + case I915_MMAP_TYPE_FIXED: + GEM_WARN_ON(1); + fallthrough; case I915_MMAP_TYPE_WB: vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = &vm_ops_cpu; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 5706d471692d..6fb9afb65034 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -30,14 +30,10 @@ #include "i915_gem_context.h" #include "i915_gem_mman.h" #include "i915_gem_object.h" -#include "i915_globals.h" #include "i915_memcpy.h" #include "i915_trace.h" -static struct i915_global_object { - struct i915_global base; - struct kmem_cache *slab_objects; -} global; +static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; @@ -45,7 +41,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; - obj = kmem_cache_zalloc(global.slab_objects, GFP_KERNEL); + obj = kmem_cache_zalloc(slab_objects, GFP_KERNEL); if (!obj) return NULL; obj->base.funcs = &i915_gem_object_funcs; @@ -55,7 +51,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void) void i915_gem_object_free(struct drm_i915_gem_object *obj) { - return kmem_cache_free(global.slab_objects, obj); + return kmem_cache_free(slab_objects, obj); } void i915_gem_object_init(struct drm_i915_gem_object *obj, @@ -172,7 +168,7 @@ static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *f } } -static void __i915_gem_free_object_rcu(struct rcu_head *head) +void __i915_gem_free_object_rcu(struct rcu_head *head) { struct drm_i915_gem_object *obj = container_of(head, typeof(*obj), rcu); @@ -208,59 +204,69 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj) } } -static void __i915_gem_free_objects(struct drm_i915_private *i915, - struct llist_node *freed) +void __i915_gem_free_object(struct drm_i915_gem_object *obj) { - struct drm_i915_gem_object *obj, *on; + trace_i915_gem_object_destroy(obj); - llist_for_each_entry_safe(obj, on, freed, freed) { - trace_i915_gem_object_destroy(obj); + if (!list_empty(&obj->vma.list)) { + struct i915_vma *vma; + + /* + * Note that the vma keeps an object reference while + * it is active, so it *should* not sleep while we + * destroy it. Our debug code errs insits it *might*. + * For the moment, play along. + */ + spin_lock(&obj->vma.lock); + while ((vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + GEM_BUG_ON(vma->obj != obj); + spin_unlock(&obj->vma.lock); - if (!list_empty(&obj->vma.list)) { - struct i915_vma *vma; + __i915_vma_put(vma); - /* - * Note that the vma keeps an object reference while - * it is active, so it *should* not sleep while we - * destroy it. Our debug code errs insits it *might*. - * For the moment, play along. - */ spin_lock(&obj->vma.lock); - while ((vma = list_first_entry_or_null(&obj->vma.list, - struct i915_vma, - obj_link))) { - GEM_BUG_ON(vma->obj != obj); - spin_unlock(&obj->vma.lock); + } + spin_unlock(&obj->vma.lock); + } - __i915_vma_put(vma); + __i915_gem_object_free_mmaps(obj); - spin_lock(&obj->vma.lock); - } - spin_unlock(&obj->vma.lock); - } + GEM_BUG_ON(!list_empty(&obj->lut_list)); - __i915_gem_object_free_mmaps(obj); + atomic_set(&obj->mm.pages_pin_count, 0); + __i915_gem_object_put_pages(obj); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); + bitmap_free(obj->bit_17); - GEM_BUG_ON(!list_empty(&obj->lut_list)); + if (obj->base.import_attach) + drm_prime_gem_destroy(&obj->base, NULL); - atomic_set(&obj->mm.pages_pin_count, 0); - __i915_gem_object_put_pages(obj); - GEM_BUG_ON(i915_gem_object_has_pages(obj)); - bitmap_free(obj->bit_17); + drm_gem_free_mmap_offset(&obj->base); - if (obj->base.import_attach) - drm_prime_gem_destroy(&obj->base, NULL); + if (obj->ops->release) + obj->ops->release(obj); - drm_gem_free_mmap_offset(&obj->base); + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); - if (obj->ops->release) - obj->ops->release(obj); + if (obj->shares_resv_from) + i915_vm_resv_put(obj->shares_resv_from); +} - if (obj->mm.n_placements > 1) - kfree(obj->mm.placements); +static void __i915_gem_free_objects(struct drm_i915_private *i915, + struct llist_node *freed) +{ + struct drm_i915_gem_object *obj, *on; - if (obj->shares_resv_from) - i915_vm_resv_put(obj->shares_resv_from); + llist_for_each_entry_safe(obj, on, freed, freed) { + might_sleep(); + if (obj->ops->delayed_free) { + obj->ops->delayed_free(obj); + continue; + } + __i915_gem_free_object(obj); /* But keep the pointer alive for RCU-protected lookups */ call_rcu(&obj->rcu, __i915_gem_free_object_rcu); @@ -318,6 +324,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) * worker and performing frees directly from subsequent allocations for * crude but effective memory throttling. */ + if (llist_add(&obj->freed, &i915->mm.free_list)) queue_work(i915->wq, &i915->mm.free_work); } @@ -410,34 +417,254 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, return 0; } -void i915_gem_init__objects(struct drm_i915_private *i915) +/** + * i915_gem_object_evictable - Whether object is likely evictable after unbind. + * @obj: The object to check + * + * This function checks whether the object is likely unvictable after unbind. + * If the object is not locked when checking, the result is only advisory. + * If the object is locked when checking, and the function returns true, + * then an eviction should indeed be possible. But since unlocked vma + * unpinning and unbinding is currently possible, the object can actually + * become evictable even if this function returns false. + * + * Return: true if the object may be evictable. False otherwise. + */ +bool i915_gem_object_evictable(struct drm_i915_gem_object *obj) { - INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + struct i915_vma *vma; + int pin_count = atomic_read(&obj->mm.pages_pin_count); + + if (!pin_count) + return true; + + spin_lock(&obj->vma.lock); + list_for_each_entry(vma, &obj->vma.list, obj_link) { + if (i915_vma_is_pinned(vma)) { + spin_unlock(&obj->vma.lock); + return false; + } + if (atomic_read(&vma->pages_count)) + pin_count--; + } + spin_unlock(&obj->vma.lock); + GEM_WARN_ON(pin_count < 0); + + return pin_count == 0; +} + +/** + * i915_gem_object_migratable - Whether the object is migratable out of the + * current region. + * @obj: Pointer to the object. + * + * Return: Whether the object is allowed to be resident in other + * regions than the current while pages are present. + */ +bool i915_gem_object_migratable(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + + if (!mr) + return false; + + return obj->mm.n_placements > 1; } -static void i915_global_objects_shrink(void) +/** + * i915_gem_object_has_struct_page - Whether the object is page-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the page backing may change under the caller. + * + * Return: True if page-backed, false otherwise. + */ +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - kmem_cache_shrink(global.slab_objects); +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_STRUCT_PAGE; } -static void i915_global_objects_exit(void) +/** + * i915_gem_object_has_iomem - Whether the object is iomem-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the iomem backing may change under the caller. + * + * Return: True if iomem-backed, false otherwise. + */ +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) { - kmem_cache_destroy(global.slab_objects); +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_IOMEM; } -static struct i915_global_object global = { { - .shrink = i915_global_objects_shrink, - .exit = i915_global_objects_exit, -} }; +/** + * i915_gem_object_can_migrate - Whether an object likely can be migrated + * + * @obj: The object to migrate + * @id: The region intended to migrate to + * + * Check whether the object backend supports migration to the + * given region. Note that pinning may affect the ability to migrate as + * returned by this function. + * + * This function is primarily intended as a helper for checking the + * possibility to migrate objects and might be slightly less permissive + * than i915_gem_object_migrate() when it comes to objects with the + * I915_BO_ALLOC_USER flag set. + * + * Return: true if migration is possible, false otherwise. + */ +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int num_allowed = obj->mm.n_placements; + struct intel_memory_region *mr; + unsigned int i; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + + mr = i915->mm.regions[id]; + if (!mr) + return false; + + if (obj->mm.region == mr) + return true; + + if (!i915_gem_object_evictable(obj)) + return false; + + if (!obj->ops->migrate) + return false; + + if (!(obj->flags & I915_BO_ALLOC_USER)) + return true; + + if (num_allowed == 0) + return false; + + for (i = 0; i < num_allowed; ++i) { + if (mr == obj->mm.placements[i]) + return true; + } + + return false; +} + +/** + * i915_gem_object_migrate - Migrate an object to the desired region id + * @obj: The object to migrate. + * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may + * not be successful in evicting other objects to make room for this object. + * @id: The region id to migrate to. + * + * Attempt to migrate the object to the desired memory region. The + * object backend must support migration and the object may not be + * pinned, (explicitly pinned pages or pinned vmas). The object must + * be locked. + * On successful completion, the object will have pages pointing to + * memory in the new region, but an async migration task may not have + * completed yet, and to accomplish that, i915_gem_object_wait_migration() + * must be called. + * + * Note: the @ww parameter is not used yet, but included to make sure + * callers put some effort into obtaining a valid ww ctx if one is + * available. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance + * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and + * -EBUSY if the object is pinned. + */ +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mr; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + assert_object_held(obj); + + mr = i915->mm.regions[id]; + GEM_BUG_ON(!mr); + + if (!i915_gem_object_can_migrate(obj, id)) + return -EINVAL; + + if (!obj->ops->migrate) { + if (GEM_WARN_ON(obj->mm.region != mr)) + return -EINVAL; + return 0; + } + + return obj->ops->migrate(obj, mr); +} + +/** + * i915_gem_object_placement_possible - Check whether the object can be + * placed at certain memory type + * @obj: Pointer to the object + * @type: The memory type to check + * + * Return: True if the object can be placed in @type. False otherwise. + */ +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type) +{ + unsigned int i; + + if (!obj->mm.n_placements) { + switch (type) { + case INTEL_MEMORY_LOCAL: + return i915_gem_object_has_iomem(obj); + case INTEL_MEMORY_SYSTEM: + return i915_gem_object_has_pages(obj); + default: + /* Ignore stolen for now */ + GEM_BUG_ON(1); + return false; + } + } + + for (i = 0; i < obj->mm.n_placements; i++) { + if (obj->mm.placements[i]->type == type) + return true; + } + + return false; +} + +void i915_gem_init__objects(struct drm_i915_private *i915) +{ + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); +} + +void i915_objects_module_exit(void) +{ + kmem_cache_destroy(slab_objects); +} -int __init i915_global_objects_init(void) +int __init i915_objects_module_init(void) { - global.slab_objects = - KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); - if (!global.slab_objects) + slab_objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); + if (!slab_objects) return -ENOMEM; - i915_global_register(&global.base); return 0; } @@ -450,6 +677,7 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" +#include "selftests/i915_gem_migrate.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 7c0eb425cb3b..48112b9d76df 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -12,10 +12,14 @@ #include <drm/drm_device.h> #include "display/intel_frontbuffer.h" +#include "intel_memory_region.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" +#include "i915_gem_ww.h" #include "i915_vma_types.h" +enum intel_region_id; + /* * XXX: There is a prevalence of the assumption that we fit the * object's page count inside a 32bit _signed_ variable. Let's document @@ -44,6 +48,9 @@ static inline bool i915_gem_object_size_2big(u64 size) void i915_gem_init__objects(struct drm_i915_private *i915); +void i915_objects_module_exit(void); +int i915_objects_module_init(void); + struct drm_i915_gem_object *i915_gem_object_alloc(void); void i915_gem_object_free(struct drm_i915_gem_object *obj); @@ -57,6 +64,10 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, const void *data, resource_size_t size); +struct drm_i915_gem_object * +__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; @@ -147,7 +158,7 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) /* * If more than one potential simultaneous locker, assert held. */ -static inline void assert_object_held_shared(struct drm_i915_gem_object *obj) +static inline void assert_object_held_shared(const struct drm_i915_gem_object *obj) { /* * Note mm list lookup is protected by @@ -169,13 +180,17 @@ static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, else ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL); - if (!ret && ww) + if (!ret && ww) { + i915_gem_object_get(obj); list_add_tail(&obj->obj_link, &ww->obj_list); + } if (ret == -EALREADY) ret = 0; - if (ret == -EDEADLK) + if (ret == -EDEADLK) { + i915_gem_object_get(obj); ww->contended = obj; + } return ret; } @@ -200,6 +215,9 @@ static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) { + if (obj->ops->adjust_lru) + obj->ops->adjust_lru(obj); + dma_resv_unlock(obj->base.resv); } @@ -258,17 +276,9 @@ i915_gem_object_type_has(const struct drm_i915_gem_object *obj, return obj->ops->flags & flags; } -static inline bool -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) -{ - return obj->flags & I915_BO_ALLOC_STRUCT_PAGE; -} +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj); -static inline bool -i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) -{ - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); -} +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj); static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) @@ -339,22 +349,22 @@ struct scatterlist * __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, - unsigned int *offset, bool allow_alloc); + unsigned int *offset, bool dma); static inline struct scatterlist * i915_gem_object_get_sg(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, allow_alloc); + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, false); } static inline struct scatterlist * i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, allow_alloc); + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, true); } struct page * @@ -587,6 +597,27 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj); +void __i915_gem_free_object_rcu(struct rcu_head *head); + +void __i915_gem_free_object(struct drm_i915_gem_object *obj); + +bool i915_gem_object_evictable(struct drm_i915_gem_object *obj); + +bool i915_gem_object_migratable(struct drm_i915_gem_object *obj); + +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id); + +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id); + +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags); + +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) @@ -596,14 +627,12 @@ i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj); int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj); -void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj); int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj); #else static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; } static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } -static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); } static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c deleted file mode 100644 index 3e28c68fda3e..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ /dev/null @@ -1,461 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "gt/intel_gpu_commands.h" -#include "gt/intel_gt.h" -#include "gt/intel_gt_buffer_pool.h" -#include "gt/intel_ring.h" -#include "i915_gem_clflush.h" -#include "i915_gem_object_blt.h" - -struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, - struct i915_vma *vma, - struct i915_gem_ww_ctx *ww, - u32 value) -{ - struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *batch; - u64 offset; - u64 count; - u64 rem; - u32 size; - u32 *cmd; - int err; - - GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); - intel_engine_pm_get(ce->engine); - - count = div_u64(round_up(vma->size, block_size), block_size); - size = (1 + 8 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); - if (IS_ERR(pool)) { - err = PTR_ERR(pool); - goto out_pm; - } - - err = i915_gem_object_lock(pool->obj, ww); - if (err) - goto out_put; - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - - /* we pinned the pool, mark it as such */ - intel_gt_buffer_pool_mark_used(pool); - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_unpin; - } - - rem = vma->size; - offset = vma->node.start; - - do { - u32 size = min_t(u64, rem, block_size); - - GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - - if (GRAPHICS_VER(i915) >= 8) { - *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = value; - } else { - *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = offset; - *cmd++ = value; - } - - /* Allow ourselves to be preempted in between blocks. */ - *cmd++ = MI_ARB_CHECK; - - offset += size; - rem -= size; - } while (rem); - - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(pool->obj); - i915_gem_object_unpin_map(pool->obj); - - intel_gt_chipset_flush(ce->vm->gt); - - batch->private = pool; - return batch; - -out_unpin: - i915_vma_unpin(batch); -out_put: - intel_gt_buffer_pool_put(pool); -out_pm: - intel_engine_pm_put(ce->engine); - return ERR_PTR(err); -} - -int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) -{ - int err; - - err = i915_request_await_object(rq, vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, 0); - if (unlikely(err)) - return err; - - return intel_gt_buffer_pool_mark_active(vma->private, rq); -} - -void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) -{ - i915_vma_unpin(vma); - intel_gt_buffer_pool_put(vma->private); - intel_engine_pm_put(ce->engine); -} - -static int -move_obj_to_gpu(struct drm_i915_gem_object *obj, - struct i915_request *rq, - bool write) -{ - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - - return i915_request_await_object(rq, obj, write); -} - -int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - u32 value) -{ - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - struct i915_vma *batch; - struct i915_vma *vma; - int err; - - vma = i915_vma_instance(obj, ce->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - i915_gem_ww_ctx_init(&ww, true); - intel_engine_pm_get(ce->engine); -retry: - err = i915_gem_object_lock(obj, &ww); - if (err) - goto out; - - err = intel_context_pin_ww(ce, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); - if (err) - goto out_ctx; - - batch = intel_emit_vma_fill_blt(ce, vma, &ww, value); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_vma; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - err = move_obj_to_gpu(vma->obj, rq, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (unlikely(err)) - goto out_request; - - if (ce->engine->emit_init_breadcrumb) - err = ce->engine->emit_init_breadcrumb(rq); - - if (likely(!err)) - err = ce->engine->emit_bb_start(rq, - batch->node.start, - batch->node.size, - 0); -out_request: - if (unlikely(err)) - i915_request_set_error_once(rq, err); - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(ce, batch); -out_vma: - i915_vma_unpin(vma); -out_ctx: - intel_context_unpin(ce); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - intel_engine_pm_put(ce->engine); - return err; -} - -/* Wa_1209644611:icl,ehl */ -static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size) -{ - u32 height = size >> PAGE_SHIFT; - - if (GRAPHICS_VER(i915) != 11) - return false; - - return height % 4 == 3 && height <= 8; -} - -struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - struct i915_vma *src, - struct i915_vma *dst) -{ - struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *batch; - u64 src_offset, dst_offset; - u64 count, rem; - u32 size, *cmd; - int err; - - GEM_BUG_ON(src->size != dst->size); - - GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); - intel_engine_pm_get(ce->engine); - - count = div_u64(round_up(dst->size, block_size), block_size); - size = (1 + 11 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); - if (IS_ERR(pool)) { - err = PTR_ERR(pool); - goto out_pm; - } - - err = i915_gem_object_lock(pool->obj, ww); - if (err) - goto out_put; - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - - /* we pinned the pool, mark it as such */ - intel_gt_buffer_pool_mark_used(pool); - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_unpin; - } - - rem = src->size; - src_offset = src->node.start; - dst_offset = dst->node.start; - - do { - size = min_t(u64, rem, block_size); - GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - - if (GRAPHICS_VER(i915) >= 9 && - !wa_1209644611_applies(i915, size)) { - *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); - *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(dst_offset); - *cmd++ = upper_32_bits(dst_offset); - *cmd++ = 0; - *cmd++ = PAGE_SIZE; - *cmd++ = lower_32_bits(src_offset); - *cmd++ = upper_32_bits(src_offset); - } else if (GRAPHICS_VER(i915) >= 8) { - *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(dst_offset); - *cmd++ = upper_32_bits(dst_offset); - *cmd++ = 0; - *cmd++ = PAGE_SIZE; - *cmd++ = lower_32_bits(src_offset); - *cmd++ = upper_32_bits(src_offset); - } else { - *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; - *cmd++ = dst_offset; - *cmd++ = PAGE_SIZE; - *cmd++ = src_offset; - } - - /* Allow ourselves to be preempted in between blocks. */ - *cmd++ = MI_ARB_CHECK; - - src_offset += size; - dst_offset += size; - rem -= size; - } while (rem); - - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(pool->obj); - i915_gem_object_unpin_map(pool->obj); - - intel_gt_chipset_flush(ce->vm->gt); - batch->private = pool; - return batch; - -out_unpin: - i915_vma_unpin(batch); -out_put: - intel_gt_buffer_pool_put(pool); -out_pm: - intel_engine_pm_put(ce->engine); - return ERR_PTR(err); -} - -int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst, - struct intel_context *ce) -{ - struct i915_address_space *vm = ce->vm; - struct i915_vma *vma[2], *batch; - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - int err, i; - - vma[0] = i915_vma_instance(src, vm, NULL); - if (IS_ERR(vma[0])) - return PTR_ERR(vma[0]); - - vma[1] = i915_vma_instance(dst, vm, NULL); - if (IS_ERR(vma[1])) - return PTR_ERR(vma[1]); - - i915_gem_ww_ctx_init(&ww, true); - intel_engine_pm_get(ce->engine); -retry: - err = i915_gem_object_lock(src, &ww); - if (!err) - err = i915_gem_object_lock(dst, &ww); - if (!err) - err = intel_context_pin_ww(ce, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER); - if (err) - goto out_ctx; - - err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_unpin_src; - - batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unpin_dst; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - for (i = 0; i < ARRAY_SIZE(vma); i++) { - err = move_obj_to_gpu(vma[i]->obj, rq, i); - if (unlikely(err)) - goto out_request; - } - - for (i = 0; i < ARRAY_SIZE(vma); i++) { - unsigned int flags = i ? EXEC_OBJECT_WRITE : 0; - - err = i915_vma_move_to_active(vma[i], rq, flags); - if (unlikely(err)) - goto out_request; - } - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); - -out_request: - if (unlikely(err)) - i915_request_set_error_once(rq, err); - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(ce, batch); -out_unpin_dst: - i915_vma_unpin(vma[1]); -out_unpin_src: - i915_vma_unpin(vma[0]); -out_ctx: - intel_context_unpin(ce); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - intel_engine_pm_put(ce->engine); - return err; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_object_blt.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h deleted file mode 100644 index 2409fdcccf0e..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_GEM_OBJECT_BLT_H__ -#define __I915_GEM_OBJECT_BLT_H__ - -#include <linux/types.h> - -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "i915_vma.h" - -struct drm_i915_gem_object; -struct i915_gem_ww_ctx; - -struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, - struct i915_vma *vma, - struct i915_gem_ww_ctx *ww, - u32 value); - -struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - struct i915_vma *src, - struct i915_vma *dst); - -int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq); -void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma); - -int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - u32 value); - -int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst, - struct intel_context *ce); - -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d047ea126029..2471f36aaff3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -18,6 +18,7 @@ struct drm_i915_gem_object; struct intel_fronbuffer; +struct intel_memory_region; /* * struct i915_lut_handle tracks the fast lookups from handle to vma used @@ -33,10 +34,9 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_IOMEM BIT(1) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) -#define I915_GEM_OBJECT_IS_PROXY BIT(3) -#define I915_GEM_OBJECT_NO_MMAP BIT(4) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_IS_PROXY BIT(2) +#define I915_GEM_OBJECT_NO_MMAP BIT(3) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -61,13 +61,117 @@ struct drm_i915_gem_object_ops { const struct drm_i915_gem_pread *arg); int (*pwrite)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg); + u64 (*mmap_offset)(struct drm_i915_gem_object *obj); int (*dmabuf_export)(struct drm_i915_gem_object *obj); + + /** + * adjust_lru - notify that the madvise value was updated + * @obj: The gem object + * + * The madvise value may have been updated, or object was recently + * referenced so act accordingly (Perhaps changing an LRU list etc). + */ + void (*adjust_lru)(struct drm_i915_gem_object *obj); + + /** + * delayed_free - Override the default delayed free implementation + */ + void (*delayed_free)(struct drm_i915_gem_object *obj); + + /** + * migrate - Migrate object to a different region either for + * pinning or for as long as the object lock is held. + */ + int (*migrate)(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr); + void (*release)(struct drm_i915_gem_object *obj); + const struct vm_operations_struct *mmap_ops; const char *name; /* friendly name for debug, e.g. lockdep classes */ }; +/** + * enum i915_cache_level - The supported GTT caching values for system memory + * pages. + * + * These translate to some special GTT PTE bits when binding pages into some + * address space. It also determines whether an object, or rather its pages are + * coherent with the GPU, when also reading or writing through the CPU cache + * with those pages. + * + * Userspace can also control this through struct drm_i915_gem_caching. + */ +enum i915_cache_level { + /** + * @I915_CACHE_NONE: + * + * GPU access is not coherent with the CPU cache. If the cache is dirty + * and we need the underlying pages to be coherent with some later GPU + * access then we need to manually flush the pages. + * + * On shared LLC platforms reads and writes through the CPU cache are + * still coherent even with this setting. See also + * &drm_i915_gem_object.cache_coherent for more details. Due to this we + * should only ever use uncached for scanout surfaces, otherwise we end + * up over-flushing in some places. + * + * This is the default on non-LLC platforms. + */ + I915_CACHE_NONE = 0, + /** + * @I915_CACHE_LLC: + * + * GPU access is coherent with the CPU cache. If the cache is dirty, + * then the GPU will ensure that access remains coherent, when both + * reading and writing through the CPU cache. GPU writes can dirty the + * CPU cache. + * + * Not used for scanout surfaces. + * + * Applies to both platforms with shared LLC(HAS_LLC), and snooping + * based platforms(HAS_SNOOP). + * + * This is the default on shared LLC platforms. The only exception is + * scanout objects, where the display engine is not coherent with the + * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is + * automatically applied by the kernel in pin_for_display, if userspace + * has not done so already. + */ + I915_CACHE_LLC, + /** + * @I915_CACHE_L3_LLC: + * + * Explicitly enable the Gfx L3 cache, with coherent LLC. + * + * The Gfx L3 sits between the domain specific caches, e.g + * sampler/render caches, and the larger LLC. LLC is coherent with the + * GPU, but L3 is only visible to the GPU, so likely needs to be flushed + * when the workload completes. + * + * Not used for scanout surfaces. + * + * Only exposed on some gen7 + GGTT. More recent hardware has dropped + * this explicit setting, where it should now be enabled by default. + */ + I915_CACHE_L3_LLC, + /** + * @I915_CACHE_WT: + * + * Write-through. Used for scanout surfaces. + * + * The GPU can utilise the caches, while still having the display engine + * be coherent with GPU writes, as a result we don't need to flush the + * CPU caches when moving out of the render domain. This is the default + * setting chosen by the kernel, if supported by the HW, otherwise we + * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU + * cache still need to be flushed, to remain coherent with the display + * engine. + */ + I915_CACHE_WT, +}; + enum i915_map_type { I915_MAP_WB = 0, I915_MAP_WC, @@ -81,6 +185,7 @@ enum i915_mmap_type { I915_MMAP_TYPE_WC, I915_MMAP_TYPE_WB, I915_MMAP_TYPE_UC, + I915_MMAP_TYPE_FIXED, }; struct i915_mmap_offset { @@ -185,23 +290,138 @@ struct drm_i915_gem_object { unsigned long flags; #define I915_BO_ALLOC_CONTIGUOUS BIT(0) #define I915_BO_ALLOC_VOLATILE BIT(1) -#define I915_BO_ALLOC_STRUCT_PAGE BIT(2) -#define I915_BO_ALLOC_CPU_CLEAR BIT(3) +#define I915_BO_ALLOC_CPU_CLEAR BIT(2) +#define I915_BO_ALLOC_USER BIT(3) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ - I915_BO_ALLOC_STRUCT_PAGE | \ - I915_BO_ALLOC_CPU_CLEAR) + I915_BO_ALLOC_CPU_CLEAR | \ + I915_BO_ALLOC_USER) #define I915_BO_READONLY BIT(4) #define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */ - /* - * Is the object to be mapped as read-only to the GPU - * Only honoured if hardware has relevant pte bit + /** + * @mem_flags - Mutable placement-related flags + * + * These are flags that indicate specifics of the memory region + * the object is currently in. As such they are only stable + * either under the object lock or if the object is pinned. + */ + unsigned int mem_flags; +#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ +#define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ + /** + * @cache_level: The desired GTT caching level. + * + * See enum i915_cache_level for possible values, along with what + * each does. */ unsigned int cache_level:3; - unsigned int cache_coherent:2; + /** + * @cache_coherent: + * + * Track whether the pages are coherent with the GPU if reading or + * writing through the CPU caches. The largely depends on the + * @cache_level setting. + * + * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom + * platforms, coherency must be explicitly requested with some special + * GTT caching bits(see enum i915_cache_level). When enabling coherency + * it does come at a performance and power cost on such platforms. On + * the flip side the kernel does not need to manually flush any buffers + * which need to be coherent with the GPU, if the object is not coherent + * i.e @cache_coherent is zero. + * + * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory + * access will automatically snoop the CPU caches(even with CACHE_NONE). + * The one exception is when dealing with the display engine, like with + * scanout surfaces. To handle this the kernel will always flush the + * surface out of the CPU caches when preparing it for scanout. Also + * note that since scanout surfaces are only ever read by the display + * engine we only need to care about flushing any writes through the CPU + * cache, reads on the other hand will always be coherent. + * + * Something strange here is why @cache_coherent is not a simple + * boolean, i.e coherent vs non-coherent. The reasoning for this is back + * to the display engine not being fully coherent. As a result scanout + * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT. + * In the case of seeing I915_CACHE_NONE the kernel makes the assumption + * that this is likely a scanout surface, and will set @cache_coherent + * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared + * LLC. The kernel uses this to always flush writes through the CPU + * cache as early as possible, where it can, in effect keeping + * @cache_dirty clean, so we can potentially avoid stalling when + * flushing the surface just before doing the scanout. This does mean + * we might unnecessarily flush non-scanout objects in some places, but + * the default assumption is that all normal objects should be using + * I915_CACHE_LLC, at least on platforms with the shared LLC. + * + * Supported values: + * + * I915_BO_CACHE_COHERENT_FOR_READ: + * + * On shared LLC platforms, we use this for special scanout surfaces, + * where the display engine is not coherent with the CPU cache. As such + * we need to ensure we flush any writes before doing the scanout. As an + * optimisation we try to flush any writes as early as possible to avoid + * stalling later. + * + * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC + * platforms, we use: + * + * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ + * + * While for normal objects that are fully coherent, including special + * scanout surfaces marked as I915_CACHE_WT, we use: + * + * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ | + * I915_BO_CACHE_COHERENT_FOR_WRITE + * + * And then for objects that are not coherent at all we use: + * + * cache_coherent = 0 + * + * I915_BO_CACHE_COHERENT_FOR_WRITE: + * + * When writing through the CPU cache, the GPU is still coherent. Note + * that this also implies I915_BO_CACHE_COHERENT_FOR_READ. + */ #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) + unsigned int cache_coherent:2; + + /** + * @cache_dirty: + * + * Track if we are we dirty with writes through the CPU cache for this + * object. As a result reading directly from main memory might yield + * stale data. + * + * This also ties into whether the kernel is tracking the object as + * coherent with the GPU, as per @cache_coherent, as it determines if + * flushing might be needed at various points. + * + * Another part of @cache_dirty is managing flushing when first + * acquiring the pages for system memory, at this point the pages are + * considered foreign, so the default assumption is that the cache is + * dirty, for example the page zeroing done by the kernel might leave + * writes though the CPU cache, or swapping-in, while the actual data in + * main memory is potentially stale. Note that this is a potential + * security issue when dealing with userspace objects and zeroing. Now, + * whether we actually need apply the big sledgehammer of flushing all + * the pages on acquire depends on if @cache_coherent is marked as + * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent + * for both reads and writes though the CPU cache. + * + * Note that on shared LLC platforms we still apply the heavy flush for + * I915_CACHE_NONE objects, under the assumption that this is going to + * be used for scanout. + * + * Update: On some hardware there is now also the 'Bypass LLC' MOCS + * entry, which defeats our @cache_coherent tracking, since userspace + * can freely bypass the CPU cache when touching the pages with the GPU, + * where the kernel is completely unaware. On such platform we need + * apply the sledgehammer-on-acquire regardless of the @cache_coherent. + */ unsigned int cache_dirty:1; /** @@ -247,9 +467,10 @@ struct drm_i915_gem_object { struct intel_memory_region *region; /** - * Memory manager node allocated for this object. + * Memory manager resource allocated for this object. Only + * needed for the mock region. */ - void *st_mm_node; + struct ttm_resource *res; /** * Element within memory_region->objects or region->purgeable @@ -310,6 +531,12 @@ struct drm_i915_gem_object { bool dirty:1; } mm; + struct { + struct sg_table *cached_io_st; + struct i915_gem_object_page_iter get_io_page; + bool created:1; + } ttm; + /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 6444e097016d..8eb1c3a6fc9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, dma_addr_t addr; void *vaddr; - if (type != I915_MAP_WC) - return ERR_PTR(-ENODEV); + GEM_BUG_ON(type != I915_MAP_WC); if (n_pfn > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */ @@ -351,7 +350,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, int err; if (!i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + !i915_gem_object_has_iomem(obj)) return ERR_PTR(-ENXIO); assert_object_held(obj); @@ -374,6 +373,34 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + /* + * For discrete our CPU mappings needs to be consistent in order to + * function correctly on !x86. When mapping things through TTM, we use + * the same rules to determine the caching type. + * + * The caching rules, starting from DG1: + * + * - If the object can be placed in device local-memory, then the + * pages should be allocated and mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, + * with the guarantee that everything is also coherent with the + * GPU. + * + * Internal users of lmem are already expected to get this right, so no + * fudging needed there. + */ + if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) { + if (type != I915_MAP_WC && !obj->mm.n_placements) { + ptr = ERR_PTR(-ENODEV); + goto err_unpin; + } + + type = I915_MAP_WC; + } else if (IS_DGFX(to_i915(obj->base.dev))) { + type = I915_MAP_WB; + } + ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) { @@ -467,9 +494,8 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, unsigned int *offset, - bool allow_alloc) + bool dma) { - const bool dma = iter == &obj->mm.get_dma_page; struct scatterlist *sg; unsigned int idx, count; @@ -490,9 +516,6 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, if (n < READ_ONCE(iter->sg_idx)) goto lookup; - if (!allow_alloc) - goto manual_lookup; - mutex_lock(&iter->lock); /* We prefer to reuse the last sg so that repeated lookup of this @@ -542,16 +565,7 @@ scan: if (unlikely(n < idx)) /* insertion completed by another thread */ goto lookup; - goto manual_walk; - -manual_lookup: - idx = 0; - sg = obj->mm.pages->sgl; - count = __sg_page_count(sg); - -manual_walk: - /* - * In case we failed to insert the entry into the radixtree, we need + /* In case we failed to insert the entry into the radixtree, we need * to look beyond the current sg. */ while (idx + count <= n) { @@ -598,7 +612,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); - sg = i915_gem_object_get_sg(obj, n, &offset, true); + sg = i915_gem_object_get_sg(obj, n, &offset); return nth_page(sg_page(sg), offset); } @@ -624,7 +638,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, struct scatterlist *sg; unsigned int offset; - sg = i915_gem_object_get_sg_dma(obj, n, &offset, true); + sg = i915_gem_object_get_sg_dma(obj, n, &offset); if (len) *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index be72ad0634ba..7986612f48fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -76,7 +76,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); /* We're no longer struct page backed */ - obj->flags &= ~I915_BO_ALLOC_STRUCT_PAGE; + obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE; __i915_gem_object_set_pages(obj, st, sg->length); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index f25e6646c5b7..1f557b2178ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -13,16 +13,8 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, { obj->mm.region = intel_memory_region_get(mem); - if (obj->base.size <= mem->min_page_size) - obj->flags |= I915_BO_ALLOC_CONTIGUOUS; - mutex_lock(&mem->objects.lock); - - if (obj->flags & I915_BO_ALLOC_VOLATILE) - list_add(&obj->mm.region_link, &mem->objects.purgeable); - else - list_add(&obj->mm.region_link, &mem->objects.list); - + list_add(&obj->mm.region_link, &mem->objects.list); mutex_unlock(&mem->objects.lock); } @@ -40,9 +32,11 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_gem_object *obj; + resource_size_t default_page_size; int err; /* @@ -56,7 +50,14 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!mem) return ERR_PTR(-ENODEV); - size = round_up(size, mem->min_page_size); + default_page_size = mem->min_page_size; + if (page_size) + default_page_size = page_size; + + GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); + GEM_BUG_ON(default_page_size < PAGE_SIZE); + + size = round_up(size, default_page_size); GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); @@ -68,7 +69,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!obj) return ERR_PTR(-ENOMEM); - err = mem->ops->init_object(mem, obj, size, flags); + err = mem->ops->init_object(mem, obj, size, page_size, flags); if (err) goto err_object_free; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index 84fcb3297400..1008e580a89a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -19,6 +19,7 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags); #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 5d16c4462fda..11f072193f3b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -182,6 +182,24 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); + /* + * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it + * possible for userspace to bypass the GTT caching bits set by the + * kernel, as per the given object cache_level. This is troublesome + * since the heavy flush we apply when first gathering the pages is + * skipped if the kernel thinks the object is coherent with the GPU. As + * a result it might be possible to bypass the cache and read the + * contents of the page directly, which could be stale data. If it's + * just a case of userspace shooting themselves in the foot then so be + * it, but since i915 takes the stance of always zeroing memory before + * handing it to userspace, we need to prevent this. + * + * By setting cache_dirty here we make the clflush in set_pages + * unconditional on such platforms. + */ + if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER) + obj->cache_dirty = true; + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -302,6 +320,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_ struct pagevec pvec; struct page *page; + GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev))); __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -444,7 +463,7 @@ shmem_pread(struct drm_i915_gem_object *obj, static void shmem_release(struct drm_i915_gem_object *obj) { - if (obj->flags & I915_BO_ALLOC_STRUCT_PAGE) + if (i915_gem_object_has_struct_page(obj)) i915_gem_object_release_memory_region(obj); fput(obj->base.filp); @@ -489,6 +508,7 @@ static int __create_shmem(struct drm_i915_private *i915, static int shmem_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; @@ -513,9 +533,8 @@ static int shmem_object_init(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -548,7 +567,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], - size, 0); + size, 0, 0); } /* Allocate a new GEM object and fill it with the supplied data */ @@ -561,6 +580,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, resource_size_t offset; int err; + GEM_WARN_ON(IS_DGFX(dev_priv)); obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); if (IS_ERR(obj)) return obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index f4fb68e8955a..e382b7f2353b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -62,6 +62,7 @@ static void try_to_writeback(struct drm_i915_gem_object *obj, switch (obj->mm.madv) { case I915_MADV_DONTNEED: i915_gem_object_truncate(obj); + return; case __I915_MADV_PURGED: return; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 34070d0ea325..ddd37ccb1362 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -669,6 +669,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_private *i915 = mem->i915; @@ -707,7 +708,7 @@ struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *i915, resource_size_t size) { - return i915_gem_object_create_region(i915->mm.stolen_region, size, 0); + return i915_gem_object_create_region(i915->mm.stolen_region, size, 0, 0); } static int init_stolen_smem(struct intel_memory_region *mem) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c new file mode 100644 index 000000000000..771eb2963123 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -0,0 +1,965 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> + +#include "i915_drv.h" +#include "intel_memory_region.h" +#include "intel_region_ttm.h" + +#include "gem/i915_gem_object.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_mman.h" + +#include "gt/intel_migrate.h" +#include "gt/intel_engine_pm.h" + +#define I915_PL_LMEM0 TTM_PL_PRIV +#define I915_PL_SYSTEM TTM_PL_SYSTEM +#define I915_PL_STOLEN TTM_PL_VRAM +#define I915_PL_GGTT TTM_PL_TT + +#define I915_TTM_PRIO_PURGE 0 +#define I915_TTM_PRIO_NO_PAGES 1 +#define I915_TTM_PRIO_HAS_PAGES 2 + +/* + * Size of struct ttm_place vector in on-stack struct ttm_placement allocs + */ +#define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN + +/** + * struct i915_ttm_tt - TTM page vector with additional private information + * @ttm: The base TTM page vector. + * @dev: The struct device used for dma mapping and unmapping. + * @cached_st: The cached scatter-gather table. + * + * Note that DMA may be going on right up to the point where the page- + * vector is unpopulated in delayed destroy. Hence keep the + * scatter-gather table mapped and cached up to that point. This is + * different from the cached gem object io scatter-gather table which + * doesn't have an associated dma mapping. + */ +struct i915_ttm_tt { + struct ttm_tt ttm; + struct device *dev; + struct sg_table *cached_st; +}; + +static const struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, +}; + +static struct ttm_placement i915_sys_placement = { + .num_placement = 1, + .placement = &sys_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags, +}; + +static int i915_ttm_err_to_gem(int err) +{ + /* Fastpath */ + if (likely(!err)) + return 0; + + switch (err) { + case -EBUSY: + /* + * TTM likes to convert -EDEADLK to -EBUSY, and wants us to + * restart the operation, since we don't record the contending + * lock. We use -EAGAIN to restart. + */ + return -EAGAIN; + case -ENOSPC: + /* + * Memory type / region is full, and we can't evict. + * Except possibly system, that returns -ENOMEM; + */ + return -ENXIO; + default: + break; + } + + return err; +} + +static bool gpu_binds_iomem(struct ttm_resource *mem) +{ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static bool cpu_maps_iomem(struct ttm_resource *mem) +{ + /* Once / if we support GGTT, this is also false for cached ttm_tts */ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static enum i915_cache_level +i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) && + ttm->caching == ttm_cached) ? I915_CACHE_LLC : + I915_CACHE_NONE; +} + +static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); + +static enum ttm_caching +i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) +{ + /* + * Objects only allowed in system get cached cpu-mappings. + * Other objects get WC mapping for now. Even if in system. + */ + if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && + obj->mm.n_placements <= 1) + return ttm_cached; + + return ttm_write_combined; +} + +static void +i915_ttm_place_from_region(const struct intel_memory_region *mr, + struct ttm_place *place, + unsigned int flags) +{ + memset(place, 0, sizeof(*place)); + place->mem_type = intel_region_to_ttm_type(mr); + + if (flags & I915_BO_ALLOC_CONTIGUOUS) + place->flags = TTM_PL_FLAG_CONTIGUOUS; +} + +static void +i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, + struct ttm_place *requested, + struct ttm_place *busy, + struct ttm_placement *placement) +{ + unsigned int num_allowed = obj->mm.n_placements; + unsigned int flags = obj->flags; + unsigned int i; + + placement->num_placement = 1; + i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : + obj->mm.region, requested, flags); + + /* Cache this on object? */ + placement->num_busy_placement = num_allowed; + for (i = 0; i < placement->num_busy_placement; ++i) + i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags); + + if (num_allowed == 0) { + *busy = *requested; + placement->num_busy_placement = 1; + } + + placement->placement = requested; + placement->busy_placement = busy; +} + +static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, + uint32_t page_flags) +{ + struct ttm_resource_manager *man = + ttm_manager_type(bo->bdev, bo->resource->mem_type); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct i915_ttm_tt *i915_tt; + int ret; + + i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); + if (!i915_tt) + return NULL; + + if (obj->flags & I915_BO_ALLOC_CPU_CLEAR && + man->use_tt) + page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; + + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, + i915_ttm_select_tt_caching(obj)); + if (ret) { + kfree(i915_tt); + return NULL; + } + + i915_tt->dev = obj->base.dev->dev; + + return &i915_tt->ttm; +} + +static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + + if (i915_tt->cached_st) { + dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st, + DMA_BIDIRECTIONAL, 0); + sg_free_table(i915_tt->cached_st); + kfree(i915_tt->cached_st); + i915_tt->cached_st = NULL; + } + ttm_pool_free(&bdev->pool, ttm); +} + +static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + + ttm_tt_destroy_common(bdev, ttm); + ttm_tt_fini(ttm); + kfree(i915_tt); +} + +static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, + const struct ttm_place *place) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + + /* Will do for now. Our pinned objects are still on TTM's LRU lists */ + return i915_gem_object_evictable(obj); +} + +static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, + struct ttm_placement *placement) +{ + *placement = i915_sys_placement; +} + +static int i915_ttm_move_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + int ret; + + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (ret) + return ret; + + ret = __i915_gem_object_put_pages(obj); + if (ret) + return ret; + + return 0; +} + +static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + if (!obj->ttm.cached_io_st) + return; + + rcu_read_lock(); + radix_tree_for_each_slot(slot, &obj->ttm.get_io_page.radix, &iter, 0) + radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index); + rcu_read_unlock(); + + sg_free_table(obj->ttm.cached_io_st); + kfree(obj->ttm.cached_io_st); + obj->ttm.cached_io_st = NULL; +} + +static void +i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { + obj->write_domain = I915_GEM_DOMAIN_WC; + obj->read_domains = I915_GEM_DOMAIN_WC; + } else { + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + } +} + +static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + unsigned int cache_level; + unsigned int i; + + /* + * If object was moved to an allowable region, update the object + * region to consider it migrated. Note that if it's currently not + * in an allowable region, it's evicted and we don't update the + * object region. + */ + if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { + for (i = 0; i < obj->mm.n_placements; ++i) { + struct intel_memory_region *mr = obj->mm.placements[i]; + + if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && + mr != obj->mm.region) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + break; + } + } + } + + obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); + + obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; + + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, + bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); +} + +static void i915_ttm_purge(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement place = {}; + int ret; + + if (obj->mm.madv == __I915_MADV_PURGED) + return; + + /* TTM's purge interface. Note that we might be reentering. */ + ret = ttm_bo_validate(bo, &place, &ctx); + if (!ret) { + obj->write_domain = 0; + obj->read_domains = 0; + i915_ttm_adjust_gem_after_move(obj); + i915_ttm_free_cached_io_st(obj); + obj->mm.madv = __I915_MADV_PURGED; + } +} + +static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + int ret = i915_ttm_move_notify(bo); + + GEM_WARN_ON(ret); + GEM_WARN_ON(obj->ttm.cached_io_st); + if (!ret && obj->mm.madv != I915_MADV_WILLNEED) + i915_ttm_purge(obj); +} + +static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + + if (likely(obj)) { + /* This releases all gem object bindings to the backend. */ + i915_ttm_free_cached_io_st(obj); + __i915_gem_free_object(obj); + } +} + +static struct intel_memory_region * +i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) +{ + struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); + + /* There's some room for optimization here... */ + GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && + ttm_mem_type < I915_PL_LMEM0); + if (ttm_mem_type == I915_PL_SYSTEM) + return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, + 0); + + return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, + ttm_mem_type - I915_PL_LMEM0); +} + +static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + struct scatterlist *sg; + struct sg_table *st; + int ret; + + if (i915_tt->cached_st) + return i915_tt->cached_st; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + sg = __sg_alloc_table_from_pages + (st, ttm->pages, ttm->num_pages, 0, + (unsigned long)ttm->num_pages << PAGE_SHIFT, + i915_sg_segment_size(), NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { + kfree(st); + return ERR_CAST(sg); + } + + ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); + if (ret) { + sg_free_table(st); + kfree(st); + return ERR_PTR(ret); + } + + i915_tt->cached_st = st; + return st; +} + +static struct sg_table * +i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, + struct ttm_resource *res) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (!gpu_binds_iomem(res)) + return i915_ttm_tt_get_st(bo->ttm); + + /* + * If CPU mapping differs, we need to add the ttm_tt pages to + * the resulting st. Might make sense for GGTT. + */ + GEM_WARN_ON(!cpu_maps_iomem(res)); + return intel_region_ttm_resource_to_st(obj->mm.region, res); +} + +static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) +{ + struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), + bdev); + struct ttm_resource_manager *src_man = + ttm_manager_type(bo->bdev, bo->resource->mem_type); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct sg_table *src_st; + struct i915_request *rq; + struct ttm_tt *ttm = bo->ttm; + enum i915_cache_level src_level, dst_level; + int ret; + + if (!i915->gt.migrate.context) + return -EINVAL; + + dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); + if (!ttm || !ttm_tt_is_populated(ttm)) { + if (bo->type == ttm_bo_type_kernel) + return -EINVAL; + + if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) + return 0; + + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), + 0, &rq); + + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } else { + src_st = src_man->use_tt ? i915_ttm_tt_get_st(ttm) : + obj->ttm.cached_io_st; + + src_level = i915_ttm_cache_level(i915, bo->resource, ttm); + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_copy(i915->gt.migrate.context, + NULL, src_st->sgl, src_level, + gpu_binds_iomem(bo->resource), + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), + &rq); + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } + + return ret; +} + +static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct intel_memory_region *dst_reg, *src_reg; + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_iomap io; + } _dst_iter, _src_iter; + struct ttm_kmap_iter *dst_iter, *src_iter; + struct sg_table *dst_st; + int ret; + + dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); + src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); + GEM_BUG_ON(!dst_reg || !src_reg); + + /* Sync for now. We could do the actual copy async. */ + ret = ttm_bo_wait_ctx(bo, ctx); + if (ret) + return ret; + + ret = i915_ttm_move_notify(bo); + if (ret) + return ret; + + if (obj->mm.madv != I915_MADV_WILLNEED) { + i915_ttm_purge(obj); + ttm_resource_free(bo, &dst_mem); + return 0; + } + + /* Populate ttm with pages if needed. Typically system memory. */ + if (bo->ttm && (dst_man->use_tt || + (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ret) + return ret; + } + + dst_st = i915_ttm_resource_get_st(obj, dst_mem); + if (IS_ERR(dst_st)) + return PTR_ERR(dst_st); + + ret = i915_ttm_accel_move(bo, dst_mem, dst_st); + if (ret) { + /* If we start mapping GGTT, we can no longer use man::use_tt here. */ + dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg->region.start); + + src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj->ttm.cached_io_st, + src_reg->region.start); + + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } + /* Below dst_mem becomes bo->resource. */ + ttm_bo_move_sync_cleanup(bo, dst_mem); + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_free_cached_io_st(obj); + + if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) { + obj->ttm.cached_io_st = dst_st; + obj->ttm.get_io_page.sg_pos = dst_st->sgl; + obj->ttm.get_io_page.sg_idx = 0; + } + + i915_ttm_adjust_gem_after_move(obj); + return 0; +} + +static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) +{ + if (!cpu_maps_iomem(mem)) + return 0; + + mem->bus.caching = ttm_write_combined; + mem->bus.is_iomem = true; + + return 0; +} + +static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, + unsigned long page_offset) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start; + struct scatterlist *sg; + unsigned int ofs; + + GEM_WARN_ON(bo->ttm); + + sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); + + return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; +} + +static struct ttm_device_funcs i915_ttm_bo_driver = { + .ttm_tt_create = i915_ttm_tt_create, + .ttm_tt_unpopulate = i915_ttm_tt_unpopulate, + .ttm_tt_destroy = i915_ttm_tt_destroy, + .eviction_valuable = i915_ttm_eviction_valuable, + .evict_flags = i915_ttm_evict_flags, + .move = i915_ttm_move, + .swap_notify = i915_ttm_swap_notify, + .delete_mem_notify = i915_ttm_delete_mem_notify, + .io_mem_reserve = i915_ttm_io_mem_reserve, + .io_mem_pfn = i915_ttm_io_mem_pfn, +}; + +/** + * i915_ttm_driver - Return a pointer to the TTM device funcs + * + * Return: Pointer to statically allocated TTM device funcs. + */ +struct ttm_device_funcs *i915_ttm_driver(void) +{ + return &i915_ttm_bo_driver; +} + +static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct sg_table *st; + int real_num_busy; + int ret; + + /* First try only the requested placement. No eviction. */ + real_num_busy = fetch_and_zero(&placement->num_busy_placement); + ret = ttm_bo_validate(bo, placement, &ctx); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + /* + * Anything that wants to restart the operation gets to + * do that. + */ + if (ret == -EDEADLK || ret == -EINTR || ret == -ERESTARTSYS || + ret == -EAGAIN) + return ret; + + /* + * If the initial attempt fails, allow all accepted placements, + * evicting if necessary. + */ + placement->num_busy_placement = real_num_busy; + ret = ttm_bo_validate(bo, placement, &ctx); + if (ret) + return i915_ttm_err_to_gem(ret); + } + + i915_ttm_adjust_lru(obj); + if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); + if (ret) + return ret; + + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + } + + if (!i915_gem_object_has_pages(obj)) { + /* Object either has a page vector or is an iomem object */ + st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; + if (IS_ERR(st)) + return PTR_ERR(st); + + __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + } + + return ret; +} + +static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) +{ + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; + struct ttm_placement placement; + + GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); + + /* Move to the requested placement. */ + i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + + return __i915_ttm_get_pages(obj, &placement); +} + +/** + * DOC: Migration vs eviction + * + * GEM migration may not be the same as TTM migration / eviction. If + * the TTM core decides to evict an object it may be evicted to a + * TTM memory type that is not in the object's allowable GEM regions, or + * in fact theoretically to a TTM memory type that doesn't correspond to + * a GEM memory region. In that case the object's GEM region is not + * updated, and the data is migrated back to the GEM region at + * get_pages time. TTM may however set up CPU ptes to the object even + * when it is evicted. + * Gem forced migration using the i915_ttm_migrate() op, is allowed even + * to regions that are not in the object's list of allowable placements. + */ +static int i915_ttm_migrate(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr) +{ + struct ttm_place requested; + struct ttm_placement placement; + int ret; + + i915_ttm_place_from_region(mr, &requested, obj->flags); + placement.num_placement = 1; + placement.num_busy_placement = 1; + placement.placement = &requested; + placement.busy_placement = &requested; + + ret = __i915_ttm_get_pages(obj, &placement); + if (ret) + return ret; + + /* + * Reinitialize the region bindings. This is primarily + * required for objects where the new region is not in + * its allowable placements. + */ + if (obj->mm.region != mr) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + } + + return 0; +} + +static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *st) +{ + /* + * We're currently not called from a shrinker, so put_pages() + * typically means the object is about to destroyed, or called + * from move_notify(). So just avoid doing much for now. + * If the object is not destroyed next, The TTM eviction logic + * and shrinkers will move it out if needed. + */ + + i915_ttm_adjust_lru(obj); +} + +static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + /* + * Don't manipulate the TTM LRUs while in TTM bo destruction. + * We're called through i915_ttm_delete_mem_notify(). + */ + if (!kref_read(&bo->kref)) + return; + + /* + * Put on the correct LRU list depending on the MADV status + */ + spin_lock(&bo->bdev->lru_lock); + if (obj->mm.madv != I915_MADV_WILLNEED) { + bo->priority = I915_TTM_PRIO_PURGE; + } else if (!i915_gem_object_has_pages(obj)) { + if (bo->priority < I915_TTM_PRIO_HAS_PAGES) + bo->priority = I915_TTM_PRIO_HAS_PAGES; + } else { + if (bo->priority > I915_TTM_PRIO_NO_PAGES) + bo->priority = I915_TTM_PRIO_NO_PAGES; + } + + ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); + spin_unlock(&bo->bdev->lru_lock); +} + +/* + * TTM-backed gem object destruction requires some clarification. + * Basically we have two possibilities here. We can either rely on the + * i915 delayed destruction and put the TTM object when the object + * is idle. This would be detected by TTM which would bypass the + * TTM delayed destroy handling. The other approach is to put the TTM + * object early and rely on the TTM destroyed handling, and then free + * the leftover parts of the GEM object once TTM's destroyed list handling is + * complete. For now, we rely on the latter for two reasons: + * a) TTM can evict an object even when it's on the delayed destroy list, + * which in theory allows for complete eviction. + * b) There is work going on in TTM to allow freeing an object even when + * it's not idle, and using the TTM destroyed list handling could help us + * benefit from that. + */ +static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) +{ + if (obj->ttm.created) { + ttm_bo_put(i915_gem_to_ttm(obj)); + } else { + __i915_gem_free_object(obj); + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + } +} + +static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) +{ + struct vm_area_struct *area = vmf->vma; + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(area->vm_private_data); + + /* Sanity check that we allow writing into this object */ + if (unlikely(i915_gem_object_is_readonly(obj) && + area->vm_flags & VM_WRITE)) + return VM_FAULT_SIGBUS; + + return ttm_bo_vm_fault(vmf); +} + +static int +vm_access_ttm(struct vm_area_struct *area, unsigned long addr, + void *buf, int len, int write) +{ + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(area->vm_private_data); + + if (i915_gem_object_is_readonly(obj) && write) + return -EACCES; + + return ttm_bo_vm_access(area, addr, buf, len, write); +} + +static void ttm_vm_open(struct vm_area_struct *vma) +{ + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(vma->vm_private_data); + + GEM_BUG_ON(!obj); + i915_gem_object_get(obj); +} + +static void ttm_vm_close(struct vm_area_struct *vma) +{ + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(vma->vm_private_data); + + GEM_BUG_ON(!obj); + i915_gem_object_put(obj); +} + +static const struct vm_operations_struct vm_ops_ttm = { + .fault = vm_fault_ttm, + .access = vm_access_ttm, + .open = ttm_vm_open, + .close = ttm_vm_close, +}; + +static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) +{ + /* The ttm_bo must be allocated with I915_BO_ALLOC_USER */ + GEM_BUG_ON(!drm_mm_node_allocated(&obj->base.vma_node.vm_node)); + + return drm_vma_node_offset_addr(&obj->base.vma_node); +} + +static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { + .name = "i915_gem_object_ttm", + + .get_pages = i915_ttm_get_pages, + .put_pages = i915_ttm_put_pages, + .truncate = i915_ttm_purge, + .adjust_lru = i915_ttm_adjust_lru, + .delayed_free = i915_ttm_delayed_free, + .migrate = i915_ttm_migrate, + .mmap_offset = i915_ttm_mmap_offset, + .mmap_ops = &vm_ops_ttm, +}; + +void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + + i915_gem_object_release_memory_region(obj); + mutex_destroy(&obj->ttm.get_io_page.lock); + if (obj->ttm.created) + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); +} + +/** + * __i915_gem_ttm_object_init - Initialize a ttm-backed i915 gem object + * @mem: The initial memory region for the object. + * @obj: The gem object. + * @size: Object size in bytes. + * @flags: gem object flags. + * + * Return: 0 on success, negative error code on failure. + */ +int __i915_gem_ttm_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) +{ + static struct lock_class_key lock_class; + struct drm_i915_private *i915 = mem->i915; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + enum ttm_bo_type bo_type; + int ret; + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); + i915_gem_object_init_memory_region(obj, mem); + i915_gem_object_make_unshrinkable(obj); + INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->ttm.get_io_page.lock); + bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : + ttm_bo_type_kernel; + + obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); + + /* Forcing the page size is kernel internal only */ + GEM_BUG_ON(page_size && obj->mm.n_placements); + + /* + * If this function fails, it will call the destructor, but + * our caller still owns the object. So no freeing in the + * destructor until obj->ttm.created is true. + * Similarly, in delayed_destroy, we can't call ttm_bo_put() + * until successful initialization. + */ + ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, + bo_type, &i915_sys_placement, + page_size >> PAGE_SHIFT, + &ctx, NULL, NULL, i915_ttm_bo_destroy); + if (ret) + return i915_ttm_err_to_gem(ret); + + obj->ttm.created = true; + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + i915_gem_object_unlock(obj); + + return 0; +} + +static const struct intel_memory_region_ops ttm_system_region_ops = { + .init_object = __i915_gem_ttm_object_init, +}; + +struct intel_memory_region * +i915_gem_ttm_system_setup(struct drm_i915_private *i915, + u16 type, u16 instance) +{ + struct intel_memory_region *mr; + + mr = intel_memory_region_create(i915, 0, + totalram_pages() << PAGE_SHIFT, + PAGE_SIZE, 0, + type, instance, + &ttm_system_region_ops); + if (IS_ERR(mr)) + return mr; + + intel_memory_region_set_name(mr, "system-ttm"); + return mr; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h new file mode 100644 index 000000000000..40927f67b6d9 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _I915_GEM_TTM_H_ +#define _I915_GEM_TTM_H_ + +#include "gem/i915_gem_object_types.h" + +/** + * i915_gem_to_ttm - Convert a struct drm_i915_gem_object to a + * struct ttm_buffer_object. + * @obj: Pointer to the gem object. + * + * Return: Pointer to the embedded struct ttm_buffer_object. + */ +static inline struct ttm_buffer_object * +i915_gem_to_ttm(struct drm_i915_gem_object *obj) +{ + return &obj->__do_not_access; +} + +/* + * i915 ttm gem object destructor. Internal use only. + */ +void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); + +/** + * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding + * struct drm_i915_gem_object. + * + * Return: Pointer to the embedding struct ttm_buffer_object, or NULL + * if the object was not an i915 ttm object. + */ +static inline struct drm_i915_gem_object * +i915_ttm_to_gem(struct ttm_buffer_object *bo) +{ + if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy)) + return NULL; + + return container_of(bo, struct drm_i915_gem_object, __do_not_access); +} + +int __i915_gem_ttm_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + resource_size_t page_size, + unsigned int flags); +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 7487bab11f0b..468a7a617fbf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -67,11 +67,11 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, if (!mmu_notifier_range_blockable(range)) return false; - spin_lock(&i915->mm.notifier_lock); + write_lock(&i915->mm.notifier_lock); mmu_interval_set_seq(mni, cur_seq); - spin_unlock(&i915->mm.notifier_lock); + write_unlock(&i915->mm.notifier_lock); /* * We don't wait when the process is exiting. This is valid @@ -107,16 +107,15 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj) static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct page **pvec = NULL; - spin_lock(&i915->mm.notifier_lock); + assert_object_held_shared(obj); + if (!--obj->userptr.page_ref) { pvec = obj->userptr.pvec; obj->userptr.pvec = NULL; } GEM_BUG_ON(obj->userptr.page_ref < 0); - spin_unlock(&i915->mm.notifier_lock); if (pvec) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; @@ -128,7 +127,6 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; @@ -141,16 +139,13 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) if (!st) return -ENOMEM; - spin_lock(&i915->mm.notifier_lock); - if (GEM_WARN_ON(!obj->userptr.page_ref)) { - spin_unlock(&i915->mm.notifier_lock); - ret = -EFAULT; + if (!obj->userptr.page_ref) { + ret = -EAGAIN; goto err_free; } obj->userptr.page_ref++; pvec = obj->userptr.pvec; - spin_unlock(&i915->mm.notifier_lock); alloc_table: sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, @@ -241,7 +236,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, i915_gem_object_userptr_drop_ref(obj); } -static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool get_pages) +static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj) { struct sg_table *pages; int err; @@ -259,15 +254,11 @@ static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool if (!IS_ERR_OR_NULL(pages)) i915_gem_userptr_put_pages(obj, pages); - if (get_pages) - err = ____i915_gem_object_get_pages(obj); - return err; } int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; struct page **pvec; unsigned int gup_flags = 0; @@ -277,39 +268,22 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (obj->userptr.notifier.mm != current->mm) return -EFAULT; + notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) return ret; - /* optimistically try to preserve current pages while unlocked */ - if (i915_gem_object_has_pages(obj) && - !mmu_interval_check_retry(&obj->userptr.notifier, - obj->userptr.notifier_seq)) { - spin_lock(&i915->mm.notifier_lock); - if (obj->userptr.pvec && - !mmu_interval_read_retry(&obj->userptr.notifier, - obj->userptr.notifier_seq)) { - obj->userptr.page_ref++; - - /* We can keep using the current binding, this is the fastpath */ - ret = 1; - } - spin_unlock(&i915->mm.notifier_lock); + if (notifier_seq == obj->userptr.notifier_seq && obj->userptr.pvec) { + i915_gem_object_unlock(obj); + return 0; } - if (!ret) { - /* Make sure userptr is unbound for next attempt, so we don't use stale pages. */ - ret = i915_gem_object_userptr_unbind(obj, false); - } + ret = i915_gem_object_userptr_unbind(obj); i915_gem_object_unlock(obj); - if (ret < 0) + if (ret) return ret; - if (ret > 0) - return 0; - - notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); - pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); if (!pvec) return -ENOMEM; @@ -329,7 +303,9 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) } ret = 0; - spin_lock(&i915->mm.notifier_lock); + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + goto out; if (mmu_interval_read_retry(&obj->userptr.notifier, !obj->userptr.page_ref ? notifier_seq : @@ -341,12 +317,14 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (!obj->userptr.page_ref++) { obj->userptr.pvec = pvec; obj->userptr.notifier_seq = notifier_seq; - pvec = NULL; + ret = ____i915_gem_object_get_pages(obj); } + obj->userptr.page_ref--; + out_unlock: - spin_unlock(&i915->mm.notifier_lock); + i915_gem_object_unlock(obj); out: if (pvec) { @@ -369,11 +347,6 @@ int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) return 0; } -void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) -{ - i915_gem_object_userptr_drop_ref(obj); -} - int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { int err; @@ -396,7 +369,6 @@ int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) i915_gem_object_unlock(obj); } - i915_gem_object_userptr_submit_fini(obj); return err; } @@ -450,6 +422,34 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { #endif +static int +probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) +{ + const unsigned long end = addr + len; + struct vm_area_struct *vma; + int ret = -EFAULT; + + mmap_read_lock(mm); + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + /* Check for holes, note that we also update the addr below */ + if (vma->vm_start > addr) + break; + + if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) + break; + + if (vma->vm_end >= end) { + ret = 0; + break; + } + + addr = vma->vm_end; + } + mmap_read_unlock(mm); + + return ret; +} + /* * Creates a new mm object that wraps some normal memory from the process * context - user memory. @@ -505,7 +505,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, } if (args->flags & ~(I915_USERPTR_READ_ONLY | - I915_USERPTR_UNSYNCHRONIZED)) + I915_USERPTR_UNSYNCHRONIZED | + I915_USERPTR_PROBE)) return -EINVAL; if (i915_gem_object_size_2big(args->user_size)) @@ -532,14 +533,24 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENODEV; } + if (args->flags & I915_USERPTR_PROBE) { + /* + * Check that the range pointed to represents real struct + * pages and not iomappings (at this moment in time!) + */ + ret = probe_range(current->mm, args->user_ptr, args->user_size); + if (ret) + return ret; + } + #ifdef CONFIG_MMU_NOTIFIER obj = i915_gem_object_alloc(); if (obj == NULL) return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0); + obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); @@ -572,7 +583,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, int i915_gem_init_userptr(struct drm_i915_private *dev_priv) { #ifdef CONFIG_MMU_NOTIFIER - spin_lock_init(&dev_priv->mm.notifier_lock); + rwlock_init(&dev_priv->mm.notifier_lock); #endif return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 1e97520c62b2..f909aaa09d9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -104,8 +104,8 @@ static void fence_set_priority(struct dma_fence *fence, engine = rq->engine; rcu_read_lock(); /* RCU serialisation for set-wedged protection */ - if (engine->schedule) - engine->schedule(rq, attr); + if (engine->sched_engine->schedule) + engine->sched_engine->schedule(rq, attr); rcu_read_unlock(); } @@ -290,3 +290,22 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_object_put(obj); return ret; } + +/** + * i915_gem_object_wait_migration - Sync an accelerated migration operation + * @obj: The migrating object. + * @flags: waiting flags. Currently supports only I915_WAIT_INTERRUPTIBLE. + * + * Wait for any pending async migration operation on the object, + * whether it's explicitly (i915_gem_object_migrate()) or implicitly + * (swapin, initial clearing) initiated. + * + * Return: 0 if successful, -ERESTARTSYS if a signal was hit during waiting. + */ +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + might_sleep(); + /* NOP for now. */ + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 0c8ecfdf5405..f963b8e1e37b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -114,8 +114,8 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &huge_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index dadd485bc52f..a094f3ce1a90 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -167,9 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; @@ -497,7 +496,8 @@ static int igt_mock_memory_region_huge_pages(void *arg) int i; for (i = 0; i < ARRAY_SIZE(flags); ++i) { - obj = i915_gem_object_create_region(mem, page_size, + obj = i915_gem_object_create_region(mem, + page_size, page_size, flags[i]); if (IS_ERR(obj)) { err = PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 176e6b22f87f..ecbcbb86ae1e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,6 +5,7 @@ #include "i915_selftest.h" +#include "gt/intel_context.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gpu_commands.h" @@ -16,118 +17,6 @@ #include "huge_gem_object.h" #include "mock_context.h" -static int __igt_client_fill(struct intel_engine_cs *engine) -{ - struct intel_context *ce = engine->kernel_context; - struct drm_i915_gem_object *obj; - I915_RND_STATE(prng); - IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; - - intel_engine_pm_get(engine); - do { - const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); - u32 phys_sz = sz % (max_block_size + 1); - u32 val = prandom_u32_state(&prng); - u32 i; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - obj = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put; - } - - /* - * XXX: The goal is move this to get_pages, so try to dirty the - * CPU cache first to check that we do the required clflush - * before scheduling the blt for !llc platforms. This matches - * some version of reality where at get_pages the pages - * themselves may not yet be coherent with the GPU(swap-in). If - * we are missing the flush then we should see the stale cache - * values after we do the set_to_cpu_domain and pick it up as a - * test failure. - */ - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(obj) / sizeof(u32)); - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - obj->cache_dirty = true; - - err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages, - &obj->mm.page_sizes, - val); - if (err) - goto err_unpin; - - i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_cpu_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(obj); -err_put: - i915_gem_object_put(obj); -err_flush: - if (err == -ENOMEM) - err = 0; - intel_engine_pm_put(engine); - - return err; -} - -static int igt_client_fill(void *arg) -{ - int inst = 0; - - do { - struct intel_engine_cs *engine; - int err; - - engine = intel_engine_lookup_user(arg, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - err = __igt_client_fill(engine); - if (err == -ENOMEM) - err = 0; - if (err) - return err; - } while (1); -} - #define WIDTH 512 #define HEIGHT 32 @@ -693,7 +582,6 @@ static int igt_client_tiled_blits(void *arg) int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(igt_client_fill), SUBTEST(igt_client_tiled_blits), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index dbcfa28a9d91..8eb5050f8cb3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -680,7 +680,7 @@ static int igt_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915); + ctx = kernel_context(i915, NULL); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_file; @@ -813,16 +813,12 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915); + ctx = kernel_context(i915, ctx_vm(parent)); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_test; } - mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, ctx_vm(parent)); - mutex_unlock(&ctx->mutex); - ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); @@ -1875,125 +1871,6 @@ out_file: return err; } -static bool skip_unused_engines(struct intel_context *ce, void *data) -{ - return !ce->state; -} - -static void mock_barrier_task(void *data) -{ - unsigned int *counter = data; - - ++*counter; -} - -static int mock_context_barrier(void *arg) -{ -#undef pr_fmt -#define pr_fmt(x) "context_barrier_task():" # x - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx; - struct i915_request *rq; - unsigned int counter; - int err; - - /* - * The context barrier provides us with a callback after it emits - * a request; useful for retiring old state after loading new. - */ - - ctx = mock_context(i915, "mock"); - if (!ctx) - return -ENOMEM; - - counter = 0; - err = context_barrier_task(ctx, 0, NULL, NULL, NULL, - mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately with 0 engines\n"); - err = -EINVAL; - goto out; - } - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, - NULL, NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately for all unused engines\n"); - err = -EINVAL; - goto out; - } - - rq = igt_request_alloc(ctx, i915->gt.engine[RCS0]); - if (IS_ERR(rq)) { - pr_err("Request allocation failed!\n"); - goto out; - } - i915_request_add(rq); - - counter = 0; - context_barrier_inject_fault = BIT(RCS0); - err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL, - mock_barrier_task, &counter); - context_barrier_inject_fault = 0; - if (err == -ENXIO) - err = 0; - else - pr_err("Did not hit fault injection!\n"); - if (counter != 0) { - pr_err("Invoked callback on error!\n"); - err = -EIO; - } - if (err) - goto out; - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, - NULL, NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - mock_device_flush(i915); - if (counter == 0) { - pr_err("Did not retire on each active engines\n"); - err = -EINVAL; - goto out; - } - -out: - mock_context_close(ctx); - return err; -#undef pr_fmt -#define pr_fmt(x) x -} - -int i915_gem_context_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(mock_context_barrier), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - mock_destroy_device(i915); - return err; -} - int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index dd74bc09ec88..ffae7df5e4d7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -35,7 +35,7 @@ static int igt_dmabuf_export(void *arg) static int igt_dmabuf_import_self(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj, *import_obj; struct drm_gem_object *import; struct dma_buf *dmabuf; int err; @@ -65,10 +65,19 @@ static int igt_dmabuf_import_self(void *arg) err = -EINVAL; goto out_import; } + import_obj = to_intel_bo(import); + + i915_gem_object_lock(import_obj, NULL); + err = __i915_gem_object_get_pages(import_obj); + i915_gem_object_unlock(import_obj); + if (err) { + pr_err("Same object dma-buf get_pages failed!\n"); + goto out_import; + } err = 0; out_import: - i915_gem_object_put(to_intel_bo(import)); + i915_gem_object_put(import_obj); out_dmabuf: dma_buf_put(dmabuf); out: @@ -76,6 +85,180 @@ out: return err; } +static int igt_dmabuf_import_same_driver_lmem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM]; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + if (!lmem) + return 0; + + force_different_devices = true; + + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1); + if (IS_ERR(obj)) { + pr_err("__i915_gem_object_create_user failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(obj); + goto out_ret; + } + + dmabuf = i915_gem_prime_export(&obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + /* + * We expect an import of an LMEM-only object to fail with + * -EOPNOTSUPP because it can't be migrated to SMEM. + */ + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (!IS_ERR(import)) { + drm_gem_object_put(import); + pr_err("i915_gem_prime_import succeeded when it shouldn't have\n"); + err = -EINVAL; + } else if (PTR_ERR(import) != -EOPNOTSUPP) { + pr_err("i915_gem_prime_import failed with the wrong err=%ld\n", + PTR_ERR(import)); + err = PTR_ERR(import); + } + + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); +out_ret: + force_different_devices = false; + return err; +} + +static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, + struct intel_memory_region **regions, + unsigned int num_regions) +{ + struct drm_i915_gem_object *obj, *import_obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + struct dma_buf_attachment *import_attach; + struct sg_table *st; + long timeout; + int err; + + force_different_devices = true; + + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, + regions, num_regions); + if (IS_ERR(obj)) { + pr_err("__i915_gem_object_create_user failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(obj); + goto out_ret; + } + + dmabuf = i915_gem_prime_export(&obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%ld\n", + PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import == &obj->base) { + pr_err("i915_gem_prime_import reused gem object!\n"); + err = -EINVAL; + goto out_import; + } + + import_obj = to_intel_bo(import); + + i915_gem_object_lock(import_obj, NULL); + err = __i915_gem_object_get_pages(import_obj); + if (err) { + pr_err("Different objects dma-buf get_pages failed!\n"); + i915_gem_object_unlock(import_obj); + goto out_import; + } + + /* + * If the exported object is not in system memory, something + * weird is going on. TODO: When p2p is supported, this is no + * longer considered weird. + */ + if (obj->mm.region != i915->mm.regions[INTEL_REGION_SMEM]) { + pr_err("Exported dma-buf is not in system memory\n"); + err = -EINVAL; + } + + i915_gem_object_unlock(import_obj); + + /* Now try a fake an importer */ + import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev); + if (IS_ERR(import_attach)) { + err = PTR_ERR(import_attach); + goto out_import; + } + + st = dma_buf_map_attachment(import_attach, DMA_BIDIRECTIONAL); + if (IS_ERR(st)) { + err = PTR_ERR(st); + goto out_detach; + } + + timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ); + if (!timeout) { + pr_err("dmabuf wait for exclusive fence timed out.\n"); + timeout = -ETIME; + } + err = timeout > 0 ? 0 : timeout; + dma_buf_unmap_attachment(import_attach, st, DMA_BIDIRECTIONAL); +out_detach: + dma_buf_detach(dmabuf, import_attach); +out_import: + i915_gem_object_put(import_obj); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); +out_ret: + force_different_devices = false; + return err; +} + +static int igt_dmabuf_import_same_driver_smem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *smem = i915->mm.regions[INTEL_REGION_SMEM]; + + return igt_dmabuf_import_same_driver(i915, &smem, 1); +} + +static int igt_dmabuf_import_same_driver_lmem_smem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *regions[2]; + + if (!i915->mm.regions[INTEL_REGION_LMEM]) + return 0; + + regions[0] = i915->mm.regions[INTEL_REGION_LMEM]; + regions[1] = i915->mm.regions[INTEL_REGION_SMEM]; + return igt_dmabuf_import_same_driver(i915, regions, 2); +} + static int igt_dmabuf_import(void *arg) { struct drm_i915_private *i915 = arg; @@ -286,6 +469,9 @@ int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_same_driver_lmem), + SUBTEST(igt_dmabuf_import_same_driver_smem), + SUBTEST(igt_dmabuf_import_same_driver_lmem_smem), }; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index 4df505e4c53a..16162fc2782d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg) intel_gt_pm_get(&eb.i915->gt); for_each_uabi_engine(eb.engine, eb.i915) { + if (intel_engine_requires_cmd_parser(eb.engine) || + intel_engine_using_cmd_parser(eb.engine)) + continue; + reloc_cache_init(&eb.reloc_cache, eb.i915); memset(map, POISON_INUSE, 4096); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c new file mode 100644 index 000000000000..28a700f08b49 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020-2021 Intel Corporation + */ + +#include "gt/intel_migrate.h" + +static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, + bool fill) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int i, count = obj->base.size / sizeof(u32); + enum i915_map_type map_type = + i915_coherent_map_type(i915, obj, false); + u32 *cur; + int err = 0; + + assert_object_held(obj); + cur = i915_gem_object_pin_map(obj, map_type); + if (IS_ERR(cur)) + return PTR_ERR(cur); + + if (fill) + for (i = 0; i < count; ++i) + *cur++ = i; + else + for (i = 0; i < count; ++i) + if (*cur++ != i) { + pr_err("Object content mismatch at location %d of %d\n", i, count); + err = -EINVAL; + break; + } + + i915_gem_object_unpin_map(obj); + + return err; +} + +static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src, + enum intel_region_id dst) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_memory_region *src_mr = i915->mm.regions[src]; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + int err = 0; + + GEM_BUG_ON(!src_mr); + + /* Switch object backing-store on create */ + obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = igt_fill_check_buffer(obj, true); + if (err) + continue; + + err = i915_gem_object_migrate(obj, &ww, dst); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + if (err) + continue; + + if (i915_gem_object_can_migrate(obj, src)) + err = -EINVAL; + + i915_gem_object_unpin_pages(obj); + err = i915_gem_object_wait_migration(obj, true); + if (err) + continue; + + err = igt_fill_check_buffer(obj, false); + } + i915_gem_object_put(obj); + + return err; +} + +static int igt_smem_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_SMEM); +} + +static int igt_lmem_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM); +} + +static int igt_same_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_LMEM); +} + +static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj) +{ + int err; + + err = i915_gem_object_lock(obj, ww); + if (err) + return err; + + if (i915_gem_object_is_lmem(obj)) { + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); + if (err) { + pr_err("Object failed migration to smem\n"); + if (err) + return err; + } + + if (i915_gem_object_is_lmem(obj)) { + pr_err("object still backed by lmem\n"); + err = -EINVAL; + } + + if (!i915_gem_object_has_struct_page(obj)) { + pr_err("object not backed by struct page\n"); + err = -EINVAL; + } + + } else { + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM); + if (err) { + pr_err("Object failed migration to lmem\n"); + if (err) + return err; + } + + if (i915_gem_object_has_struct_page(obj)) { + pr_err("object still backed by struct page\n"); + err = -EINVAL; + } + + if (!i915_gem_object_is_lmem(obj)) { + pr_err("object not backed by lmem\n"); + err = -EINVAL; + } + } + + return err; +} + +static int igt_lmem_pages_migrate(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + struct i915_request *rq; + int err; + int i; + + /* From LMEM to shmem and back again */ + + obj = i915_gem_object_create_lmem(i915, SZ_2M, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + /* Initial GPU fill, sync, CPU initialization. */ + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = ____i915_gem_object_get_pages(obj); + if (err) + continue; + + err = intel_migrate_clear(>->migrate, &ww, NULL, + obj->mm.pages->sgl, obj->cache_level, + i915_gem_object_is_lmem(obj), + 0xdeadbeaf, &rq); + if (rq) { + dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + i915_request_put(rq); + } + if (err) + continue; + + err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, + 5 * HZ); + if (err) + continue; + + err = igt_fill_check_buffer(obj, true); + if (err) + continue; + } + if (err) + goto out_put; + + /* + * Migrate to and from smem without explicitly syncing. + * Finalize with data in smem for fast readout. + */ + for (i = 1; i <= 5; ++i) { + for_i915_gem_ww(&ww, err, true) + err = lmem_pages_migrate_one(&ww, obj); + if (err) + goto out_put; + } + + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out_put; + + /* Finally sync migration and check content. */ + err = i915_gem_object_wait_migration(obj, true); + if (err) + goto out_unlock; + + err = igt_fill_check_buffer(obj, false); + +out_unlock: + i915_gem_object_unlock(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_migrate_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_smem_create_migrate), + SUBTEST(igt_lmem_create_migrate), + SUBTEST(igt_same_create_migrate), + SUBTEST(igt_lmem_pages_migrate), + }; + + if (!HAS_LMEM(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5575172c66f5..b20f5621f62b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -573,21 +573,30 @@ err: return 0; } +static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) +{ + if (HAS_LMEM(i915)) + return I915_MMAP_TYPE_FIXED; + + return I915_MMAP_TYPE_GTT; +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) { struct drm_i915_gem_object *obj; - struct i915_mmap_offset *mmo; + u64 offset; + int ret; obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) - return false; + return expected && expected == PTR_ERR(obj); - mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); + ret = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL); i915_gem_object_put(obj); - return PTR_ERR_OR_ZERO(mmo) == expected; + return ret == expected; } static void disable_retire_worker(struct drm_i915_private *i915) @@ -622,8 +631,8 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; struct drm_i915_gem_object *obj; struct drm_mm_node *hole, *next; - struct i915_mmap_offset *mmo; int loop, err = 0; + u64 offset; /* Disable background reaper */ disable_retire_worker(i915); @@ -684,13 +693,13 @@ static int igt_mmap_offset_exhaustion(void *arg) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); + pr_err("Unable to create object for reclaimed hole\n"); goto out; } - mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); - if (IS_ERR(mmo)) { + err = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL); + if (err) { pr_err("Unable to insert object into reclaimed hole\n"); - err = PTR_ERR(mmo); goto err_obj; } @@ -830,34 +839,25 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - if (type == I915_MMAP_TYPE_GTT && - !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) - return false; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + bool no_map; - if (type != I915_MMAP_TYPE_GTT && - !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + if (HAS_LMEM(i915)) + return type == I915_MMAP_TYPE_FIXED; + else if (type == I915_MMAP_TYPE_FIXED) return false; - return true; -} - -static void object_set_placements(struct drm_i915_gem_object *obj, - struct intel_memory_region **placements, - unsigned int n_placements) -{ - GEM_BUG_ON(!n_placements); + if (type == I915_MMAP_TYPE_GTT && + !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) + return false; - if (n_placements == 1) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_memory_region *mr = placements[0]; + i915_gem_object_lock(obj, NULL); + no_map = (type != I915_MMAP_TYPE_GTT && + !i915_gem_object_has_struct_page(obj) && + !i915_gem_object_has_iomem(obj)); + i915_gem_object_unlock(obj); - obj->mm.placements = &i915->mm.regions[mr->id]; - obj->mm.n_placements = 1; - } else { - obj->mm.placements = placements; - obj->mm.n_placements = n_placements; - } + return !no_map; } #define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) @@ -865,10 +865,10 @@ static int __igt_mmap(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct i915_mmap_offset *mmo; struct vm_area_struct *area; unsigned long addr; int err, i; + u64 offset; if (!can_mmap(obj, type)) return 0; @@ -879,11 +879,11 @@ static int __igt_mmap(struct drm_i915_private *i915, if (err) return err; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; @@ -897,13 +897,6 @@ static int __igt_mmap(struct drm_i915_private *i915, goto out_unmap; } - if (area->vm_private_data != mmo) { - pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n", - obj->mm.region->name); - err = -EINVAL; - goto out_unmap; - } - for (i = 0; i < obj->base.size / sizeof(u32); i++) { u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); u32 x; @@ -961,18 +954,18 @@ static int igt_mmap(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, sizes[i], 0); + obj = __i915_gem_object_create_user(i915, sizes[i], &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -990,26 +983,33 @@ static const char *repr_mmap_type(enum i915_mmap_type type) case I915_MMAP_TYPE_WB: return "wb"; case I915_MMAP_TYPE_WC: return "wc"; case I915_MMAP_TYPE_UC: return "uc"; + case I915_MMAP_TYPE_FIXED: return "fixed"; default: return "unknown"; } } -static bool can_access(const struct drm_i915_gem_object *obj) +static bool can_access(struct drm_i915_gem_object *obj) { - return i915_gem_object_has_struct_page(obj) || - i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); + bool access; + + i915_gem_object_lock(obj, NULL); + access = i915_gem_object_has_struct_page(obj) || + i915_gem_object_has_iomem(obj); + i915_gem_object_unlock(obj); + + return access; } static int __igt_mmap_access(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct i915_mmap_offset *mmo; unsigned long __user *ptr; unsigned long A, B; unsigned long x, y; unsigned long addr; int err; + u64 offset; memset(&A, 0xAA, sizeof(A)); memset(&B, 0xBB, sizeof(B)); @@ -1017,11 +1017,11 @@ static int __igt_mmap_access(struct drm_i915_private *i915, if (!can_mmap(obj, type) || !can_access(obj)) return 0; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; ptr = (unsigned long __user *)addr; @@ -1081,15 +1081,13 @@ static int igt_mmap_access(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB); @@ -1097,6 +1095,8 @@ static int igt_mmap_access(void *arg) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC); if (err == 0) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -1111,11 +1111,11 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, enum i915_mmap_type type) { struct intel_engine_cs *engine; - struct i915_mmap_offset *mmo; unsigned long addr; u32 __user *ux; u32 bbe; int err; + u64 offset; /* * Verify that the mmap access into the backing store aligns with @@ -1132,11 +1132,11 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, if (err) return err; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; @@ -1226,18 +1226,18 @@ static int igt_mmap_gpu(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -1303,18 +1303,18 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct i915_mmap_offset *mmo; unsigned long addr; int err; + u64 offset; if (!can_mmap(obj, type)) return 0; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; @@ -1350,10 +1350,20 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, } } - err = check_absent(addr, obj->base.size); - if (err) { - pr_err("%s: was not absent\n", obj->mm.region->name); - goto out_unmap; + if (!obj->ops->mmap_ops) { + err = check_absent(addr, obj->base.size); + if (err) { + pr_err("%s: was not absent\n", obj->mm.region->name); + goto out_unmap; + } + } else { + /* ttm allows access to evicted regions by design */ + + err = check_present(addr, obj->base.size); + if (err) { + pr_err("%s: was not present\n", obj->mm.region->name); + goto out_unmap; + } } out_unmap: @@ -1371,18 +1381,18 @@ static int igt_mmap_revoke(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c deleted file mode 100644 index 8c335d1a8406..000000000000 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ /dev/null @@ -1,597 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include <linux/sort.h> - -#include "gt/intel_gt.h" -#include "gt/intel_engine_user.h" - -#include "i915_selftest.h" - -#include "gem/i915_gem_context.h" -#include "selftests/igt_flush_test.h" -#include "selftests/i915_random.h" -#include "selftests/mock_drm.h" -#include "huge_gem_object.h" -#include "mock_context.h" - -static int wrap_ktime_compare(const void *A, const void *B) -{ - const ktime_t *a = A, *b = B; - - return ktime_compare(*a, *b); -} - -static int __perf_fill_blt(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - int err; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - intel_engine_pm_get(engine); - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - - t0 = ktime_get(); - - err = i915_gem_object_fill_blt(obj, ce, 0); - if (err) - break; - - err = i915_gem_object_wait(obj, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - intel_engine_pm_put(engine); - if (err) - return err; - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", - engine->name, - obj->base.size >> 10, - div64_u64(mul_u32_u32(4 * obj->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_fill_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *obj; - int err; - - obj = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = __perf_fill_blt(obj); - i915_gem_object_put(obj); - if (err) - return err; - } - - return 0; -} - -static int __perf_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst) -{ - struct drm_i915_private *i915 = to_i915(src->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - int err = 0; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - intel_engine_pm_get(engine); - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - - t0 = ktime_get(); - - err = i915_gem_object_copy_blt(src, dst, ce); - if (err) - break; - - err = i915_gem_object_wait(dst, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - intel_engine_pm_put(engine); - if (err) - return err; - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", - engine->name, - src->base.size >> 10, - div64_u64(mul_u32_u32(4 * src->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_copy_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *src, *dst; - int err; - - src = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(src)) - return PTR_ERR(src); - - dst = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto err_src; - } - - err = __perf_copy_blt(src, dst); - - i915_gem_object_put(dst); -err_src: - i915_gem_object_put(src); - if (err) - return err; - } - - return 0; -} - -struct igt_thread_arg { - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct file *file; - struct rnd_state prng; - unsigned int n_cpus; -}; - -static int igt_fill_blt_thread(void *arg) -{ - struct igt_thread_arg *thread = arg; - struct intel_engine_cs *engine = thread->engine; - struct rnd_state *prng = &thread->prng; - struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - unsigned int prio; - IGT_TIMEOUT(end); - u64 total, max; - int err; - - ctx = thread->ctx; - if (!ctx) { - ctx = live_context_for_engine(engine, thread->file); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = prio; - } - - ce = i915_gem_context_get_engine(ctx, 0); - GEM_BUG_ON(IS_ERR(ce)); - - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - max = ce->vm->total; - if (i915_is_ggtt(ce->vm) || thread->ctx) - max = div_u64(max, thread->n_cpus); - max >>= 4; - - total = PAGE_SIZE; - do { - /* Aim to keep the runtime under reasonable bounds! */ - const u32 max_phys_size = SZ_64K; - u32 val = prandom_u32_state(prng); - u32 phys_sz; - u32 sz; - u32 *vaddr; - u32 i; - - total = min(total, max); - sz = i915_prandom_u32_max_state(total, prng) + 1; - phys_sz = sz % max_phys_size + 1; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - phys_sz = min(phys_sz, sz); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - obj = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put; - } - - /* - * Make sure the potentially async clflush does its job, if - * required. - */ - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(obj) / sizeof(u32)); - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - obj->cache_dirty = true; - - err = i915_gem_object_fill_blt(obj, ce, val); - if (err) - goto err_unpin; - - err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) { - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); - - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - - total <<= 1; - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(obj); -err_put: - i915_gem_object_put(obj); -err_flush: - if (err == -ENOMEM) - err = 0; - - intel_context_put(ce); - return err; -} - -static int igt_copy_blt_thread(void *arg) -{ - struct igt_thread_arg *thread = arg; - struct intel_engine_cs *engine = thread->engine; - struct rnd_state *prng = &thread->prng; - struct drm_i915_gem_object *src, *dst; - struct i915_gem_context *ctx; - struct intel_context *ce; - unsigned int prio; - IGT_TIMEOUT(end); - u64 total, max; - int err; - - ctx = thread->ctx; - if (!ctx) { - ctx = live_context_for_engine(engine, thread->file); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = prio; - } - - ce = i915_gem_context_get_engine(ctx, 0); - GEM_BUG_ON(IS_ERR(ce)); - - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - max = ce->vm->total; - if (i915_is_ggtt(ce->vm) || thread->ctx) - max = div_u64(max, thread->n_cpus); - max >>= 4; - - total = PAGE_SIZE; - do { - /* Aim to keep the runtime under reasonable bounds! */ - const u32 max_phys_size = SZ_64K; - u32 val = prandom_u32_state(prng); - u32 phys_sz; - u32 sz; - u32 *vaddr; - u32 i; - - total = min(total, max); - sz = i915_prandom_u32_max_state(total, prng) + 1; - phys_sz = sz % max_phys_size + 1; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - phys_sz = min(phys_sz, sz); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - src = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(src)) { - err = PTR_ERR(src); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put_src; - } - - memset32(vaddr, val, - huge_gem_object_phys_size(src) / sizeof(u32)); - - i915_gem_object_unpin_map(src); - - if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - src->cache_dirty = true; - - dst = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto err_put_src; - } - - vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put_dst; - } - - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(dst) / sizeof(u32)); - - if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - dst->cache_dirty = true; - - err = i915_gem_object_copy_blt(src, dst, ce); - if (err) - goto err_unpin; - - err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) { - if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); - - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(dst); - - i915_gem_object_put(src); - i915_gem_object_put(dst); - - total <<= 1; - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(dst); -err_put_dst: - i915_gem_object_put(dst); -err_put_src: - i915_gem_object_put(src); -err_flush: - if (err == -ENOMEM) - err = 0; - - intel_context_put(ce); - return err; -} - -static int igt_threaded_blt(struct intel_engine_cs *engine, - int (*blt_fn)(void *arg), - unsigned int flags) -#define SINGLE_CTX BIT(0) -{ - struct igt_thread_arg *thread; - struct task_struct **tsk; - unsigned int n_cpus, i; - I915_RND_STATE(prng); - int err = 0; - - n_cpus = num_online_cpus() + 1; - - tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); - if (!tsk) - return 0; - - thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); - if (!thread) - goto out_tsk; - - thread[0].file = mock_file(engine->i915); - if (IS_ERR(thread[0].file)) { - err = PTR_ERR(thread[0].file); - goto out_thread; - } - - if (flags & SINGLE_CTX) { - thread[0].ctx = live_context_for_engine(engine, thread[0].file); - if (IS_ERR(thread[0].ctx)) { - err = PTR_ERR(thread[0].ctx); - goto out_file; - } - } - - for (i = 0; i < n_cpus; ++i) { - thread[i].engine = engine; - thread[i].file = thread[0].file; - thread[i].ctx = thread[0].ctx; - thread[i].n_cpus = n_cpus; - thread[i].prng = - I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); - - tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); - if (IS_ERR(tsk[i])) { - err = PTR_ERR(tsk[i]); - break; - } - - get_task_struct(tsk[i]); - } - - yield(); /* start all threads before we kthread_stop() */ - - for (i = 0; i < n_cpus; ++i) { - int status; - - if (IS_ERR_OR_NULL(tsk[i])) - continue; - - status = kthread_stop(tsk[i]); - if (status && !err) - err = status; - - put_task_struct(tsk[i]); - } - -out_file: - fput(thread[0].file); -out_thread: - kfree(thread); -out_tsk: - kfree(tsk); - return err; -} - -static int test_copy_engines(struct drm_i915_private *i915, - int (*fn)(void *arg), - unsigned int flags) -{ - struct intel_engine_cs *engine; - int ret; - - for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) { - ret = igt_threaded_blt(engine, fn, flags); - if (ret) - return ret; - } - - return 0; -} - -static int igt_fill_blt(void *arg) -{ - return test_copy_engines(arg, igt_fill_blt_thread, 0); -} - -static int igt_fill_blt_ctx0(void *arg) -{ - return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX); -} - -static int igt_copy_blt(void *arg) -{ - return test_copy_engines(arg, igt_copy_blt_thread, 0); -} - -static int igt_copy_blt_ctx0(void *arg) -{ - return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX); -} - -int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_fill_blt), - SUBTEST(igt_fill_blt_ctx0), - SUBTEST(igt_copy_blt), - SUBTEST(igt_copy_blt_ctx0), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} - -int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(perf_fill_blt), - SUBTEST(perf_copy_blt), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 3a6ce87f8b52..d43d8dae0f69 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,13 +25,14 @@ static int mock_phys_object(void *arg) goto out; } + i915_gem_object_lock(obj, NULL); if (!i915_gem_object_has_struct_page(obj)) { + i915_gem_object_unlock(obj); err = -EINVAL; pr_err("shmem has no struct page\n"); goto out_obj; } - i915_gem_object_lock(obj, NULL); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); i915_gem_object_unlock(obj); if (err) { diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 51b5a3421b40..fee070df1c97 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -14,6 +14,7 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; + struct intel_sseu null_sseu = {}; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -30,15 +31,6 @@ mock_context(struct drm_i915_private *i915, i915_gem_context_set_persistence(ctx); - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) - goto err_free; - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - mutex_init(&ctx->lut_mutex); - if (name) { struct i915_ppgtt *ppgtt; @@ -46,25 +38,29 @@ mock_context(struct drm_i915_private *i915, ppgtt = mock_ppgtt(i915, name); if (!ppgtt) - goto err_put; - - mutex_lock(&ctx->mutex); - __set_ppgtt(ctx, &ppgtt->vm); - mutex_unlock(&ctx->mutex); + goto err_free; + ctx->vm = i915_vm_open(&ppgtt->vm); i915_vm_put(&ppgtt->vm); } + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx, null_sseu); + if (IS_ERR(e)) + goto err_vm; + RCU_INIT_POINTER(ctx->engines, e); + + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + mutex_init(&ctx->lut_mutex); + return ctx; +err_vm: + if (ctx->vm) + i915_vm_close(ctx->vm); err_free: kfree(ctx); return NULL; - -err_put: - i915_gem_context_set_closed(ctx); - i915_gem_context_put(ctx); - return NULL; } void mock_context_close(struct i915_gem_context *ctx) @@ -80,20 +76,29 @@ void mock_init_contexts(struct drm_i915_private *i915) struct i915_gem_context * live_context(struct drm_i915_private *i915, struct file *file) { + struct drm_i915_file_private *fpriv = to_drm_file(file)->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; u32 id; - ctx = i915_gem_create_context(i915, 0); + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) + return ERR_CAST(pc); + + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) return ctx; i915_gem_context_set_no_error_capture(ctx); - err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id); + err = xa_alloc(&fpriv->context_xa, &id, NULL, xa_limit_32b, GFP_KERNEL); if (err < 0) goto err_ctx; + gem_context_register(ctx, fpriv, id); + return ctx; err_ctx: @@ -106,6 +111,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) { struct i915_gem_engines *engines; struct i915_gem_context *ctx; + struct intel_sseu null_sseu = {}; struct intel_context *ce; engines = alloc_engines(1); @@ -124,7 +130,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) return ERR_CAST(ce); } - intel_context_set_gem(ce, ctx); + intel_context_set_gem(ce, ctx, null_sseu); engines->engines[0] = ce; engines->num_engines = 1; @@ -139,11 +145,24 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) } struct i915_gem_context * -kernel_context(struct drm_i915_private *i915) +kernel_context(struct drm_i915_private *i915, + struct i915_address_space *vm) { struct i915_gem_context *ctx; + struct i915_gem_proto_context *pc; + + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) + return ERR_CAST(pc); + + if (vm) { + if (pc->vm) + i915_vm_put(pc->vm); + pc->vm = i915_vm_get(vm); + } - ctx = i915_gem_create_context(i915, 0); + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) return ctx; diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h index 2a6121d33352..7a02fd9b5866 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -10,6 +10,7 @@ struct file; struct drm_i915_private; struct intel_engine_cs; +struct i915_address_space; void mock_init_contexts(struct drm_i915_private *i915); @@ -25,7 +26,8 @@ live_context(struct drm_i915_private *i915, struct file *file); struct i915_gem_context * live_context_for_engine(struct intel_engine_cs *engine, struct file *file); -struct i915_gem_context *kernel_context(struct drm_i915_private *i915); +struct i915_gem_context *kernel_context(struct drm_i915_private *i915, + struct i915_address_space *vm); void kernel_context_close(struct i915_gem_context *ctx); #endif /* !__MOCK_CONTEXT_H */ |