summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gem
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_client_blt.c355
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_client_blt.h21
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c2154
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.h6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h196
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.c188
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c58
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c11
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c705
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_lmem.c160
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_lmem.h12
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c107
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c350
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h69
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.c461
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.h39
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h255
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c54
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_phys.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.c23
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.h1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c30
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c965
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.h49
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c117
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_wait.c23
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c4
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c8
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c114
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c127
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c190
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c4
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c243
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c164
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c597
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c3
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_context.c67
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_context.h4
42 files changed, 3829 insertions, 4119 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index daf9284ef1f5..f0435c6feb68 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -24,13 +24,11 @@ static void __do_clflush(struct drm_i915_gem_object *obj)
i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
}
-static int clflush_work(struct dma_fence_work *base)
+static void clflush_work(struct dma_fence_work *base)
{
struct clflush *clflush = container_of(base, typeof(*clflush), base);
__do_clflush(clflush->obj);
-
- return 0;
}
static void clflush_release(struct dma_fence_work *base)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
deleted file mode 100644
index 44821d94544f..000000000000
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ /dev/null
@@ -1,355 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#include "i915_drv.h"
-#include "gt/intel_context.h"
-#include "gt/intel_engine_pm.h"
-#include "i915_gem_client_blt.h"
-#include "i915_gem_object_blt.h"
-
-struct i915_sleeve {
- struct i915_vma *vma;
- struct drm_i915_gem_object *obj;
- struct sg_table *pages;
- struct i915_page_sizes page_sizes;
-};
-
-static int vma_set_pages(struct i915_vma *vma)
-{
- struct i915_sleeve *sleeve = vma->private;
-
- vma->pages = sleeve->pages;
- vma->page_sizes = sleeve->page_sizes;
-
- return 0;
-}
-
-static void vma_clear_pages(struct i915_vma *vma)
-{
- GEM_BUG_ON(!vma->pages);
- vma->pages = NULL;
-}
-
-static void vma_bind(struct i915_address_space *vm,
- struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
-{
- vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags);
-}
-
-static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
-{
- vm->vma_ops.unbind_vma(vm, vma);
-}
-
-static const struct i915_vma_ops proxy_vma_ops = {
- .set_pages = vma_set_pages,
- .clear_pages = vma_clear_pages,
- .bind_vma = vma_bind,
- .unbind_vma = vma_unbind,
-};
-
-static struct i915_sleeve *create_sleeve(struct i915_address_space *vm,
- struct drm_i915_gem_object *obj,
- struct sg_table *pages,
- struct i915_page_sizes *page_sizes)
-{
- struct i915_sleeve *sleeve;
- struct i915_vma *vma;
- int err;
-
- sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL);
- if (!sleeve)
- return ERR_PTR(-ENOMEM);
-
- vma = i915_vma_instance(obj, vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_free;
- }
-
- vma->private = sleeve;
- vma->ops = &proxy_vma_ops;
-
- sleeve->vma = vma;
- sleeve->pages = pages;
- sleeve->page_sizes = *page_sizes;
-
- return sleeve;
-
-err_free:
- kfree(sleeve);
- return ERR_PTR(err);
-}
-
-static void destroy_sleeve(struct i915_sleeve *sleeve)
-{
- kfree(sleeve);
-}
-
-struct clear_pages_work {
- struct dma_fence dma;
- struct dma_fence_cb cb;
- struct i915_sw_fence wait;
- struct work_struct work;
- struct irq_work irq_work;
- struct i915_sleeve *sleeve;
- struct intel_context *ce;
- u32 value;
-};
-
-static const char *clear_pages_work_driver_name(struct dma_fence *fence)
-{
- return DRIVER_NAME;
-}
-
-static const char *clear_pages_work_timeline_name(struct dma_fence *fence)
-{
- return "clear";
-}
-
-static void clear_pages_work_release(struct dma_fence *fence)
-{
- struct clear_pages_work *w = container_of(fence, typeof(*w), dma);
-
- destroy_sleeve(w->sleeve);
-
- i915_sw_fence_fini(&w->wait);
-
- BUILD_BUG_ON(offsetof(typeof(*w), dma));
- dma_fence_free(&w->dma);
-}
-
-static const struct dma_fence_ops clear_pages_work_ops = {
- .get_driver_name = clear_pages_work_driver_name,
- .get_timeline_name = clear_pages_work_timeline_name,
- .release = clear_pages_work_release,
-};
-
-static void clear_pages_signal_irq_worker(struct irq_work *work)
-{
- struct clear_pages_work *w = container_of(work, typeof(*w), irq_work);
-
- dma_fence_signal(&w->dma);
- dma_fence_put(&w->dma);
-}
-
-static void clear_pages_dma_fence_cb(struct dma_fence *fence,
- struct dma_fence_cb *cb)
-{
- struct clear_pages_work *w = container_of(cb, typeof(*w), cb);
-
- if (fence->error)
- dma_fence_set_error(&w->dma, fence->error);
-
- /*
- * Push the signalling of the fence into yet another worker to avoid
- * the nightmare locking around the fence spinlock.
- */
- irq_work_queue(&w->irq_work);
-}
-
-static void clear_pages_worker(struct work_struct *work)
-{
- struct clear_pages_work *w = container_of(work, typeof(*w), work);
- struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
- struct i915_vma *vma = w->sleeve->vma;
- struct i915_gem_ww_ctx ww;
- struct i915_request *rq;
- struct i915_vma *batch;
- int err = w->dma.error;
-
- if (unlikely(err))
- goto out_signal;
-
- if (obj->cache_dirty) {
- if (i915_gem_object_has_struct_page(obj))
- drm_clflush_sg(w->sleeve->pages);
- obj->cache_dirty = false;
- }
- obj->read_domains = I915_GEM_GPU_DOMAINS;
- obj->write_domain = 0;
-
- i915_gem_ww_ctx_init(&ww, false);
- intel_engine_pm_get(w->ce->engine);
-retry:
- err = intel_context_pin_ww(w->ce, &ww);
- if (err)
- goto out_signal;
-
- batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_ctx;
- }
-
- rq = i915_request_create(w->ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out_batch;
- }
-
- /* There's no way the fence has signalled */
- if (dma_fence_add_callback(&rq->fence, &w->cb,
- clear_pages_dma_fence_cb))
- GEM_BUG_ON(1);
-
- err = intel_emit_vma_mark_active(batch, rq);
- if (unlikely(err))
- goto out_request;
-
- /*
- * w->dma is already exported via (vma|obj)->resv we need only
- * keep track of the GPU activity within this vma/request, and
- * propagate the signal from the request to w->dma.
- */
- err = __i915_vma_move_to_active(vma, rq);
- if (err)
- goto out_request;
-
- if (rq->engine->emit_init_breadcrumb) {
- err = rq->engine->emit_init_breadcrumb(rq);
- if (unlikely(err))
- goto out_request;
- }
-
- err = rq->engine->emit_bb_start(rq,
- batch->node.start, batch->node.size,
- 0);
-out_request:
- if (unlikely(err)) {
- i915_request_set_error_once(rq, err);
- err = 0;
- }
-
- i915_request_add(rq);
-out_batch:
- intel_emit_vma_release(w->ce, batch);
-out_ctx:
- intel_context_unpin(w->ce);
-out_signal:
- if (err == -EDEADLK) {
- err = i915_gem_ww_ctx_backoff(&ww);
- if (!err)
- goto retry;
- }
- i915_gem_ww_ctx_fini(&ww);
-
- i915_vma_unpin(w->sleeve->vma);
- intel_engine_pm_put(w->ce->engine);
-
- if (unlikely(err)) {
- dma_fence_set_error(&w->dma, err);
- dma_fence_signal(&w->dma);
- dma_fence_put(&w->dma);
- }
-}
-
-static int pin_wait_clear_pages_work(struct clear_pages_work *w,
- struct intel_context *ce)
-{
- struct i915_vma *vma = w->sleeve->vma;
- struct i915_gem_ww_ctx ww;
- int err;
-
- i915_gem_ww_ctx_init(&ww, false);
-retry:
- err = i915_gem_object_lock(vma->obj, &ww);
- if (err)
- goto out;
-
- err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
- if (unlikely(err))
- goto out;
-
- err = i915_sw_fence_await_reservation(&w->wait,
- vma->obj->base.resv, NULL,
- true, 0, I915_FENCE_GFP);
- if (err)
- goto err_unpin_vma;
-
- dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma);
-
-err_unpin_vma:
- if (err)
- i915_vma_unpin(vma);
-out:
- if (err == -EDEADLK) {
- err = i915_gem_ww_ctx_backoff(&ww);
- if (!err)
- goto retry;
- }
- i915_gem_ww_ctx_fini(&ww);
- return err;
-}
-
-static int __i915_sw_fence_call
-clear_pages_work_notify(struct i915_sw_fence *fence,
- enum i915_sw_fence_notify state)
-{
- struct clear_pages_work *w = container_of(fence, typeof(*w), wait);
-
- switch (state) {
- case FENCE_COMPLETE:
- schedule_work(&w->work);
- break;
-
- case FENCE_FREE:
- dma_fence_put(&w->dma);
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-static DEFINE_SPINLOCK(fence_lock);
-
-/* XXX: better name please */
-int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
- struct intel_context *ce,
- struct sg_table *pages,
- struct i915_page_sizes *page_sizes,
- u32 value)
-{
- struct clear_pages_work *work;
- struct i915_sleeve *sleeve;
- int err;
-
- sleeve = create_sleeve(ce->vm, obj, pages, page_sizes);
- if (IS_ERR(sleeve))
- return PTR_ERR(sleeve);
-
- work = kmalloc(sizeof(*work), GFP_KERNEL);
- if (!work) {
- destroy_sleeve(sleeve);
- return -ENOMEM;
- }
-
- work->value = value;
- work->sleeve = sleeve;
- work->ce = ce;
-
- INIT_WORK(&work->work, clear_pages_worker);
-
- init_irq_work(&work->irq_work, clear_pages_signal_irq_worker);
-
- dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
- i915_sw_fence_init(&work->wait, clear_pages_work_notify);
-
- err = pin_wait_clear_pages_work(work, ce);
- if (err < 0)
- dma_fence_set_error(&work->dma, err);
-
- dma_fence_get(&work->dma);
- i915_sw_fence_commit(&work->wait);
-
- return err;
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/i915_gem_client_blt.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h
deleted file mode 100644
index 3dbd28c22ff5..000000000000
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2019 Intel Corporation
- */
-#ifndef __I915_GEM_CLIENT_BLT_H__
-#define __I915_GEM_CLIENT_BLT_H__
-
-#include <linux/types.h>
-
-struct drm_i915_gem_object;
-struct i915_page_sizes;
-struct intel_context;
-struct sg_table;
-
-int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
- struct intel_context *ce,
- struct sg_table *pages,
- struct i915_page_sizes *page_sizes,
- u32 value);
-
-#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 7720b8c22c81..cff72679ad7c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -67,35 +67,32 @@
#include <linux/log2.h>
#include <linux/nospec.h>
+#include <drm/drm_syncobj.h>
+
#include "gt/gen6_ppgtt.h"
#include "gt/intel_context.h"
#include "gt/intel_context_param.h"
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_engine_user.h"
-#include "gt/intel_execlists_submission.h" /* virtual_engine */
#include "gt/intel_gpu_commands.h"
#include "gt/intel_ring.h"
#include "i915_gem_context.h"
-#include "i915_globals.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
-static struct i915_global_gem_context {
- struct i915_global base;
- struct kmem_cache *slab_luts;
-} global;
+static struct kmem_cache *slab_luts;
struct i915_lut_handle *i915_lut_handle_alloc(void)
{
- return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
+ return kmem_cache_alloc(slab_luts, GFP_KERNEL);
}
void i915_lut_handle_free(struct i915_lut_handle *lut)
{
- return kmem_cache_free(global.slab_luts, lut);
+ return kmem_cache_free(slab_luts, lut);
}
static void lut_close(struct i915_gem_context *ctx)
@@ -167,6 +164,577 @@ lookup_user_engine(struct i915_gem_context *ctx,
return i915_gem_context_get_engine(ctx, idx);
}
+static int validate_priority(struct drm_i915_private *i915,
+ const struct drm_i915_gem_context_param *args)
+{
+ s64 priority = args->value;
+
+ if (args->size)
+ return -EINVAL;
+
+ if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+ return -ENODEV;
+
+ if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
+ priority < I915_CONTEXT_MIN_USER_PRIORITY)
+ return -EINVAL;
+
+ if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
+ !capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ return 0;
+}
+
+static void proto_context_close(struct i915_gem_proto_context *pc)
+{
+ int i;
+
+ if (pc->vm)
+ i915_vm_put(pc->vm);
+ if (pc->user_engines) {
+ for (i = 0; i < pc->num_user_engines; i++)
+ kfree(pc->user_engines[i].siblings);
+ kfree(pc->user_engines);
+ }
+ kfree(pc);
+}
+
+static int proto_context_set_persistence(struct drm_i915_private *i915,
+ struct i915_gem_proto_context *pc,
+ bool persist)
+{
+ if (persist) {
+ /*
+ * Only contexts that are short-lived [that will expire or be
+ * reset] are allowed to survive past termination. We require
+ * hangcheck to ensure that the persistent requests are healthy.
+ */
+ if (!i915->params.enable_hangcheck)
+ return -EINVAL;
+
+ pc->user_flags |= BIT(UCONTEXT_PERSISTENCE);
+ } else {
+ /* To cancel a context we use "preempt-to-idle" */
+ if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+ return -ENODEV;
+
+ /*
+ * If the cancel fails, we then need to reset, cleanly!
+ *
+ * If the per-engine reset fails, all hope is lost! We resort
+ * to a full GPU reset in that unlikely case, but realistically
+ * if the engine could not reset, the full reset does not fare
+ * much better. The damage has been done.
+ *
+ * However, if we cannot reset an engine by itself, we cannot
+ * cleanup a hanging persistent context without causing
+ * colateral damage, and we should not pretend we can by
+ * exposing the interface.
+ */
+ if (!intel_has_reset_engine(&i915->gt))
+ return -ENODEV;
+
+ pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE);
+ }
+
+ return 0;
+}
+
+static struct i915_gem_proto_context *
+proto_context_create(struct drm_i915_private *i915, unsigned int flags)
+{
+ struct i915_gem_proto_context *pc, *err;
+
+ pc = kzalloc(sizeof(*pc), GFP_KERNEL);
+ if (!pc)
+ return ERR_PTR(-ENOMEM);
+
+ pc->num_user_engines = -1;
+ pc->user_engines = NULL;
+ pc->user_flags = BIT(UCONTEXT_BANNABLE) |
+ BIT(UCONTEXT_RECOVERABLE);
+ if (i915->params.enable_hangcheck)
+ pc->user_flags |= BIT(UCONTEXT_PERSISTENCE);
+ pc->sched.priority = I915_PRIORITY_NORMAL;
+
+ if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+ if (!HAS_EXECLISTS(i915)) {
+ err = ERR_PTR(-EINVAL);
+ goto proto_close;
+ }
+ pc->single_timeline = true;
+ }
+
+ return pc;
+
+proto_close:
+ proto_context_close(pc);
+ return err;
+}
+
+static int proto_context_register_locked(struct drm_i915_file_private *fpriv,
+ struct i915_gem_proto_context *pc,
+ u32 *id)
+{
+ int ret;
+ void *old;
+
+ lockdep_assert_held(&fpriv->proto_context_lock);
+
+ ret = xa_alloc(&fpriv->context_xa, id, NULL, xa_limit_32b, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ old = xa_store(&fpriv->proto_context_xa, *id, pc, GFP_KERNEL);
+ if (xa_is_err(old)) {
+ xa_erase(&fpriv->context_xa, *id);
+ return xa_err(old);
+ }
+ WARN_ON(old);
+
+ return 0;
+}
+
+static int proto_context_register(struct drm_i915_file_private *fpriv,
+ struct i915_gem_proto_context *pc,
+ u32 *id)
+{
+ int ret;
+
+ mutex_lock(&fpriv->proto_context_lock);
+ ret = proto_context_register_locked(fpriv, pc, id);
+ mutex_unlock(&fpriv->proto_context_lock);
+
+ return ret;
+}
+
+static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv,
+ struct i915_gem_proto_context *pc,
+ const struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_private *i915 = fpriv->dev_priv;
+ struct i915_address_space *vm;
+
+ if (args->size)
+ return -EINVAL;
+
+ if (!HAS_FULL_PPGTT(i915))
+ return -ENODEV;
+
+ if (upper_32_bits(args->value))
+ return -ENOENT;
+
+ vm = i915_gem_vm_lookup(fpriv, args->value);
+ if (!vm)
+ return -ENOENT;
+
+ if (pc->vm)
+ i915_vm_put(pc->vm);
+ pc->vm = vm;
+
+ return 0;
+}
+
+struct set_proto_ctx_engines {
+ struct drm_i915_private *i915;
+ unsigned num_engines;
+ struct i915_gem_proto_engine *engines;
+};
+
+static int
+set_proto_ctx_engines_balance(struct i915_user_extension __user *base,
+ void *data)
+{
+ struct i915_context_engines_load_balance __user *ext =
+ container_of_user(base, typeof(*ext), base);
+ const struct set_proto_ctx_engines *set = data;
+ struct drm_i915_private *i915 = set->i915;
+ struct intel_engine_cs **siblings;
+ u16 num_siblings, idx;
+ unsigned int n;
+ int err;
+
+ if (!HAS_EXECLISTS(i915))
+ return -ENODEV;
+
+ if (get_user(idx, &ext->engine_index))
+ return -EFAULT;
+
+ if (idx >= set->num_engines) {
+ drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
+ idx, set->num_engines);
+ return -EINVAL;
+ }
+
+ idx = array_index_nospec(idx, set->num_engines);
+ if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_INVALID) {
+ drm_dbg(&i915->drm,
+ "Invalid placement[%d], already occupied\n", idx);
+ return -EEXIST;
+ }
+
+ if (get_user(num_siblings, &ext->num_siblings))
+ return -EFAULT;
+
+ err = check_user_mbz(&ext->flags);
+ if (err)
+ return err;
+
+ err = check_user_mbz(&ext->mbz64);
+ if (err)
+ return err;
+
+ if (num_siblings == 0)
+ return 0;
+
+ siblings = kmalloc_array(num_siblings, sizeof(*siblings), GFP_KERNEL);
+ if (!siblings)
+ return -ENOMEM;
+
+ for (n = 0; n < num_siblings; n++) {
+ struct i915_engine_class_instance ci;
+
+ if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+ err = -EFAULT;
+ goto err_siblings;
+ }
+
+ siblings[n] = intel_engine_lookup_user(i915,
+ ci.engine_class,
+ ci.engine_instance);
+ if (!siblings[n]) {
+ drm_dbg(&i915->drm,
+ "Invalid sibling[%d]: { class:%d, inst:%d }\n",
+ n, ci.engine_class, ci.engine_instance);
+ err = -EINVAL;
+ goto err_siblings;
+ }
+ }
+
+ if (num_siblings == 1) {
+ set->engines[idx].type = I915_GEM_ENGINE_TYPE_PHYSICAL;
+ set->engines[idx].engine = siblings[0];
+ kfree(siblings);
+ } else {
+ set->engines[idx].type = I915_GEM_ENGINE_TYPE_BALANCED;
+ set->engines[idx].num_siblings = num_siblings;
+ set->engines[idx].siblings = siblings;
+ }
+
+ return 0;
+
+err_siblings:
+ kfree(siblings);
+
+ return err;
+}
+
+static int
+set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
+{
+ struct i915_context_engines_bond __user *ext =
+ container_of_user(base, typeof(*ext), base);
+ const struct set_proto_ctx_engines *set = data;
+ struct drm_i915_private *i915 = set->i915;
+ struct i915_engine_class_instance ci;
+ struct intel_engine_cs *master;
+ u16 idx, num_bonds;
+ int err, n;
+
+ if (get_user(idx, &ext->virtual_index))
+ return -EFAULT;
+
+ if (idx >= set->num_engines) {
+ drm_dbg(&i915->drm,
+ "Invalid index for virtual engine: %d >= %d\n",
+ idx, set->num_engines);
+ return -EINVAL;
+ }
+
+ idx = array_index_nospec(idx, set->num_engines);
+ if (set->engines[idx].type == I915_GEM_ENGINE_TYPE_INVALID) {
+ drm_dbg(&i915->drm, "Invalid engine at %d\n", idx);
+ return -EINVAL;
+ }
+
+ if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_PHYSICAL) {
+ drm_dbg(&i915->drm,
+ "Bonding with virtual engines not allowed\n");
+ return -EINVAL;
+ }
+
+ err = check_user_mbz(&ext->flags);
+ if (err)
+ return err;
+
+ for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+ err = check_user_mbz(&ext->mbz64[n]);
+ if (err)
+ return err;
+ }
+
+ if (copy_from_user(&ci, &ext->master, sizeof(ci)))
+ return -EFAULT;
+
+ master = intel_engine_lookup_user(i915,
+ ci.engine_class,
+ ci.engine_instance);
+ if (!master) {
+ drm_dbg(&i915->drm,
+ "Unrecognised master engine: { class:%u, instance:%u }\n",
+ ci.engine_class, ci.engine_instance);
+ return -EINVAL;
+ }
+
+ if (intel_engine_uses_guc(master)) {
+ DRM_DEBUG("bonding extension not supported with GuC submission");
+ return -ENODEV;
+ }
+
+ if (get_user(num_bonds, &ext->num_bonds))
+ return -EFAULT;
+
+ for (n = 0; n < num_bonds; n++) {
+ struct intel_engine_cs *bond;
+
+ if (copy_from_user(&ci, &ext->engines[n], sizeof(ci)))
+ return -EFAULT;
+
+ bond = intel_engine_lookup_user(i915,
+ ci.engine_class,
+ ci.engine_instance);
+ if (!bond) {
+ drm_dbg(&i915->drm,
+ "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n",
+ n, ci.engine_class, ci.engine_instance);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = {
+ [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance,
+ [I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond,
+};
+
+static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv,
+ struct i915_gem_proto_context *pc,
+ const struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_private *i915 = fpriv->dev_priv;
+ struct set_proto_ctx_engines set = { .i915 = i915 };
+ struct i915_context_param_engines __user *user =
+ u64_to_user_ptr(args->value);
+ unsigned int n;
+ u64 extensions;
+ int err;
+
+ if (pc->num_user_engines >= 0) {
+ drm_dbg(&i915->drm, "Cannot set engines twice");
+ return -EINVAL;
+ }
+
+ if (args->size < sizeof(*user) ||
+ !IS_ALIGNED(args->size - sizeof(*user), sizeof(*user->engines))) {
+ drm_dbg(&i915->drm, "Invalid size for engine array: %d\n",
+ args->size);
+ return -EINVAL;
+ }
+
+ set.num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines);
+ /* RING_MASK has no shift so we can use it directly here */
+ if (set.num_engines > I915_EXEC_RING_MASK + 1)
+ return -EINVAL;
+
+ set.engines = kmalloc_array(set.num_engines, sizeof(*set.engines), GFP_KERNEL);
+ if (!set.engines)
+ return -ENOMEM;
+
+ for (n = 0; n < set.num_engines; n++) {
+ struct i915_engine_class_instance ci;
+ struct intel_engine_cs *engine;
+
+ if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
+ kfree(set.engines);
+ return -EFAULT;
+ }
+
+ memset(&set.engines[n], 0, sizeof(set.engines[n]));
+
+ if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID &&
+ ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE)
+ continue;
+
+ engine = intel_engine_lookup_user(i915,
+ ci.engine_class,
+ ci.engine_instance);
+ if (!engine) {
+ drm_dbg(&i915->drm,
+ "Invalid engine[%d]: { class:%d, instance:%d }\n",
+ n, ci.engine_class, ci.engine_instance);
+ kfree(set.engines);
+ return -ENOENT;
+ }
+
+ set.engines[n].type = I915_GEM_ENGINE_TYPE_PHYSICAL;
+ set.engines[n].engine = engine;
+ }
+
+ err = -EFAULT;
+ if (!get_user(extensions, &user->extensions))
+ err = i915_user_extensions(u64_to_user_ptr(extensions),
+ set_proto_ctx_engines_extensions,
+ ARRAY_SIZE(set_proto_ctx_engines_extensions),
+ &set);
+ if (err) {
+ kfree(set.engines);
+ return err;
+ }
+
+ pc->num_user_engines = set.num_engines;
+ pc->user_engines = set.engines;
+
+ return 0;
+}
+
+static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv,
+ struct i915_gem_proto_context *pc,
+ struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_private *i915 = fpriv->dev_priv;
+ struct drm_i915_gem_context_param_sseu user_sseu;
+ struct intel_sseu *sseu;
+ int ret;
+
+ if (args->size < sizeof(user_sseu))
+ return -EINVAL;
+
+ if (GRAPHICS_VER(i915) != 11)
+ return -ENODEV;
+
+ if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+ sizeof(user_sseu)))
+ return -EFAULT;
+
+ if (user_sseu.rsvd)
+ return -EINVAL;
+
+ if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+ return -EINVAL;
+
+ if (!!(user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) != (pc->num_user_engines >= 0))
+ return -EINVAL;
+
+ if (pc->num_user_engines >= 0) {
+ int idx = user_sseu.engine.engine_instance;
+ struct i915_gem_proto_engine *pe;
+
+ if (idx >= pc->num_user_engines)
+ return -EINVAL;
+
+ pe = &pc->user_engines[idx];
+
+ /* Only render engine supports RPCS configuration. */
+ if (pe->engine->class != RENDER_CLASS)
+ return -EINVAL;
+
+ sseu = &pe->sseu;
+ } else {
+ /* Only render engine supports RPCS configuration. */
+ if (user_sseu.engine.engine_class != I915_ENGINE_CLASS_RENDER)
+ return -EINVAL;
+
+ /* There is only one render engine */
+ if (user_sseu.engine.engine_instance != 0)
+ return -EINVAL;
+
+ sseu = &pc->legacy_rcs_sseu;
+ }
+
+ ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu);
+ if (ret)
+ return ret;
+
+ args->size = sizeof(user_sseu);
+
+ return 0;
+}
+
+static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
+ struct i915_gem_proto_context *pc,
+ struct drm_i915_gem_context_param *args)
+{
+ int ret = 0;
+
+ switch (args->param) {
+ case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+ if (args->size)
+ ret = -EINVAL;
+ else if (args->value)
+ pc->user_flags |= BIT(UCONTEXT_NO_ERROR_CAPTURE);
+ else
+ pc->user_flags &= ~BIT(UCONTEXT_NO_ERROR_CAPTURE);
+ break;
+
+ case I915_CONTEXT_PARAM_BANNABLE:
+ if (args->size)
+ ret = -EINVAL;
+ else if (!capable(CAP_SYS_ADMIN) && !args->value)
+ ret = -EPERM;
+ else if (args->value)
+ pc->user_flags |= BIT(UCONTEXT_BANNABLE);
+ else
+ pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
+ break;
+
+ case I915_CONTEXT_PARAM_RECOVERABLE:
+ if (args->size)
+ ret = -EINVAL;
+ else if (args->value)
+ pc->user_flags |= BIT(UCONTEXT_RECOVERABLE);
+ else
+ pc->user_flags &= ~BIT(UCONTEXT_RECOVERABLE);
+ break;
+
+ case I915_CONTEXT_PARAM_PRIORITY:
+ ret = validate_priority(fpriv->dev_priv, args);
+ if (!ret)
+ pc->sched.priority = args->value;
+ break;
+
+ case I915_CONTEXT_PARAM_SSEU:
+ ret = set_proto_ctx_sseu(fpriv, pc, args);
+ break;
+
+ case I915_CONTEXT_PARAM_VM:
+ ret = set_proto_ctx_vm(fpriv, pc, args);
+ break;
+
+ case I915_CONTEXT_PARAM_ENGINES:
+ ret = set_proto_ctx_engines(fpriv, pc, args);
+ break;
+
+ case I915_CONTEXT_PARAM_PERSISTENCE:
+ if (args->size)
+ ret = -EINVAL;
+ ret = proto_context_set_persistence(fpriv->dev_priv, pc,
+ args->value);
+ break;
+
+ case I915_CONTEXT_PARAM_NO_ZEROMAP:
+ case I915_CONTEXT_PARAM_BAN_PERIOD:
+ case I915_CONTEXT_PARAM_RINGSIZE:
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
static struct i915_address_space *
context_get_vm_rcu(struct i915_gem_context *ctx)
{
@@ -205,14 +773,16 @@ context_get_vm_rcu(struct i915_gem_context *ctx)
} while (1);
}
-static void intel_context_set_gem(struct intel_context *ce,
- struct i915_gem_context *ctx)
+static int intel_context_set_gem(struct intel_context *ce,
+ struct i915_gem_context *ctx,
+ struct intel_sseu sseu)
{
+ int ret = 0;
+
GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
RCU_INIT_POINTER(ce->gem_context, ctx);
- if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
- ce->ring = __intel_context_ring_size(SZ_16K);
+ ce->ring_size = SZ_16K;
if (rcu_access_pointer(ctx->vm)) {
struct i915_address_space *vm;
@@ -225,15 +795,23 @@ static void intel_context_set_gem(struct intel_context *ce,
ce->vm = vm;
}
- GEM_BUG_ON(ce->timeline);
- if (ctx->timeline)
- ce->timeline = intel_timeline_get(ctx->timeline);
-
if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
- intel_engine_has_timeslices(ce->engine))
+ intel_engine_has_timeslices(ce->engine) &&
+ intel_engine_has_semaphores(ce->engine))
__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
- intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
+ if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
+ ctx->i915->params.request_timeout_ms) {
+ unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
+
+ intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
+ }
+
+ /* A valid SSEU has no zero fields */
+ if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
+ ret = intel_context_reconfigure_sseu(ce, sseu);
+
+ return ret;
}
static void __free_engines(struct i915_gem_engines *e, unsigned int count)
@@ -301,11 +879,12 @@ static struct i915_gem_engines *alloc_engines(unsigned int count)
return e;
}
-static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
+static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx,
+ struct intel_sseu rcs_sseu)
{
const struct intel_gt *gt = &ctx->i915->gt;
struct intel_engine_cs *engine;
- struct i915_gem_engines *e;
+ struct i915_gem_engines *e, *err;
enum intel_engine_id id;
e = alloc_engines(I915_NUM_ENGINES);
@@ -314,6 +893,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
for_each_engine(engine, gt, id) {
struct intel_context *ce;
+ struct intel_sseu sseu = {};
+ int ret;
if (engine->legacy_idx == INVALID_ENGINE)
continue;
@@ -323,18 +904,79 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
- __free_engines(e, e->num_engines + 1);
- return ERR_CAST(ce);
+ err = ERR_CAST(ce);
+ goto free_engines;
}
- intel_context_set_gem(ce, ctx);
-
e->engines[engine->legacy_idx] = ce;
- e->num_engines = max(e->num_engines, engine->legacy_idx);
+ e->num_engines = max(e->num_engines, engine->legacy_idx + 1);
+
+ if (engine->class == RENDER_CLASS)
+ sseu = rcs_sseu;
+
+ ret = intel_context_set_gem(ce, ctx, sseu);
+ if (ret) {
+ err = ERR_PTR(ret);
+ goto free_engines;
+ }
+
+ }
+
+ return e;
+
+free_engines:
+ free_engines(e);
+ return err;
+}
+
+static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
+ unsigned int num_engines,
+ struct i915_gem_proto_engine *pe)
+{
+ struct i915_gem_engines *e, *err;
+ unsigned int n;
+
+ e = alloc_engines(num_engines);
+ for (n = 0; n < num_engines; n++) {
+ struct intel_context *ce;
+ int ret;
+
+ switch (pe[n].type) {
+ case I915_GEM_ENGINE_TYPE_PHYSICAL:
+ ce = intel_context_create(pe[n].engine);
+ break;
+
+ case I915_GEM_ENGINE_TYPE_BALANCED:
+ ce = intel_engine_create_virtual(pe[n].siblings,
+ pe[n].num_siblings);
+ break;
+
+ case I915_GEM_ENGINE_TYPE_INVALID:
+ default:
+ GEM_WARN_ON(pe[n].type != I915_GEM_ENGINE_TYPE_INVALID);
+ continue;
+ }
+
+ if (IS_ERR(ce)) {
+ err = ERR_CAST(ce);
+ goto free_engines;
+ }
+
+ e->engines[n] = ce;
+
+ ret = intel_context_set_gem(ce, ctx, pe->sseu);
+ if (ret) {
+ err = ERR_PTR(ret);
+ goto free_engines;
+ }
}
- e->num_engines++;
+ e->num_engines = num_engines;
return e;
+
+free_engines:
+ free_engines(e);
+ return err;
}
void i915_gem_context_release(struct kref *ref)
@@ -347,9 +989,6 @@ void i915_gem_context_release(struct kref *ref)
mutex_destroy(&ctx->engines_mutex);
mutex_destroy(&ctx->lut_mutex);
- if (ctx->timeline)
- intel_timeline_put(ctx->timeline);
-
put_pid(ctx->pid);
mutex_destroy(&ctx->mutex);
@@ -441,7 +1080,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine;
- if (ban && intel_context_set_banned(ce))
+ if (ban && intel_context_ban(ce, NULL))
continue;
/*
@@ -566,6 +1205,9 @@ static void context_close(struct i915_gem_context *ctx)
if (vm)
i915_vm_close(vm);
+ if (ctx->syncobj)
+ drm_syncobj_put(ctx->syncobj);
+
ctx->file_priv = ERR_PTR(-EBADF);
/*
@@ -635,57 +1277,6 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
return 0;
}
-static struct i915_gem_context *
-__create_context(struct drm_i915_private *i915)
-{
- struct i915_gem_context *ctx;
- struct i915_gem_engines *e;
- int err;
- int i;
-
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&ctx->ref);
- ctx->i915 = i915;
- ctx->sched.priority = I915_PRIORITY_NORMAL;
- mutex_init(&ctx->mutex);
- INIT_LIST_HEAD(&ctx->link);
-
- spin_lock_init(&ctx->stale.lock);
- INIT_LIST_HEAD(&ctx->stale.engines);
-
- mutex_init(&ctx->engines_mutex);
- e = default_engines(ctx);
- if (IS_ERR(e)) {
- err = PTR_ERR(e);
- goto err_free;
- }
- RCU_INIT_POINTER(ctx->engines, e);
-
- INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
- mutex_init(&ctx->lut_mutex);
-
- /* NB: Mark all slices as needing a remap so that when the context first
- * loads it will restore whatever remap state already exists. If there
- * is no remap info, it will be a NOP. */
- ctx->remap_slice = ALL_L3_SLICES(i915);
-
- i915_gem_context_set_bannable(ctx);
- i915_gem_context_set_recoverable(ctx);
- __context_set_persistence(ctx, true /* cgroup hook? */);
-
- for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
- ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
-
- return ctx;
-
-err_free:
- kfree(ctx);
- return ERR_PTR(err);
-}
-
static inline struct i915_gem_engines *
__context_engines_await(const struct i915_gem_context *ctx,
bool *user_engines)
@@ -714,168 +1305,112 @@ __context_engines_await(const struct i915_gem_context *ctx,
return engines;
}
-static int
+static void
context_apply_all(struct i915_gem_context *ctx,
- int (*fn)(struct intel_context *ce, void *data),
+ void (*fn)(struct intel_context *ce, void *data),
void *data)
{
struct i915_gem_engines_iter it;
struct i915_gem_engines *e;
struct intel_context *ce;
- int err = 0;
e = __context_engines_await(ctx, NULL);
- for_each_gem_engine(ce, e, it) {
- err = fn(ce, data);
- if (err)
- break;
- }
+ for_each_gem_engine(ce, e, it)
+ fn(ce, data);
i915_sw_fence_complete(&e->fence);
-
- return err;
-}
-
-static int __apply_ppgtt(struct intel_context *ce, void *vm)
-{
- i915_vm_put(ce->vm);
- ce->vm = i915_vm_get(vm);
- return 0;
-}
-
-static struct i915_address_space *
-__set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
-{
- struct i915_address_space *old;
-
- old = rcu_replace_pointer(ctx->vm,
- i915_vm_open(vm),
- lockdep_is_held(&ctx->mutex));
- GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
-
- context_apply_all(ctx, __apply_ppgtt, vm);
-
- return old;
-}
-
-static void __assign_ppgtt(struct i915_gem_context *ctx,
- struct i915_address_space *vm)
-{
- if (vm == rcu_access_pointer(ctx->vm))
- return;
-
- vm = __set_ppgtt(ctx, vm);
- if (vm)
- i915_vm_close(vm);
-}
-
-static void __set_timeline(struct intel_timeline **dst,
- struct intel_timeline *src)
-{
- struct intel_timeline *old = *dst;
-
- *dst = src ? intel_timeline_get(src) : NULL;
-
- if (old)
- intel_timeline_put(old);
-}
-
-static int __apply_timeline(struct intel_context *ce, void *timeline)
-{
- __set_timeline(&ce->timeline, timeline);
- return 0;
-}
-
-static void __assign_timeline(struct i915_gem_context *ctx,
- struct intel_timeline *timeline)
-{
- __set_timeline(&ctx->timeline, timeline);
- context_apply_all(ctx, __apply_timeline, timeline);
-}
-
-static int __apply_watchdog(struct intel_context *ce, void *timeout_us)
-{
- return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us);
-}
-
-static int
-__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us)
-{
- int ret;
-
- ret = context_apply_all(ctx, __apply_watchdog,
- (void *)(uintptr_t)timeout_us);
- if (!ret)
- ctx->watchdog.timeout_us = timeout_us;
-
- return ret;
-}
-
-static void __set_default_fence_expiry(struct i915_gem_context *ctx)
-{
- struct drm_i915_private *i915 = ctx->i915;
- int ret;
-
- if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) ||
- !i915->params.request_timeout_ms)
- return;
-
- /* Default expiry for user fences. */
- ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000);
- if (ret)
- drm_notice(&i915->drm,
- "Failed to configure default fence expiry! (%d)",
- ret);
}
static struct i915_gem_context *
-i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
+i915_gem_create_context(struct drm_i915_private *i915,
+ const struct i915_gem_proto_context *pc)
{
struct i915_gem_context *ctx;
+ struct i915_address_space *vm = NULL;
+ struct i915_gem_engines *e;
+ int err;
+ int i;
- if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
- !HAS_EXECLISTS(i915))
- return ERR_PTR(-EINVAL);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
- ctx = __create_context(i915);
- if (IS_ERR(ctx))
- return ctx;
+ kref_init(&ctx->ref);
+ ctx->i915 = i915;
+ ctx->sched = pc->sched;
+ mutex_init(&ctx->mutex);
+ INIT_LIST_HEAD(&ctx->link);
- if (HAS_FULL_PPGTT(i915)) {
+ spin_lock_init(&ctx->stale.lock);
+ INIT_LIST_HEAD(&ctx->stale.engines);
+
+ if (pc->vm) {
+ vm = i915_vm_get(pc->vm);
+ } else if (HAS_FULL_PPGTT(i915)) {
struct i915_ppgtt *ppgtt;
ppgtt = i915_ppgtt_create(&i915->gt);
if (IS_ERR(ppgtt)) {
drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt));
- context_close(ctx);
- return ERR_CAST(ppgtt);
+ err = PTR_ERR(ppgtt);
+ goto err_ctx;
}
+ vm = &ppgtt->vm;
+ }
+ if (vm) {
+ RCU_INIT_POINTER(ctx->vm, i915_vm_open(vm));
- mutex_lock(&ctx->mutex);
- __assign_ppgtt(ctx, &ppgtt->vm);
- mutex_unlock(&ctx->mutex);
+ /* i915_vm_open() takes a reference */
+ i915_vm_put(vm);
+ }
- i915_vm_put(&ppgtt->vm);
+ mutex_init(&ctx->engines_mutex);
+ if (pc->num_user_engines >= 0) {
+ i915_gem_context_set_user_engines(ctx);
+ e = user_engines(ctx, pc->num_user_engines, pc->user_engines);
+ } else {
+ i915_gem_context_clear_user_engines(ctx);
+ e = default_engines(ctx, pc->legacy_rcs_sseu);
+ }
+ if (IS_ERR(e)) {
+ err = PTR_ERR(e);
+ goto err_vm;
}
+ RCU_INIT_POINTER(ctx->engines, e);
- if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
- struct intel_timeline *timeline;
+ INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+ mutex_init(&ctx->lut_mutex);
- timeline = intel_timeline_create(&i915->gt);
- if (IS_ERR(timeline)) {
- context_close(ctx);
- return ERR_CAST(timeline);
- }
+ /* NB: Mark all slices as needing a remap so that when the context first
+ * loads it will restore whatever remap state already exists. If there
+ * is no remap info, it will be a NOP. */
+ ctx->remap_slice = ALL_L3_SLICES(i915);
- __assign_timeline(ctx, timeline);
- intel_timeline_put(timeline);
- }
+ ctx->user_flags = pc->user_flags;
- __set_default_fence_expiry(ctx);
+ for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
+ ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+
+ if (pc->single_timeline) {
+ err = drm_syncobj_create(&ctx->syncobj,
+ DRM_SYNCOBJ_CREATE_SIGNALED,
+ NULL);
+ if (err)
+ goto err_engines;
+ }
trace_i915_context_create(ctx);
return ctx;
+
+err_engines:
+ free_engines(e);
+err_vm:
+ if (ctx->vm)
+ i915_vm_close(ctx->vm);
+err_ctx:
+ kfree(ctx);
+ return ERR_PTR(err);
}
static void init_contexts(struct i915_gem_contexts *gc)
@@ -889,83 +1424,83 @@ void i915_gem_init__contexts(struct drm_i915_private *i915)
init_contexts(&i915->gem.contexts);
}
-static int gem_context_register(struct i915_gem_context *ctx,
- struct drm_i915_file_private *fpriv,
- u32 *id)
+static void gem_context_register(struct i915_gem_context *ctx,
+ struct drm_i915_file_private *fpriv,
+ u32 id)
{
struct drm_i915_private *i915 = ctx->i915;
- struct i915_address_space *vm;
- int ret;
+ void *old;
ctx->file_priv = fpriv;
- mutex_lock(&ctx->mutex);
- vm = i915_gem_context_vm(ctx);
- if (vm)
- WRITE_ONCE(vm->file, fpriv); /* XXX */
- mutex_unlock(&ctx->mutex);
-
ctx->pid = get_task_pid(current, PIDTYPE_PID);
snprintf(ctx->name, sizeof(ctx->name), "%s[%d]",
current->comm, pid_nr(ctx->pid));
/* And finally expose ourselves to userspace via the idr */
- ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL);
- if (ret)
- goto err_pid;
+ old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL);
+ WARN_ON(old);
spin_lock(&i915->gem.contexts.lock);
list_add_tail(&ctx->link, &i915->gem.contexts.list);
spin_unlock(&i915->gem.contexts.lock);
-
- return 0;
-
-err_pid:
- put_pid(fetch_and_zero(&ctx->pid));
- return ret;
}
int i915_gem_context_open(struct drm_i915_private *i915,
struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct i915_gem_proto_context *pc;
struct i915_gem_context *ctx;
int err;
- u32 id;
- xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC);
+ mutex_init(&file_priv->proto_context_lock);
+ xa_init_flags(&file_priv->proto_context_xa, XA_FLAGS_ALLOC);
+
+ /* 0 reserved for the default context */
+ xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC1);
/* 0 reserved for invalid/unassigned ppgtt */
xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1);
- ctx = i915_gem_create_context(i915, 0);
+ pc = proto_context_create(i915, 0);
+ if (IS_ERR(pc)) {
+ err = PTR_ERR(pc);
+ goto err;
+ }
+
+ ctx = i915_gem_create_context(i915, pc);
+ proto_context_close(pc);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto err;
}
- err = gem_context_register(ctx, file_priv, &id);
- if (err < 0)
- goto err_ctx;
+ gem_context_register(ctx, file_priv, 0);
- GEM_BUG_ON(id);
return 0;
-err_ctx:
- context_close(ctx);
err:
xa_destroy(&file_priv->vm_xa);
xa_destroy(&file_priv->context_xa);
+ xa_destroy(&file_priv->proto_context_xa);
+ mutex_destroy(&file_priv->proto_context_lock);
return err;
}
void i915_gem_context_close(struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct i915_gem_proto_context *pc;
struct i915_address_space *vm;
struct i915_gem_context *ctx;
unsigned long idx;
+ xa_for_each(&file_priv->proto_context_xa, idx, pc)
+ proto_context_close(pc);
+ xa_destroy(&file_priv->proto_context_xa);
+ mutex_destroy(&file_priv->proto_context_lock);
+
xa_for_each(&file_priv->context_xa, idx, ctx)
context_close(ctx);
xa_destroy(&file_priv->context_xa);
@@ -995,8 +1530,6 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
- ppgtt->vm.file = file_priv;
-
if (args->extensions) {
err = i915_user_extensions(u64_to_user_ptr(args->extensions),
NULL, 0,
@@ -1040,120 +1573,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
return 0;
}
-struct context_barrier_task {
- struct i915_active base;
- void (*task)(void *data);
- void *data;
-};
-
-static void cb_retire(struct i915_active *base)
-{
- struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
-
- if (cb->task)
- cb->task(cb->data);
-
- i915_active_fini(&cb->base);
- kfree(cb);
-}
-
-I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
-static int context_barrier_task(struct i915_gem_context *ctx,
- intel_engine_mask_t engines,
- bool (*skip)(struct intel_context *ce, void *data),
- int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data),
- int (*emit)(struct i915_request *rq, void *data),
- void (*task)(void *data),
- void *data)
-{
- struct context_barrier_task *cb;
- struct i915_gem_engines_iter it;
- struct i915_gem_engines *e;
- struct i915_gem_ww_ctx ww;
- struct intel_context *ce;
- int err = 0;
-
- GEM_BUG_ON(!task);
-
- cb = kmalloc(sizeof(*cb), GFP_KERNEL);
- if (!cb)
- return -ENOMEM;
-
- i915_active_init(&cb->base, NULL, cb_retire, 0);
- err = i915_active_acquire(&cb->base);
- if (err) {
- kfree(cb);
- return err;
- }
-
- e = __context_engines_await(ctx, NULL);
- if (!e) {
- i915_active_release(&cb->base);
- return -ENOENT;
- }
-
- for_each_gem_engine(ce, e, it) {
- struct i915_request *rq;
-
- if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
- ce->engine->mask)) {
- err = -ENXIO;
- break;
- }
-
- if (!(ce->engine->mask & engines))
- continue;
-
- if (skip && skip(ce, data))
- continue;
-
- i915_gem_ww_ctx_init(&ww, true);
-retry:
- err = intel_context_pin_ww(ce, &ww);
- if (err)
- goto err;
-
- if (pin)
- err = pin(ce, &ww, data);
- if (err)
- goto err_unpin;
-
- rq = i915_request_create(ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_unpin;
- }
-
- err = 0;
- if (emit)
- err = emit(rq, data);
- if (err == 0)
- err = i915_active_add_request(&cb->base, rq);
-
- i915_request_add(rq);
-err_unpin:
- intel_context_unpin(ce);
-err:
- if (err == -EDEADLK) {
- err = i915_gem_ww_ctx_backoff(&ww);
- if (!err)
- goto retry;
- }
- i915_gem_ww_ctx_fini(&ww);
-
- if (err)
- break;
- }
- i915_sw_fence_complete(&e->fence);
-
- cb->task = err ? NULL : task; /* caller needs to unwind instead */
- cb->data = data;
-
- i915_active_release(&cb->base);
-
- return err;
-}
-
static int get_ppgtt(struct drm_i915_file_private *file_priv,
struct i915_gem_context *ctx,
struct drm_i915_gem_context_param *args)
@@ -1186,211 +1605,6 @@ err_put:
return err;
}
-static void set_ppgtt_barrier(void *data)
-{
- struct i915_address_space *old = data;
-
- if (GRAPHICS_VER(old->i915) < 8)
- gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old));
-
- i915_vm_close(old);
-}
-
-static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data)
-{
- struct i915_address_space *vm = ce->vm;
-
- if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
- /* ppGTT is not part of the legacy context image */
- return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
-
- return 0;
-}
-
-static int emit_ppgtt_update(struct i915_request *rq, void *data)
-{
- struct i915_address_space *vm = rq->context->vm;
- struct intel_engine_cs *engine = rq->engine;
- u32 base = engine->mmio_base;
- u32 *cs;
- int i;
-
- if (i915_vm_is_4lvl(vm)) {
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- const dma_addr_t pd_daddr = px_dma(ppgtt->pd);
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(2);
-
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
- *cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
- *cs++ = lower_32_bits(pd_daddr);
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
- } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- int err;
-
- /* Magic required to prevent forcewake errors! */
- err = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (err)
- return err;
-
- cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
- for (i = GEN8_3LVL_PDPES; i--; ) {
- const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
- *cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
- *cs++ = lower_32_bits(pd_daddr);
- }
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
- }
-
- return 0;
-}
-
-static bool skip_ppgtt_update(struct intel_context *ce, void *data)
-{
- if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
- return !ce->state;
- else
- return !atomic_read(&ce->pin_count);
-}
-
-static int set_ppgtt(struct drm_i915_file_private *file_priv,
- struct i915_gem_context *ctx,
- struct drm_i915_gem_context_param *args)
-{
- struct i915_address_space *vm, *old;
- int err;
-
- if (args->size)
- return -EINVAL;
-
- if (!rcu_access_pointer(ctx->vm))
- return -ENODEV;
-
- if (upper_32_bits(args->value))
- return -ENOENT;
-
- rcu_read_lock();
- vm = xa_load(&file_priv->vm_xa, args->value);
- if (vm && !kref_get_unless_zero(&vm->ref))
- vm = NULL;
- rcu_read_unlock();
- if (!vm)
- return -ENOENT;
-
- err = mutex_lock_interruptible(&ctx->mutex);
- if (err)
- goto out;
-
- if (i915_gem_context_is_closed(ctx)) {
- err = -ENOENT;
- goto unlock;
- }
-
- if (vm == rcu_access_pointer(ctx->vm))
- goto unlock;
-
- old = __set_ppgtt(ctx, vm);
-
- /* Teardown the existing obj:vma cache, it will have to be rebuilt. */
- lut_close(ctx);
-
- /*
- * We need to flush any requests using the current ppgtt before
- * we release it as the requests do not hold a reference themselves,
- * only indirectly through the context.
- */
- err = context_barrier_task(ctx, ALL_ENGINES,
- skip_ppgtt_update,
- pin_ppgtt_update,
- emit_ppgtt_update,
- set_ppgtt_barrier,
- old);
- if (err) {
- i915_vm_close(__set_ppgtt(ctx, old));
- i915_vm_close(old);
- lut_close(ctx); /* force a rebuild of the old obj:vma cache */
- }
-
-unlock:
- mutex_unlock(&ctx->mutex);
-out:
- i915_vm_put(vm);
- return err;
-}
-
-static int __apply_ringsize(struct intel_context *ce, void *sz)
-{
- return intel_context_set_ring_size(ce, (unsigned long)sz);
-}
-
-static int set_ringsize(struct i915_gem_context *ctx,
- struct drm_i915_gem_context_param *args)
-{
- if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
- return -ENODEV;
-
- if (args->size)
- return -EINVAL;
-
- if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE))
- return -EINVAL;
-
- if (args->value < I915_GTT_PAGE_SIZE)
- return -EINVAL;
-
- if (args->value > 128 * I915_GTT_PAGE_SIZE)
- return -EINVAL;
-
- return context_apply_all(ctx,
- __apply_ringsize,
- __intel_context_ring_size(args->value));
-}
-
-static int __get_ringsize(struct intel_context *ce, void *arg)
-{
- long sz;
-
- sz = intel_context_get_ring_size(ce);
- GEM_BUG_ON(sz > INT_MAX);
-
- return sz; /* stop on first engine */
-}
-
-static int get_ringsize(struct i915_gem_context *ctx,
- struct drm_i915_gem_context_param *args)
-{
- int sz;
-
- if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
- return -ENODEV;
-
- if (args->size)
- return -EINVAL;
-
- sz = context_apply_all(ctx, __get_ringsize, NULL);
- if (sz < 0)
- return sz;
-
- args->value = sz;
- return 0;
-}
-
int
i915_gem_user_to_context_sseu(struct intel_gt *gt,
const struct drm_i915_gem_context_param_sseu *user,
@@ -1545,382 +1759,6 @@ out_ce:
return ret;
}
-struct set_engines {
- struct i915_gem_context *ctx;
- struct i915_gem_engines *engines;
-};
-
-static int
-set_engines__load_balance(struct i915_user_extension __user *base, void *data)
-{
- struct i915_context_engines_load_balance __user *ext =
- container_of_user(base, typeof(*ext), base);
- const struct set_engines *set = data;
- struct drm_i915_private *i915 = set->ctx->i915;
- struct intel_engine_cs *stack[16];
- struct intel_engine_cs **siblings;
- struct intel_context *ce;
- u16 num_siblings, idx;
- unsigned int n;
- int err;
-
- if (!HAS_EXECLISTS(i915))
- return -ENODEV;
-
- if (intel_uc_uses_guc_submission(&i915->gt.uc))
- return -ENODEV; /* not implement yet */
-
- if (get_user(idx, &ext->engine_index))
- return -EFAULT;
-
- if (idx >= set->engines->num_engines) {
- drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
- idx, set->engines->num_engines);
- return -EINVAL;
- }
-
- idx = array_index_nospec(idx, set->engines->num_engines);
- if (set->engines->engines[idx]) {
- drm_dbg(&i915->drm,
- "Invalid placement[%d], already occupied\n", idx);
- return -EEXIST;
- }
-
- if (get_user(num_siblings, &ext->num_siblings))
- return -EFAULT;
-
- err = check_user_mbz(&ext->flags);
- if (err)
- return err;
-
- err = check_user_mbz(&ext->mbz64);
- if (err)
- return err;
-
- siblings = stack;
- if (num_siblings > ARRAY_SIZE(stack)) {
- siblings = kmalloc_array(num_siblings,
- sizeof(*siblings),
- GFP_KERNEL);
- if (!siblings)
- return -ENOMEM;
- }
-
- for (n = 0; n < num_siblings; n++) {
- struct i915_engine_class_instance ci;
-
- if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
- err = -EFAULT;
- goto out_siblings;
- }
-
- siblings[n] = intel_engine_lookup_user(i915,
- ci.engine_class,
- ci.engine_instance);
- if (!siblings[n]) {
- drm_dbg(&i915->drm,
- "Invalid sibling[%d]: { class:%d, inst:%d }\n",
- n, ci.engine_class, ci.engine_instance);
- err = -EINVAL;
- goto out_siblings;
- }
- }
-
- ce = intel_execlists_create_virtual(siblings, n);
- if (IS_ERR(ce)) {
- err = PTR_ERR(ce);
- goto out_siblings;
- }
-
- intel_context_set_gem(ce, set->ctx);
-
- if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
- intel_context_put(ce);
- err = -EEXIST;
- goto out_siblings;
- }
-
-out_siblings:
- if (siblings != stack)
- kfree(siblings);
-
- return err;
-}
-
-static int
-set_engines__bond(struct i915_user_extension __user *base, void *data)
-{
- struct i915_context_engines_bond __user *ext =
- container_of_user(base, typeof(*ext), base);
- const struct set_engines *set = data;
- struct drm_i915_private *i915 = set->ctx->i915;
- struct i915_engine_class_instance ci;
- struct intel_engine_cs *virtual;
- struct intel_engine_cs *master;
- u16 idx, num_bonds;
- int err, n;
-
- if (get_user(idx, &ext->virtual_index))
- return -EFAULT;
-
- if (idx >= set->engines->num_engines) {
- drm_dbg(&i915->drm,
- "Invalid index for virtual engine: %d >= %d\n",
- idx, set->engines->num_engines);
- return -EINVAL;
- }
-
- idx = array_index_nospec(idx, set->engines->num_engines);
- if (!set->engines->engines[idx]) {
- drm_dbg(&i915->drm, "Invalid engine at %d\n", idx);
- return -EINVAL;
- }
- virtual = set->engines->engines[idx]->engine;
-
- err = check_user_mbz(&ext->flags);
- if (err)
- return err;
-
- for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
- err = check_user_mbz(&ext->mbz64[n]);
- if (err)
- return err;
- }
-
- if (copy_from_user(&ci, &ext->master, sizeof(ci)))
- return -EFAULT;
-
- master = intel_engine_lookup_user(i915,
- ci.engine_class, ci.engine_instance);
- if (!master) {
- drm_dbg(&i915->drm,
- "Unrecognised master engine: { class:%u, instance:%u }\n",
- ci.engine_class, ci.engine_instance);
- return -EINVAL;
- }
-
- if (get_user(num_bonds, &ext->num_bonds))
- return -EFAULT;
-
- for (n = 0; n < num_bonds; n++) {
- struct intel_engine_cs *bond;
-
- if (copy_from_user(&ci, &ext->engines[n], sizeof(ci)))
- return -EFAULT;
-
- bond = intel_engine_lookup_user(i915,
- ci.engine_class,
- ci.engine_instance);
- if (!bond) {
- drm_dbg(&i915->drm,
- "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n",
- n, ci.engine_class, ci.engine_instance);
- return -EINVAL;
- }
-
- /*
- * A non-virtual engine has no siblings to choose between; and
- * a submit fence will always be directed to the one engine.
- */
- if (intel_engine_is_virtual(virtual)) {
- err = intel_virtual_engine_attach_bond(virtual,
- master,
- bond);
- if (err)
- return err;
- }
- }
-
- return 0;
-}
-
-static const i915_user_extension_fn set_engines__extensions[] = {
- [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
- [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
-};
-
-static int
-set_engines(struct i915_gem_context *ctx,
- const struct drm_i915_gem_context_param *args)
-{
- struct drm_i915_private *i915 = ctx->i915;
- struct i915_context_param_engines __user *user =
- u64_to_user_ptr(args->value);
- struct set_engines set = { .ctx = ctx };
- unsigned int num_engines, n;
- u64 extensions;
- int err;
-
- if (!args->size) { /* switch back to legacy user_ring_map */
- if (!i915_gem_context_user_engines(ctx))
- return 0;
-
- set.engines = default_engines(ctx);
- if (IS_ERR(set.engines))
- return PTR_ERR(set.engines);
-
- goto replace;
- }
-
- BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines)));
- if (args->size < sizeof(*user) ||
- !IS_ALIGNED(args->size, sizeof(*user->engines))) {
- drm_dbg(&i915->drm, "Invalid size for engine array: %d\n",
- args->size);
- return -EINVAL;
- }
-
- /*
- * Note that I915_EXEC_RING_MASK limits execbuf to only using the
- * first 64 engines defined here.
- */
- num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines);
- set.engines = alloc_engines(num_engines);
- if (!set.engines)
- return -ENOMEM;
-
- for (n = 0; n < num_engines; n++) {
- struct i915_engine_class_instance ci;
- struct intel_engine_cs *engine;
- struct intel_context *ce;
-
- if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
- __free_engines(set.engines, n);
- return -EFAULT;
- }
-
- if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID &&
- ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
- set.engines->engines[n] = NULL;
- continue;
- }
-
- engine = intel_engine_lookup_user(ctx->i915,
- ci.engine_class,
- ci.engine_instance);
- if (!engine) {
- drm_dbg(&i915->drm,
- "Invalid engine[%d]: { class:%d, instance:%d }\n",
- n, ci.engine_class, ci.engine_instance);
- __free_engines(set.engines, n);
- return -ENOENT;
- }
-
- ce = intel_context_create(engine);
- if (IS_ERR(ce)) {
- __free_engines(set.engines, n);
- return PTR_ERR(ce);
- }
-
- intel_context_set_gem(ce, ctx);
-
- set.engines->engines[n] = ce;
- }
- set.engines->num_engines = num_engines;
-
- err = -EFAULT;
- if (!get_user(extensions, &user->extensions))
- err = i915_user_extensions(u64_to_user_ptr(extensions),
- set_engines__extensions,
- ARRAY_SIZE(set_engines__extensions),
- &set);
- if (err) {
- free_engines(set.engines);
- return err;
- }
-
-replace:
- mutex_lock(&ctx->engines_mutex);
- if (i915_gem_context_is_closed(ctx)) {
- mutex_unlock(&ctx->engines_mutex);
- free_engines(set.engines);
- return -ENOENT;
- }
- if (args->size)
- i915_gem_context_set_user_engines(ctx);
- else
- i915_gem_context_clear_user_engines(ctx);
- set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1);
- mutex_unlock(&ctx->engines_mutex);
-
- /* Keep track of old engine sets for kill_context() */
- engines_idle_release(ctx, set.engines);
-
- return 0;
-}
-
-static int
-get_engines(struct i915_gem_context *ctx,
- struct drm_i915_gem_context_param *args)
-{
- struct i915_context_param_engines __user *user;
- struct i915_gem_engines *e;
- size_t n, count, size;
- bool user_engines;
- int err = 0;
-
- e = __context_engines_await(ctx, &user_engines);
- if (!e)
- return -ENOENT;
-
- if (!user_engines) {
- i915_sw_fence_complete(&e->fence);
- args->size = 0;
- return 0;
- }
-
- count = e->num_engines;
-
- /* Be paranoid in case we have an impedance mismatch */
- if (!check_struct_size(user, engines, count, &size)) {
- err = -EINVAL;
- goto err_free;
- }
- if (overflows_type(size, args->size)) {
- err = -EINVAL;
- goto err_free;
- }
-
- if (!args->size) {
- args->size = size;
- goto err_free;
- }
-
- if (args->size < size) {
- err = -EINVAL;
- goto err_free;
- }
-
- user = u64_to_user_ptr(args->value);
- if (put_user(0, &user->extensions)) {
- err = -EFAULT;
- goto err_free;
- }
-
- for (n = 0; n < count; n++) {
- struct i915_engine_class_instance ci = {
- .engine_class = I915_ENGINE_CLASS_INVALID,
- .engine_instance = I915_ENGINE_CLASS_INVALID_NONE,
- };
-
- if (e->engines[n]) {
- ci.engine_class = e->engines[n]->engine->uabi_class;
- ci.engine_instance = e->engines[n]->engine->uabi_instance;
- }
-
- if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
- err = -EFAULT;
- goto err_free;
- }
- }
-
- args->size = size;
-
-err_free:
- i915_sw_fence_complete(&e->fence);
- return err;
-}
-
static int
set_persistence(struct i915_gem_context *ctx,
const struct drm_i915_gem_context_param *args)
@@ -1931,41 +1769,30 @@ set_persistence(struct i915_gem_context *ctx,
return __context_set_persistence(ctx, args->value);
}
-static int __apply_priority(struct intel_context *ce, void *arg)
+static void __apply_priority(struct intel_context *ce, void *arg)
{
struct i915_gem_context *ctx = arg;
if (!intel_engine_has_timeslices(ce->engine))
- return 0;
+ return;
- if (ctx->sched.priority >= I915_PRIORITY_NORMAL)
+ if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
+ intel_engine_has_semaphores(ce->engine))
intel_context_set_use_semaphores(ce);
else
intel_context_clear_use_semaphores(ce);
-
- return 0;
}
static int set_priority(struct i915_gem_context *ctx,
const struct drm_i915_gem_context_param *args)
{
- s64 priority = args->value;
-
- if (args->size)
- return -EINVAL;
-
- if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
- return -ENODEV;
-
- if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
- priority < I915_CONTEXT_MIN_USER_PRIORITY)
- return -EINVAL;
+ int err;
- if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
- !capable(CAP_SYS_NICE))
- return -EPERM;
+ err = validate_priority(ctx->i915, args);
+ if (err)
+ return err;
- ctx->sched.priority = priority;
+ ctx->sched.priority = args->value;
context_apply_all(ctx, __apply_priority, ctx);
return 0;
@@ -1978,15 +1805,6 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
int ret = 0;
switch (args->param) {
- case I915_CONTEXT_PARAM_NO_ZEROMAP:
- if (args->size)
- ret = -EINVAL;
- else if (args->value)
- set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
- else
- clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
- break;
-
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
if (args->size)
ret = -EINVAL;
@@ -2024,23 +1842,15 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
ret = set_sseu(ctx, args);
break;
- case I915_CONTEXT_PARAM_VM:
- ret = set_ppgtt(fpriv, ctx, args);
- break;
-
- case I915_CONTEXT_PARAM_ENGINES:
- ret = set_engines(ctx, args);
- break;
-
case I915_CONTEXT_PARAM_PERSISTENCE:
ret = set_persistence(ctx, args);
break;
- case I915_CONTEXT_PARAM_RINGSIZE:
- ret = set_ringsize(ctx, args);
- break;
-
+ case I915_CONTEXT_PARAM_NO_ZEROMAP:
case I915_CONTEXT_PARAM_BAN_PERIOD:
+ case I915_CONTEXT_PARAM_RINGSIZE:
+ case I915_CONTEXT_PARAM_VM:
+ case I915_CONTEXT_PARAM_ENGINES:
default:
ret = -EINVAL;
break;
@@ -2050,7 +1860,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
}
struct create_ext {
- struct i915_gem_context *ctx;
+ struct i915_gem_proto_context *pc;
struct drm_i915_file_private *fpriv;
};
@@ -2065,233 +1875,84 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
if (local.param.ctx_id)
return -EINVAL;
- return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
+ return set_proto_ctx_param(arg->fpriv, arg->pc, &local.param);
}
-static int copy_ring_size(struct intel_context *dst,
- struct intel_context *src)
+static int invalid_ext(struct i915_user_extension __user *ext, void *data)
{
- long sz;
-
- sz = intel_context_get_ring_size(src);
- if (sz < 0)
- return sz;
-
- return intel_context_set_ring_size(dst, sz);
+ return -EINVAL;
}
-static int clone_engines(struct i915_gem_context *dst,
- struct i915_gem_context *src)
-{
- struct i915_gem_engines *clone, *e;
- bool user_engines;
- unsigned long n;
-
- e = __context_engines_await(src, &user_engines);
- if (!e)
- return -ENOENT;
-
- clone = alloc_engines(e->num_engines);
- if (!clone)
- goto err_unlock;
-
- for (n = 0; n < e->num_engines; n++) {
- struct intel_engine_cs *engine;
-
- if (!e->engines[n]) {
- clone->engines[n] = NULL;
- continue;
- }
- engine = e->engines[n]->engine;
-
- /*
- * Virtual engines are singletons; they can only exist
- * inside a single context, because they embed their
- * HW context... As each virtual context implies a single
- * timeline (each engine can only dequeue a single request
- * at any time), it would be surprising for two contexts
- * to use the same engine. So let's create a copy of
- * the virtual engine instead.
- */
- if (intel_engine_is_virtual(engine))
- clone->engines[n] =
- intel_execlists_clone_virtual(engine);
- else
- clone->engines[n] = intel_context_create(engine);
- if (IS_ERR_OR_NULL(clone->engines[n])) {
- __free_engines(clone, n);
- goto err_unlock;
- }
-
- intel_context_set_gem(clone->engines[n], dst);
-
- /* Copy across the preferred ringsize */
- if (copy_ring_size(clone->engines[n], e->engines[n])) {
- __free_engines(clone, n + 1);
- goto err_unlock;
- }
- }
- clone->num_engines = n;
- i915_sw_fence_complete(&e->fence);
-
- /* Serialised by constructor */
- engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1));
- if (user_engines)
- i915_gem_context_set_user_engines(dst);
- else
- i915_gem_context_clear_user_engines(dst);
- return 0;
-
-err_unlock:
- i915_sw_fence_complete(&e->fence);
- return -ENOMEM;
-}
-
-static int clone_flags(struct i915_gem_context *dst,
- struct i915_gem_context *src)
-{
- dst->user_flags = src->user_flags;
- return 0;
-}
-
-static int clone_schedattr(struct i915_gem_context *dst,
- struct i915_gem_context *src)
-{
- dst->sched = src->sched;
- return 0;
-}
-
-static int clone_sseu(struct i915_gem_context *dst,
- struct i915_gem_context *src)
-{
- struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
- struct i915_gem_engines *clone;
- unsigned long n;
- int err;
-
- /* no locking required; sole access under constructor*/
- clone = __context_engines_static(dst);
- if (e->num_engines != clone->num_engines) {
- err = -EINVAL;
- goto unlock;
- }
-
- for (n = 0; n < e->num_engines; n++) {
- struct intel_context *ce = e->engines[n];
-
- if (clone->engines[n]->engine->class != ce->engine->class) {
- /* Must have compatible engine maps! */
- err = -EINVAL;
- goto unlock;
- }
-
- /* serialises with set_sseu */
- err = intel_context_lock_pinned(ce);
- if (err)
- goto unlock;
-
- clone->engines[n]->sseu = ce->sseu;
- intel_context_unlock_pinned(ce);
- }
-
- err = 0;
-unlock:
- i915_gem_context_unlock_engines(src);
- return err;
-}
+static const i915_user_extension_fn create_extensions[] = {
+ [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+ [I915_CONTEXT_CREATE_EXT_CLONE] = invalid_ext,
+};
-static int clone_timeline(struct i915_gem_context *dst,
- struct i915_gem_context *src)
+static bool client_is_banned(struct drm_i915_file_private *file_priv)
{
- if (src->timeline)
- __assign_timeline(dst, src->timeline);
-
- return 0;
+ return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
}
-static int clone_vm(struct i915_gem_context *dst,
- struct i915_gem_context *src)
+static inline struct i915_gem_context *
+__context_lookup(struct drm_i915_file_private *file_priv, u32 id)
{
- struct i915_address_space *vm;
- int err = 0;
-
- if (!rcu_access_pointer(src->vm))
- return 0;
+ struct i915_gem_context *ctx;
rcu_read_lock();
- vm = context_get_vm_rcu(src);
+ ctx = xa_load(&file_priv->context_xa, id);
+ if (ctx && !kref_get_unless_zero(&ctx->ref))
+ ctx = NULL;
rcu_read_unlock();
- if (!mutex_lock_interruptible(&dst->mutex)) {
- __assign_ppgtt(dst, vm);
- mutex_unlock(&dst->mutex);
- } else {
- err = -EINTR;
- }
-
- i915_vm_put(vm);
- return err;
+ return ctx;
}
-static int create_clone(struct i915_user_extension __user *ext, void *data)
-{
- static int (* const fn[])(struct i915_gem_context *dst,
- struct i915_gem_context *src) = {
-#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
- MAP(ENGINES, clone_engines),
- MAP(FLAGS, clone_flags),
- MAP(SCHEDATTR, clone_schedattr),
- MAP(SSEU, clone_sseu),
- MAP(TIMELINE, clone_timeline),
- MAP(VM, clone_vm),
-#undef MAP
- };
- struct drm_i915_gem_context_create_ext_clone local;
- const struct create_ext *arg = data;
- struct i915_gem_context *dst = arg->ctx;
- struct i915_gem_context *src;
- int err, bit;
+static struct i915_gem_context *
+finalize_create_context_locked(struct drm_i915_file_private *file_priv,
+ struct i915_gem_proto_context *pc, u32 id)
+{
+ struct i915_gem_context *ctx;
+ void *old;
- if (copy_from_user(&local, ext, sizeof(local)))
- return -EFAULT;
+ lockdep_assert_held(&file_priv->proto_context_lock);
- BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) !=
- I915_CONTEXT_CLONE_UNKNOWN);
+ ctx = i915_gem_create_context(file_priv->dev_priv, pc);
+ if (IS_ERR(ctx))
+ return ctx;
- if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
- return -EINVAL;
+ gem_context_register(ctx, file_priv, id);
- if (local.rsvd)
- return -EINVAL;
+ old = xa_erase(&file_priv->proto_context_xa, id);
+ GEM_BUG_ON(old != pc);
+ proto_context_close(pc);
- rcu_read_lock();
- src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id);
- rcu_read_unlock();
- if (!src)
- return -ENOENT;
+ /* One for the xarray and one for the caller */
+ return i915_gem_context_get(ctx);
+}
- GEM_BUG_ON(src == dst);
+struct i915_gem_context *
+i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
+{
+ struct i915_gem_proto_context *pc;
+ struct i915_gem_context *ctx;
- for (bit = 0; bit < ARRAY_SIZE(fn); bit++) {
- if (!(local.flags & BIT(bit)))
- continue;
+ ctx = __context_lookup(file_priv, id);
+ if (ctx)
+ return ctx;
- err = fn[bit](dst, src);
- if (err)
- return err;
+ mutex_lock(&file_priv->proto_context_lock);
+ /* Try one more time under the lock */
+ ctx = __context_lookup(file_priv, id);
+ if (!ctx) {
+ pc = xa_load(&file_priv->proto_context_xa, id);
+ if (!pc)
+ ctx = ERR_PTR(-ENOENT);
+ else
+ ctx = finalize_create_context_locked(file_priv, pc, id);
}
+ mutex_unlock(&file_priv->proto_context_lock);
- return 0;
-}
-
-static const i915_user_extension_fn create_extensions[] = {
- [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
- [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
-};
-
-static bool client_is_banned(struct drm_i915_file_private *file_priv)
-{
- return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
+ return ctx;
}
int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
@@ -2321,9 +1982,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
return -EIO;
}
- ext_data.ctx = i915_gem_create_context(i915, args->flags);
- if (IS_ERR(ext_data.ctx))
- return PTR_ERR(ext_data.ctx);
+ ext_data.pc = proto_context_create(i915, args->flags);
+ if (IS_ERR(ext_data.pc))
+ return PTR_ERR(ext_data.pc);
if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) {
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
@@ -2331,20 +1992,39 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
ARRAY_SIZE(create_extensions),
&ext_data);
if (ret)
- goto err_ctx;
+ goto err_pc;
}
- ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id);
- if (ret < 0)
- goto err_ctx;
+ if (GRAPHICS_VER(i915) > 12) {
+ struct i915_gem_context *ctx;
+
+ /* Get ourselves a context ID */
+ ret = xa_alloc(&ext_data.fpriv->context_xa, &id, NULL,
+ xa_limit_32b, GFP_KERNEL);
+ if (ret)
+ goto err_pc;
+
+ ctx = i915_gem_create_context(i915, ext_data.pc);
+ if (IS_ERR(ctx)) {
+ ret = PTR_ERR(ctx);
+ goto err_pc;
+ }
+
+ proto_context_close(ext_data.pc);
+ gem_context_register(ctx, ext_data.fpriv, id);
+ } else {
+ ret = proto_context_register(ext_data.fpriv, ext_data.pc, &id);
+ if (ret < 0)
+ goto err_pc;
+ }
args->ctx_id = id;
drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id);
return 0;
-err_ctx:
- context_close(ext_data.ctx);
+err_pc:
+ proto_context_close(ext_data.pc);
return ret;
}
@@ -2353,6 +2033,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_context_destroy *args = data;
struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct i915_gem_proto_context *pc;
struct i915_gem_context *ctx;
if (args->pad != 0)
@@ -2361,11 +2042,24 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
if (!args->ctx_id)
return -ENOENT;
+ /* We need to hold the proto-context lock here to prevent races
+ * with finalize_create_context_locked().
+ */
+ mutex_lock(&file_priv->proto_context_lock);
ctx = xa_erase(&file_priv->context_xa, args->ctx_id);
- if (!ctx)
+ pc = xa_erase(&file_priv->proto_context_xa, args->ctx_id);
+ mutex_unlock(&file_priv->proto_context_lock);
+
+ if (!ctx && !pc)
return -ENOENT;
+ GEM_WARN_ON(ctx && pc);
+
+ if (pc)
+ proto_context_close(pc);
+
+ if (ctx)
+ context_close(ctx);
- context_close(ctx);
return 0;
}
@@ -2433,15 +2127,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
int ret = 0;
ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
- if (!ctx)
- return -ENOENT;
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
switch (args->param) {
- case I915_CONTEXT_PARAM_NO_ZEROMAP:
- args->size = 0;
- args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
- break;
-
case I915_CONTEXT_PARAM_GTT_SIZE:
args->size = 0;
rcu_read_lock();
@@ -2480,20 +2169,15 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
ret = get_ppgtt(file_priv, ctx, args);
break;
- case I915_CONTEXT_PARAM_ENGINES:
- ret = get_engines(ctx, args);
- break;
-
case I915_CONTEXT_PARAM_PERSISTENCE:
args->size = 0;
args->value = i915_gem_context_is_persistent(ctx);
break;
- case I915_CONTEXT_PARAM_RINGSIZE:
- ret = get_ringsize(ctx, args);
- break;
-
+ case I915_CONTEXT_PARAM_NO_ZEROMAP:
case I915_CONTEXT_PARAM_BAN_PERIOD:
+ case I915_CONTEXT_PARAM_ENGINES:
+ case I915_CONTEXT_PARAM_RINGSIZE:
default:
ret = -EINVAL;
break;
@@ -2508,16 +2192,32 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
+ struct i915_gem_proto_context *pc;
struct i915_gem_context *ctx;
- int ret;
+ int ret = 0;
- ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
- if (!ctx)
- return -ENOENT;
+ mutex_lock(&file_priv->proto_context_lock);
+ ctx = __context_lookup(file_priv, args->ctx_id);
+ if (!ctx) {
+ pc = xa_load(&file_priv->proto_context_xa, args->ctx_id);
+ if (pc) {
+ /* Contexts should be finalized inside
+ * GEM_CONTEXT_CREATE starting with graphics
+ * version 13.
+ */
+ WARN_ON(GRAPHICS_VER(file_priv->dev_priv) > 12);
+ ret = set_proto_ctx_param(file_priv, pc, args);
+ } else {
+ ret = -ENOENT;
+ }
+ }
+ mutex_unlock(&file_priv->proto_context_lock);
- ret = ctx_setparam(file_priv, ctx, args);
+ if (ctx) {
+ ret = ctx_setparam(file_priv, ctx, args);
+ i915_gem_context_put(ctx);
+ }
- i915_gem_context_put(ctx);
return ret;
}
@@ -2527,16 +2227,13 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_reset_stats *args = data;
struct i915_gem_context *ctx;
- int ret;
if (args->flags || args->pad)
return -EINVAL;
- ret = -ENOENT;
- rcu_read_lock();
- ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id);
- if (!ctx)
- goto out;
+ ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
/*
* We opt for unserialised reads here. This may result in tearing
@@ -2553,10 +2250,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
args->batch_active = atomic_read(&ctx->guilty_count);
args->batch_pending = atomic_read(&ctx->active_count);
- ret = 0;
-out:
- rcu_read_unlock();
- return ret;
+ i915_gem_context_put(ctx);
+ return 0;
}
/* GEM context-engines iterator: for_each_gem_engine() */
@@ -2584,27 +2279,16 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
#include "selftests/i915_gem_context.c"
#endif
-static void i915_global_gem_context_shrink(void)
+void i915_gem_context_module_exit(void)
{
- kmem_cache_shrink(global.slab_luts);
+ kmem_cache_destroy(slab_luts);
}
-static void i915_global_gem_context_exit(void)
-{
- kmem_cache_destroy(global.slab_luts);
-}
-
-static struct i915_global_gem_context global = { {
- .shrink = i915_global_gem_context_shrink,
- .exit = i915_global_gem_context_exit,
-} };
-
-int __init i915_global_gem_context_init(void)
+int __init i915_gem_context_module_init(void)
{
- global.slab_luts = KMEM_CACHE(i915_lut_handle, 0);
- if (!global.slab_luts)
+ slab_luts = KMEM_CACHE(i915_lut_handle, 0);
+ if (!slab_luts)
return -ENOMEM;
- i915_global_register(&global.base);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index b5c908f3f4f2..18060536b0c2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -133,6 +133,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+struct i915_gem_context *
+i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id);
+
static inline struct i915_gem_context *
i915_gem_context_get(struct i915_gem_context *ctx)
{
@@ -221,6 +224,9 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it);
for (i915_gem_engines_iter_init(&(it), (engines)); \
((ce) = i915_gem_engines_iter_next(&(it)));)
+void i915_gem_context_module_exit(void);
+int i915_gem_context_module_init(void);
+
struct i915_lut_handle *i915_lut_handle_alloc(void);
void i915_lut_handle_free(struct i915_lut_handle *lut);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 340473aa70de..94c03a97cb77 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -30,33 +30,187 @@ struct i915_address_space;
struct intel_timeline;
struct intel_ring;
+/**
+ * struct i915_gem_engines - A set of engines
+ */
struct i915_gem_engines {
union {
+ /** @link: Link in i915_gem_context::stale::engines */
struct list_head link;
+
+ /** @rcu: RCU to use when freeing */
struct rcu_head rcu;
};
+
+ /** @fence: Fence used for delayed destruction of engines */
struct i915_sw_fence fence;
+
+ /** @ctx: i915_gem_context backpointer */
struct i915_gem_context *ctx;
+
+ /** @num_engines: Number of engines in this set */
unsigned int num_engines;
+
+ /** @engines: Array of engines */
struct intel_context *engines[];
};
+/**
+ * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set
+ */
struct i915_gem_engines_iter {
+ /** @idx: Index into i915_gem_engines::engines */
unsigned int idx;
+
+ /** @engines: Engine set being iterated */
const struct i915_gem_engines *engines;
};
/**
+ * enum i915_gem_engine_type - Describes the type of an i915_gem_proto_engine
+ */
+enum i915_gem_engine_type {
+ /** @I915_GEM_ENGINE_TYPE_INVALID: An invalid engine */
+ I915_GEM_ENGINE_TYPE_INVALID = 0,
+
+ /** @I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine */
+ I915_GEM_ENGINE_TYPE_PHYSICAL,
+
+ /** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */
+ I915_GEM_ENGINE_TYPE_BALANCED,
+};
+
+/**
+ * struct i915_gem_proto_engine - prototype engine
+ *
+ * This struct describes an engine that a context may contain. Engines
+ * have three types:
+ *
+ * - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they
+ * show up as a NULL in i915_gem_engines::engines[i] and any attempt to
+ * use them by the user results in -EINVAL. They are also useful during
+ * proto-context construction because the client may create invalid
+ * engines and then set them up later as virtual engines.
+ *
+ * - I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine, described by
+ * i915_gem_proto_engine::engine.
+ *
+ * - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described
+ * i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings.
+ */
+struct i915_gem_proto_engine {
+ /** @type: Type of this engine */
+ enum i915_gem_engine_type type;
+
+ /** @engine: Engine, for physical */
+ struct intel_engine_cs *engine;
+
+ /** @num_siblings: Number of balanced siblings */
+ unsigned int num_siblings;
+
+ /** @siblings: Balanced siblings */
+ struct intel_engine_cs **siblings;
+
+ /** @sseu: Client-set SSEU parameters */
+ struct intel_sseu sseu;
+};
+
+/**
+ * struct i915_gem_proto_context - prototype context
+ *
+ * The struct i915_gem_proto_context represents the creation parameters for
+ * a struct i915_gem_context. This is used to gather parameters provided
+ * either through creation flags or via SET_CONTEXT_PARAM so that, when we
+ * create the final i915_gem_context, those parameters can be immutable.
+ *
+ * The context uAPI allows for two methods of setting context parameters:
+ * SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM. The former is
+ * allowed to be called at any time while the later happens as part of
+ * GEM_CONTEXT_CREATE. When these were initially added, Currently,
+ * everything settable via one is settable via the other. While some
+ * params are fairly simple and setting them on a live context is harmless
+ * such the context priority, others are far trickier such as the VM or the
+ * set of engines. To avoid some truly nasty race conditions, we don't
+ * allow setting the VM or the set of engines on live contexts.
+ *
+ * The way we dealt with this without breaking older userspace that sets
+ * the VM or engine set via SET_CONTEXT_PARAM is to delay the creation of
+ * the actual context until after the client is done configuring it with
+ * SET_CONTEXT_PARAM. From the perspective of the client, it has the same
+ * u32 context ID the whole time. From the perspective of i915, however,
+ * it's an i915_gem_proto_context right up until the point where we attempt
+ * to do something which the proto-context can't handle at which point the
+ * real context gets created.
+ *
+ * This is accomplished via a little xarray dance. When GEM_CONTEXT_CREATE
+ * is called, we create a proto-context, reserve a slot in context_xa but
+ * leave it NULL, the proto-context in the corresponding slot in
+ * proto_context_xa. Then, whenever we go to look up a context, we first
+ * check context_xa. If it's there, we return the i915_gem_context and
+ * we're done. If it's not, we look in proto_context_xa and, if we find it
+ * there, we create the actual context and kill the proto-context.
+ *
+ * At the time we made this change (April, 2021), we did a fairly complete
+ * audit of existing userspace to ensure this wouldn't break anything:
+ *
+ * - Mesa/i965 didn't use the engines or VM APIs at all
+ *
+ * - Mesa/ANV used the engines API but via CONTEXT_CREATE_EXT_SETPARAM and
+ * didn't use the VM API.
+ *
+ * - Mesa/iris didn't use the engines or VM APIs at all
+ *
+ * - The open-source compute-runtime didn't yet use the engines API but
+ * did use the VM API via SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM
+ * was always the second ioctl on that context, immediately following
+ * GEM_CONTEXT_CREATE.
+ *
+ * - The media driver sets engines and bonding/balancing via
+ * SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM to set the VM was
+ * always the second ioctl on that context, immediately following
+ * GEM_CONTEXT_CREATE and setting engines immediately followed that.
+ *
+ * In order for this dance to work properly, any modification to an
+ * i915_gem_proto_context that is exposed to the client via
+ * drm_i915_file_private::proto_context_xa must be guarded by
+ * drm_i915_file_private::proto_context_lock. The exception is when a
+ * proto-context has not yet been exposed such as when handling
+ * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE.
+ */
+struct i915_gem_proto_context {
+ /** @vm: See &i915_gem_context.vm */
+ struct i915_address_space *vm;
+
+ /** @user_flags: See &i915_gem_context.user_flags */
+ unsigned long user_flags;
+
+ /** @sched: See &i915_gem_context.sched */
+ struct i915_sched_attr sched;
+
+ /** @num_user_engines: Number of user-specified engines or -1 */
+ int num_user_engines;
+
+ /** @user_engines: User-specified engines */
+ struct i915_gem_proto_engine *user_engines;
+
+ /** @legacy_rcs_sseu: Client-set SSEU parameters for the legacy RCS */
+ struct intel_sseu legacy_rcs_sseu;
+
+ /** @single_timeline: See See &i915_gem_context.syncobj */
+ bool single_timeline;
+};
+
+/**
* struct i915_gem_context - client state
*
* The struct i915_gem_context represents the combined view of the driver and
* logical hardware state for a particular client.
*/
struct i915_gem_context {
- /** i915: i915 device backpointer */
+ /** @i915: i915 device backpointer */
struct drm_i915_private *i915;
- /** file_priv: owning file descriptor */
+ /** @file_priv: owning file descriptor */
struct drm_i915_file_private *file_priv;
/**
@@ -81,9 +235,23 @@ struct i915_gem_context {
* CONTEXT_USER_ENGINES flag is set).
*/
struct i915_gem_engines __rcu *engines;
- struct mutex engines_mutex; /* guards writes to engines */
- struct intel_timeline *timeline;
+ /** @engines_mutex: guards writes to engines */
+ struct mutex engines_mutex;
+
+ /**
+ * @syncobj: Shared timeline syncobj
+ *
+ * When the SHARED_TIMELINE flag is set on context creation, we
+ * emulate a single timeline across all engines using this syncobj.
+ * For every execbuffer2 call, this syncobj is used as both an in-
+ * and out-fence. Unlike the real intel_timeline, this doesn't
+ * provide perfect atomic in-order guarantees if the client races
+ * with itself by calling execbuffer2 twice concurrently. However,
+ * if userspace races with itself, that's not likely to yield well-
+ * defined results anyway so we choose to not care.
+ */
+ struct drm_syncobj *syncobj;
/**
* @vm: unique address space (GTT)
@@ -106,7 +274,7 @@ struct i915_gem_context {
*/
struct pid *pid;
- /** link: place with &drm_i915_private.context_list */
+ /** @link: place with &drm_i915_private.context_list */
struct list_head link;
/**
@@ -129,7 +297,6 @@ struct i915_gem_context {
* @user_flags: small set of booleans controlled by the user
*/
unsigned long user_flags;
-#define UCONTEXT_NO_ZEROMAP 0
#define UCONTEXT_NO_ERROR_CAPTURE 1
#define UCONTEXT_BANNABLE 2
#define UCONTEXT_RECOVERABLE 3
@@ -142,11 +309,13 @@ struct i915_gem_context {
#define CONTEXT_CLOSED 0
#define CONTEXT_USER_ENGINES 1
+ /** @mutex: guards everything that isn't engines or handles_vma */
struct mutex mutex;
+ /** @sched: scheduler parameters */
struct i915_sched_attr sched;
- /** guilty_count: How many times this context has caused a GPU hang. */
+ /** @guilty_count: How many times this context has caused a GPU hang. */
atomic_t guilty_count;
/**
* @active_count: How many times this context was active during a GPU
@@ -154,25 +323,23 @@ struct i915_gem_context {
*/
atomic_t active_count;
- struct {
- u64 timeout_us;
- } watchdog;
-
/**
* @hang_timestamp: The last time(s) this context caused a GPU hang
*/
unsigned long hang_timestamp[2];
#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
- /** remap_slice: Bitmask of cache lines that need remapping */
+ /** @remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
/**
- * handles_vma: rbtree to look up our context specific obj/vma for
+ * @handles_vma: rbtree to look up our context specific obj/vma for
* the user handle. (user handles are per fd, but the binding is
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;
+
+ /** @lut_mutex: Locks handles_vma */
struct mutex lut_mutex;
/**
@@ -184,8 +351,11 @@ struct i915_gem_context {
*/
char name[TASK_COMM_LEN + 8];
+ /** @stale: tracks stale engines to be destroyed */
struct {
+ /** @lock: guards engines */
spinlock_t lock;
+ /** @engines: list of stale engines */
struct list_head engines;
} stale;
};
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 548ddf39d853..23fee13a3384 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -11,13 +11,14 @@
#include "i915_trace.h"
#include "i915_user_extensions.h"
-static u32 object_max_page_size(struct drm_i915_gem_object *obj)
+static u32 object_max_page_size(struct intel_memory_region **placements,
+ unsigned int n_placements)
{
u32 max_page_size = 0;
int i;
- for (i = 0; i < obj->mm.n_placements; i++) {
- struct intel_memory_region *mr = obj->mm.placements[i];
+ for (i = 0; i < n_placements; i++) {
+ struct intel_memory_region *mr = placements[i];
GEM_BUG_ON(!is_power_of_2(mr->min_page_size));
max_page_size = max_t(u32, max_page_size, mr->min_page_size);
@@ -27,10 +28,13 @@ static u32 object_max_page_size(struct drm_i915_gem_object *obj)
return max_page_size;
}
-static void object_set_placements(struct drm_i915_gem_object *obj,
- struct intel_memory_region **placements,
- unsigned int n_placements)
+static int object_set_placements(struct drm_i915_gem_object *obj,
+ struct intel_memory_region **placements,
+ unsigned int n_placements)
{
+ struct intel_memory_region **arr;
+ unsigned int i;
+
GEM_BUG_ON(!n_placements);
/*
@@ -44,9 +48,20 @@ static void object_set_placements(struct drm_i915_gem_object *obj,
obj->mm.placements = &i915->mm.regions[mr->id];
obj->mm.n_placements = 1;
} else {
- obj->mm.placements = placements;
+ arr = kmalloc_array(n_placements,
+ sizeof(struct intel_memory_region *),
+ GFP_KERNEL);
+ if (!arr)
+ return -ENOMEM;
+
+ for (i = 0; i < n_placements; i++)
+ arr[i] = placements[i];
+
+ obj->mm.placements = arr;
obj->mm.n_placements = n_placements;
}
+
+ return 0;
}
static int i915_gem_publish(struct drm_i915_gem_object *obj,
@@ -67,40 +82,67 @@ static int i915_gem_publish(struct drm_i915_gem_object *obj,
return 0;
}
-static int
-i915_gem_setup(struct drm_i915_gem_object *obj, u64 size)
+/**
+ * Creates a new object using the same path as DRM_I915_GEM_CREATE_EXT
+ * @i915: i915 private
+ * @size: size of the buffer, in bytes
+ * @placements: possible placement regions, in priority order
+ * @n_placements: number of possible placement regions
+ *
+ * This function is exposed primarily for selftests and does very little
+ * error checking. It is assumed that the set of placement regions has
+ * already been verified to be valid.
+ */
+struct drm_i915_gem_object *
+__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size,
+ struct intel_memory_region **placements,
+ unsigned int n_placements)
{
- struct intel_memory_region *mr = obj->mm.placements[0];
+ struct intel_memory_region *mr = placements[0];
+ struct drm_i915_gem_object *obj;
unsigned int flags;
int ret;
- size = round_up(size, object_max_page_size(obj));
+ i915_gem_flush_free_objects(i915);
+
+ size = round_up(size, object_max_page_size(placements, n_placements));
if (size == 0)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
/* For most of the ABI (e.g. mmap) we think in system pages */
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
if (i915_gem_object_size_2big(size))
- return -E2BIG;
+ return ERR_PTR(-E2BIG);
+
+ obj = i915_gem_object_alloc();
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ ret = object_set_placements(obj, placements, n_placements);
+ if (ret)
+ goto object_free;
/*
- * For now resort to CPU based clearing for device local-memory, in the
- * near future this will use the blitter engine for accelerated, GPU
- * based clearing.
+ * I915_BO_ALLOC_USER will make sure the object is cleared before
+ * any user access.
*/
- flags = 0;
- if (mr->type == INTEL_MEMORY_LOCAL)
- flags = I915_BO_ALLOC_CPU_CLEAR;
+ flags = I915_BO_ALLOC_USER;
- ret = mr->ops->init_object(mr, obj, size, flags);
+ ret = mr->ops->init_object(mr, obj, size, 0, flags);
if (ret)
- return ret;
+ goto object_free;
GEM_BUG_ON(size != obj->base.size);
trace_i915_gem_object_create(obj);
- return 0;
+ return obj;
+
+object_free:
+ if (obj->mm.n_placements > 1)
+ kfree(obj->mm.placements);
+ i915_gem_object_free(obj);
+ return ERR_PTR(ret);
}
int
@@ -113,7 +155,6 @@ i915_gem_dumb_create(struct drm_file *file,
enum intel_memory_type mem_type;
int cpp = DIV_ROUND_UP(args->bpp, 8);
u32 format;
- int ret;
switch (cpp) {
case 1:
@@ -146,22 +187,13 @@ i915_gem_dumb_create(struct drm_file *file,
if (HAS_LMEM(to_i915(dev)))
mem_type = INTEL_MEMORY_LOCAL;
- obj = i915_gem_object_alloc();
- if (!obj)
- return -ENOMEM;
-
mr = intel_memory_region_by_type(to_i915(dev), mem_type);
- object_set_placements(obj, &mr, 1);
- ret = i915_gem_setup(obj, args->size);
- if (ret)
- goto object_free;
+ obj = __i915_gem_object_create_user(to_i915(dev), args->size, &mr, 1);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
return i915_gem_publish(obj, file, &args->size, &args->handle);
-
-object_free:
- i915_gem_object_free(obj);
- return ret;
}
/**
@@ -178,31 +210,20 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_create *args = data;
struct drm_i915_gem_object *obj;
struct intel_memory_region *mr;
- int ret;
-
- i915_gem_flush_free_objects(i915);
-
- obj = i915_gem_object_alloc();
- if (!obj)
- return -ENOMEM;
mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM);
- object_set_placements(obj, &mr, 1);
- ret = i915_gem_setup(obj, args->size);
- if (ret)
- goto object_free;
+ obj = __i915_gem_object_create_user(i915, args->size, &mr, 1);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
return i915_gem_publish(obj, file, &args->size, &args->handle);
-
-object_free:
- i915_gem_object_free(obj);
- return ret;
}
struct create_ext {
struct drm_i915_private *i915;
- struct drm_i915_gem_object *vanilla_object;
+ struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
+ unsigned int n_placements;
};
static void repr_placements(char *buf, size_t size,
@@ -233,8 +254,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
struct drm_i915_private *i915 = ext_data->i915;
struct drm_i915_gem_memory_class_instance __user *uregions =
u64_to_user_ptr(args->regions);
- struct drm_i915_gem_object *obj = ext_data->vanilla_object;
- struct intel_memory_region **placements;
+ struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
u32 mask;
int i, ret = 0;
@@ -248,6 +268,8 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
ret = -EINVAL;
}
+ BUILD_BUG_ON(ARRAY_SIZE(i915->mm.regions) != ARRAY_SIZE(placements));
+ BUILD_BUG_ON(ARRAY_SIZE(ext_data->placements) != ARRAY_SIZE(placements));
if (args->num_regions > ARRAY_SIZE(i915->mm.regions)) {
drm_dbg(&i915->drm, "num_regions is too large\n");
ret = -EINVAL;
@@ -256,21 +278,13 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
if (ret)
return ret;
- placements = kmalloc_array(args->num_regions,
- sizeof(struct intel_memory_region *),
- GFP_KERNEL);
- if (!placements)
- return -ENOMEM;
-
mask = 0;
for (i = 0; i < args->num_regions; i++) {
struct drm_i915_gem_memory_class_instance region;
struct intel_memory_region *mr;
- if (copy_from_user(&region, uregions, sizeof(region))) {
- ret = -EFAULT;
- goto out_free;
- }
+ if (copy_from_user(&region, uregions, sizeof(region)))
+ return -EFAULT;
mr = intel_memory_region_lookup(i915,
region.memory_class,
@@ -296,14 +310,14 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
++uregions;
}
- if (obj->mm.placements) {
+ if (ext_data->n_placements) {
ret = -EINVAL;
goto out_dump;
}
- object_set_placements(obj, placements, args->num_regions);
- if (args->num_regions == 1)
- kfree(placements);
+ ext_data->n_placements = args->num_regions;
+ for (i = 0; i < args->num_regions; i++)
+ ext_data->placements[i] = placements[i];
return 0;
@@ -311,11 +325,11 @@ out_dump:
if (1) {
char buf[256];
- if (obj->mm.placements) {
+ if (ext_data->n_placements) {
repr_placements(buf,
sizeof(buf),
- obj->mm.placements,
- obj->mm.n_placements);
+ ext_data->placements,
+ ext_data->n_placements);
drm_dbg(&i915->drm,
"Placements were already set in previous EXT. Existing placements: %s\n",
buf);
@@ -325,8 +339,6 @@ out_dump:
drm_dbg(&i915->drm, "New placements(so far validated): %s\n", buf);
}
-out_free:
- kfree(placements);
return ret;
}
@@ -361,44 +373,30 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_create_ext *args = data;
struct create_ext ext_data = { .i915 = i915 };
- struct intel_memory_region **placements_ext;
struct drm_i915_gem_object *obj;
int ret;
if (args->flags)
return -EINVAL;
- i915_gem_flush_free_objects(i915);
-
- obj = i915_gem_object_alloc();
- if (!obj)
- return -ENOMEM;
-
- ext_data.vanilla_object = obj;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
create_extensions,
ARRAY_SIZE(create_extensions),
&ext_data);
- placements_ext = obj->mm.placements;
if (ret)
- goto object_free;
+ return ret;
- if (!placements_ext) {
- struct intel_memory_region *mr =
+ if (!ext_data.n_placements) {
+ ext_data.placements[0] =
intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM);
-
- object_set_placements(obj, &mr, 1);
+ ext_data.n_placements = 1;
}
- ret = i915_gem_setup(obj, args->size);
- if (ret)
- goto object_free;
+ obj = __i915_gem_object_create_user(i915, args->size,
+ ext_data.placements,
+ ext_data.n_placements);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
return i915_gem_publish(obj, file, &args->size, &args->handle);
-
-object_free:
- if (obj->mm.n_placements > 1)
- kfree(placements_ext);
- i915_gem_object_free(obj);
- return ret;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 616c3a2f1baf..afa34111de02 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -12,6 +12,8 @@
#include "i915_gem_object.h"
#include "i915_scatterlist.h"
+I915_SELFTEST_DECLARE(static bool force_different_devices;)
+
static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
{
return to_intel_bo(buf->priv);
@@ -25,15 +27,11 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
struct scatterlist *src, *dst;
int ret, i;
- ret = i915_gem_object_pin_pages_unlocked(obj);
- if (ret)
- goto err;
-
/* Copy sg so that we make an independent mapping */
st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
if (st == NULL) {
ret = -ENOMEM;
- goto err_unpin_pages;
+ goto err;
}
ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
@@ -58,8 +56,6 @@ err_free_sg:
sg_free_table(st);
err_free:
kfree(st);
-err_unpin_pages:
- i915_gem_object_unpin_pages(obj);
err:
return ERR_PTR(ret);
}
@@ -68,13 +64,9 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
struct sg_table *sg,
enum dma_data_direction dir)
{
- struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
-
dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC);
sg_free_table(sg);
kfree(sg);
-
- i915_gem_object_unpin_pages(obj);
}
static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map)
@@ -168,7 +160,46 @@ retry:
return err;
}
+static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+ struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
+ struct i915_gem_ww_ctx ww;
+ int err;
+
+ if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM))
+ return -EOPNOTSUPP;
+
+ for_i915_gem_ww(&ww, err, true) {
+ err = i915_gem_object_lock(obj, &ww);
+ if (err)
+ continue;
+
+ err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_SMEM);
+ if (err)
+ continue;
+
+ err = i915_gem_object_wait_migration(obj, 0);
+ if (err)
+ continue;
+
+ err = i915_gem_object_pin_pages(obj);
+ }
+
+ return err;
+}
+
+static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+ struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
+
+ i915_gem_object_unpin_pages(obj);
+}
+
static const struct dma_buf_ops i915_dmabuf_ops = {
+ .attach = i915_gem_dmabuf_attach,
+ .detach = i915_gem_dmabuf_detach,
.map_dma_buf = i915_gem_map_dma_buf,
.unmap_dma_buf = i915_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
@@ -204,6 +235,8 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
struct sg_table *pages;
unsigned int sg_page_sizes;
+ assert_object_held(obj);
+
pages = dma_buf_map_attachment(obj->base.import_attach,
DMA_BIDIRECTIONAL);
if (IS_ERR(pages))
@@ -241,7 +274,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
if (dma_buf->ops == &i915_dmabuf_ops) {
obj = dma_buf_to_obj(dma_buf);
/* is it from our device? */
- if (obj->base.dev == dev) {
+ if (obj->base.dev == dev &&
+ !I915_SELFTEST_ONLY(force_different_devices)) {
/*
* Importing dmabuf exported from out own gem increases
* refcount on gem itself instead of f_count of dmabuf.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 073822100da7..b684a62bf3b0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -268,6 +268,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_object *obj;
int err = 0;
+ if (IS_DGFX(to_i915(dev)))
+ return -ENODEV;
+
rcu_read_lock();
obj = i915_gem_object_lookup_rcu(file, args->handle);
if (!obj) {
@@ -303,6 +306,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
enum i915_cache_level level;
int ret = 0;
+ if (IS_DGFX(i915))
+ return -ENODEV;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
@@ -375,7 +381,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
struct i915_vma *vma;
int ret;
- /* Frame buffer must be in LMEM (no migration yet) */
+ /* Frame buffer must be in LMEM */
if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
return ERR_PTR(-EINVAL);
@@ -484,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
u32 write_domain = args->write_domain;
int err;
+ if (IS_DGFX(to_i915(dev)))
+ return -ENODEV;
+
/* Only handle setting domains to types used by the CPU. */
if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index a8abc9af5ff4..1aa249908b64 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,10 +25,8 @@
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
#include "i915_gem_ioctls.h"
-#include "i915_sw_fence_work.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
-#include "i915_memcpy.h"
struct eb_vma {
struct i915_vma *vma;
@@ -279,18 +277,9 @@ struct i915_execbuffer {
bool has_llc : 1;
bool has_fence : 1;
bool needs_unfenced : 1;
-
- struct i915_request *rq;
- u32 *rq_cmd;
- unsigned int rq_size;
- struct intel_gt_buffer_pool_node *pool;
} reloc_cache;
- struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
- struct intel_context *reloc_context;
-
u64 invalid_flags; /** Set of execobj.flags that are invalid */
- u32 context_flags; /** Set of execobj.flags to insert from the ctx */
u64 batch_len; /** Length of batch within object */
u32 batch_start_offset; /** Location within object of batch */
@@ -541,9 +530,6 @@ eb_validate_vma(struct i915_execbuffer *eb,
entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
}
- if (!(entry->flags & EXEC_OBJECT_PINNED))
- entry->flags |= eb->context_flags;
-
return 0;
}
@@ -743,17 +729,13 @@ static int eb_select_context(struct i915_execbuffer *eb)
struct i915_gem_context *ctx;
ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
- if (unlikely(!ctx))
- return -ENOENT;
+ if (unlikely(IS_ERR(ctx)))
+ return PTR_ERR(ctx);
eb->gem_context = ctx;
if (rcu_access_pointer(ctx->vm))
eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
- eb->context_flags = 0;
- if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
- eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
-
return 0;
}
@@ -922,21 +904,38 @@ err:
return err;
}
-static int eb_validate_vmas(struct i915_execbuffer *eb)
+static int eb_lock_vmas(struct i915_execbuffer *eb)
{
unsigned int i;
int err;
- INIT_LIST_HEAD(&eb->unbound);
-
for (i = 0; i < eb->buffer_count; i++) {
- struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
struct eb_vma *ev = &eb->vma[i];
struct i915_vma *vma = ev->vma;
err = i915_gem_object_lock(vma->obj, &eb->ww);
if (err)
return err;
+ }
+
+ return 0;
+}
+
+static int eb_validate_vmas(struct i915_execbuffer *eb)
+{
+ unsigned int i;
+ int err;
+
+ INIT_LIST_HEAD(&eb->unbound);
+
+ err = eb_lock_vmas(eb);
+ if (err)
+ return err;
+
+ for (i = 0; i < eb->buffer_count; i++) {
+ struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+ struct eb_vma *ev = &eb->vma[i];
+ struct i915_vma *vma = ev->vma;
err = eb_pin_vma(eb, entry, ev);
if (err == -EDEADLK)
@@ -994,7 +993,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
}
}
-static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr)
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
{
const unsigned int count = eb->buffer_count;
unsigned int i;
@@ -1008,11 +1007,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release
eb_unreserve_vma(ev);
- if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) {
- ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT;
- i915_gem_object_userptr_submit_fini(vma->obj);
- }
-
if (final)
i915_vma_put(vma);
}
@@ -1022,8 +1016,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release
static void eb_destroy(const struct i915_execbuffer *eb)
{
- GEM_BUG_ON(eb->reloc_cache.rq);
-
if (eb->lut_size > 0)
kfree(eb->buckets);
}
@@ -1035,14 +1027,6 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
return gen8_canonical_addr((int)reloc->delta + target->node.start);
}
-static void reloc_cache_clear(struct reloc_cache *cache)
-{
- cache->rq = NULL;
- cache->rq_cmd = NULL;
- cache->pool = NULL;
- cache->rq_size = 0;
-}
-
static void reloc_cache_init(struct reloc_cache *cache,
struct drm_i915_private *i915)
{
@@ -1055,7 +1039,6 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->has_fence = cache->graphics_ver < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
- reloc_cache_clear(cache);
}
static inline void *unmask_page(unsigned long p)
@@ -1077,48 +1060,10 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt;
}
-static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
-{
- if (!cache->pool)
- return;
-
- /*
- * This is a bit nasty, normally we keep objects locked until the end
- * of execbuffer, but we already submit this, and have to unlock before
- * dropping the reference. Fortunately we can only hold 1 pool node at
- * a time, so this should be harmless.
- */
- i915_gem_ww_unlock_single(cache->pool->obj);
- intel_gt_buffer_pool_put(cache->pool);
- cache->pool = NULL;
-}
-
-static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
-{
- struct drm_i915_gem_object *obj = cache->rq->batch->obj;
-
- GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
- cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
-
- i915_gem_object_flush_map(obj);
- i915_gem_object_unpin_map(obj);
-
- intel_gt_chipset_flush(cache->rq->engine->gt);
-
- i915_request_add(cache->rq);
- reloc_cache_put_pool(eb, cache);
- reloc_cache_clear(cache);
-
- eb->reloc_pool = NULL;
-}
-
static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
{
void *vaddr;
- if (cache->rq)
- reloc_gpu_flush(eb, cache);
-
if (!cache->vaddr)
return;
@@ -1300,291 +1245,6 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
*addr = value;
}
-static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
-{
- struct drm_i915_gem_object *obj = vma->obj;
- int err;
-
- assert_vma_held(vma);
-
- if (obj->cache_dirty & ~obj->cache_coherent)
- i915_gem_clflush_object(obj, 0);
- obj->write_domain = 0;
-
- err = i915_request_await_object(rq, vma->obj, true);
- if (err == 0)
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-
- return err;
-}
-
-static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
- struct intel_engine_cs *engine,
- struct i915_vma *vma,
- unsigned int len)
-{
- struct reloc_cache *cache = &eb->reloc_cache;
- struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
- struct i915_request *rq;
- struct i915_vma *batch;
- u32 *cmd;
- int err;
-
- if (!pool) {
- pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE,
- cache->has_llc ?
- I915_MAP_WB :
- I915_MAP_WC);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
- }
- eb->reloc_pool = NULL;
-
- err = i915_gem_object_lock(pool->obj, &eb->ww);
- if (err)
- goto err_pool;
-
- cmd = i915_gem_object_pin_map(pool->obj, pool->type);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto err_pool;
- }
- intel_gt_buffer_pool_mark_used(pool);
-
- memset32(cmd, 0, pool->obj->base.size / sizeof(u32));
-
- batch = i915_vma_instance(pool->obj, vma->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto err_unmap;
- }
-
- err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK);
- if (err)
- goto err_unmap;
-
- if (engine == eb->context->engine) {
- rq = i915_request_create(eb->context);
- } else {
- struct intel_context *ce = eb->reloc_context;
-
- if (!ce) {
- ce = intel_context_create(engine);
- if (IS_ERR(ce)) {
- err = PTR_ERR(ce);
- goto err_unpin;
- }
-
- i915_vm_put(ce->vm);
- ce->vm = i915_vm_get(eb->context->vm);
- eb->reloc_context = ce;
- }
-
- err = intel_context_pin_ww(ce, &eb->ww);
- if (err)
- goto err_unpin;
-
- rq = i915_request_create(ce);
- intel_context_unpin(ce);
- }
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_unpin;
- }
-
- err = intel_gt_buffer_pool_mark_active(pool, rq);
- if (err)
- goto err_request;
-
- err = reloc_move_to_gpu(rq, vma);
- if (err)
- goto err_request;
-
- err = eb->engine->emit_bb_start(rq,
- batch->node.start, PAGE_SIZE,
- cache->graphics_ver > 5 ? 0 : I915_DISPATCH_SECURE);
- if (err)
- goto skip_request;
-
- assert_vma_held(batch);
- err = i915_request_await_object(rq, batch->obj, false);
- if (err == 0)
- err = i915_vma_move_to_active(batch, rq, 0);
- if (err)
- goto skip_request;
-
- rq->batch = batch;
- i915_vma_unpin(batch);
-
- cache->rq = rq;
- cache->rq_cmd = cmd;
- cache->rq_size = 0;
- cache->pool = pool;
-
- /* Return with batch mapping (cmd) still pinned */
- return 0;
-
-skip_request:
- i915_request_set_error_once(rq, err);
-err_request:
- i915_request_add(rq);
-err_unpin:
- i915_vma_unpin(batch);
-err_unmap:
- i915_gem_object_unpin_map(pool->obj);
-err_pool:
- eb->reloc_pool = pool;
- return err;
-}
-
-static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
-{
- return engine->class != VIDEO_DECODE_CLASS || GRAPHICS_VER(engine->i915) != 6;
-}
-
-static u32 *reloc_gpu(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- unsigned int len)
-{
- struct reloc_cache *cache = &eb->reloc_cache;
- u32 *cmd;
-
- if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
- reloc_gpu_flush(eb, cache);
-
- if (unlikely(!cache->rq)) {
- int err;
- struct intel_engine_cs *engine = eb->engine;
-
- if (!reloc_can_use_engine(engine)) {
- engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
- if (!engine)
- return ERR_PTR(-ENODEV);
- }
-
- err = __reloc_gpu_alloc(eb, engine, vma, len);
- if (unlikely(err))
- return ERR_PTR(err);
- }
-
- cmd = cache->rq_cmd + cache->rq_size;
- cache->rq_size += len;
-
- return cmd;
-}
-
-static inline bool use_reloc_gpu(struct i915_vma *vma)
-{
- if (DBG_FORCE_RELOC == FORCE_GPU_RELOC)
- return true;
-
- if (DBG_FORCE_RELOC)
- return false;
-
- return !dma_resv_test_signaled(vma->resv, true);
-}
-
-static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
-{
- struct page *page;
- unsigned long addr;
-
- GEM_BUG_ON(vma->pages != vma->obj->mm.pages);
-
- page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT);
- addr = PFN_PHYS(page_to_pfn(page));
- GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */
-
- return addr + offset_in_page(offset);
-}
-
-static int __reloc_entry_gpu(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- u64 offset,
- u64 target_addr)
-{
- const unsigned int ver = eb->reloc_cache.graphics_ver;
- unsigned int len;
- u32 *batch;
- u64 addr;
-
- if (ver >= 8)
- len = offset & 7 ? 8 : 5;
- else if (ver >= 4)
- len = 4;
- else
- len = 3;
-
- batch = reloc_gpu(eb, vma, len);
- if (batch == ERR_PTR(-EDEADLK))
- return -EDEADLK;
- else if (IS_ERR(batch))
- return false;
-
- addr = gen8_canonical_addr(vma->node.start + offset);
- if (ver >= 8) {
- if (offset & 7) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = lower_32_bits(target_addr);
-
- addr = gen8_canonical_addr(addr + 4);
-
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = upper_32_bits(target_addr);
- } else {
- *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = lower_32_bits(target_addr);
- *batch++ = upper_32_bits(target_addr);
- }
- } else if (ver >= 6) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = 0;
- *batch++ = addr;
- *batch++ = target_addr;
- } else if (IS_I965G(eb->i915)) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = 0;
- *batch++ = vma_phys_addr(vma, offset);
- *batch++ = target_addr;
- } else if (ver >= 4) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *batch++ = 0;
- *batch++ = addr;
- *batch++ = target_addr;
- } else if (ver >= 3 &&
- !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) {
- *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *batch++ = addr;
- *batch++ = target_addr;
- } else {
- *batch++ = MI_STORE_DWORD_IMM;
- *batch++ = vma_phys_addr(vma, offset);
- *batch++ = target_addr;
- }
-
- return true;
-}
-
-static int reloc_entry_gpu(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- u64 offset,
- u64 target_addr)
-{
- if (eb->reloc_cache.vaddr)
- return false;
-
- if (!use_reloc_gpu(vma))
- return false;
-
- return __reloc_entry_gpu(eb, vma, offset, target_addr);
-}
-
static u64
relocate_entry(struct i915_vma *vma,
const struct drm_i915_gem_relocation_entry *reloc,
@@ -1593,32 +1253,25 @@ relocate_entry(struct i915_vma *vma,
{
u64 target_addr = relocation_target(reloc, target);
u64 offset = reloc->offset;
- int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
-
- if (reloc_gpu < 0)
- return reloc_gpu;
-
- if (!reloc_gpu) {
- bool wide = eb->reloc_cache.use_64bit_reloc;
- void *vaddr;
+ bool wide = eb->reloc_cache.use_64bit_reloc;
+ void *vaddr;
repeat:
- vaddr = reloc_vaddr(vma->obj, eb,
- offset >> PAGE_SHIFT);
- if (IS_ERR(vaddr))
- return PTR_ERR(vaddr);
-
- GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
- clflush_write32(vaddr + offset_in_page(offset),
- lower_32_bits(target_addr),
- eb->reloc_cache.vaddr);
-
- if (wide) {
- offset += sizeof(u32);
- target_addr >>= 32;
- wide = false;
- goto repeat;
- }
+ vaddr = reloc_vaddr(vma->obj, eb,
+ offset >> PAGE_SHIFT);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+ clflush_write32(vaddr + offset_in_page(offset),
+ lower_32_bits(target_addr),
+ eb->reloc_cache.vaddr);
+
+ if (wide) {
+ offset += sizeof(u32);
+ target_addr >>= 32;
+ wide = false;
+ goto repeat;
}
return target->node.start | UPDATE;
@@ -1990,7 +1643,7 @@ repeat:
}
/* We may process another execbuffer during the unlock... */
- eb_release_vmas(eb, false, true);
+ eb_release_vmas(eb, false);
i915_gem_ww_ctx_fini(&eb->ww);
if (rq) {
@@ -2059,9 +1712,7 @@ repeat_validate:
list_for_each_entry(ev, &eb->relocs, reloc_link) {
if (!have_copy) {
- pagefault_disable();
err = eb_relocate_vma(eb, ev);
- pagefault_enable();
if (err)
break;
} else {
@@ -2094,7 +1745,7 @@ repeat_validate:
err:
if (err == -EDEADLK) {
- eb_release_vmas(eb, false, false);
+ eb_release_vmas(eb, false);
err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
goto repeat_validate;
@@ -2191,7 +1842,7 @@ retry:
err:
if (err == -EDEADLK) {
- eb_release_vmas(eb, false, false);
+ eb_release_vmas(eb, false);
err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
goto retry;
@@ -2268,7 +1919,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
#ifdef CONFIG_MMU_NOTIFIER
if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
- spin_lock(&eb->i915->mm.notifier_lock);
+ read_lock(&eb->i915->mm.notifier_lock);
/*
* count is always at least 1, otherwise __EXEC_USERPTR_USED
@@ -2286,7 +1937,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
break;
}
- spin_unlock(&eb->i915->mm.notifier_lock);
+ read_unlock(&eb->i915->mm.notifier_lock);
}
#endif
@@ -2372,217 +2023,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
return vma;
}
-struct eb_parse_work {
- struct dma_fence_work base;
- struct intel_engine_cs *engine;
- struct i915_vma *batch;
- struct i915_vma *shadow;
- struct i915_vma *trampoline;
- unsigned long batch_offset;
- unsigned long batch_length;
- unsigned long *jump_whitelist;
- const void *batch_map;
- void *shadow_map;
-};
-
-static int __eb_parse(struct dma_fence_work *work)
-{
- struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
- int ret;
- bool cookie;
-
- cookie = dma_fence_begin_signalling();
- ret = intel_engine_cmd_parser(pw->engine,
- pw->batch,
- pw->batch_offset,
- pw->batch_length,
- pw->shadow,
- pw->jump_whitelist,
- pw->shadow_map,
- pw->batch_map);
- dma_fence_end_signalling(cookie);
-
- return ret;
-}
-
-static void __eb_parse_release(struct dma_fence_work *work)
-{
- struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-
- if (!IS_ERR_OR_NULL(pw->jump_whitelist))
- kfree(pw->jump_whitelist);
-
- if (pw->batch_map)
- i915_gem_object_unpin_map(pw->batch->obj);
- else
- i915_gem_object_unpin_pages(pw->batch->obj);
-
- i915_gem_object_unpin_map(pw->shadow->obj);
-
- if (pw->trampoline)
- i915_active_release(&pw->trampoline->active);
- i915_active_release(&pw->shadow->active);
- i915_active_release(&pw->batch->active);
-}
-
-static const struct dma_fence_work_ops eb_parse_ops = {
- .name = "eb_parse",
- .work = __eb_parse,
- .release = __eb_parse_release,
-};
-
-static inline int
-__parser_mark_active(struct i915_vma *vma,
- struct intel_timeline *tl,
- struct dma_fence *fence)
-{
- struct intel_gt_buffer_pool_node *node = vma->private;
-
- return i915_active_ref(&node->active, tl->fence_context, fence);
-}
-
-static int
-parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl)
-{
- int err;
-
- mutex_lock(&tl->mutex);
-
- err = __parser_mark_active(pw->shadow, tl, &pw->base.dma);
- if (err)
- goto unlock;
-
- if (pw->trampoline) {
- err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma);
- if (err)
- goto unlock;
- }
-
-unlock:
- mutex_unlock(&tl->mutex);
- return err;
-}
-
-static int eb_parse_pipeline(struct i915_execbuffer *eb,
- struct i915_vma *shadow,
- struct i915_vma *trampoline)
-{
- struct eb_parse_work *pw;
- struct drm_i915_gem_object *batch = eb->batch->vma->obj;
- bool needs_clflush;
- int err;
-
- GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset));
- GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length));
-
- pw = kzalloc(sizeof(*pw), GFP_KERNEL);
- if (!pw)
- return -ENOMEM;
-
- err = i915_active_acquire(&eb->batch->vma->active);
- if (err)
- goto err_free;
-
- err = i915_active_acquire(&shadow->active);
- if (err)
- goto err_batch;
-
- if (trampoline) {
- err = i915_active_acquire(&trampoline->active);
- if (err)
- goto err_shadow;
- }
-
- pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB);
- if (IS_ERR(pw->shadow_map)) {
- err = PTR_ERR(pw->shadow_map);
- goto err_trampoline;
- }
-
- needs_clflush =
- !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
-
- pw->batch_map = ERR_PTR(-ENODEV);
- if (needs_clflush && i915_has_memcpy_from_wc())
- pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);
-
- if (IS_ERR(pw->batch_map)) {
- err = i915_gem_object_pin_pages(batch);
- if (err)
- goto err_unmap_shadow;
- pw->batch_map = NULL;
- }
-
- pw->jump_whitelist =
- intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len,
- trampoline);
- if (IS_ERR(pw->jump_whitelist)) {
- err = PTR_ERR(pw->jump_whitelist);
- goto err_unmap_batch;
- }
-
- dma_fence_work_init(&pw->base, &eb_parse_ops);
-
- pw->engine = eb->engine;
- pw->batch = eb->batch->vma;
- pw->batch_offset = eb->batch_start_offset;
- pw->batch_length = eb->batch_len;
- pw->shadow = shadow;
- pw->trampoline = trampoline;
-
- /* Mark active refs early for this worker, in case we get interrupted */
- err = parser_mark_active(pw, eb->context->timeline);
- if (err)
- goto err_commit;
-
- err = dma_resv_reserve_shared(pw->batch->resv, 1);
- if (err)
- goto err_commit;
-
- err = dma_resv_reserve_shared(shadow->resv, 1);
- if (err)
- goto err_commit;
-
- /* Wait for all writes (and relocs) into the batch to complete */
- err = i915_sw_fence_await_reservation(&pw->base.chain,
- pw->batch->resv, NULL, false,
- 0, I915_FENCE_GFP);
- if (err < 0)
- goto err_commit;
-
- /* Keep the batch alive and unwritten as we parse */
- dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
-
- /* Force execution to wait for completion of the parser */
- dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
-
- dma_fence_work_commit_imm(&pw->base);
- return 0;
-
-err_commit:
- i915_sw_fence_set_error_once(&pw->base.chain, err);
- dma_fence_work_commit_imm(&pw->base);
- return err;
-
-err_unmap_batch:
- if (pw->batch_map)
- i915_gem_object_unpin_map(batch);
- else
- i915_gem_object_unpin_pages(batch);
-err_unmap_shadow:
- i915_gem_object_unpin_map(shadow->obj);
-err_trampoline:
- if (trampoline)
- i915_active_release(&trampoline->active);
-err_shadow:
- i915_active_release(&shadow->active);
-err_batch:
- i915_active_release(&eb->batch->vma->active);
-err_free:
- kfree(pw);
- return err;
-}
-
static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
{
/*
@@ -2672,7 +2112,15 @@ static int eb_parse(struct i915_execbuffer *eb)
goto err_trampoline;
}
- err = eb_parse_pipeline(eb, shadow, trampoline);
+ err = dma_resv_reserve_shared(shadow->resv, 1);
+ if (err)
+ goto err_trampoline;
+
+ err = intel_engine_cmd_parser(eb->engine,
+ eb->batch->vma,
+ eb->batch_start_offset,
+ eb->batch_len,
+ shadow, trampoline);
if (err)
goto err_unpin_batch;
@@ -2983,7 +2431,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n)
while (n--) {
drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
dma_fence_put(fences[n].dma_fence);
- kfree(fences[n].chain_fence);
+ dma_fence_chain_free(fences[n].chain_fence);
}
kvfree(fences);
}
@@ -3097,9 +2545,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
return -EINVAL;
}
- f->chain_fence =
- kmalloc(sizeof(*f->chain_fence),
- GFP_KERNEL);
+ f->chain_fence = dma_fence_chain_alloc();
if (!f->chain_fence) {
drm_syncobj_put(syncobj);
dma_fence_put(fence);
@@ -3359,8 +2805,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.exec = exec;
eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
eb.vma[0].vma = NULL;
- eb.reloc_pool = eb.batch_pool = NULL;
- eb.reloc_context = NULL;
+ eb.batch_pool = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -3435,7 +2880,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err = eb_lookup_vmas(&eb);
if (err) {
- eb_release_vmas(&eb, true, true);
+ eb_release_vmas(&eb, true);
goto err_engine;
}
@@ -3458,9 +2903,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
batch = eb.batch->vma;
- /* All GPU relocation batches must be submitted prior to the user rq */
- GEM_BUG_ON(eb.reloc_cache.rq);
-
/* Allocate a request for this batch buffer nice and early. */
eb.request = i915_request_create(eb.context);
if (IS_ERR(eb.request)) {
@@ -3468,11 +2910,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
+ if (unlikely(eb.gem_context->syncobj)) {
+ struct dma_fence *fence;
+
+ fence = drm_syncobj_fence_get(eb.gem_context->syncobj);
+ err = i915_request_await_dma_fence(eb.request, fence);
+ dma_fence_put(fence);
+ if (err)
+ goto err_ext;
+ }
+
if (in_fence) {
if (args->flags & I915_EXEC_FENCE_SUBMIT)
err = i915_request_await_execution(eb.request,
- in_fence,
- eb.engine->bond_execute);
+ in_fence);
else
err = i915_request_await_dma_fence(eb.request,
in_fence);
@@ -3525,10 +2976,16 @@ err_request:
fput(out_fence->file);
}
}
+
+ if (unlikely(eb.gem_context->syncobj)) {
+ drm_syncobj_replace_fence(eb.gem_context->syncobj,
+ &eb.request->fence);
+ }
+
i915_request_put(eb.request);
err_vma:
- eb_release_vmas(&eb, true, true);
+ eb_release_vmas(&eb, true);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
WARN_ON(err == -EDEADLK);
@@ -3536,10 +2993,6 @@ err_vma:
if (eb.batch_pool)
intel_gt_buffer_pool_put(eb.batch_pool);
- if (eb.reloc_pool)
- intel_gt_buffer_pool_put(eb.reloc_pool);
- if (eb.reloc_context)
- intel_context_put(eb.reloc_context);
err_engine:
eb_put_engine(&eb);
err_context:
@@ -3653,7 +3106,3 @@ end:;
kvfree(exec2_list);
return err;
}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/i915_gem_execbuffer.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index ce6b664b10aa..13b217f75055 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -177,8 +177,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, size);
- i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class,
- I915_BO_ALLOC_STRUCT_PAGE);
+ i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0);
+ obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
/*
* Mark the object as volatile, such that the pages are marked as
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
index 3b4aa28a076d..eb345305dc52 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
@@ -4,74 +4,10 @@
*/
#include "intel_memory_region.h"
-#include "intel_region_ttm.h"
#include "gem/i915_gem_region.h"
#include "gem/i915_gem_lmem.h"
#include "i915_drv.h"
-static void lmem_put_pages(struct drm_i915_gem_object *obj,
- struct sg_table *pages)
-{
- intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node);
- obj->mm.dirty = false;
- sg_free_table(pages);
- kfree(pages);
-}
-
-static int lmem_get_pages(struct drm_i915_gem_object *obj)
-{
- unsigned int flags;
- struct sg_table *pages;
-
- flags = I915_ALLOC_MIN_PAGE_SIZE;
- if (obj->flags & I915_BO_ALLOC_CONTIGUOUS)
- flags |= I915_ALLOC_CONTIGUOUS;
-
- obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region,
- obj->base.size,
- flags);
- if (IS_ERR(obj->mm.st_mm_node))
- return PTR_ERR(obj->mm.st_mm_node);
-
- /* Range manager is always contigous */
- if (obj->mm.region->is_range_manager)
- obj->flags |= I915_BO_ALLOC_CONTIGUOUS;
- pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node);
- if (IS_ERR(pages)) {
- intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node);
- return PTR_ERR(pages);
- }
-
- __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl));
-
- if (obj->flags & I915_BO_ALLOC_CPU_CLEAR) {
- void __iomem *vaddr =
- i915_gem_object_lmem_io_map(obj, 0, obj->base.size);
-
- if (!vaddr) {
- struct sg_table *pages =
- __i915_gem_object_unset_pages(obj);
-
- if (!IS_ERR_OR_NULL(pages))
- lmem_put_pages(obj, pages);
- }
-
- memset_io(vaddr, 0, obj->base.size);
- io_mapping_unmap(vaddr);
- }
-
- return 0;
-}
-
-const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = {
- .name = "i915_gem_object_lmem",
- .flags = I915_GEM_OBJECT_HAS_IOMEM,
-
- .get_pages = lmem_get_pages,
- .put_pages = lmem_put_pages,
- .release = i915_gem_object_release_memory_region,
-};
-
void __iomem *
i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
unsigned long n,
@@ -87,39 +23,91 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
return io_mapping_map_wc(&obj->mm.region->iomap, offset, size);
}
+/**
+ * i915_gem_object_is_lmem - Whether the object is resident in
+ * lmem
+ * @obj: The object to check.
+ *
+ * Even if an object is allowed to migrate and change memory region,
+ * this function checks whether it will always be present in lmem when
+ * valid *or* if that's not the case, whether it's currently resident in lmem.
+ * For migratable and evictable objects, the latter only makes sense when
+ * the object is locked.
+ *
+ * Return: Whether the object migratable but resident in lmem, or not
+ * migratable and will be present in lmem when valid.
+ */
bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
{
- struct intel_memory_region *mr = obj->mm.region;
+ struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
+#ifdef CONFIG_LOCKDEP
+ if (i915_gem_object_migratable(obj) &&
+ i915_gem_object_evictable(obj))
+ assert_object_held(obj);
+#endif
return mr && (mr->type == INTEL_MEMORY_LOCAL ||
mr->type == INTEL_MEMORY_STOLEN_LOCAL);
}
+/**
+ * __i915_gem_object_is_lmem - Whether the object is resident in
+ * lmem while in the fence signaling critical path.
+ * @obj: The object to check.
+ *
+ * This function is intended to be called from within the fence signaling
+ * path where the fence keeps the object from being migrated. For example
+ * during gpu reset or similar.
+ *
+ * Return: Whether the object is resident in lmem.
+ */
+bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
+{
+ struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
+
+#ifdef CONFIG_LOCKDEP
+ GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true));
+#endif
+ return mr && (mr->type == INTEL_MEMORY_LOCAL ||
+ mr->type == INTEL_MEMORY_STOLEN_LOCAL);
+}
+
+/**
+ * __i915_gem_object_create_lmem_with_ps - Create lmem object and force the
+ * minimum page size for the backing pages.
+ * @i915: The i915 instance.
+ * @size: The size in bytes for the object. Note that we need to round the size
+ * up depending on the @page_size. The final object size can be fished out from
+ * the drm GEM object.
+ * @page_size: The requested minimum page size in bytes for this object. This is
+ * useful if we need something bigger than the regions min_page_size due to some
+ * hw restriction, or in some very specialised cases where it needs to be
+ * smaller, where the internal fragmentation cost is too great when rounding up
+ * the object size.
+ * @flags: The optional BO allocation flags.
+ *
+ * Note that this interface assumes you know what you are doing when forcing the
+ * @page_size. If this is smaller than the regions min_page_size then it can
+ * never be inserted into any GTT, otherwise it might lead to undefined
+ * behaviour.
+ *
+ * Return: The object pointer, which might be an ERR_PTR in the case of failure.
+ */
struct drm_i915_gem_object *
-i915_gem_object_create_lmem(struct drm_i915_private *i915,
- resource_size_t size,
- unsigned int flags)
+__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
+ resource_size_t size,
+ resource_size_t page_size,
+ unsigned int flags)
{
return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM],
- size, flags);
+ size, page_size, flags);
}
-int __i915_gem_lmem_object_init(struct intel_memory_region *mem,
- struct drm_i915_gem_object *obj,
- resource_size_t size,
- unsigned int flags)
+struct drm_i915_gem_object *
+i915_gem_object_create_lmem(struct drm_i915_private *i915,
+ resource_size_t size,
+ unsigned int flags)
{
- static struct lock_class_key lock_class;
- struct drm_i915_private *i915 = mem->i915;
-
- drm_gem_private_object_init(&i915->drm, &obj->base, size);
- i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class, flags);
-
- obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT;
-
- i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
-
- i915_gem_object_init_memory_region(obj, mem);
-
- return 0;
+ return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM],
+ size, 0, flags);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
index fac6bc5a5ebb..4ee81fc66302 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
@@ -21,14 +21,16 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
+bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
+ resource_size_t size,
+ resource_size_t page_size,
+ unsigned int flags);
struct drm_i915_gem_object *
i915_gem_object_create_lmem(struct drm_i915_private *i915,
resource_size_t size,
unsigned int flags);
-int __i915_gem_lmem_object_init(struct intel_memory_region *mem,
- struct drm_i915_gem_object *obj,
- resource_size_t size,
- unsigned int flags);
-
#endif /* !__I915_GEM_LMEM_H */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 215326764606..5130e8ed9564 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -19,6 +19,7 @@
#include "i915_gem_mman.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
+#include "i915_gem_ttm.h"
#include "i915_vma.h"
static inline bool
@@ -624,6 +625,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
struct i915_mmap_offset *mmo;
int err;
+ GEM_BUG_ON(obj->ops->mmap_offset || obj->ops->mmap_ops);
+
mmo = lookup_mmo(obj, mmap_type);
if (mmo)
goto out;
@@ -642,7 +645,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
goto insert;
/* Attempt to reap some mmap space from dead objects */
- err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT);
+ err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (err)
goto err;
@@ -666,40 +670,58 @@ err:
}
static int
-__assign_mmap_offset(struct drm_file *file,
- u32 handle,
+__assign_mmap_offset(struct drm_i915_gem_object *obj,
enum i915_mmap_type mmap_type,
- u64 *offset)
+ u64 *offset, struct drm_file *file)
{
- struct drm_i915_gem_object *obj;
struct i915_mmap_offset *mmo;
- int err;
- obj = i915_gem_object_lookup(file, handle);
- if (!obj)
- return -ENOENT;
+ if (i915_gem_object_never_mmap(obj))
+ return -ENODEV;
- if (i915_gem_object_never_mmap(obj)) {
- err = -ENODEV;
- goto out;
+ if (obj->ops->mmap_offset) {
+ if (mmap_type != I915_MMAP_TYPE_FIXED)
+ return -ENODEV;
+
+ *offset = obj->ops->mmap_offset(obj);
+ return 0;
}
+ if (mmap_type == I915_MMAP_TYPE_FIXED)
+ return -ENODEV;
+
if (mmap_type != I915_MMAP_TYPE_GTT &&
!i915_gem_object_has_struct_page(obj) &&
- !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) {
- err = -ENODEV;
- goto out;
- }
+ !i915_gem_object_has_iomem(obj))
+ return -ENODEV;
mmo = mmap_offset_attach(obj, mmap_type, file);
- if (IS_ERR(mmo)) {
- err = PTR_ERR(mmo);
- goto out;
- }
+ if (IS_ERR(mmo))
+ return PTR_ERR(mmo);
*offset = drm_vma_node_offset_addr(&mmo->vma_node);
- err = 0;
-out:
+ return 0;
+}
+
+static int
+__assign_mmap_offset_handle(struct drm_file *file,
+ u32 handle,
+ enum i915_mmap_type mmap_type,
+ u64 *offset)
+{
+ struct drm_i915_gem_object *obj;
+ int err;
+
+ obj = i915_gem_object_lookup(file, handle);
+ if (!obj)
+ return -ENOENT;
+
+ err = i915_gem_object_lock_interruptible(obj, NULL);
+ if (err)
+ goto out_put;
+ err = __assign_mmap_offset(obj, mmap_type, offset, file);
+ i915_gem_object_unlock(obj);
+out_put:
i915_gem_object_put(obj);
return err;
}
@@ -712,14 +734,16 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
{
enum i915_mmap_type mmap_type;
- if (boot_cpu_has(X86_FEATURE_PAT))
+ if (HAS_LMEM(to_i915(dev)))
+ mmap_type = I915_MMAP_TYPE_FIXED;
+ else if (boot_cpu_has(X86_FEATURE_PAT))
mmap_type = I915_MMAP_TYPE_WC;
else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt))
return -ENODEV;
else
mmap_type = I915_MMAP_TYPE_GTT;
- return __assign_mmap_offset(file, handle, mmap_type, offset);
+ return __assign_mmap_offset_handle(file, handle, mmap_type, offset);
}
/**
@@ -783,11 +807,15 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
type = I915_MMAP_TYPE_UC;
break;
+ case I915_MMAP_OFFSET_FIXED:
+ type = I915_MMAP_TYPE_FIXED;
+ break;
+
default:
return -EINVAL;
}
- return __assign_mmap_offset(file, args->handle, type, &args->offset);
+ return __assign_mmap_offset_handle(file, args->handle, type, &args->offset);
}
static void vm_open(struct vm_area_struct *vma)
@@ -891,8 +919,18 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
* destroyed and will be invalid when the vma manager lock
* is released.
*/
- mmo = container_of(node, struct i915_mmap_offset, vma_node);
- obj = i915_gem_object_get_rcu(mmo->obj);
+ if (!node->driver_private) {
+ mmo = container_of(node, struct i915_mmap_offset, vma_node);
+ obj = i915_gem_object_get_rcu(mmo->obj);
+
+ GEM_BUG_ON(obj && obj->ops->mmap_ops);
+ } else {
+ obj = i915_gem_object_get_rcu
+ (container_of(node, struct drm_i915_gem_object,
+ base.vma_node));
+
+ GEM_BUG_ON(obj && !obj->ops->mmap_ops);
+ }
}
drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
rcu_read_unlock();
@@ -913,8 +951,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
return PTR_ERR(anon);
}
- vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
- vma->vm_private_data = mmo;
+ vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
/*
* We keep the ref on mmo->obj, not vm_file, but we require
@@ -928,6 +965,15 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
/* Drop the initial creation reference, the vma is now holding one. */
fput(anon);
+ if (obj->ops->mmap_ops) {
+ vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags));
+ vma->vm_ops = obj->ops->mmap_ops;
+ vma->vm_private_data = node->driver_private;
+ return 0;
+ }
+
+ vma->vm_private_data = mmo;
+
switch (mmo->mmap_type) {
case I915_MMAP_TYPE_WC:
vma->vm_page_prot =
@@ -935,6 +981,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
vma->vm_ops = &vm_ops_cpu;
break;
+ case I915_MMAP_TYPE_FIXED:
+ GEM_WARN_ON(1);
+ fallthrough;
case I915_MMAP_TYPE_WB:
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
vma->vm_ops = &vm_ops_cpu;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 5706d471692d..6fb9afb65034 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -30,14 +30,10 @@
#include "i915_gem_context.h"
#include "i915_gem_mman.h"
#include "i915_gem_object.h"
-#include "i915_globals.h"
#include "i915_memcpy.h"
#include "i915_trace.h"
-static struct i915_global_object {
- struct i915_global base;
- struct kmem_cache *slab_objects;
-} global;
+static struct kmem_cache *slab_objects;
static const struct drm_gem_object_funcs i915_gem_object_funcs;
@@ -45,7 +41,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void)
{
struct drm_i915_gem_object *obj;
- obj = kmem_cache_zalloc(global.slab_objects, GFP_KERNEL);
+ obj = kmem_cache_zalloc(slab_objects, GFP_KERNEL);
if (!obj)
return NULL;
obj->base.funcs = &i915_gem_object_funcs;
@@ -55,7 +51,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void)
void i915_gem_object_free(struct drm_i915_gem_object *obj)
{
- return kmem_cache_free(global.slab_objects, obj);
+ return kmem_cache_free(slab_objects, obj);
}
void i915_gem_object_init(struct drm_i915_gem_object *obj,
@@ -172,7 +168,7 @@ static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *f
}
}
-static void __i915_gem_free_object_rcu(struct rcu_head *head)
+void __i915_gem_free_object_rcu(struct rcu_head *head)
{
struct drm_i915_gem_object *obj =
container_of(head, typeof(*obj), rcu);
@@ -208,59 +204,69 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
}
}
-static void __i915_gem_free_objects(struct drm_i915_private *i915,
- struct llist_node *freed)
+void __i915_gem_free_object(struct drm_i915_gem_object *obj)
{
- struct drm_i915_gem_object *obj, *on;
+ trace_i915_gem_object_destroy(obj);
- llist_for_each_entry_safe(obj, on, freed, freed) {
- trace_i915_gem_object_destroy(obj);
+ if (!list_empty(&obj->vma.list)) {
+ struct i915_vma *vma;
+
+ /*
+ * Note that the vma keeps an object reference while
+ * it is active, so it *should* not sleep while we
+ * destroy it. Our debug code errs insits it *might*.
+ * For the moment, play along.
+ */
+ spin_lock(&obj->vma.lock);
+ while ((vma = list_first_entry_or_null(&obj->vma.list,
+ struct i915_vma,
+ obj_link))) {
+ GEM_BUG_ON(vma->obj != obj);
+ spin_unlock(&obj->vma.lock);
- if (!list_empty(&obj->vma.list)) {
- struct i915_vma *vma;
+ __i915_vma_put(vma);
- /*
- * Note that the vma keeps an object reference while
- * it is active, so it *should* not sleep while we
- * destroy it. Our debug code errs insits it *might*.
- * For the moment, play along.
- */
spin_lock(&obj->vma.lock);
- while ((vma = list_first_entry_or_null(&obj->vma.list,
- struct i915_vma,
- obj_link))) {
- GEM_BUG_ON(vma->obj != obj);
- spin_unlock(&obj->vma.lock);
+ }
+ spin_unlock(&obj->vma.lock);
+ }
- __i915_vma_put(vma);
+ __i915_gem_object_free_mmaps(obj);
- spin_lock(&obj->vma.lock);
- }
- spin_unlock(&obj->vma.lock);
- }
+ GEM_BUG_ON(!list_empty(&obj->lut_list));
- __i915_gem_object_free_mmaps(obj);
+ atomic_set(&obj->mm.pages_pin_count, 0);
+ __i915_gem_object_put_pages(obj);
+ GEM_BUG_ON(i915_gem_object_has_pages(obj));
+ bitmap_free(obj->bit_17);
- GEM_BUG_ON(!list_empty(&obj->lut_list));
+ if (obj->base.import_attach)
+ drm_prime_gem_destroy(&obj->base, NULL);
- atomic_set(&obj->mm.pages_pin_count, 0);
- __i915_gem_object_put_pages(obj);
- GEM_BUG_ON(i915_gem_object_has_pages(obj));
- bitmap_free(obj->bit_17);
+ drm_gem_free_mmap_offset(&obj->base);
- if (obj->base.import_attach)
- drm_prime_gem_destroy(&obj->base, NULL);
+ if (obj->ops->release)
+ obj->ops->release(obj);
- drm_gem_free_mmap_offset(&obj->base);
+ if (obj->mm.n_placements > 1)
+ kfree(obj->mm.placements);
- if (obj->ops->release)
- obj->ops->release(obj);
+ if (obj->shares_resv_from)
+ i915_vm_resv_put(obj->shares_resv_from);
+}
- if (obj->mm.n_placements > 1)
- kfree(obj->mm.placements);
+static void __i915_gem_free_objects(struct drm_i915_private *i915,
+ struct llist_node *freed)
+{
+ struct drm_i915_gem_object *obj, *on;
- if (obj->shares_resv_from)
- i915_vm_resv_put(obj->shares_resv_from);
+ llist_for_each_entry_safe(obj, on, freed, freed) {
+ might_sleep();
+ if (obj->ops->delayed_free) {
+ obj->ops->delayed_free(obj);
+ continue;
+ }
+ __i915_gem_free_object(obj);
/* But keep the pointer alive for RCU-protected lookups */
call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
@@ -318,6 +324,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
* worker and performing frees directly from subsequent allocations for
* crude but effective memory throttling.
*/
+
if (llist_add(&obj->freed, &i915->mm.free_list))
queue_work(i915->wq, &i915->mm.free_work);
}
@@ -410,34 +417,254 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,
return 0;
}
-void i915_gem_init__objects(struct drm_i915_private *i915)
+/**
+ * i915_gem_object_evictable - Whether object is likely evictable after unbind.
+ * @obj: The object to check
+ *
+ * This function checks whether the object is likely unvictable after unbind.
+ * If the object is not locked when checking, the result is only advisory.
+ * If the object is locked when checking, and the function returns true,
+ * then an eviction should indeed be possible. But since unlocked vma
+ * unpinning and unbinding is currently possible, the object can actually
+ * become evictable even if this function returns false.
+ *
+ * Return: true if the object may be evictable. False otherwise.
+ */
+bool i915_gem_object_evictable(struct drm_i915_gem_object *obj)
{
- INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+ struct i915_vma *vma;
+ int pin_count = atomic_read(&obj->mm.pages_pin_count);
+
+ if (!pin_count)
+ return true;
+
+ spin_lock(&obj->vma.lock);
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
+ if (i915_vma_is_pinned(vma)) {
+ spin_unlock(&obj->vma.lock);
+ return false;
+ }
+ if (atomic_read(&vma->pages_count))
+ pin_count--;
+ }
+ spin_unlock(&obj->vma.lock);
+ GEM_WARN_ON(pin_count < 0);
+
+ return pin_count == 0;
+}
+
+/**
+ * i915_gem_object_migratable - Whether the object is migratable out of the
+ * current region.
+ * @obj: Pointer to the object.
+ *
+ * Return: Whether the object is allowed to be resident in other
+ * regions than the current while pages are present.
+ */
+bool i915_gem_object_migratable(struct drm_i915_gem_object *obj)
+{
+ struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
+
+ if (!mr)
+ return false;
+
+ return obj->mm.n_placements > 1;
}
-static void i915_global_objects_shrink(void)
+/**
+ * i915_gem_object_has_struct_page - Whether the object is page-backed
+ * @obj: The object to query.
+ *
+ * This function should only be called while the object is locked or pinned,
+ * otherwise the page backing may change under the caller.
+ *
+ * Return: True if page-backed, false otherwise.
+ */
+bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
{
- kmem_cache_shrink(global.slab_objects);
+#ifdef CONFIG_LOCKDEP
+ if (IS_DGFX(to_i915(obj->base.dev)) &&
+ i915_gem_object_evictable((void __force *)obj))
+ assert_object_held_shared(obj);
+#endif
+ return obj->mem_flags & I915_BO_FLAG_STRUCT_PAGE;
}
-static void i915_global_objects_exit(void)
+/**
+ * i915_gem_object_has_iomem - Whether the object is iomem-backed
+ * @obj: The object to query.
+ *
+ * This function should only be called while the object is locked or pinned,
+ * otherwise the iomem backing may change under the caller.
+ *
+ * Return: True if iomem-backed, false otherwise.
+ */
+bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj)
{
- kmem_cache_destroy(global.slab_objects);
+#ifdef CONFIG_LOCKDEP
+ if (IS_DGFX(to_i915(obj->base.dev)) &&
+ i915_gem_object_evictable((void __force *)obj))
+ assert_object_held_shared(obj);
+#endif
+ return obj->mem_flags & I915_BO_FLAG_IOMEM;
}
-static struct i915_global_object global = { {
- .shrink = i915_global_objects_shrink,
- .exit = i915_global_objects_exit,
-} };
+/**
+ * i915_gem_object_can_migrate - Whether an object likely can be migrated
+ *
+ * @obj: The object to migrate
+ * @id: The region intended to migrate to
+ *
+ * Check whether the object backend supports migration to the
+ * given region. Note that pinning may affect the ability to migrate as
+ * returned by this function.
+ *
+ * This function is primarily intended as a helper for checking the
+ * possibility to migrate objects and might be slightly less permissive
+ * than i915_gem_object_migrate() when it comes to objects with the
+ * I915_BO_ALLOC_USER flag set.
+ *
+ * Return: true if migration is possible, false otherwise.
+ */
+bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj,
+ enum intel_region_id id)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ unsigned int num_allowed = obj->mm.n_placements;
+ struct intel_memory_region *mr;
+ unsigned int i;
+
+ GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN);
+ GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED);
+
+ mr = i915->mm.regions[id];
+ if (!mr)
+ return false;
+
+ if (obj->mm.region == mr)
+ return true;
+
+ if (!i915_gem_object_evictable(obj))
+ return false;
+
+ if (!obj->ops->migrate)
+ return false;
+
+ if (!(obj->flags & I915_BO_ALLOC_USER))
+ return true;
+
+ if (num_allowed == 0)
+ return false;
+
+ for (i = 0; i < num_allowed; ++i) {
+ if (mr == obj->mm.placements[i])
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * i915_gem_object_migrate - Migrate an object to the desired region id
+ * @obj: The object to migrate.
+ * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may
+ * not be successful in evicting other objects to make room for this object.
+ * @id: The region id to migrate to.
+ *
+ * Attempt to migrate the object to the desired memory region. The
+ * object backend must support migration and the object may not be
+ * pinned, (explicitly pinned pages or pinned vmas). The object must
+ * be locked.
+ * On successful completion, the object will have pages pointing to
+ * memory in the new region, but an async migration task may not have
+ * completed yet, and to accomplish that, i915_gem_object_wait_migration()
+ * must be called.
+ *
+ * Note: the @ww parameter is not used yet, but included to make sure
+ * callers put some effort into obtaining a valid ww ctx if one is
+ * available.
+ *
+ * Return: 0 on success. Negative error code on failure. In particular may
+ * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance
+ * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and
+ * -EBUSY if the object is pinned.
+ */
+int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww,
+ enum intel_region_id id)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct intel_memory_region *mr;
+
+ GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN);
+ GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED);
+ assert_object_held(obj);
+
+ mr = i915->mm.regions[id];
+ GEM_BUG_ON(!mr);
+
+ if (!i915_gem_object_can_migrate(obj, id))
+ return -EINVAL;
+
+ if (!obj->ops->migrate) {
+ if (GEM_WARN_ON(obj->mm.region != mr))
+ return -EINVAL;
+ return 0;
+ }
+
+ return obj->ops->migrate(obj, mr);
+}
+
+/**
+ * i915_gem_object_placement_possible - Check whether the object can be
+ * placed at certain memory type
+ * @obj: Pointer to the object
+ * @type: The memory type to check
+ *
+ * Return: True if the object can be placed in @type. False otherwise.
+ */
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
+ enum intel_memory_type type)
+{
+ unsigned int i;
+
+ if (!obj->mm.n_placements) {
+ switch (type) {
+ case INTEL_MEMORY_LOCAL:
+ return i915_gem_object_has_iomem(obj);
+ case INTEL_MEMORY_SYSTEM:
+ return i915_gem_object_has_pages(obj);
+ default:
+ /* Ignore stolen for now */
+ GEM_BUG_ON(1);
+ return false;
+ }
+ }
+
+ for (i = 0; i < obj->mm.n_placements; i++) {
+ if (obj->mm.placements[i]->type == type)
+ return true;
+ }
+
+ return false;
+}
+
+void i915_gem_init__objects(struct drm_i915_private *i915)
+{
+ INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+}
+
+void i915_objects_module_exit(void)
+{
+ kmem_cache_destroy(slab_objects);
+}
-int __init i915_global_objects_init(void)
+int __init i915_objects_module_init(void)
{
- global.slab_objects =
- KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
- if (!global.slab_objects)
+ slab_objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
+ if (!slab_objects)
return -ENOMEM;
- i915_global_register(&global.base);
return 0;
}
@@ -450,6 +677,7 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/huge_gem_object.c"
#include "selftests/huge_pages.c"
+#include "selftests/i915_gem_migrate.c"
#include "selftests/i915_gem_object.c"
#include "selftests/i915_gem_coherency.c"
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 7c0eb425cb3b..48112b9d76df 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -12,10 +12,14 @@
#include <drm/drm_device.h>
#include "display/intel_frontbuffer.h"
+#include "intel_memory_region.h"
#include "i915_gem_object_types.h"
#include "i915_gem_gtt.h"
+#include "i915_gem_ww.h"
#include "i915_vma_types.h"
+enum intel_region_id;
+
/*
* XXX: There is a prevalence of the assumption that we fit the
* object's page count inside a 32bit _signed_ variable. Let's document
@@ -44,6 +48,9 @@ static inline bool i915_gem_object_size_2big(u64 size)
void i915_gem_init__objects(struct drm_i915_private *i915);
+void i915_objects_module_exit(void);
+int i915_objects_module_init(void);
+
struct drm_i915_gem_object *i915_gem_object_alloc(void);
void i915_gem_object_free(struct drm_i915_gem_object *obj);
@@ -57,6 +64,10 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915,
struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
const void *data, resource_size_t size);
+struct drm_i915_gem_object *
+__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size,
+ struct intel_memory_region **placements,
+ unsigned int n_placements);
extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
@@ -147,7 +158,7 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
/*
* If more than one potential simultaneous locker, assert held.
*/
-static inline void assert_object_held_shared(struct drm_i915_gem_object *obj)
+static inline void assert_object_held_shared(const struct drm_i915_gem_object *obj)
{
/*
* Note mm list lookup is protected by
@@ -169,13 +180,17 @@ static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
else
ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
- if (!ret && ww)
+ if (!ret && ww) {
+ i915_gem_object_get(obj);
list_add_tail(&obj->obj_link, &ww->obj_list);
+ }
if (ret == -EALREADY)
ret = 0;
- if (ret == -EDEADLK)
+ if (ret == -EDEADLK) {
+ i915_gem_object_get(obj);
ww->contended = obj;
+ }
return ret;
}
@@ -200,6 +215,9 @@ static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
{
+ if (obj->ops->adjust_lru)
+ obj->ops->adjust_lru(obj);
+
dma_resv_unlock(obj->base.resv);
}
@@ -258,17 +276,9 @@ i915_gem_object_type_has(const struct drm_i915_gem_object *obj,
return obj->ops->flags & flags;
}
-static inline bool
-i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
-{
- return obj->flags & I915_BO_ALLOC_STRUCT_PAGE;
-}
+bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj);
-static inline bool
-i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj)
-{
- return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM);
-}
+bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj);
static inline bool
i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
@@ -339,22 +349,22 @@ struct scatterlist *
__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
struct i915_gem_object_page_iter *iter,
unsigned int n,
- unsigned int *offset, bool allow_alloc);
+ unsigned int *offset, bool dma);
static inline struct scatterlist *
i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
unsigned int n,
- unsigned int *offset, bool allow_alloc)
+ unsigned int *offset)
{
- return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, allow_alloc);
+ return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, false);
}
static inline struct scatterlist *
i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj,
unsigned int n,
- unsigned int *offset, bool allow_alloc)
+ unsigned int *offset)
{
- return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, allow_alloc);
+ return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, true);
}
struct page *
@@ -587,6 +597,27 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,
bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj);
+void __i915_gem_free_object_rcu(struct rcu_head *head);
+
+void __i915_gem_free_object(struct drm_i915_gem_object *obj);
+
+bool i915_gem_object_evictable(struct drm_i915_gem_object *obj);
+
+bool i915_gem_object_migratable(struct drm_i915_gem_object *obj);
+
+int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww,
+ enum intel_region_id id);
+
+bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj,
+ enum intel_region_id id);
+
+int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
+ unsigned int flags);
+
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
+ enum intel_memory_type type);
+
#ifdef CONFIG_MMU_NOTIFIER
static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
@@ -596,14 +627,12 @@ i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj);
-void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj);
#else
static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; }
static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
-static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); }
static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
deleted file mode 100644
index 3e28c68fda3e..000000000000
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ /dev/null
@@ -1,461 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#include "i915_drv.h"
-#include "gt/intel_context.h"
-#include "gt/intel_engine_pm.h"
-#include "gt/intel_gpu_commands.h"
-#include "gt/intel_gt.h"
-#include "gt/intel_gt_buffer_pool.h"
-#include "gt/intel_ring.h"
-#include "i915_gem_clflush.h"
-#include "i915_gem_object_blt.h"
-
-struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
- struct i915_vma *vma,
- struct i915_gem_ww_ctx *ww,
- u32 value)
-{
- struct drm_i915_private *i915 = ce->vm->i915;
- const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
- struct intel_gt_buffer_pool_node *pool;
- struct i915_vma *batch;
- u64 offset;
- u64 count;
- u64 rem;
- u32 size;
- u32 *cmd;
- int err;
-
- GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
- intel_engine_pm_get(ce->engine);
-
- count = div_u64(round_up(vma->size, block_size), block_size);
- size = (1 + 8 * count) * sizeof(u32);
- size = round_up(size, PAGE_SIZE);
- pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC);
- if (IS_ERR(pool)) {
- err = PTR_ERR(pool);
- goto out_pm;
- }
-
- err = i915_gem_object_lock(pool->obj, ww);
- if (err)
- goto out_put;
-
- batch = i915_vma_instance(pool->obj, ce->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_put;
- }
-
- err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
- if (unlikely(err))
- goto out_put;
-
- /* we pinned the pool, mark it as such */
- intel_gt_buffer_pool_mark_used(pool);
-
- cmd = i915_gem_object_pin_map(pool->obj, pool->type);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto out_unpin;
- }
-
- rem = vma->size;
- offset = vma->node.start;
-
- do {
- u32 size = min_t(u64, rem, block_size);
-
- GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
-
- if (GRAPHICS_VER(i915) >= 8) {
- *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
- *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
- *cmd++ = 0;
- *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cmd++ = lower_32_bits(offset);
- *cmd++ = upper_32_bits(offset);
- *cmd++ = value;
- } else {
- *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
- *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
- *cmd++ = 0;
- *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cmd++ = offset;
- *cmd++ = value;
- }
-
- /* Allow ourselves to be preempted in between blocks. */
- *cmd++ = MI_ARB_CHECK;
-
- offset += size;
- rem -= size;
- } while (rem);
-
- *cmd = MI_BATCH_BUFFER_END;
-
- i915_gem_object_flush_map(pool->obj);
- i915_gem_object_unpin_map(pool->obj);
-
- intel_gt_chipset_flush(ce->vm->gt);
-
- batch->private = pool;
- return batch;
-
-out_unpin:
- i915_vma_unpin(batch);
-out_put:
- intel_gt_buffer_pool_put(pool);
-out_pm:
- intel_engine_pm_put(ce->engine);
- return ERR_PTR(err);
-}
-
-int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
-{
- int err;
-
- err = i915_request_await_object(rq, vma->obj, false);
- if (err == 0)
- err = i915_vma_move_to_active(vma, rq, 0);
- if (unlikely(err))
- return err;
-
- return intel_gt_buffer_pool_mark_active(vma->private, rq);
-}
-
-void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
-{
- i915_vma_unpin(vma);
- intel_gt_buffer_pool_put(vma->private);
- intel_engine_pm_put(ce->engine);
-}
-
-static int
-move_obj_to_gpu(struct drm_i915_gem_object *obj,
- struct i915_request *rq,
- bool write)
-{
- if (obj->cache_dirty & ~obj->cache_coherent)
- i915_gem_clflush_object(obj, 0);
-
- return i915_request_await_object(rq, obj, write);
-}
-
-int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
- struct intel_context *ce,
- u32 value)
-{
- struct i915_gem_ww_ctx ww;
- struct i915_request *rq;
- struct i915_vma *batch;
- struct i915_vma *vma;
- int err;
-
- vma = i915_vma_instance(obj, ce->vm, NULL);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- i915_gem_ww_ctx_init(&ww, true);
- intel_engine_pm_get(ce->engine);
-retry:
- err = i915_gem_object_lock(obj, &ww);
- if (err)
- goto out;
-
- err = intel_context_pin_ww(ce, &ww);
- if (err)
- goto out;
-
- err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
- if (err)
- goto out_ctx;
-
- batch = intel_emit_vma_fill_blt(ce, vma, &ww, value);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_vma;
- }
-
- rq = i915_request_create(ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out_batch;
- }
-
- err = intel_emit_vma_mark_active(batch, rq);
- if (unlikely(err))
- goto out_request;
-
- err = move_obj_to_gpu(vma->obj, rq, true);
- if (err == 0)
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- if (unlikely(err))
- goto out_request;
-
- if (ce->engine->emit_init_breadcrumb)
- err = ce->engine->emit_init_breadcrumb(rq);
-
- if (likely(!err))
- err = ce->engine->emit_bb_start(rq,
- batch->node.start,
- batch->node.size,
- 0);
-out_request:
- if (unlikely(err))
- i915_request_set_error_once(rq, err);
-
- i915_request_add(rq);
-out_batch:
- intel_emit_vma_release(ce, batch);
-out_vma:
- i915_vma_unpin(vma);
-out_ctx:
- intel_context_unpin(ce);
-out:
- if (err == -EDEADLK) {
- err = i915_gem_ww_ctx_backoff(&ww);
- if (!err)
- goto retry;
- }
- i915_gem_ww_ctx_fini(&ww);
- intel_engine_pm_put(ce->engine);
- return err;
-}
-
-/* Wa_1209644611:icl,ehl */
-static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
-{
- u32 height = size >> PAGE_SHIFT;
-
- if (GRAPHICS_VER(i915) != 11)
- return false;
-
- return height % 4 == 3 && height <= 8;
-}
-
-struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
- struct i915_gem_ww_ctx *ww,
- struct i915_vma *src,
- struct i915_vma *dst)
-{
- struct drm_i915_private *i915 = ce->vm->i915;
- const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
- struct intel_gt_buffer_pool_node *pool;
- struct i915_vma *batch;
- u64 src_offset, dst_offset;
- u64 count, rem;
- u32 size, *cmd;
- int err;
-
- GEM_BUG_ON(src->size != dst->size);
-
- GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
- intel_engine_pm_get(ce->engine);
-
- count = div_u64(round_up(dst->size, block_size), block_size);
- size = (1 + 11 * count) * sizeof(u32);
- size = round_up(size, PAGE_SIZE);
- pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC);
- if (IS_ERR(pool)) {
- err = PTR_ERR(pool);
- goto out_pm;
- }
-
- err = i915_gem_object_lock(pool->obj, ww);
- if (err)
- goto out_put;
-
- batch = i915_vma_instance(pool->obj, ce->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_put;
- }
-
- err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
- if (unlikely(err))
- goto out_put;
-
- /* we pinned the pool, mark it as such */
- intel_gt_buffer_pool_mark_used(pool);
-
- cmd = i915_gem_object_pin_map(pool->obj, pool->type);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto out_unpin;
- }
-
- rem = src->size;
- src_offset = src->node.start;
- dst_offset = dst->node.start;
-
- do {
- size = min_t(u64, rem, block_size);
- GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
-
- if (GRAPHICS_VER(i915) >= 9 &&
- !wa_1209644611_applies(i915, size)) {
- *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
- *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
- *cmd++ = 0;
- *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cmd++ = lower_32_bits(dst_offset);
- *cmd++ = upper_32_bits(dst_offset);
- *cmd++ = 0;
- *cmd++ = PAGE_SIZE;
- *cmd++ = lower_32_bits(src_offset);
- *cmd++ = upper_32_bits(src_offset);
- } else if (GRAPHICS_VER(i915) >= 8) {
- *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
- *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
- *cmd++ = 0;
- *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cmd++ = lower_32_bits(dst_offset);
- *cmd++ = upper_32_bits(dst_offset);
- *cmd++ = 0;
- *cmd++ = PAGE_SIZE;
- *cmd++ = lower_32_bits(src_offset);
- *cmd++ = upper_32_bits(src_offset);
- } else {
- *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
- *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
- *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
- *cmd++ = dst_offset;
- *cmd++ = PAGE_SIZE;
- *cmd++ = src_offset;
- }
-
- /* Allow ourselves to be preempted in between blocks. */
- *cmd++ = MI_ARB_CHECK;
-
- src_offset += size;
- dst_offset += size;
- rem -= size;
- } while (rem);
-
- *cmd = MI_BATCH_BUFFER_END;
-
- i915_gem_object_flush_map(pool->obj);
- i915_gem_object_unpin_map(pool->obj);
-
- intel_gt_chipset_flush(ce->vm->gt);
- batch->private = pool;
- return batch;
-
-out_unpin:
- i915_vma_unpin(batch);
-out_put:
- intel_gt_buffer_pool_put(pool);
-out_pm:
- intel_engine_pm_put(ce->engine);
- return ERR_PTR(err);
-}
-
-int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
- struct drm_i915_gem_object *dst,
- struct intel_context *ce)
-{
- struct i915_address_space *vm = ce->vm;
- struct i915_vma *vma[2], *batch;
- struct i915_gem_ww_ctx ww;
- struct i915_request *rq;
- int err, i;
-
- vma[0] = i915_vma_instance(src, vm, NULL);
- if (IS_ERR(vma[0]))
- return PTR_ERR(vma[0]);
-
- vma[1] = i915_vma_instance(dst, vm, NULL);
- if (IS_ERR(vma[1]))
- return PTR_ERR(vma[1]);
-
- i915_gem_ww_ctx_init(&ww, true);
- intel_engine_pm_get(ce->engine);
-retry:
- err = i915_gem_object_lock(src, &ww);
- if (!err)
- err = i915_gem_object_lock(dst, &ww);
- if (!err)
- err = intel_context_pin_ww(ce, &ww);
- if (err)
- goto out;
-
- err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER);
- if (err)
- goto out_ctx;
-
- err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER);
- if (unlikely(err))
- goto out_unpin_src;
-
- batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_unpin_dst;
- }
-
- rq = i915_request_create(ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out_batch;
- }
-
- err = intel_emit_vma_mark_active(batch, rq);
- if (unlikely(err))
- goto out_request;
-
- for (i = 0; i < ARRAY_SIZE(vma); i++) {
- err = move_obj_to_gpu(vma[i]->obj, rq, i);
- if (unlikely(err))
- goto out_request;
- }
-
- for (i = 0; i < ARRAY_SIZE(vma); i++) {
- unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
-
- err = i915_vma_move_to_active(vma[i], rq, flags);
- if (unlikely(err))
- goto out_request;
- }
-
- if (rq->engine->emit_init_breadcrumb) {
- err = rq->engine->emit_init_breadcrumb(rq);
- if (unlikely(err))
- goto out_request;
- }
-
- err = rq->engine->emit_bb_start(rq,
- batch->node.start, batch->node.size,
- 0);
-
-out_request:
- if (unlikely(err))
- i915_request_set_error_once(rq, err);
-
- i915_request_add(rq);
-out_batch:
- intel_emit_vma_release(ce, batch);
-out_unpin_dst:
- i915_vma_unpin(vma[1]);
-out_unpin_src:
- i915_vma_unpin(vma[0]);
-out_ctx:
- intel_context_unpin(ce);
-out:
- if (err == -EDEADLK) {
- err = i915_gem_ww_ctx_backoff(&ww);
- if (!err)
- goto retry;
- }
- i915_gem_ww_ctx_fini(&ww);
- intel_engine_pm_put(ce->engine);
- return err;
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/i915_gem_object_blt.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
deleted file mode 100644
index 2409fdcccf0e..000000000000
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef __I915_GEM_OBJECT_BLT_H__
-#define __I915_GEM_OBJECT_BLT_H__
-
-#include <linux/types.h>
-
-#include "gt/intel_context.h"
-#include "gt/intel_engine_pm.h"
-#include "i915_vma.h"
-
-struct drm_i915_gem_object;
-struct i915_gem_ww_ctx;
-
-struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
- struct i915_vma *vma,
- struct i915_gem_ww_ctx *ww,
- u32 value);
-
-struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
- struct i915_gem_ww_ctx *ww,
- struct i915_vma *src,
- struct i915_vma *dst);
-
-int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq);
-void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma);
-
-int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
- struct intel_context *ce,
- u32 value);
-
-int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
- struct drm_i915_gem_object *dst,
- struct intel_context *ce);
-
-#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index d047ea126029..2471f36aaff3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -18,6 +18,7 @@
struct drm_i915_gem_object;
struct intel_fronbuffer;
+struct intel_memory_region;
/*
* struct i915_lut_handle tracks the fast lookups from handle to vma used
@@ -33,10 +34,9 @@ struct i915_lut_handle {
struct drm_i915_gem_object_ops {
unsigned int flags;
-#define I915_GEM_OBJECT_HAS_IOMEM BIT(1)
-#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2)
-#define I915_GEM_OBJECT_IS_PROXY BIT(3)
-#define I915_GEM_OBJECT_NO_MMAP BIT(4)
+#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
+#define I915_GEM_OBJECT_IS_PROXY BIT(2)
+#define I915_GEM_OBJECT_NO_MMAP BIT(3)
/* Interface between the GEM object and its backing storage.
* get_pages() is called once prior to the use of the associated set
@@ -61,13 +61,117 @@ struct drm_i915_gem_object_ops {
const struct drm_i915_gem_pread *arg);
int (*pwrite)(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *arg);
+ u64 (*mmap_offset)(struct drm_i915_gem_object *obj);
int (*dmabuf_export)(struct drm_i915_gem_object *obj);
+
+ /**
+ * adjust_lru - notify that the madvise value was updated
+ * @obj: The gem object
+ *
+ * The madvise value may have been updated, or object was recently
+ * referenced so act accordingly (Perhaps changing an LRU list etc).
+ */
+ void (*adjust_lru)(struct drm_i915_gem_object *obj);
+
+ /**
+ * delayed_free - Override the default delayed free implementation
+ */
+ void (*delayed_free)(struct drm_i915_gem_object *obj);
+
+ /**
+ * migrate - Migrate object to a different region either for
+ * pinning or for as long as the object lock is held.
+ */
+ int (*migrate)(struct drm_i915_gem_object *obj,
+ struct intel_memory_region *mr);
+
void (*release)(struct drm_i915_gem_object *obj);
+ const struct vm_operations_struct *mmap_ops;
const char *name; /* friendly name for debug, e.g. lockdep classes */
};
+/**
+ * enum i915_cache_level - The supported GTT caching values for system memory
+ * pages.
+ *
+ * These translate to some special GTT PTE bits when binding pages into some
+ * address space. It also determines whether an object, or rather its pages are
+ * coherent with the GPU, when also reading or writing through the CPU cache
+ * with those pages.
+ *
+ * Userspace can also control this through struct drm_i915_gem_caching.
+ */
+enum i915_cache_level {
+ /**
+ * @I915_CACHE_NONE:
+ *
+ * GPU access is not coherent with the CPU cache. If the cache is dirty
+ * and we need the underlying pages to be coherent with some later GPU
+ * access then we need to manually flush the pages.
+ *
+ * On shared LLC platforms reads and writes through the CPU cache are
+ * still coherent even with this setting. See also
+ * &drm_i915_gem_object.cache_coherent for more details. Due to this we
+ * should only ever use uncached for scanout surfaces, otherwise we end
+ * up over-flushing in some places.
+ *
+ * This is the default on non-LLC platforms.
+ */
+ I915_CACHE_NONE = 0,
+ /**
+ * @I915_CACHE_LLC:
+ *
+ * GPU access is coherent with the CPU cache. If the cache is dirty,
+ * then the GPU will ensure that access remains coherent, when both
+ * reading and writing through the CPU cache. GPU writes can dirty the
+ * CPU cache.
+ *
+ * Not used for scanout surfaces.
+ *
+ * Applies to both platforms with shared LLC(HAS_LLC), and snooping
+ * based platforms(HAS_SNOOP).
+ *
+ * This is the default on shared LLC platforms. The only exception is
+ * scanout objects, where the display engine is not coherent with the
+ * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is
+ * automatically applied by the kernel in pin_for_display, if userspace
+ * has not done so already.
+ */
+ I915_CACHE_LLC,
+ /**
+ * @I915_CACHE_L3_LLC:
+ *
+ * Explicitly enable the Gfx L3 cache, with coherent LLC.
+ *
+ * The Gfx L3 sits between the domain specific caches, e.g
+ * sampler/render caches, and the larger LLC. LLC is coherent with the
+ * GPU, but L3 is only visible to the GPU, so likely needs to be flushed
+ * when the workload completes.
+ *
+ * Not used for scanout surfaces.
+ *
+ * Only exposed on some gen7 + GGTT. More recent hardware has dropped
+ * this explicit setting, where it should now be enabled by default.
+ */
+ I915_CACHE_L3_LLC,
+ /**
+ * @I915_CACHE_WT:
+ *
+ * Write-through. Used for scanout surfaces.
+ *
+ * The GPU can utilise the caches, while still having the display engine
+ * be coherent with GPU writes, as a result we don't need to flush the
+ * CPU caches when moving out of the render domain. This is the default
+ * setting chosen by the kernel, if supported by the HW, otherwise we
+ * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU
+ * cache still need to be flushed, to remain coherent with the display
+ * engine.
+ */
+ I915_CACHE_WT,
+};
+
enum i915_map_type {
I915_MAP_WB = 0,
I915_MAP_WC,
@@ -81,6 +185,7 @@ enum i915_mmap_type {
I915_MMAP_TYPE_WC,
I915_MMAP_TYPE_WB,
I915_MMAP_TYPE_UC,
+ I915_MMAP_TYPE_FIXED,
};
struct i915_mmap_offset {
@@ -185,23 +290,138 @@ struct drm_i915_gem_object {
unsigned long flags;
#define I915_BO_ALLOC_CONTIGUOUS BIT(0)
#define I915_BO_ALLOC_VOLATILE BIT(1)
-#define I915_BO_ALLOC_STRUCT_PAGE BIT(2)
-#define I915_BO_ALLOC_CPU_CLEAR BIT(3)
+#define I915_BO_ALLOC_CPU_CLEAR BIT(2)
+#define I915_BO_ALLOC_USER BIT(3)
#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
I915_BO_ALLOC_VOLATILE | \
- I915_BO_ALLOC_STRUCT_PAGE | \
- I915_BO_ALLOC_CPU_CLEAR)
+ I915_BO_ALLOC_CPU_CLEAR | \
+ I915_BO_ALLOC_USER)
#define I915_BO_READONLY BIT(4)
#define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */
- /*
- * Is the object to be mapped as read-only to the GPU
- * Only honoured if hardware has relevant pte bit
+ /**
+ * @mem_flags - Mutable placement-related flags
+ *
+ * These are flags that indicate specifics of the memory region
+ * the object is currently in. As such they are only stable
+ * either under the object lock or if the object is pinned.
+ */
+ unsigned int mem_flags;
+#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
+#define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */
+ /**
+ * @cache_level: The desired GTT caching level.
+ *
+ * See enum i915_cache_level for possible values, along with what
+ * each does.
*/
unsigned int cache_level:3;
- unsigned int cache_coherent:2;
+ /**
+ * @cache_coherent:
+ *
+ * Track whether the pages are coherent with the GPU if reading or
+ * writing through the CPU caches. The largely depends on the
+ * @cache_level setting.
+ *
+ * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom
+ * platforms, coherency must be explicitly requested with some special
+ * GTT caching bits(see enum i915_cache_level). When enabling coherency
+ * it does come at a performance and power cost on such platforms. On
+ * the flip side the kernel does not need to manually flush any buffers
+ * which need to be coherent with the GPU, if the object is not coherent
+ * i.e @cache_coherent is zero.
+ *
+ * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory
+ * access will automatically snoop the CPU caches(even with CACHE_NONE).
+ * The one exception is when dealing with the display engine, like with
+ * scanout surfaces. To handle this the kernel will always flush the
+ * surface out of the CPU caches when preparing it for scanout. Also
+ * note that since scanout surfaces are only ever read by the display
+ * engine we only need to care about flushing any writes through the CPU
+ * cache, reads on the other hand will always be coherent.
+ *
+ * Something strange here is why @cache_coherent is not a simple
+ * boolean, i.e coherent vs non-coherent. The reasoning for this is back
+ * to the display engine not being fully coherent. As a result scanout
+ * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT.
+ * In the case of seeing I915_CACHE_NONE the kernel makes the assumption
+ * that this is likely a scanout surface, and will set @cache_coherent
+ * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared
+ * LLC. The kernel uses this to always flush writes through the CPU
+ * cache as early as possible, where it can, in effect keeping
+ * @cache_dirty clean, so we can potentially avoid stalling when
+ * flushing the surface just before doing the scanout. This does mean
+ * we might unnecessarily flush non-scanout objects in some places, but
+ * the default assumption is that all normal objects should be using
+ * I915_CACHE_LLC, at least on platforms with the shared LLC.
+ *
+ * Supported values:
+ *
+ * I915_BO_CACHE_COHERENT_FOR_READ:
+ *
+ * On shared LLC platforms, we use this for special scanout surfaces,
+ * where the display engine is not coherent with the CPU cache. As such
+ * we need to ensure we flush any writes before doing the scanout. As an
+ * optimisation we try to flush any writes as early as possible to avoid
+ * stalling later.
+ *
+ * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC
+ * platforms, we use:
+ *
+ * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ
+ *
+ * While for normal objects that are fully coherent, including special
+ * scanout surfaces marked as I915_CACHE_WT, we use:
+ *
+ * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ |
+ * I915_BO_CACHE_COHERENT_FOR_WRITE
+ *
+ * And then for objects that are not coherent at all we use:
+ *
+ * cache_coherent = 0
+ *
+ * I915_BO_CACHE_COHERENT_FOR_WRITE:
+ *
+ * When writing through the CPU cache, the GPU is still coherent. Note
+ * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
+ */
#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
+ unsigned int cache_coherent:2;
+
+ /**
+ * @cache_dirty:
+ *
+ * Track if we are we dirty with writes through the CPU cache for this
+ * object. As a result reading directly from main memory might yield
+ * stale data.
+ *
+ * This also ties into whether the kernel is tracking the object as
+ * coherent with the GPU, as per @cache_coherent, as it determines if
+ * flushing might be needed at various points.
+ *
+ * Another part of @cache_dirty is managing flushing when first
+ * acquiring the pages for system memory, at this point the pages are
+ * considered foreign, so the default assumption is that the cache is
+ * dirty, for example the page zeroing done by the kernel might leave
+ * writes though the CPU cache, or swapping-in, while the actual data in
+ * main memory is potentially stale. Note that this is a potential
+ * security issue when dealing with userspace objects and zeroing. Now,
+ * whether we actually need apply the big sledgehammer of flushing all
+ * the pages on acquire depends on if @cache_coherent is marked as
+ * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
+ * for both reads and writes though the CPU cache.
+ *
+ * Note that on shared LLC platforms we still apply the heavy flush for
+ * I915_CACHE_NONE objects, under the assumption that this is going to
+ * be used for scanout.
+ *
+ * Update: On some hardware there is now also the 'Bypass LLC' MOCS
+ * entry, which defeats our @cache_coherent tracking, since userspace
+ * can freely bypass the CPU cache when touching the pages with the GPU,
+ * where the kernel is completely unaware. On such platform we need
+ * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
+ */
unsigned int cache_dirty:1;
/**
@@ -247,9 +467,10 @@ struct drm_i915_gem_object {
struct intel_memory_region *region;
/**
- * Memory manager node allocated for this object.
+ * Memory manager resource allocated for this object. Only
+ * needed for the mock region.
*/
- void *st_mm_node;
+ struct ttm_resource *res;
/**
* Element within memory_region->objects or region->purgeable
@@ -310,6 +531,12 @@ struct drm_i915_gem_object {
bool dirty:1;
} mm;
+ struct {
+ struct sg_table *cached_io_st;
+ struct i915_gem_object_page_iter get_io_page;
+ bool created:1;
+ } ttm;
+
/** Record of address bit 17 of each page at last unbind. */
unsigned long *bit_17;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 6444e097016d..8eb1c3a6fc9c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
dma_addr_t addr;
void *vaddr;
- if (type != I915_MAP_WC)
- return ERR_PTR(-ENODEV);
+ GEM_BUG_ON(type != I915_MAP_WC);
if (n_pfn > ARRAY_SIZE(stack)) {
/* Too big for stack -- allocate temporary array instead */
@@ -351,7 +350,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
int err;
if (!i915_gem_object_has_struct_page(obj) &&
- !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM))
+ !i915_gem_object_has_iomem(obj))
return ERR_PTR(-ENXIO);
assert_object_held(obj);
@@ -374,6 +373,34 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
}
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+ /*
+ * For discrete our CPU mappings needs to be consistent in order to
+ * function correctly on !x86. When mapping things through TTM, we use
+ * the same rules to determine the caching type.
+ *
+ * The caching rules, starting from DG1:
+ *
+ * - If the object can be placed in device local-memory, then the
+ * pages should be allocated and mapped as write-combined only.
+ *
+ * - Everything else is always allocated and mapped as write-back,
+ * with the guarantee that everything is also coherent with the
+ * GPU.
+ *
+ * Internal users of lmem are already expected to get this right, so no
+ * fudging needed there.
+ */
+ if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) {
+ if (type != I915_MAP_WC && !obj->mm.n_placements) {
+ ptr = ERR_PTR(-ENODEV);
+ goto err_unpin;
+ }
+
+ type = I915_MAP_WC;
+ } else if (IS_DGFX(to_i915(obj->base.dev))) {
+ type = I915_MAP_WB;
+ }
+
ptr = page_unpack_bits(obj->mm.mapping, &has_type);
if (ptr && has_type != type) {
if (pinned) {
@@ -467,9 +494,8 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
struct i915_gem_object_page_iter *iter,
unsigned int n,
unsigned int *offset,
- bool allow_alloc)
+ bool dma)
{
- const bool dma = iter == &obj->mm.get_dma_page;
struct scatterlist *sg;
unsigned int idx, count;
@@ -490,9 +516,6 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
if (n < READ_ONCE(iter->sg_idx))
goto lookup;
- if (!allow_alloc)
- goto manual_lookup;
-
mutex_lock(&iter->lock);
/* We prefer to reuse the last sg so that repeated lookup of this
@@ -542,16 +565,7 @@ scan:
if (unlikely(n < idx)) /* insertion completed by another thread */
goto lookup;
- goto manual_walk;
-
-manual_lookup:
- idx = 0;
- sg = obj->mm.pages->sgl;
- count = __sg_page_count(sg);
-
-manual_walk:
- /*
- * In case we failed to insert the entry into the radixtree, we need
+ /* In case we failed to insert the entry into the radixtree, we need
* to look beyond the current sg.
*/
while (idx + count <= n) {
@@ -598,7 +612,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
- sg = i915_gem_object_get_sg(obj, n, &offset, true);
+ sg = i915_gem_object_get_sg(obj, n, &offset);
return nth_page(sg_page(sg), offset);
}
@@ -624,7 +638,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
struct scatterlist *sg;
unsigned int offset;
- sg = i915_gem_object_get_sg_dma(obj, n, &offset, true);
+ sg = i915_gem_object_get_sg_dma(obj, n, &offset);
if (len)
*len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index be72ad0634ba..7986612f48fa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -76,7 +76,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
/* We're no longer struct page backed */
- obj->flags &= ~I915_BO_ALLOC_STRUCT_PAGE;
+ obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE;
__i915_gem_object_set_pages(obj, st, sg->length);
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
index f25e6646c5b7..1f557b2178ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -13,16 +13,8 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
{
obj->mm.region = intel_memory_region_get(mem);
- if (obj->base.size <= mem->min_page_size)
- obj->flags |= I915_BO_ALLOC_CONTIGUOUS;
-
mutex_lock(&mem->objects.lock);
-
- if (obj->flags & I915_BO_ALLOC_VOLATILE)
- list_add(&obj->mm.region_link, &mem->objects.purgeable);
- else
- list_add(&obj->mm.region_link, &mem->objects.list);
-
+ list_add(&obj->mm.region_link, &mem->objects.list);
mutex_unlock(&mem->objects.lock);
}
@@ -40,9 +32,11 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
struct drm_i915_gem_object *
i915_gem_object_create_region(struct intel_memory_region *mem,
resource_size_t size,
+ resource_size_t page_size,
unsigned int flags)
{
struct drm_i915_gem_object *obj;
+ resource_size_t default_page_size;
int err;
/*
@@ -56,7 +50,14 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
if (!mem)
return ERR_PTR(-ENODEV);
- size = round_up(size, mem->min_page_size);
+ default_page_size = mem->min_page_size;
+ if (page_size)
+ default_page_size = page_size;
+
+ GEM_BUG_ON(!is_power_of_2_u64(default_page_size));
+ GEM_BUG_ON(default_page_size < PAGE_SIZE);
+
+ size = round_up(size, default_page_size);
GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT));
@@ -68,7 +69,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
if (!obj)
return ERR_PTR(-ENOMEM);
- err = mem->ops->init_object(mem, obj, size, flags);
+ err = mem->ops->init_object(mem, obj, size, page_size, flags);
if (err)
goto err_object_free;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h
index 84fcb3297400..1008e580a89a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h
@@ -19,6 +19,7 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj);
struct drm_i915_gem_object *
i915_gem_object_create_region(struct intel_memory_region *mem,
resource_size_t size,
+ resource_size_t page_size,
unsigned int flags);
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 5d16c4462fda..11f072193f3b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -182,6 +182,24 @@ rebuild_st:
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj, st);
+ /*
+ * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
+ * possible for userspace to bypass the GTT caching bits set by the
+ * kernel, as per the given object cache_level. This is troublesome
+ * since the heavy flush we apply when first gathering the pages is
+ * skipped if the kernel thinks the object is coherent with the GPU. As
+ * a result it might be possible to bypass the cache and read the
+ * contents of the page directly, which could be stale data. If it's
+ * just a case of userspace shooting themselves in the foot then so be
+ * it, but since i915 takes the stance of always zeroing memory before
+ * handing it to userspace, we need to prevent this.
+ *
+ * By setting cache_dirty here we make the clflush in set_pages
+ * unconditional on such platforms.
+ */
+ if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
+ obj->cache_dirty = true;
+
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
return 0;
@@ -302,6 +320,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
struct pagevec pvec;
struct page *page;
+ GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev)));
__i915_gem_object_release_shmem(obj, pages, true);
i915_gem_gtt_finish_pages(obj, pages);
@@ -444,7 +463,7 @@ shmem_pread(struct drm_i915_gem_object *obj,
static void shmem_release(struct drm_i915_gem_object *obj)
{
- if (obj->flags & I915_BO_ALLOC_STRUCT_PAGE)
+ if (i915_gem_object_has_struct_page(obj))
i915_gem_object_release_memory_region(obj);
fput(obj->base.filp);
@@ -489,6 +508,7 @@ static int __create_shmem(struct drm_i915_private *i915,
static int shmem_object_init(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
resource_size_t size,
+ resource_size_t page_size,
unsigned int flags)
{
static struct lock_class_key lock_class;
@@ -513,9 +533,8 @@ static int shmem_object_init(struct intel_memory_region *mem,
mapping_set_gfp_mask(mapping, mask);
GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
- i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class,
- I915_BO_ALLOC_STRUCT_PAGE);
-
+ i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0);
+ obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
@@ -548,7 +567,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915,
resource_size_t size)
{
return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
- size, 0);
+ size, 0, 0);
}
/* Allocate a new GEM object and fill it with the supplied data */
@@ -561,6 +580,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
resource_size_t offset;
int err;
+ GEM_WARN_ON(IS_DGFX(dev_priv));
obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
if (IS_ERR(obj))
return obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index f4fb68e8955a..e382b7f2353b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -62,6 +62,7 @@ static void try_to_writeback(struct drm_i915_gem_object *obj,
switch (obj->mm.madv) {
case I915_MADV_DONTNEED:
i915_gem_object_truncate(obj);
+ return;
case __I915_MADV_PURGED:
return;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 34070d0ea325..ddd37ccb1362 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -669,6 +669,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
static int _i915_gem_object_stolen_init(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
resource_size_t size,
+ resource_size_t page_size,
unsigned int flags)
{
struct drm_i915_private *i915 = mem->i915;
@@ -707,7 +708,7 @@ struct drm_i915_gem_object *
i915_gem_object_create_stolen(struct drm_i915_private *i915,
resource_size_t size)
{
- return i915_gem_object_create_region(i915->mm.stolen_region, size, 0);
+ return i915_gem_object_create_region(i915->mm.stolen_region, size, 0, 0);
}
static int init_stolen_smem(struct intel_memory_region *mem)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
new file mode 100644
index 000000000000..771eb2963123
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -0,0 +1,965 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+#include "intel_region_ttm.h"
+
+#include "gem/i915_gem_object.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_mman.h"
+
+#include "gt/intel_migrate.h"
+#include "gt/intel_engine_pm.h"
+
+#define I915_PL_LMEM0 TTM_PL_PRIV
+#define I915_PL_SYSTEM TTM_PL_SYSTEM
+#define I915_PL_STOLEN TTM_PL_VRAM
+#define I915_PL_GGTT TTM_PL_TT
+
+#define I915_TTM_PRIO_PURGE 0
+#define I915_TTM_PRIO_NO_PAGES 1
+#define I915_TTM_PRIO_HAS_PAGES 2
+
+/*
+ * Size of struct ttm_place vector in on-stack struct ttm_placement allocs
+ */
+#define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN
+
+/**
+ * struct i915_ttm_tt - TTM page vector with additional private information
+ * @ttm: The base TTM page vector.
+ * @dev: The struct device used for dma mapping and unmapping.
+ * @cached_st: The cached scatter-gather table.
+ *
+ * Note that DMA may be going on right up to the point where the page-
+ * vector is unpopulated in delayed destroy. Hence keep the
+ * scatter-gather table mapped and cached up to that point. This is
+ * different from the cached gem object io scatter-gather table which
+ * doesn't have an associated dma mapping.
+ */
+struct i915_ttm_tt {
+ struct ttm_tt ttm;
+ struct device *dev;
+ struct sg_table *cached_st;
+};
+
+static const struct ttm_place sys_placement_flags = {
+ .fpfn = 0,
+ .lpfn = 0,
+ .mem_type = I915_PL_SYSTEM,
+ .flags = 0,
+};
+
+static struct ttm_placement i915_sys_placement = {
+ .num_placement = 1,
+ .placement = &sys_placement_flags,
+ .num_busy_placement = 1,
+ .busy_placement = &sys_placement_flags,
+};
+
+static int i915_ttm_err_to_gem(int err)
+{
+ /* Fastpath */
+ if (likely(!err))
+ return 0;
+
+ switch (err) {
+ case -EBUSY:
+ /*
+ * TTM likes to convert -EDEADLK to -EBUSY, and wants us to
+ * restart the operation, since we don't record the contending
+ * lock. We use -EAGAIN to restart.
+ */
+ return -EAGAIN;
+ case -ENOSPC:
+ /*
+ * Memory type / region is full, and we can't evict.
+ * Except possibly system, that returns -ENOMEM;
+ */
+ return -ENXIO;
+ default:
+ break;
+ }
+
+ return err;
+}
+
+static bool gpu_binds_iomem(struct ttm_resource *mem)
+{
+ return mem->mem_type != TTM_PL_SYSTEM;
+}
+
+static bool cpu_maps_iomem(struct ttm_resource *mem)
+{
+ /* Once / if we support GGTT, this is also false for cached ttm_tts */
+ return mem->mem_type != TTM_PL_SYSTEM;
+}
+
+static enum i915_cache_level
+i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
+ struct ttm_tt *ttm)
+{
+ return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) &&
+ ttm->caching == ttm_cached) ? I915_CACHE_LLC :
+ I915_CACHE_NONE;
+}
+
+static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
+
+static enum ttm_caching
+i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
+{
+ /*
+ * Objects only allowed in system get cached cpu-mappings.
+ * Other objects get WC mapping for now. Even if in system.
+ */
+ if (obj->mm.region->type == INTEL_MEMORY_SYSTEM &&
+ obj->mm.n_placements <= 1)
+ return ttm_cached;
+
+ return ttm_write_combined;
+}
+
+static void
+i915_ttm_place_from_region(const struct intel_memory_region *mr,
+ struct ttm_place *place,
+ unsigned int flags)
+{
+ memset(place, 0, sizeof(*place));
+ place->mem_type = intel_region_to_ttm_type(mr);
+
+ if (flags & I915_BO_ALLOC_CONTIGUOUS)
+ place->flags = TTM_PL_FLAG_CONTIGUOUS;
+}
+
+static void
+i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
+ struct ttm_place *requested,
+ struct ttm_place *busy,
+ struct ttm_placement *placement)
+{
+ unsigned int num_allowed = obj->mm.n_placements;
+ unsigned int flags = obj->flags;
+ unsigned int i;
+
+ placement->num_placement = 1;
+ i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
+ obj->mm.region, requested, flags);
+
+ /* Cache this on object? */
+ placement->num_busy_placement = num_allowed;
+ for (i = 0; i < placement->num_busy_placement; ++i)
+ i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags);
+
+ if (num_allowed == 0) {
+ *busy = *requested;
+ placement->num_busy_placement = 1;
+ }
+
+ placement->placement = requested;
+ placement->busy_placement = busy;
+}
+
+static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
+ uint32_t page_flags)
+{
+ struct ttm_resource_manager *man =
+ ttm_manager_type(bo->bdev, bo->resource->mem_type);
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct i915_ttm_tt *i915_tt;
+ int ret;
+
+ i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL);
+ if (!i915_tt)
+ return NULL;
+
+ if (obj->flags & I915_BO_ALLOC_CPU_CLEAR &&
+ man->use_tt)
+ page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC;
+
+ ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags,
+ i915_ttm_select_tt_caching(obj));
+ if (ret) {
+ kfree(i915_tt);
+ return NULL;
+ }
+
+ i915_tt->dev = obj->base.dev->dev;
+
+ return &i915_tt->ttm;
+}
+
+static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+ if (i915_tt->cached_st) {
+ dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
+ DMA_BIDIRECTIONAL, 0);
+ sg_free_table(i915_tt->cached_st);
+ kfree(i915_tt->cached_st);
+ i915_tt->cached_st = NULL;
+ }
+ ttm_pool_free(&bdev->pool, ttm);
+}
+
+static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+ ttm_tt_destroy_common(bdev, ttm);
+ ttm_tt_fini(ttm);
+ kfree(i915_tt);
+}
+
+static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
+ const struct ttm_place *place)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+
+ /* Will do for now. Our pinned objects are still on TTM's LRU lists */
+ return i915_gem_object_evictable(obj);
+}
+
+static void i915_ttm_evict_flags(struct ttm_buffer_object *bo,
+ struct ttm_placement *placement)
+{
+ *placement = i915_sys_placement;
+}
+
+static int i915_ttm_move_notify(struct ttm_buffer_object *bo)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ int ret;
+
+ ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+ if (ret)
+ return ret;
+
+ ret = __i915_gem_object_put_pages(obj);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ if (!obj->ttm.cached_io_st)
+ return;
+
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &obj->ttm.get_io_page.radix, &iter, 0)
+ radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index);
+ rcu_read_unlock();
+
+ sg_free_table(obj->ttm.cached_io_st);
+ kfree(obj->ttm.cached_io_st);
+ obj->ttm.cached_io_st = NULL;
+}
+
+static void
+i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+
+ if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
+ obj->write_domain = I915_GEM_DOMAIN_WC;
+ obj->read_domains = I915_GEM_DOMAIN_WC;
+ } else {
+ obj->write_domain = I915_GEM_DOMAIN_CPU;
+ obj->read_domains = I915_GEM_DOMAIN_CPU;
+ }
+}
+
+static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ unsigned int cache_level;
+ unsigned int i;
+
+ /*
+ * If object was moved to an allowable region, update the object
+ * region to consider it migrated. Note that if it's currently not
+ * in an allowable region, it's evicted and we don't update the
+ * object region.
+ */
+ if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) {
+ for (i = 0; i < obj->mm.n_placements; ++i) {
+ struct intel_memory_region *mr = obj->mm.placements[i];
+
+ if (intel_region_to_ttm_type(mr) == bo->resource->mem_type &&
+ mr != obj->mm.region) {
+ i915_gem_object_release_memory_region(obj);
+ i915_gem_object_init_memory_region(obj, mr);
+ break;
+ }
+ }
+ }
+
+ obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
+
+ obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
+ I915_BO_FLAG_STRUCT_PAGE;
+
+ cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
+ bo->ttm);
+ i915_gem_object_set_cache_coherency(obj, cache_level);
+}
+
+static void i915_ttm_purge(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ };
+ struct ttm_placement place = {};
+ int ret;
+
+ if (obj->mm.madv == __I915_MADV_PURGED)
+ return;
+
+ /* TTM's purge interface. Note that we might be reentering. */
+ ret = ttm_bo_validate(bo, &place, &ctx);
+ if (!ret) {
+ obj->write_domain = 0;
+ obj->read_domains = 0;
+ i915_ttm_adjust_gem_after_move(obj);
+ i915_ttm_free_cached_io_st(obj);
+ obj->mm.madv = __I915_MADV_PURGED;
+ }
+}
+
+static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ int ret = i915_ttm_move_notify(bo);
+
+ GEM_WARN_ON(ret);
+ GEM_WARN_ON(obj->ttm.cached_io_st);
+ if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
+ i915_ttm_purge(obj);
+}
+
+static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+
+ if (likely(obj)) {
+ /* This releases all gem object bindings to the backend. */
+ i915_ttm_free_cached_io_st(obj);
+ __i915_gem_free_object(obj);
+ }
+}
+
+static struct intel_memory_region *
+i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
+{
+ struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+
+ /* There's some room for optimization here... */
+ GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
+ ttm_mem_type < I915_PL_LMEM0);
+ if (ttm_mem_type == I915_PL_SYSTEM)
+ return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
+ 0);
+
+ return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
+ ttm_mem_type - I915_PL_LMEM0);
+}
+
+static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ struct scatterlist *sg;
+ struct sg_table *st;
+ int ret;
+
+ if (i915_tt->cached_st)
+ return i915_tt->cached_st;
+
+ st = kzalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ return ERR_PTR(-ENOMEM);
+
+ sg = __sg_alloc_table_from_pages
+ (st, ttm->pages, ttm->num_pages, 0,
+ (unsigned long)ttm->num_pages << PAGE_SHIFT,
+ i915_sg_segment_size(), NULL, 0, GFP_KERNEL);
+ if (IS_ERR(sg)) {
+ kfree(st);
+ return ERR_CAST(sg);
+ }
+
+ ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
+ if (ret) {
+ sg_free_table(st);
+ kfree(st);
+ return ERR_PTR(ret);
+ }
+
+ i915_tt->cached_st = st;
+ return st;
+}
+
+static struct sg_table *
+i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
+ struct ttm_resource *res)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+
+ if (!gpu_binds_iomem(res))
+ return i915_ttm_tt_get_st(bo->ttm);
+
+ /*
+ * If CPU mapping differs, we need to add the ttm_tt pages to
+ * the resulting st. Might make sense for GGTT.
+ */
+ GEM_WARN_ON(!cpu_maps_iomem(res));
+ return intel_region_ttm_resource_to_st(obj->mm.region, res);
+}
+
+static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
+ struct ttm_resource *dst_mem,
+ struct sg_table *dst_st)
+{
+ struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+ bdev);
+ struct ttm_resource_manager *src_man =
+ ttm_manager_type(bo->bdev, bo->resource->mem_type);
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct sg_table *src_st;
+ struct i915_request *rq;
+ struct ttm_tt *ttm = bo->ttm;
+ enum i915_cache_level src_level, dst_level;
+ int ret;
+
+ if (!i915->gt.migrate.context)
+ return -EINVAL;
+
+ dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
+ if (!ttm || !ttm_tt_is_populated(ttm)) {
+ if (bo->type == ttm_bo_type_kernel)
+ return -EINVAL;
+
+ if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))
+ return 0;
+
+ intel_engine_pm_get(i915->gt.migrate.context->engine);
+ ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL,
+ dst_st->sgl, dst_level,
+ gpu_binds_iomem(dst_mem),
+ 0, &rq);
+
+ if (!ret && rq) {
+ i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(rq);
+ }
+ intel_engine_pm_put(i915->gt.migrate.context->engine);
+ } else {
+ src_st = src_man->use_tt ? i915_ttm_tt_get_st(ttm) :
+ obj->ttm.cached_io_st;
+
+ src_level = i915_ttm_cache_level(i915, bo->resource, ttm);
+ intel_engine_pm_get(i915->gt.migrate.context->engine);
+ ret = intel_context_migrate_copy(i915->gt.migrate.context,
+ NULL, src_st->sgl, src_level,
+ gpu_binds_iomem(bo->resource),
+ dst_st->sgl, dst_level,
+ gpu_binds_iomem(dst_mem),
+ &rq);
+ if (!ret && rq) {
+ i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(rq);
+ }
+ intel_engine_pm_put(i915->gt.migrate.context->engine);
+ }
+
+ return ret;
+}
+
+static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_resource *dst_mem,
+ struct ttm_place *hop)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct ttm_resource_manager *dst_man =
+ ttm_manager_type(bo->bdev, dst_mem->mem_type);
+ struct intel_memory_region *dst_reg, *src_reg;
+ union {
+ struct ttm_kmap_iter_tt tt;
+ struct ttm_kmap_iter_iomap io;
+ } _dst_iter, _src_iter;
+ struct ttm_kmap_iter *dst_iter, *src_iter;
+ struct sg_table *dst_st;
+ int ret;
+
+ dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
+ src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
+ GEM_BUG_ON(!dst_reg || !src_reg);
+
+ /* Sync for now. We could do the actual copy async. */
+ ret = ttm_bo_wait_ctx(bo, ctx);
+ if (ret)
+ return ret;
+
+ ret = i915_ttm_move_notify(bo);
+ if (ret)
+ return ret;
+
+ if (obj->mm.madv != I915_MADV_WILLNEED) {
+ i915_ttm_purge(obj);
+ ttm_resource_free(bo, &dst_mem);
+ return 0;
+ }
+
+ /* Populate ttm with pages if needed. Typically system memory. */
+ if (bo->ttm && (dst_man->use_tt ||
+ (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) {
+ ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
+ if (ret)
+ return ret;
+ }
+
+ dst_st = i915_ttm_resource_get_st(obj, dst_mem);
+ if (IS_ERR(dst_st))
+ return PTR_ERR(dst_st);
+
+ ret = i915_ttm_accel_move(bo, dst_mem, dst_st);
+ if (ret) {
+ /* If we start mapping GGTT, we can no longer use man::use_tt here. */
+ dst_iter = !cpu_maps_iomem(dst_mem) ?
+ ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) :
+ ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
+ dst_st, dst_reg->region.start);
+
+ src_iter = !cpu_maps_iomem(bo->resource) ?
+ ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
+ ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
+ obj->ttm.cached_io_st,
+ src_reg->region.start);
+
+ ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
+ }
+ /* Below dst_mem becomes bo->resource. */
+ ttm_bo_move_sync_cleanup(bo, dst_mem);
+ i915_ttm_adjust_domains_after_move(obj);
+ i915_ttm_free_cached_io_st(obj);
+
+ if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) {
+ obj->ttm.cached_io_st = dst_st;
+ obj->ttm.get_io_page.sg_pos = dst_st->sgl;
+ obj->ttm.get_io_page.sg_idx = 0;
+ }
+
+ i915_ttm_adjust_gem_after_move(obj);
+ return 0;
+}
+
+static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem)
+{
+ if (!cpu_maps_iomem(mem))
+ return 0;
+
+ mem->bus.caching = ttm_write_combined;
+ mem->bus.is_iomem = true;
+
+ return 0;
+}
+
+static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+ unsigned long page_offset)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start;
+ struct scatterlist *sg;
+ unsigned int ofs;
+
+ GEM_WARN_ON(bo->ttm);
+
+ sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true);
+
+ return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
+}
+
+static struct ttm_device_funcs i915_ttm_bo_driver = {
+ .ttm_tt_create = i915_ttm_tt_create,
+ .ttm_tt_unpopulate = i915_ttm_tt_unpopulate,
+ .ttm_tt_destroy = i915_ttm_tt_destroy,
+ .eviction_valuable = i915_ttm_eviction_valuable,
+ .evict_flags = i915_ttm_evict_flags,
+ .move = i915_ttm_move,
+ .swap_notify = i915_ttm_swap_notify,
+ .delete_mem_notify = i915_ttm_delete_mem_notify,
+ .io_mem_reserve = i915_ttm_io_mem_reserve,
+ .io_mem_pfn = i915_ttm_io_mem_pfn,
+};
+
+/**
+ * i915_ttm_driver - Return a pointer to the TTM device funcs
+ *
+ * Return: Pointer to statically allocated TTM device funcs.
+ */
+struct ttm_device_funcs *i915_ttm_driver(void)
+{
+ return &i915_ttm_bo_driver;
+}
+
+static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
+ struct ttm_placement *placement)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ };
+ struct sg_table *st;
+ int real_num_busy;
+ int ret;
+
+ /* First try only the requested placement. No eviction. */
+ real_num_busy = fetch_and_zero(&placement->num_busy_placement);
+ ret = ttm_bo_validate(bo, placement, &ctx);
+ if (ret) {
+ ret = i915_ttm_err_to_gem(ret);
+ /*
+ * Anything that wants to restart the operation gets to
+ * do that.
+ */
+ if (ret == -EDEADLK || ret == -EINTR || ret == -ERESTARTSYS ||
+ ret == -EAGAIN)
+ return ret;
+
+ /*
+ * If the initial attempt fails, allow all accepted placements,
+ * evicting if necessary.
+ */
+ placement->num_busy_placement = real_num_busy;
+ ret = ttm_bo_validate(bo, placement, &ctx);
+ if (ret)
+ return i915_ttm_err_to_gem(ret);
+ }
+
+ i915_ttm_adjust_lru(obj);
+ if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
+ ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
+ if (ret)
+ return ret;
+
+ i915_ttm_adjust_domains_after_move(obj);
+ i915_ttm_adjust_gem_after_move(obj);
+ }
+
+ if (!i915_gem_object_has_pages(obj)) {
+ /* Object either has a page vector or is an iomem object */
+ st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
+ }
+
+ return ret;
+}
+
+static int i915_ttm_get_pages(struct drm_i915_gem_object *obj)
+{
+ struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS];
+ struct ttm_placement placement;
+
+ GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS);
+
+ /* Move to the requested placement. */
+ i915_ttm_placement_from_obj(obj, &requested, busy, &placement);
+
+ return __i915_ttm_get_pages(obj, &placement);
+}
+
+/**
+ * DOC: Migration vs eviction
+ *
+ * GEM migration may not be the same as TTM migration / eviction. If
+ * the TTM core decides to evict an object it may be evicted to a
+ * TTM memory type that is not in the object's allowable GEM regions, or
+ * in fact theoretically to a TTM memory type that doesn't correspond to
+ * a GEM memory region. In that case the object's GEM region is not
+ * updated, and the data is migrated back to the GEM region at
+ * get_pages time. TTM may however set up CPU ptes to the object even
+ * when it is evicted.
+ * Gem forced migration using the i915_ttm_migrate() op, is allowed even
+ * to regions that are not in the object's list of allowable placements.
+ */
+static int i915_ttm_migrate(struct drm_i915_gem_object *obj,
+ struct intel_memory_region *mr)
+{
+ struct ttm_place requested;
+ struct ttm_placement placement;
+ int ret;
+
+ i915_ttm_place_from_region(mr, &requested, obj->flags);
+ placement.num_placement = 1;
+ placement.num_busy_placement = 1;
+ placement.placement = &requested;
+ placement.busy_placement = &requested;
+
+ ret = __i915_ttm_get_pages(obj, &placement);
+ if (ret)
+ return ret;
+
+ /*
+ * Reinitialize the region bindings. This is primarily
+ * required for objects where the new region is not in
+ * its allowable placements.
+ */
+ if (obj->mm.region != mr) {
+ i915_gem_object_release_memory_region(obj);
+ i915_gem_object_init_memory_region(obj, mr);
+ }
+
+ return 0;
+}
+
+static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *st)
+{
+ /*
+ * We're currently not called from a shrinker, so put_pages()
+ * typically means the object is about to destroyed, or called
+ * from move_notify(). So just avoid doing much for now.
+ * If the object is not destroyed next, The TTM eviction logic
+ * and shrinkers will move it out if needed.
+ */
+
+ i915_ttm_adjust_lru(obj);
+}
+
+static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+
+ /*
+ * Don't manipulate the TTM LRUs while in TTM bo destruction.
+ * We're called through i915_ttm_delete_mem_notify().
+ */
+ if (!kref_read(&bo->kref))
+ return;
+
+ /*
+ * Put on the correct LRU list depending on the MADV status
+ */
+ spin_lock(&bo->bdev->lru_lock);
+ if (obj->mm.madv != I915_MADV_WILLNEED) {
+ bo->priority = I915_TTM_PRIO_PURGE;
+ } else if (!i915_gem_object_has_pages(obj)) {
+ if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
+ bo->priority = I915_TTM_PRIO_HAS_PAGES;
+ } else {
+ if (bo->priority > I915_TTM_PRIO_NO_PAGES)
+ bo->priority = I915_TTM_PRIO_NO_PAGES;
+ }
+
+ ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
+ spin_unlock(&bo->bdev->lru_lock);
+}
+
+/*
+ * TTM-backed gem object destruction requires some clarification.
+ * Basically we have two possibilities here. We can either rely on the
+ * i915 delayed destruction and put the TTM object when the object
+ * is idle. This would be detected by TTM which would bypass the
+ * TTM delayed destroy handling. The other approach is to put the TTM
+ * object early and rely on the TTM destroyed handling, and then free
+ * the leftover parts of the GEM object once TTM's destroyed list handling is
+ * complete. For now, we rely on the latter for two reasons:
+ * a) TTM can evict an object even when it's on the delayed destroy list,
+ * which in theory allows for complete eviction.
+ * b) There is work going on in TTM to allow freeing an object even when
+ * it's not idle, and using the TTM destroyed list handling could help us
+ * benefit from that.
+ */
+static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj)
+{
+ if (obj->ttm.created) {
+ ttm_bo_put(i915_gem_to_ttm(obj));
+ } else {
+ __i915_gem_free_object(obj);
+ call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+ }
+}
+
+static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
+{
+ struct vm_area_struct *area = vmf->vma;
+ struct drm_i915_gem_object *obj =
+ i915_ttm_to_gem(area->vm_private_data);
+
+ /* Sanity check that we allow writing into this object */
+ if (unlikely(i915_gem_object_is_readonly(obj) &&
+ area->vm_flags & VM_WRITE))
+ return VM_FAULT_SIGBUS;
+
+ return ttm_bo_vm_fault(vmf);
+}
+
+static int
+vm_access_ttm(struct vm_area_struct *area, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct drm_i915_gem_object *obj =
+ i915_ttm_to_gem(area->vm_private_data);
+
+ if (i915_gem_object_is_readonly(obj) && write)
+ return -EACCES;
+
+ return ttm_bo_vm_access(area, addr, buf, len, write);
+}
+
+static void ttm_vm_open(struct vm_area_struct *vma)
+{
+ struct drm_i915_gem_object *obj =
+ i915_ttm_to_gem(vma->vm_private_data);
+
+ GEM_BUG_ON(!obj);
+ i915_gem_object_get(obj);
+}
+
+static void ttm_vm_close(struct vm_area_struct *vma)
+{
+ struct drm_i915_gem_object *obj =
+ i915_ttm_to_gem(vma->vm_private_data);
+
+ GEM_BUG_ON(!obj);
+ i915_gem_object_put(obj);
+}
+
+static const struct vm_operations_struct vm_ops_ttm = {
+ .fault = vm_fault_ttm,
+ .access = vm_access_ttm,
+ .open = ttm_vm_open,
+ .close = ttm_vm_close,
+};
+
+static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
+{
+ /* The ttm_bo must be allocated with I915_BO_ALLOC_USER */
+ GEM_BUG_ON(!drm_mm_node_allocated(&obj->base.vma_node.vm_node));
+
+ return drm_vma_node_offset_addr(&obj->base.vma_node);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
+ .name = "i915_gem_object_ttm",
+
+ .get_pages = i915_ttm_get_pages,
+ .put_pages = i915_ttm_put_pages,
+ .truncate = i915_ttm_purge,
+ .adjust_lru = i915_ttm_adjust_lru,
+ .delayed_free = i915_ttm_delayed_free,
+ .migrate = i915_ttm_migrate,
+ .mmap_offset = i915_ttm_mmap_offset,
+ .mmap_ops = &vm_ops_ttm,
+};
+
+void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+
+ i915_gem_object_release_memory_region(obj);
+ mutex_destroy(&obj->ttm.get_io_page.lock);
+ if (obj->ttm.created)
+ call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+}
+
+/**
+ * __i915_gem_ttm_object_init - Initialize a ttm-backed i915 gem object
+ * @mem: The initial memory region for the object.
+ * @obj: The gem object.
+ * @size: Object size in bytes.
+ * @flags: gem object flags.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
+ struct drm_i915_gem_object *obj,
+ resource_size_t size,
+ resource_size_t page_size,
+ unsigned int flags)
+{
+ static struct lock_class_key lock_class;
+ struct drm_i915_private *i915 = mem->i915;
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ };
+ enum ttm_bo_type bo_type;
+ int ret;
+
+ drm_gem_private_object_init(&i915->drm, &obj->base, size);
+ i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags);
+ i915_gem_object_init_memory_region(obj, mem);
+ i915_gem_object_make_unshrinkable(obj);
+ INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN);
+ mutex_init(&obj->ttm.get_io_page.lock);
+ bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device :
+ ttm_bo_type_kernel;
+
+ obj->base.vma_node.driver_private = i915_gem_to_ttm(obj);
+
+ /* Forcing the page size is kernel internal only */
+ GEM_BUG_ON(page_size && obj->mm.n_placements);
+
+ /*
+ * If this function fails, it will call the destructor, but
+ * our caller still owns the object. So no freeing in the
+ * destructor until obj->ttm.created is true.
+ * Similarly, in delayed_destroy, we can't call ttm_bo_put()
+ * until successful initialization.
+ */
+ ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size,
+ bo_type, &i915_sys_placement,
+ page_size >> PAGE_SHIFT,
+ &ctx, NULL, NULL, i915_ttm_bo_destroy);
+ if (ret)
+ return i915_ttm_err_to_gem(ret);
+
+ obj->ttm.created = true;
+ i915_ttm_adjust_domains_after_move(obj);
+ i915_ttm_adjust_gem_after_move(obj);
+ i915_gem_object_unlock(obj);
+
+ return 0;
+}
+
+static const struct intel_memory_region_ops ttm_system_region_ops = {
+ .init_object = __i915_gem_ttm_object_init,
+};
+
+struct intel_memory_region *
+i915_gem_ttm_system_setup(struct drm_i915_private *i915,
+ u16 type, u16 instance)
+{
+ struct intel_memory_region *mr;
+
+ mr = intel_memory_region_create(i915, 0,
+ totalram_pages() << PAGE_SHIFT,
+ PAGE_SIZE, 0,
+ type, instance,
+ &ttm_system_region_ops);
+ if (IS_ERR(mr))
+ return mr;
+
+ intel_memory_region_set_name(mr, "system-ttm");
+ return mr;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
new file mode 100644
index 000000000000..40927f67b6d9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _I915_GEM_TTM_H_
+#define _I915_GEM_TTM_H_
+
+#include "gem/i915_gem_object_types.h"
+
+/**
+ * i915_gem_to_ttm - Convert a struct drm_i915_gem_object to a
+ * struct ttm_buffer_object.
+ * @obj: Pointer to the gem object.
+ *
+ * Return: Pointer to the embedded struct ttm_buffer_object.
+ */
+static inline struct ttm_buffer_object *
+i915_gem_to_ttm(struct drm_i915_gem_object *obj)
+{
+ return &obj->__do_not_access;
+}
+
+/*
+ * i915 ttm gem object destructor. Internal use only.
+ */
+void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
+
+/**
+ * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding
+ * struct drm_i915_gem_object.
+ *
+ * Return: Pointer to the embedding struct ttm_buffer_object, or NULL
+ * if the object was not an i915 ttm object.
+ */
+static inline struct drm_i915_gem_object *
+i915_ttm_to_gem(struct ttm_buffer_object *bo)
+{
+ if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy))
+ return NULL;
+
+ return container_of(bo, struct drm_i915_gem_object, __do_not_access);
+}
+
+int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
+ struct drm_i915_gem_object *obj,
+ resource_size_t size,
+ resource_size_t page_size,
+ unsigned int flags);
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 7487bab11f0b..468a7a617fbf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -67,11 +67,11 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni,
if (!mmu_notifier_range_blockable(range))
return false;
- spin_lock(&i915->mm.notifier_lock);
+ write_lock(&i915->mm.notifier_lock);
mmu_interval_set_seq(mni, cur_seq);
- spin_unlock(&i915->mm.notifier_lock);
+ write_unlock(&i915->mm.notifier_lock);
/*
* We don't wait when the process is exiting. This is valid
@@ -107,16 +107,15 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj)
static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct page **pvec = NULL;
- spin_lock(&i915->mm.notifier_lock);
+ assert_object_held_shared(obj);
+
if (!--obj->userptr.page_ref) {
pvec = obj->userptr.pvec;
obj->userptr.pvec = NULL;
}
GEM_BUG_ON(obj->userptr.page_ref < 0);
- spin_unlock(&i915->mm.notifier_lock);
if (pvec) {
const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
@@ -128,7 +127,6 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
unsigned int max_segment = i915_sg_segment_size();
struct sg_table *st;
@@ -141,16 +139,13 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
if (!st)
return -ENOMEM;
- spin_lock(&i915->mm.notifier_lock);
- if (GEM_WARN_ON(!obj->userptr.page_ref)) {
- spin_unlock(&i915->mm.notifier_lock);
- ret = -EFAULT;
+ if (!obj->userptr.page_ref) {
+ ret = -EAGAIN;
goto err_free;
}
obj->userptr.page_ref++;
pvec = obj->userptr.pvec;
- spin_unlock(&i915->mm.notifier_lock);
alloc_table:
sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
@@ -241,7 +236,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
i915_gem_object_userptr_drop_ref(obj);
}
-static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool get_pages)
+static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj)
{
struct sg_table *pages;
int err;
@@ -259,15 +254,11 @@ static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool
if (!IS_ERR_OR_NULL(pages))
i915_gem_userptr_put_pages(obj, pages);
- if (get_pages)
- err = ____i915_gem_object_get_pages(obj);
-
return err;
}
int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
struct page **pvec;
unsigned int gup_flags = 0;
@@ -277,39 +268,22 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
if (obj->userptr.notifier.mm != current->mm)
return -EFAULT;
+ notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier);
+
ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
return ret;
- /* optimistically try to preserve current pages while unlocked */
- if (i915_gem_object_has_pages(obj) &&
- !mmu_interval_check_retry(&obj->userptr.notifier,
- obj->userptr.notifier_seq)) {
- spin_lock(&i915->mm.notifier_lock);
- if (obj->userptr.pvec &&
- !mmu_interval_read_retry(&obj->userptr.notifier,
- obj->userptr.notifier_seq)) {
- obj->userptr.page_ref++;
-
- /* We can keep using the current binding, this is the fastpath */
- ret = 1;
- }
- spin_unlock(&i915->mm.notifier_lock);
+ if (notifier_seq == obj->userptr.notifier_seq && obj->userptr.pvec) {
+ i915_gem_object_unlock(obj);
+ return 0;
}
- if (!ret) {
- /* Make sure userptr is unbound for next attempt, so we don't use stale pages. */
- ret = i915_gem_object_userptr_unbind(obj, false);
- }
+ ret = i915_gem_object_userptr_unbind(obj);
i915_gem_object_unlock(obj);
- if (ret < 0)
+ if (ret)
return ret;
- if (ret > 0)
- return 0;
-
- notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier);
-
pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL);
if (!pvec)
return -ENOMEM;
@@ -329,7 +303,9 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
}
ret = 0;
- spin_lock(&i915->mm.notifier_lock);
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
+ if (ret)
+ goto out;
if (mmu_interval_read_retry(&obj->userptr.notifier,
!obj->userptr.page_ref ? notifier_seq :
@@ -341,12 +317,14 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
if (!obj->userptr.page_ref++) {
obj->userptr.pvec = pvec;
obj->userptr.notifier_seq = notifier_seq;
-
pvec = NULL;
+ ret = ____i915_gem_object_get_pages(obj);
}
+ obj->userptr.page_ref--;
+
out_unlock:
- spin_unlock(&i915->mm.notifier_lock);
+ i915_gem_object_unlock(obj);
out:
if (pvec) {
@@ -369,11 +347,6 @@ int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj)
return 0;
}
-void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj)
-{
- i915_gem_object_userptr_drop_ref(obj);
-}
-
int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj)
{
int err;
@@ -396,7 +369,6 @@ int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj)
i915_gem_object_unlock(obj);
}
- i915_gem_object_userptr_submit_fini(obj);
return err;
}
@@ -450,6 +422,34 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
#endif
+static int
+probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len)
+{
+ const unsigned long end = addr + len;
+ struct vm_area_struct *vma;
+ int ret = -EFAULT;
+
+ mmap_read_lock(mm);
+ for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+ /* Check for holes, note that we also update the addr below */
+ if (vma->vm_start > addr)
+ break;
+
+ if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
+ break;
+
+ if (vma->vm_end >= end) {
+ ret = 0;
+ break;
+ }
+
+ addr = vma->vm_end;
+ }
+ mmap_read_unlock(mm);
+
+ return ret;
+}
+
/*
* Creates a new mm object that wraps some normal memory from the process
* context - user memory.
@@ -505,7 +505,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
}
if (args->flags & ~(I915_USERPTR_READ_ONLY |
- I915_USERPTR_UNSYNCHRONIZED))
+ I915_USERPTR_UNSYNCHRONIZED |
+ I915_USERPTR_PROBE))
return -EINVAL;
if (i915_gem_object_size_2big(args->user_size))
@@ -532,14 +533,24 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
return -ENODEV;
}
+ if (args->flags & I915_USERPTR_PROBE) {
+ /*
+ * Check that the range pointed to represents real struct
+ * pages and not iomappings (at this moment in time!)
+ */
+ ret = probe_range(current->mm, args->user_ptr, args->user_size);
+ if (ret)
+ return ret;
+ }
+
#ifdef CONFIG_MMU_NOTIFIER
obj = i915_gem_object_alloc();
if (obj == NULL)
return -ENOMEM;
drm_gem_private_object_init(dev, &obj->base, args->user_size);
- i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class,
- I915_BO_ALLOC_STRUCT_PAGE);
+ i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0);
+ obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE;
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;
i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
@@ -572,7 +583,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
{
#ifdef CONFIG_MMU_NOTIFIER
- spin_lock_init(&dev_priv->mm.notifier_lock);
+ rwlock_init(&dev_priv->mm.notifier_lock);
#endif
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 1e97520c62b2..f909aaa09d9c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -104,8 +104,8 @@ static void fence_set_priority(struct dma_fence *fence,
engine = rq->engine;
rcu_read_lock(); /* RCU serialisation for set-wedged protection */
- if (engine->schedule)
- engine->schedule(rq, attr);
+ if (engine->sched_engine->schedule)
+ engine->sched_engine->schedule(rq, attr);
rcu_read_unlock();
}
@@ -290,3 +290,22 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
i915_gem_object_put(obj);
return ret;
}
+
+/**
+ * i915_gem_object_wait_migration - Sync an accelerated migration operation
+ * @obj: The migrating object.
+ * @flags: waiting flags. Currently supports only I915_WAIT_INTERRUPTIBLE.
+ *
+ * Wait for any pending async migration operation on the object,
+ * whether it's explicitly (i915_gem_object_migrate()) or implicitly
+ * (swapin, initial clearing) initiated.
+ *
+ * Return: 0 if successful, -ERESTARTSYS if a signal was hit during waiting.
+ */
+int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
+ unsigned int flags)
+{
+ might_sleep();
+ /* NOP for now. */
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 0c8ecfdf5405..f963b8e1e37b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -114,8 +114,8 @@ huge_gem_object(struct drm_i915_private *i915,
return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, dma_size);
- i915_gem_object_init(obj, &huge_ops, &lock_class,
- I915_BO_ALLOC_STRUCT_PAGE);
+ i915_gem_object_init(obj, &huge_ops, &lock_class, 0);
+ obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index dadd485bc52f..a094f3ce1a90 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -167,9 +167,8 @@ huge_pages_object(struct drm_i915_private *i915,
return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, size);
- i915_gem_object_init(obj, &huge_page_ops, &lock_class,
- I915_BO_ALLOC_STRUCT_PAGE);
-
+ i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0);
+ obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
i915_gem_object_set_volatile(obj);
obj->write_domain = I915_GEM_DOMAIN_CPU;
@@ -497,7 +496,8 @@ static int igt_mock_memory_region_huge_pages(void *arg)
int i;
for (i = 0; i < ARRAY_SIZE(flags); ++i) {
- obj = i915_gem_object_create_region(mem, page_size,
+ obj = i915_gem_object_create_region(mem,
+ page_size, page_size,
flags[i]);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 176e6b22f87f..ecbcbb86ae1e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -5,6 +5,7 @@
#include "i915_selftest.h"
+#include "gt/intel_context.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
#include "gt/intel_gpu_commands.h"
@@ -16,118 +17,6 @@
#include "huge_gem_object.h"
#include "mock_context.h"
-static int __igt_client_fill(struct intel_engine_cs *engine)
-{
- struct intel_context *ce = engine->kernel_context;
- struct drm_i915_gem_object *obj;
- I915_RND_STATE(prng);
- IGT_TIMEOUT(end);
- u32 *vaddr;
- int err = 0;
-
- intel_engine_pm_get(engine);
- do {
- const u32 max_block_size = S16_MAX * PAGE_SIZE;
- u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
- u32 phys_sz = sz % (max_block_size + 1);
- u32 val = prandom_u32_state(&prng);
- u32 i;
-
- sz = round_up(sz, PAGE_SIZE);
- phys_sz = round_up(phys_sz, PAGE_SIZE);
-
- pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
- phys_sz, sz, val);
-
- obj = huge_gem_object(engine->i915, phys_sz, sz);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto err_flush;
- }
-
- vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
- goto err_put;
- }
-
- /*
- * XXX: The goal is move this to get_pages, so try to dirty the
- * CPU cache first to check that we do the required clflush
- * before scheduling the blt for !llc platforms. This matches
- * some version of reality where at get_pages the pages
- * themselves may not yet be coherent with the GPU(swap-in). If
- * we are missing the flush then we should see the stale cache
- * values after we do the set_to_cpu_domain and pick it up as a
- * test failure.
- */
- memset32(vaddr, val ^ 0xdeadbeaf,
- huge_gem_object_phys_size(obj) / sizeof(u32));
-
- if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
- obj->cache_dirty = true;
-
- err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages,
- &obj->mm.page_sizes,
- val);
- if (err)
- goto err_unpin;
-
- i915_gem_object_lock(obj, NULL);
- err = i915_gem_object_set_to_cpu_domain(obj, false);
- i915_gem_object_unlock(obj);
- if (err)
- goto err_unpin;
-
- for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
- if (vaddr[i] != val) {
- pr_err("vaddr[%u]=%x, expected=%x\n", i,
- vaddr[i], val);
- err = -EINVAL;
- goto err_unpin;
- }
- }
-
- i915_gem_object_unpin_map(obj);
- i915_gem_object_put(obj);
- } while (!time_after(jiffies, end));
-
- goto err_flush;
-
-err_unpin:
- i915_gem_object_unpin_map(obj);
-err_put:
- i915_gem_object_put(obj);
-err_flush:
- if (err == -ENOMEM)
- err = 0;
- intel_engine_pm_put(engine);
-
- return err;
-}
-
-static int igt_client_fill(void *arg)
-{
- int inst = 0;
-
- do {
- struct intel_engine_cs *engine;
- int err;
-
- engine = intel_engine_lookup_user(arg,
- I915_ENGINE_CLASS_COPY,
- inst++);
- if (!engine)
- return 0;
-
- err = __igt_client_fill(engine);
- if (err == -ENOMEM)
- err = 0;
- if (err)
- return err;
- } while (1);
-}
-
#define WIDTH 512
#define HEIGHT 32
@@ -693,7 +582,6 @@ static int igt_client_tiled_blits(void *arg)
int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
- SUBTEST(igt_client_fill),
SUBTEST(igt_client_tiled_blits),
};
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index dbcfa28a9d91..8eb5050f8cb3 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -680,7 +680,7 @@ static int igt_ctx_exec(void *arg)
struct i915_gem_context *ctx;
struct intel_context *ce;
- ctx = kernel_context(i915);
+ ctx = kernel_context(i915, NULL);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out_file;
@@ -813,16 +813,12 @@ static int igt_shared_ctx_exec(void *arg)
struct i915_gem_context *ctx;
struct intel_context *ce;
- ctx = kernel_context(i915);
+ ctx = kernel_context(i915, ctx_vm(parent));
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out_test;
}
- mutex_lock(&ctx->mutex);
- __assign_ppgtt(ctx, ctx_vm(parent));
- mutex_unlock(&ctx->mutex);
-
ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
GEM_BUG_ON(IS_ERR(ce));
@@ -1875,125 +1871,6 @@ out_file:
return err;
}
-static bool skip_unused_engines(struct intel_context *ce, void *data)
-{
- return !ce->state;
-}
-
-static void mock_barrier_task(void *data)
-{
- unsigned int *counter = data;
-
- ++*counter;
-}
-
-static int mock_context_barrier(void *arg)
-{
-#undef pr_fmt
-#define pr_fmt(x) "context_barrier_task():" # x
- struct drm_i915_private *i915 = arg;
- struct i915_gem_context *ctx;
- struct i915_request *rq;
- unsigned int counter;
- int err;
-
- /*
- * The context barrier provides us with a callback after it emits
- * a request; useful for retiring old state after loading new.
- */
-
- ctx = mock_context(i915, "mock");
- if (!ctx)
- return -ENOMEM;
-
- counter = 0;
- err = context_barrier_task(ctx, 0, NULL, NULL, NULL,
- mock_barrier_task, &counter);
- if (err) {
- pr_err("Failed at line %d, err=%d\n", __LINE__, err);
- goto out;
- }
- if (counter == 0) {
- pr_err("Did not retire immediately with 0 engines\n");
- err = -EINVAL;
- goto out;
- }
-
- counter = 0;
- err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
- NULL, NULL, mock_barrier_task, &counter);
- if (err) {
- pr_err("Failed at line %d, err=%d\n", __LINE__, err);
- goto out;
- }
- if (counter == 0) {
- pr_err("Did not retire immediately for all unused engines\n");
- err = -EINVAL;
- goto out;
- }
-
- rq = igt_request_alloc(ctx, i915->gt.engine[RCS0]);
- if (IS_ERR(rq)) {
- pr_err("Request allocation failed!\n");
- goto out;
- }
- i915_request_add(rq);
-
- counter = 0;
- context_barrier_inject_fault = BIT(RCS0);
- err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL,
- mock_barrier_task, &counter);
- context_barrier_inject_fault = 0;
- if (err == -ENXIO)
- err = 0;
- else
- pr_err("Did not hit fault injection!\n");
- if (counter != 0) {
- pr_err("Invoked callback on error!\n");
- err = -EIO;
- }
- if (err)
- goto out;
-
- counter = 0;
- err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
- NULL, NULL, mock_barrier_task, &counter);
- if (err) {
- pr_err("Failed at line %d, err=%d\n", __LINE__, err);
- goto out;
- }
- mock_device_flush(i915);
- if (counter == 0) {
- pr_err("Did not retire on each active engines\n");
- err = -EINVAL;
- goto out;
- }
-
-out:
- mock_context_close(ctx);
- return err;
-#undef pr_fmt
-#define pr_fmt(x) x
-}
-
-int i915_gem_context_mock_selftests(void)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(mock_context_barrier),
- };
- struct drm_i915_private *i915;
- int err;
-
- i915 = mock_gem_device();
- if (!i915)
- return -ENOMEM;
-
- err = i915_subtests(tests, i915);
-
- mock_destroy_device(i915);
- return err;
-}
-
int i915_gem_context_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index dd74bc09ec88..ffae7df5e4d7 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -35,7 +35,7 @@ static int igt_dmabuf_export(void *arg)
static int igt_dmabuf_import_self(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct drm_i915_gem_object *obj;
+ struct drm_i915_gem_object *obj, *import_obj;
struct drm_gem_object *import;
struct dma_buf *dmabuf;
int err;
@@ -65,10 +65,19 @@ static int igt_dmabuf_import_self(void *arg)
err = -EINVAL;
goto out_import;
}
+ import_obj = to_intel_bo(import);
+
+ i915_gem_object_lock(import_obj, NULL);
+ err = __i915_gem_object_get_pages(import_obj);
+ i915_gem_object_unlock(import_obj);
+ if (err) {
+ pr_err("Same object dma-buf get_pages failed!\n");
+ goto out_import;
+ }
err = 0;
out_import:
- i915_gem_object_put(to_intel_bo(import));
+ i915_gem_object_put(import_obj);
out_dmabuf:
dma_buf_put(dmabuf);
out:
@@ -76,6 +85,180 @@ out:
return err;
}
+static int igt_dmabuf_import_same_driver_lmem(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM];
+ struct drm_i915_gem_object *obj;
+ struct drm_gem_object *import;
+ struct dma_buf *dmabuf;
+ int err;
+
+ if (!lmem)
+ return 0;
+
+ force_different_devices = true;
+
+ obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1);
+ if (IS_ERR(obj)) {
+ pr_err("__i915_gem_object_create_user failed with err=%ld\n",
+ PTR_ERR(dmabuf));
+ err = PTR_ERR(obj);
+ goto out_ret;
+ }
+
+ dmabuf = i915_gem_prime_export(&obj->base, 0);
+ if (IS_ERR(dmabuf)) {
+ pr_err("i915_gem_prime_export failed with err=%ld\n",
+ PTR_ERR(dmabuf));
+ err = PTR_ERR(dmabuf);
+ goto out;
+ }
+
+ /*
+ * We expect an import of an LMEM-only object to fail with
+ * -EOPNOTSUPP because it can't be migrated to SMEM.
+ */
+ import = i915_gem_prime_import(&i915->drm, dmabuf);
+ if (!IS_ERR(import)) {
+ drm_gem_object_put(import);
+ pr_err("i915_gem_prime_import succeeded when it shouldn't have\n");
+ err = -EINVAL;
+ } else if (PTR_ERR(import) != -EOPNOTSUPP) {
+ pr_err("i915_gem_prime_import failed with the wrong err=%ld\n",
+ PTR_ERR(import));
+ err = PTR_ERR(import);
+ }
+
+ dma_buf_put(dmabuf);
+out:
+ i915_gem_object_put(obj);
+out_ret:
+ force_different_devices = false;
+ return err;
+}
+
+static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
+ struct intel_memory_region **regions,
+ unsigned int num_regions)
+{
+ struct drm_i915_gem_object *obj, *import_obj;
+ struct drm_gem_object *import;
+ struct dma_buf *dmabuf;
+ struct dma_buf_attachment *import_attach;
+ struct sg_table *st;
+ long timeout;
+ int err;
+
+ force_different_devices = true;
+
+ obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
+ regions, num_regions);
+ if (IS_ERR(obj)) {
+ pr_err("__i915_gem_object_create_user failed with err=%ld\n",
+ PTR_ERR(dmabuf));
+ err = PTR_ERR(obj);
+ goto out_ret;
+ }
+
+ dmabuf = i915_gem_prime_export(&obj->base, 0);
+ if (IS_ERR(dmabuf)) {
+ pr_err("i915_gem_prime_export failed with err=%ld\n",
+ PTR_ERR(dmabuf));
+ err = PTR_ERR(dmabuf);
+ goto out;
+ }
+
+ import = i915_gem_prime_import(&i915->drm, dmabuf);
+ if (IS_ERR(import)) {
+ pr_err("i915_gem_prime_import failed with err=%ld\n",
+ PTR_ERR(import));
+ err = PTR_ERR(import);
+ goto out_dmabuf;
+ }
+
+ if (import == &obj->base) {
+ pr_err("i915_gem_prime_import reused gem object!\n");
+ err = -EINVAL;
+ goto out_import;
+ }
+
+ import_obj = to_intel_bo(import);
+
+ i915_gem_object_lock(import_obj, NULL);
+ err = __i915_gem_object_get_pages(import_obj);
+ if (err) {
+ pr_err("Different objects dma-buf get_pages failed!\n");
+ i915_gem_object_unlock(import_obj);
+ goto out_import;
+ }
+
+ /*
+ * If the exported object is not in system memory, something
+ * weird is going on. TODO: When p2p is supported, this is no
+ * longer considered weird.
+ */
+ if (obj->mm.region != i915->mm.regions[INTEL_REGION_SMEM]) {
+ pr_err("Exported dma-buf is not in system memory\n");
+ err = -EINVAL;
+ }
+
+ i915_gem_object_unlock(import_obj);
+
+ /* Now try a fake an importer */
+ import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev);
+ if (IS_ERR(import_attach)) {
+ err = PTR_ERR(import_attach);
+ goto out_import;
+ }
+
+ st = dma_buf_map_attachment(import_attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(st)) {
+ err = PTR_ERR(st);
+ goto out_detach;
+ }
+
+ timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ);
+ if (!timeout) {
+ pr_err("dmabuf wait for exclusive fence timed out.\n");
+ timeout = -ETIME;
+ }
+ err = timeout > 0 ? 0 : timeout;
+ dma_buf_unmap_attachment(import_attach, st, DMA_BIDIRECTIONAL);
+out_detach:
+ dma_buf_detach(dmabuf, import_attach);
+out_import:
+ i915_gem_object_put(import_obj);
+out_dmabuf:
+ dma_buf_put(dmabuf);
+out:
+ i915_gem_object_put(obj);
+out_ret:
+ force_different_devices = false;
+ return err;
+}
+
+static int igt_dmabuf_import_same_driver_smem(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_memory_region *smem = i915->mm.regions[INTEL_REGION_SMEM];
+
+ return igt_dmabuf_import_same_driver(i915, &smem, 1);
+}
+
+static int igt_dmabuf_import_same_driver_lmem_smem(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_memory_region *regions[2];
+
+ if (!i915->mm.regions[INTEL_REGION_LMEM])
+ return 0;
+
+ regions[0] = i915->mm.regions[INTEL_REGION_LMEM];
+ regions[1] = i915->mm.regions[INTEL_REGION_SMEM];
+ return igt_dmabuf_import_same_driver(i915, regions, 2);
+}
+
static int igt_dmabuf_import(void *arg)
{
struct drm_i915_private *i915 = arg;
@@ -286,6 +469,9 @@ int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_dmabuf_export),
+ SUBTEST(igt_dmabuf_import_same_driver_lmem),
+ SUBTEST(igt_dmabuf_import_same_driver_smem),
+ SUBTEST(igt_dmabuf_import_same_driver_lmem_smem),
};
return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 4df505e4c53a..16162fc2782d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg)
intel_gt_pm_get(&eb.i915->gt);
for_each_uabi_engine(eb.engine, eb.i915) {
+ if (intel_engine_requires_cmd_parser(eb.engine) ||
+ intel_engine_using_cmd_parser(eb.engine))
+ continue;
+
reloc_cache_init(&eb.reloc_cache, eb.i915);
memset(map, POISON_INUSE, 4096);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
new file mode 100644
index 000000000000..28a700f08b49
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020-2021 Intel Corporation
+ */
+
+#include "gt/intel_migrate.h"
+
+static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
+ bool fill)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ unsigned int i, count = obj->base.size / sizeof(u32);
+ enum i915_map_type map_type =
+ i915_coherent_map_type(i915, obj, false);
+ u32 *cur;
+ int err = 0;
+
+ assert_object_held(obj);
+ cur = i915_gem_object_pin_map(obj, map_type);
+ if (IS_ERR(cur))
+ return PTR_ERR(cur);
+
+ if (fill)
+ for (i = 0; i < count; ++i)
+ *cur++ = i;
+ else
+ for (i = 0; i < count; ++i)
+ if (*cur++ != i) {
+ pr_err("Object content mismatch at location %d of %d\n", i, count);
+ err = -EINVAL;
+ break;
+ }
+
+ i915_gem_object_unpin_map(obj);
+
+ return err;
+}
+
+static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src,
+ enum intel_region_id dst)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_memory_region *src_mr = i915->mm.regions[src];
+ struct drm_i915_gem_object *obj;
+ struct i915_gem_ww_ctx ww;
+ int err = 0;
+
+ GEM_BUG_ON(!src_mr);
+
+ /* Switch object backing-store on create */
+ obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0, 0);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ for_i915_gem_ww(&ww, err, true) {
+ err = i915_gem_object_lock(obj, &ww);
+ if (err)
+ continue;
+
+ err = igt_fill_check_buffer(obj, true);
+ if (err)
+ continue;
+
+ err = i915_gem_object_migrate(obj, &ww, dst);
+ if (err)
+ continue;
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ continue;
+
+ if (i915_gem_object_can_migrate(obj, src))
+ err = -EINVAL;
+
+ i915_gem_object_unpin_pages(obj);
+ err = i915_gem_object_wait_migration(obj, true);
+ if (err)
+ continue;
+
+ err = igt_fill_check_buffer(obj, false);
+ }
+ i915_gem_object_put(obj);
+
+ return err;
+}
+
+static int igt_smem_create_migrate(void *arg)
+{
+ return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_SMEM);
+}
+
+static int igt_lmem_create_migrate(void *arg)
+{
+ return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM);
+}
+
+static int igt_same_create_migrate(void *arg)
+{
+ return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_LMEM);
+}
+
+static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
+ struct drm_i915_gem_object *obj)
+{
+ int err;
+
+ err = i915_gem_object_lock(obj, ww);
+ if (err)
+ return err;
+
+ if (i915_gem_object_is_lmem(obj)) {
+ err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM);
+ if (err) {
+ pr_err("Object failed migration to smem\n");
+ if (err)
+ return err;
+ }
+
+ if (i915_gem_object_is_lmem(obj)) {
+ pr_err("object still backed by lmem\n");
+ err = -EINVAL;
+ }
+
+ if (!i915_gem_object_has_struct_page(obj)) {
+ pr_err("object not backed by struct page\n");
+ err = -EINVAL;
+ }
+
+ } else {
+ err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM);
+ if (err) {
+ pr_err("Object failed migration to lmem\n");
+ if (err)
+ return err;
+ }
+
+ if (i915_gem_object_has_struct_page(obj)) {
+ pr_err("object still backed by struct page\n");
+ err = -EINVAL;
+ }
+
+ if (!i915_gem_object_is_lmem(obj)) {
+ pr_err("object not backed by lmem\n");
+ err = -EINVAL;
+ }
+ }
+
+ return err;
+}
+
+static int igt_lmem_pages_migrate(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct drm_i915_private *i915 = gt->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_gem_ww_ctx ww;
+ struct i915_request *rq;
+ int err;
+ int i;
+
+ /* From LMEM to shmem and back again */
+
+ obj = i915_gem_object_create_lmem(i915, SZ_2M, 0);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ /* Initial GPU fill, sync, CPU initialization. */
+ for_i915_gem_ww(&ww, err, true) {
+ err = i915_gem_object_lock(obj, &ww);
+ if (err)
+ continue;
+
+ err = ____i915_gem_object_get_pages(obj);
+ if (err)
+ continue;
+
+ err = intel_migrate_clear(&gt->migrate, &ww, NULL,
+ obj->mm.pages->sgl, obj->cache_level,
+ i915_gem_object_is_lmem(obj),
+ 0xdeadbeaf, &rq);
+ if (rq) {
+ dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
+ i915_request_put(rq);
+ }
+ if (err)
+ continue;
+
+ err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE,
+ 5 * HZ);
+ if (err)
+ continue;
+
+ err = igt_fill_check_buffer(obj, true);
+ if (err)
+ continue;
+ }
+ if (err)
+ goto out_put;
+
+ /*
+ * Migrate to and from smem without explicitly syncing.
+ * Finalize with data in smem for fast readout.
+ */
+ for (i = 1; i <= 5; ++i) {
+ for_i915_gem_ww(&ww, err, true)
+ err = lmem_pages_migrate_one(&ww, obj);
+ if (err)
+ goto out_put;
+ }
+
+ err = i915_gem_object_lock_interruptible(obj, NULL);
+ if (err)
+ goto out_put;
+
+ /* Finally sync migration and check content. */
+ err = i915_gem_object_wait_migration(obj, true);
+ if (err)
+ goto out_unlock;
+
+ err = igt_fill_check_buffer(obj, false);
+
+out_unlock:
+ i915_gem_object_unlock(obj);
+out_put:
+ i915_gem_object_put(obj);
+
+ return err;
+}
+
+int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_smem_create_migrate),
+ SUBTEST(igt_lmem_create_migrate),
+ SUBTEST(igt_same_create_migrate),
+ SUBTEST(igt_lmem_pages_migrate),
+ };
+
+ if (!HAS_LMEM(i915))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5575172c66f5..b20f5621f62b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -573,21 +573,30 @@ err:
return 0;
}
+static enum i915_mmap_type default_mapping(struct drm_i915_private *i915)
+{
+ if (HAS_LMEM(i915))
+ return I915_MMAP_TYPE_FIXED;
+
+ return I915_MMAP_TYPE_GTT;
+}
+
static bool assert_mmap_offset(struct drm_i915_private *i915,
unsigned long size,
int expected)
{
struct drm_i915_gem_object *obj;
- struct i915_mmap_offset *mmo;
+ u64 offset;
+ int ret;
obj = i915_gem_object_create_internal(i915, size);
if (IS_ERR(obj))
- return false;
+ return expected && expected == PTR_ERR(obj);
- mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL);
+ ret = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL);
i915_gem_object_put(obj);
- return PTR_ERR_OR_ZERO(mmo) == expected;
+ return ret == expected;
}
static void disable_retire_worker(struct drm_i915_private *i915)
@@ -622,8 +631,8 @@ static int igt_mmap_offset_exhaustion(void *arg)
struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
struct drm_i915_gem_object *obj;
struct drm_mm_node *hole, *next;
- struct i915_mmap_offset *mmo;
int loop, err = 0;
+ u64 offset;
/* Disable background reaper */
disable_retire_worker(i915);
@@ -684,13 +693,13 @@ static int igt_mmap_offset_exhaustion(void *arg)
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
+ pr_err("Unable to create object for reclaimed hole\n");
goto out;
}
- mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL);
- if (IS_ERR(mmo)) {
+ err = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL);
+ if (err) {
pr_err("Unable to insert object into reclaimed hole\n");
- err = PTR_ERR(mmo);
goto err_obj;
}
@@ -830,34 +839,25 @@ static int wc_check(struct drm_i915_gem_object *obj)
static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
{
- if (type == I915_MMAP_TYPE_GTT &&
- !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt))
- return false;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ bool no_map;
- if (type != I915_MMAP_TYPE_GTT &&
- !i915_gem_object_has_struct_page(obj) &&
- !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM))
+ if (HAS_LMEM(i915))
+ return type == I915_MMAP_TYPE_FIXED;
+ else if (type == I915_MMAP_TYPE_FIXED)
return false;
- return true;
-}
-
-static void object_set_placements(struct drm_i915_gem_object *obj,
- struct intel_memory_region **placements,
- unsigned int n_placements)
-{
- GEM_BUG_ON(!n_placements);
+ if (type == I915_MMAP_TYPE_GTT &&
+ !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt))
+ return false;
- if (n_placements == 1) {
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct intel_memory_region *mr = placements[0];
+ i915_gem_object_lock(obj, NULL);
+ no_map = (type != I915_MMAP_TYPE_GTT &&
+ !i915_gem_object_has_struct_page(obj) &&
+ !i915_gem_object_has_iomem(obj));
+ i915_gem_object_unlock(obj);
- obj->mm.placements = &i915->mm.regions[mr->id];
- obj->mm.n_placements = 1;
- } else {
- obj->mm.placements = placements;
- obj->mm.n_placements = n_placements;
- }
+ return !no_map;
}
#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24))
@@ -865,10 +865,10 @@ static int __igt_mmap(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
enum i915_mmap_type type)
{
- struct i915_mmap_offset *mmo;
struct vm_area_struct *area;
unsigned long addr;
int err, i;
+ u64 offset;
if (!can_mmap(obj, type))
return 0;
@@ -879,11 +879,11 @@ static int __igt_mmap(struct drm_i915_private *i915,
if (err)
return err;
- mmo = mmap_offset_attach(obj, type, NULL);
- if (IS_ERR(mmo))
- return PTR_ERR(mmo);
+ err = __assign_mmap_offset(obj, type, &offset, NULL);
+ if (err)
+ return err;
- addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+ addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED);
if (IS_ERR_VALUE(addr))
return addr;
@@ -897,13 +897,6 @@ static int __igt_mmap(struct drm_i915_private *i915,
goto out_unmap;
}
- if (area->vm_private_data != mmo) {
- pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n",
- obj->mm.region->name);
- err = -EINVAL;
- goto out_unmap;
- }
-
for (i = 0; i < obj->base.size / sizeof(u32); i++) {
u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
u32 x;
@@ -961,18 +954,18 @@ static int igt_mmap(void *arg)
struct drm_i915_gem_object *obj;
int err;
- obj = i915_gem_object_create_region(mr, sizes[i], 0);
+ obj = __i915_gem_object_create_user(i915, sizes[i], &mr, 1);
if (obj == ERR_PTR(-ENODEV))
continue;
if (IS_ERR(obj))
return PTR_ERR(obj);
- object_set_placements(obj, &mr, 1);
-
err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT);
if (err == 0)
err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC);
+ if (err == 0)
+ err = __igt_mmap(i915, obj, I915_MMAP_TYPE_FIXED);
i915_gem_object_put(obj);
if (err)
@@ -990,26 +983,33 @@ static const char *repr_mmap_type(enum i915_mmap_type type)
case I915_MMAP_TYPE_WB: return "wb";
case I915_MMAP_TYPE_WC: return "wc";
case I915_MMAP_TYPE_UC: return "uc";
+ case I915_MMAP_TYPE_FIXED: return "fixed";
default: return "unknown";
}
}
-static bool can_access(const struct drm_i915_gem_object *obj)
+static bool can_access(struct drm_i915_gem_object *obj)
{
- return i915_gem_object_has_struct_page(obj) ||
- i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM);
+ bool access;
+
+ i915_gem_object_lock(obj, NULL);
+ access = i915_gem_object_has_struct_page(obj) ||
+ i915_gem_object_has_iomem(obj);
+ i915_gem_object_unlock(obj);
+
+ return access;
}
static int __igt_mmap_access(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
enum i915_mmap_type type)
{
- struct i915_mmap_offset *mmo;
unsigned long __user *ptr;
unsigned long A, B;
unsigned long x, y;
unsigned long addr;
int err;
+ u64 offset;
memset(&A, 0xAA, sizeof(A));
memset(&B, 0xBB, sizeof(B));
@@ -1017,11 +1017,11 @@ static int __igt_mmap_access(struct drm_i915_private *i915,
if (!can_mmap(obj, type) || !can_access(obj))
return 0;
- mmo = mmap_offset_attach(obj, type, NULL);
- if (IS_ERR(mmo))
- return PTR_ERR(mmo);
+ err = __assign_mmap_offset(obj, type, &offset, NULL);
+ if (err)
+ return err;
- addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+ addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED);
if (IS_ERR_VALUE(addr))
return addr;
ptr = (unsigned long __user *)addr;
@@ -1081,15 +1081,13 @@ static int igt_mmap_access(void *arg)
struct drm_i915_gem_object *obj;
int err;
- obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+ obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1);
if (obj == ERR_PTR(-ENODEV))
continue;
if (IS_ERR(obj))
return PTR_ERR(obj);
- object_set_placements(obj, &mr, 1);
-
err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT);
if (err == 0)
err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB);
@@ -1097,6 +1095,8 @@ static int igt_mmap_access(void *arg)
err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC);
if (err == 0)
err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC);
+ if (err == 0)
+ err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_FIXED);
i915_gem_object_put(obj);
if (err)
@@ -1111,11 +1111,11 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
enum i915_mmap_type type)
{
struct intel_engine_cs *engine;
- struct i915_mmap_offset *mmo;
unsigned long addr;
u32 __user *ux;
u32 bbe;
int err;
+ u64 offset;
/*
* Verify that the mmap access into the backing store aligns with
@@ -1132,11 +1132,11 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
if (err)
return err;
- mmo = mmap_offset_attach(obj, type, NULL);
- if (IS_ERR(mmo))
- return PTR_ERR(mmo);
+ err = __assign_mmap_offset(obj, type, &offset, NULL);
+ if (err)
+ return err;
- addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+ addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED);
if (IS_ERR_VALUE(addr))
return addr;
@@ -1226,18 +1226,18 @@ static int igt_mmap_gpu(void *arg)
struct drm_i915_gem_object *obj;
int err;
- obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+ obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1);
if (obj == ERR_PTR(-ENODEV))
continue;
if (IS_ERR(obj))
return PTR_ERR(obj);
- object_set_placements(obj, &mr, 1);
-
err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT);
if (err == 0)
err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC);
+ if (err == 0)
+ err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_FIXED);
i915_gem_object_put(obj);
if (err)
@@ -1303,18 +1303,18 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
enum i915_mmap_type type)
{
- struct i915_mmap_offset *mmo;
unsigned long addr;
int err;
+ u64 offset;
if (!can_mmap(obj, type))
return 0;
- mmo = mmap_offset_attach(obj, type, NULL);
- if (IS_ERR(mmo))
- return PTR_ERR(mmo);
+ err = __assign_mmap_offset(obj, type, &offset, NULL);
+ if (err)
+ return err;
- addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+ addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED);
if (IS_ERR_VALUE(addr))
return addr;
@@ -1350,10 +1350,20 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
}
}
- err = check_absent(addr, obj->base.size);
- if (err) {
- pr_err("%s: was not absent\n", obj->mm.region->name);
- goto out_unmap;
+ if (!obj->ops->mmap_ops) {
+ err = check_absent(addr, obj->base.size);
+ if (err) {
+ pr_err("%s: was not absent\n", obj->mm.region->name);
+ goto out_unmap;
+ }
+ } else {
+ /* ttm allows access to evicted regions by design */
+
+ err = check_present(addr, obj->base.size);
+ if (err) {
+ pr_err("%s: was not present\n", obj->mm.region->name);
+ goto out_unmap;
+ }
}
out_unmap:
@@ -1371,18 +1381,18 @@ static int igt_mmap_revoke(void *arg)
struct drm_i915_gem_object *obj;
int err;
- obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+ obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1);
if (obj == ERR_PTR(-ENODEV))
continue;
if (IS_ERR(obj))
return PTR_ERR(obj);
- object_set_placements(obj, &mr, 1);
-
err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT);
if (err == 0)
err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC);
+ if (err == 0)
+ err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_FIXED);
i915_gem_object_put(obj);
if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
deleted file mode 100644
index 8c335d1a8406..000000000000
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ /dev/null
@@ -1,597 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#include <linux/sort.h>
-
-#include "gt/intel_gt.h"
-#include "gt/intel_engine_user.h"
-
-#include "i915_selftest.h"
-
-#include "gem/i915_gem_context.h"
-#include "selftests/igt_flush_test.h"
-#include "selftests/i915_random.h"
-#include "selftests/mock_drm.h"
-#include "huge_gem_object.h"
-#include "mock_context.h"
-
-static int wrap_ktime_compare(const void *A, const void *B)
-{
- const ktime_t *a = A, *b = B;
-
- return ktime_compare(*a, *b);
-}
-
-static int __perf_fill_blt(struct drm_i915_gem_object *obj)
-{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- int inst = 0;
-
- do {
- struct intel_engine_cs *engine;
- ktime_t t[5];
- int pass;
- int err;
-
- engine = intel_engine_lookup_user(i915,
- I915_ENGINE_CLASS_COPY,
- inst++);
- if (!engine)
- return 0;
-
- intel_engine_pm_get(engine);
- for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
- struct intel_context *ce = engine->kernel_context;
- ktime_t t0, t1;
-
- t0 = ktime_get();
-
- err = i915_gem_object_fill_blt(obj, ce, 0);
- if (err)
- break;
-
- err = i915_gem_object_wait(obj,
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT);
- if (err)
- break;
-
- t1 = ktime_get();
- t[pass] = ktime_sub(t1, t0);
- }
- intel_engine_pm_put(engine);
- if (err)
- return err;
-
- sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
- pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
- engine->name,
- obj->base.size >> 10,
- div64_u64(mul_u32_u32(4 * obj->base.size,
- 1000 * 1000 * 1000),
- t[1] + 2 * t[2] + t[3]) >> 20);
- } while (1);
-}
-
-static int perf_fill_blt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- static const unsigned long sizes[] = {
- SZ_4K,
- SZ_64K,
- SZ_2M,
- SZ_64M
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(sizes); i++) {
- struct drm_i915_gem_object *obj;
- int err;
-
- obj = i915_gem_object_create_internal(i915, sizes[i]);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- err = __perf_fill_blt(obj);
- i915_gem_object_put(obj);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int __perf_copy_blt(struct drm_i915_gem_object *src,
- struct drm_i915_gem_object *dst)
-{
- struct drm_i915_private *i915 = to_i915(src->base.dev);
- int inst = 0;
-
- do {
- struct intel_engine_cs *engine;
- ktime_t t[5];
- int pass;
- int err = 0;
-
- engine = intel_engine_lookup_user(i915,
- I915_ENGINE_CLASS_COPY,
- inst++);
- if (!engine)
- return 0;
-
- intel_engine_pm_get(engine);
- for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
- struct intel_context *ce = engine->kernel_context;
- ktime_t t0, t1;
-
- t0 = ktime_get();
-
- err = i915_gem_object_copy_blt(src, dst, ce);
- if (err)
- break;
-
- err = i915_gem_object_wait(dst,
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT);
- if (err)
- break;
-
- t1 = ktime_get();
- t[pass] = ktime_sub(t1, t0);
- }
- intel_engine_pm_put(engine);
- if (err)
- return err;
-
- sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
- pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
- engine->name,
- src->base.size >> 10,
- div64_u64(mul_u32_u32(4 * src->base.size,
- 1000 * 1000 * 1000),
- t[1] + 2 * t[2] + t[3]) >> 20);
- } while (1);
-}
-
-static int perf_copy_blt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- static const unsigned long sizes[] = {
- SZ_4K,
- SZ_64K,
- SZ_2M,
- SZ_64M
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(sizes); i++) {
- struct drm_i915_gem_object *src, *dst;
- int err;
-
- src = i915_gem_object_create_internal(i915, sizes[i]);
- if (IS_ERR(src))
- return PTR_ERR(src);
-
- dst = i915_gem_object_create_internal(i915, sizes[i]);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- goto err_src;
- }
-
- err = __perf_copy_blt(src, dst);
-
- i915_gem_object_put(dst);
-err_src:
- i915_gem_object_put(src);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-struct igt_thread_arg {
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
- struct file *file;
- struct rnd_state prng;
- unsigned int n_cpus;
-};
-
-static int igt_fill_blt_thread(void *arg)
-{
- struct igt_thread_arg *thread = arg;
- struct intel_engine_cs *engine = thread->engine;
- struct rnd_state *prng = &thread->prng;
- struct drm_i915_gem_object *obj;
- struct i915_gem_context *ctx;
- struct intel_context *ce;
- unsigned int prio;
- IGT_TIMEOUT(end);
- u64 total, max;
- int err;
-
- ctx = thread->ctx;
- if (!ctx) {
- ctx = live_context_for_engine(engine, thread->file);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
- ctx->sched.priority = prio;
- }
-
- ce = i915_gem_context_get_engine(ctx, 0);
- GEM_BUG_ON(IS_ERR(ce));
-
- /*
- * If we have a tiny shared address space, like for the GGTT
- * then we can't be too greedy.
- */
- max = ce->vm->total;
- if (i915_is_ggtt(ce->vm) || thread->ctx)
- max = div_u64(max, thread->n_cpus);
- max >>= 4;
-
- total = PAGE_SIZE;
- do {
- /* Aim to keep the runtime under reasonable bounds! */
- const u32 max_phys_size = SZ_64K;
- u32 val = prandom_u32_state(prng);
- u32 phys_sz;
- u32 sz;
- u32 *vaddr;
- u32 i;
-
- total = min(total, max);
- sz = i915_prandom_u32_max_state(total, prng) + 1;
- phys_sz = sz % max_phys_size + 1;
-
- sz = round_up(sz, PAGE_SIZE);
- phys_sz = round_up(phys_sz, PAGE_SIZE);
- phys_sz = min(phys_sz, sz);
-
- pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
- phys_sz, sz, val);
-
- obj = huge_gem_object(engine->i915, phys_sz, sz);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto err_flush;
- }
-
- vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
- goto err_put;
- }
-
- /*
- * Make sure the potentially async clflush does its job, if
- * required.
- */
- memset32(vaddr, val ^ 0xdeadbeaf,
- huge_gem_object_phys_size(obj) / sizeof(u32));
-
- if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
- obj->cache_dirty = true;
-
- err = i915_gem_object_fill_blt(obj, ce, val);
- if (err)
- goto err_unpin;
-
- err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
- if (err)
- goto err_unpin;
-
- for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) {
- if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
- drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
-
- if (vaddr[i] != val) {
- pr_err("vaddr[%u]=%x, expected=%x\n", i,
- vaddr[i], val);
- err = -EINVAL;
- goto err_unpin;
- }
- }
-
- i915_gem_object_unpin_map(obj);
- i915_gem_object_put(obj);
-
- total <<= 1;
- } while (!time_after(jiffies, end));
-
- goto err_flush;
-
-err_unpin:
- i915_gem_object_unpin_map(obj);
-err_put:
- i915_gem_object_put(obj);
-err_flush:
- if (err == -ENOMEM)
- err = 0;
-
- intel_context_put(ce);
- return err;
-}
-
-static int igt_copy_blt_thread(void *arg)
-{
- struct igt_thread_arg *thread = arg;
- struct intel_engine_cs *engine = thread->engine;
- struct rnd_state *prng = &thread->prng;
- struct drm_i915_gem_object *src, *dst;
- struct i915_gem_context *ctx;
- struct intel_context *ce;
- unsigned int prio;
- IGT_TIMEOUT(end);
- u64 total, max;
- int err;
-
- ctx = thread->ctx;
- if (!ctx) {
- ctx = live_context_for_engine(engine, thread->file);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
- ctx->sched.priority = prio;
- }
-
- ce = i915_gem_context_get_engine(ctx, 0);
- GEM_BUG_ON(IS_ERR(ce));
-
- /*
- * If we have a tiny shared address space, like for the GGTT
- * then we can't be too greedy.
- */
- max = ce->vm->total;
- if (i915_is_ggtt(ce->vm) || thread->ctx)
- max = div_u64(max, thread->n_cpus);
- max >>= 4;
-
- total = PAGE_SIZE;
- do {
- /* Aim to keep the runtime under reasonable bounds! */
- const u32 max_phys_size = SZ_64K;
- u32 val = prandom_u32_state(prng);
- u32 phys_sz;
- u32 sz;
- u32 *vaddr;
- u32 i;
-
- total = min(total, max);
- sz = i915_prandom_u32_max_state(total, prng) + 1;
- phys_sz = sz % max_phys_size + 1;
-
- sz = round_up(sz, PAGE_SIZE);
- phys_sz = round_up(phys_sz, PAGE_SIZE);
- phys_sz = min(phys_sz, sz);
-
- pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
- phys_sz, sz, val);
-
- src = huge_gem_object(engine->i915, phys_sz, sz);
- if (IS_ERR(src)) {
- err = PTR_ERR(src);
- goto err_flush;
- }
-
- vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
- goto err_put_src;
- }
-
- memset32(vaddr, val,
- huge_gem_object_phys_size(src) / sizeof(u32));
-
- i915_gem_object_unpin_map(src);
-
- if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
- src->cache_dirty = true;
-
- dst = huge_gem_object(engine->i915, phys_sz, sz);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- goto err_put_src;
- }
-
- vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
- goto err_put_dst;
- }
-
- memset32(vaddr, val ^ 0xdeadbeaf,
- huge_gem_object_phys_size(dst) / sizeof(u32));
-
- if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
- dst->cache_dirty = true;
-
- err = i915_gem_object_copy_blt(src, dst, ce);
- if (err)
- goto err_unpin;
-
- err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT);
- if (err)
- goto err_unpin;
-
- for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) {
- if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
- drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
-
- if (vaddr[i] != val) {
- pr_err("vaddr[%u]=%x, expected=%x\n", i,
- vaddr[i], val);
- err = -EINVAL;
- goto err_unpin;
- }
- }
-
- i915_gem_object_unpin_map(dst);
-
- i915_gem_object_put(src);
- i915_gem_object_put(dst);
-
- total <<= 1;
- } while (!time_after(jiffies, end));
-
- goto err_flush;
-
-err_unpin:
- i915_gem_object_unpin_map(dst);
-err_put_dst:
- i915_gem_object_put(dst);
-err_put_src:
- i915_gem_object_put(src);
-err_flush:
- if (err == -ENOMEM)
- err = 0;
-
- intel_context_put(ce);
- return err;
-}
-
-static int igt_threaded_blt(struct intel_engine_cs *engine,
- int (*blt_fn)(void *arg),
- unsigned int flags)
-#define SINGLE_CTX BIT(0)
-{
- struct igt_thread_arg *thread;
- struct task_struct **tsk;
- unsigned int n_cpus, i;
- I915_RND_STATE(prng);
- int err = 0;
-
- n_cpus = num_online_cpus() + 1;
-
- tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
- if (!tsk)
- return 0;
-
- thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
- if (!thread)
- goto out_tsk;
-
- thread[0].file = mock_file(engine->i915);
- if (IS_ERR(thread[0].file)) {
- err = PTR_ERR(thread[0].file);
- goto out_thread;
- }
-
- if (flags & SINGLE_CTX) {
- thread[0].ctx = live_context_for_engine(engine, thread[0].file);
- if (IS_ERR(thread[0].ctx)) {
- err = PTR_ERR(thread[0].ctx);
- goto out_file;
- }
- }
-
- for (i = 0; i < n_cpus; ++i) {
- thread[i].engine = engine;
- thread[i].file = thread[0].file;
- thread[i].ctx = thread[0].ctx;
- thread[i].n_cpus = n_cpus;
- thread[i].prng =
- I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
-
- tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
- if (IS_ERR(tsk[i])) {
- err = PTR_ERR(tsk[i]);
- break;
- }
-
- get_task_struct(tsk[i]);
- }
-
- yield(); /* start all threads before we kthread_stop() */
-
- for (i = 0; i < n_cpus; ++i) {
- int status;
-
- if (IS_ERR_OR_NULL(tsk[i]))
- continue;
-
- status = kthread_stop(tsk[i]);
- if (status && !err)
- err = status;
-
- put_task_struct(tsk[i]);
- }
-
-out_file:
- fput(thread[0].file);
-out_thread:
- kfree(thread);
-out_tsk:
- kfree(tsk);
- return err;
-}
-
-static int test_copy_engines(struct drm_i915_private *i915,
- int (*fn)(void *arg),
- unsigned int flags)
-{
- struct intel_engine_cs *engine;
- int ret;
-
- for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) {
- ret = igt_threaded_blt(engine, fn, flags);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-static int igt_fill_blt(void *arg)
-{
- return test_copy_engines(arg, igt_fill_blt_thread, 0);
-}
-
-static int igt_fill_blt_ctx0(void *arg)
-{
- return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX);
-}
-
-static int igt_copy_blt(void *arg)
-{
- return test_copy_engines(arg, igt_copy_blt_thread, 0);
-}
-
-static int igt_copy_blt_ctx0(void *arg)
-{
- return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX);
-}
-
-int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(igt_fill_blt),
- SUBTEST(igt_fill_blt_ctx0),
- SUBTEST(igt_copy_blt),
- SUBTEST(igt_copy_blt_ctx0),
- };
-
- if (intel_gt_is_wedged(&i915->gt))
- return 0;
-
- return i915_live_subtests(tests, i915);
-}
-
-int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(perf_fill_blt),
- SUBTEST(perf_copy_blt),
- };
-
- if (intel_gt_is_wedged(&i915->gt))
- return 0;
-
- return i915_live_subtests(tests, i915);
-}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index 3a6ce87f8b52..d43d8dae0f69 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -25,13 +25,14 @@ static int mock_phys_object(void *arg)
goto out;
}
+ i915_gem_object_lock(obj, NULL);
if (!i915_gem_object_has_struct_page(obj)) {
+ i915_gem_object_unlock(obj);
err = -EINVAL;
pr_err("shmem has no struct page\n");
goto out_obj;
}
- i915_gem_object_lock(obj, NULL);
err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
i915_gem_object_unlock(obj);
if (err) {
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 51b5a3421b40..fee070df1c97 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -14,6 +14,7 @@ mock_context(struct drm_i915_private *i915,
{
struct i915_gem_context *ctx;
struct i915_gem_engines *e;
+ struct intel_sseu null_sseu = {};
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
@@ -30,15 +31,6 @@ mock_context(struct drm_i915_private *i915,
i915_gem_context_set_persistence(ctx);
- mutex_init(&ctx->engines_mutex);
- e = default_engines(ctx);
- if (IS_ERR(e))
- goto err_free;
- RCU_INIT_POINTER(ctx->engines, e);
-
- INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
- mutex_init(&ctx->lut_mutex);
-
if (name) {
struct i915_ppgtt *ppgtt;
@@ -46,25 +38,29 @@ mock_context(struct drm_i915_private *i915,
ppgtt = mock_ppgtt(i915, name);
if (!ppgtt)
- goto err_put;
-
- mutex_lock(&ctx->mutex);
- __set_ppgtt(ctx, &ppgtt->vm);
- mutex_unlock(&ctx->mutex);
+ goto err_free;
+ ctx->vm = i915_vm_open(&ppgtt->vm);
i915_vm_put(&ppgtt->vm);
}
+ mutex_init(&ctx->engines_mutex);
+ e = default_engines(ctx, null_sseu);
+ if (IS_ERR(e))
+ goto err_vm;
+ RCU_INIT_POINTER(ctx->engines, e);
+
+ INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+ mutex_init(&ctx->lut_mutex);
+
return ctx;
+err_vm:
+ if (ctx->vm)
+ i915_vm_close(ctx->vm);
err_free:
kfree(ctx);
return NULL;
-
-err_put:
- i915_gem_context_set_closed(ctx);
- i915_gem_context_put(ctx);
- return NULL;
}
void mock_context_close(struct i915_gem_context *ctx)
@@ -80,20 +76,29 @@ void mock_init_contexts(struct drm_i915_private *i915)
struct i915_gem_context *
live_context(struct drm_i915_private *i915, struct file *file)
{
+ struct drm_i915_file_private *fpriv = to_drm_file(file)->driver_priv;
+ struct i915_gem_proto_context *pc;
struct i915_gem_context *ctx;
int err;
u32 id;
- ctx = i915_gem_create_context(i915, 0);
+ pc = proto_context_create(i915, 0);
+ if (IS_ERR(pc))
+ return ERR_CAST(pc);
+
+ ctx = i915_gem_create_context(i915, pc);
+ proto_context_close(pc);
if (IS_ERR(ctx))
return ctx;
i915_gem_context_set_no_error_capture(ctx);
- err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id);
+ err = xa_alloc(&fpriv->context_xa, &id, NULL, xa_limit_32b, GFP_KERNEL);
if (err < 0)
goto err_ctx;
+ gem_context_register(ctx, fpriv, id);
+
return ctx;
err_ctx:
@@ -106,6 +111,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file)
{
struct i915_gem_engines *engines;
struct i915_gem_context *ctx;
+ struct intel_sseu null_sseu = {};
struct intel_context *ce;
engines = alloc_engines(1);
@@ -124,7 +130,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file)
return ERR_CAST(ce);
}
- intel_context_set_gem(ce, ctx);
+ intel_context_set_gem(ce, ctx, null_sseu);
engines->engines[0] = ce;
engines->num_engines = 1;
@@ -139,11 +145,24 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file)
}
struct i915_gem_context *
-kernel_context(struct drm_i915_private *i915)
+kernel_context(struct drm_i915_private *i915,
+ struct i915_address_space *vm)
{
struct i915_gem_context *ctx;
+ struct i915_gem_proto_context *pc;
+
+ pc = proto_context_create(i915, 0);
+ if (IS_ERR(pc))
+ return ERR_CAST(pc);
+
+ if (vm) {
+ if (pc->vm)
+ i915_vm_put(pc->vm);
+ pc->vm = i915_vm_get(vm);
+ }
- ctx = i915_gem_create_context(i915, 0);
+ ctx = i915_gem_create_context(i915, pc);
+ proto_context_close(pc);
if (IS_ERR(ctx))
return ctx;
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
index 2a6121d33352..7a02fd9b5866 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
@@ -10,6 +10,7 @@
struct file;
struct drm_i915_private;
struct intel_engine_cs;
+struct i915_address_space;
void mock_init_contexts(struct drm_i915_private *i915);
@@ -25,7 +26,8 @@ live_context(struct drm_i915_private *i915, struct file *file);
struct i915_gem_context *
live_context_for_engine(struct intel_engine_cs *engine, struct file *file);
-struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
+struct i915_gem_context *kernel_context(struct drm_i915_private *i915,
+ struct i915_address_space *vm);
void kernel_context_close(struct i915_gem_context *ctx);
#endif /* !__MOCK_CONTEXT_H */