diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2019-03-29 16:40:24 +0300 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2019-04-05 13:00:28 +0300 |
commit | bac24f59f45419a3853af2f58130cb82b7bdca64 (patch) | |
tree | 75086637a30f103be6110014a734dc539a82a851 /drivers/gpu/drm/i915/selftests/intel_lrc.c | |
parent | b66ea2c2cf59b80c38a14127fafb49fdf0df9180 (diff) | |
download | linux-bac24f59f45419a3853af2f58130cb82b7bdca64.tar.xz |
drm/i915/execlists: Enable coarse preemption boundaries for gen8
When we introduced preemption, we chose to keep it disabled for gen8 as
supporting preemption inside GPGPU user batches required various w/a in
userspace. Since then, the desire to preempt long queues of requests
between batches (e.g. within busywaiting semaphores) has grown. So allow
arbitration within the busywaits and between requests, but disable
arbitration within user batches so that we can preempt between requests
and not risk breaking GPGPU.
However, since this preemption is much coarser and doesn't interfere
with userspace, we decline to include it amongst the scheduler
capabilities. (This is also required for us to skip over the preemption
selftests that expect to be able to preempt user batches.)
Michal suggested that we could perhaps allow preemption inside gen8
userspace batches if we can satisfy ourselves that the default
preemption settings are viable with existing userspace (principally
OpenCL which already should carry any known workaround). We could then
merge the two code paths back into one, even dropping the artifical
has-preemption device feature flag.
Testcase: igt/gem_exec_scheduler/semaphore-user
References: beecec901790 ("drm/i915/execlists: Preemption!")
Fixes: e88619646971 ("drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Michal Winiarski <michal.winiarski@intel.com> #irc
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190329134024.5254-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/selftests/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/selftests/intel_lrc.c | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 0d3cae564db8..8bcd4e1d58ee 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -76,6 +76,185 @@ err_unlock: return err; } +static int live_busywait_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct intel_engine_cs *engine; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + u32 *map; + + /* + * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can + * preempt the busywaits used to synchronise between rings. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_unlock; + ctx_hi->sched.priority = INT_MAX; + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = INT_MIN; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_ctx_lo; + } + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto err_obj; + } + + vma = i915_vma_instance(obj, &i915->ggtt.vm, 0); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_map; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + for_each_engine(engine, i915, id) { + struct i915_request *lo, *hi; + struct igt_live_test t; + u32 *cs; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + err = -EIO; + goto err_vma; + } + + /* + * We create two requests. The low priority request + * busywaits on a semaphore (inside the ringbuffer where + * is should be preemptible) and the high priority requests + * uses a MI_STORE_DWORD_IMM to update the semaphore value + * allowing the first request to complete. If preemption + * fails, we hang instead. + */ + + lo = i915_request_alloc(engine, ctx_lo); + if (IS_ERR(lo)) { + err = PTR_ERR(lo); + goto err_vma; + } + + cs = intel_ring_begin(lo, 8); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(lo); + goto err_vma; + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma); + *cs++ = 0; + *cs++ = 1; + + /* XXX Do we need a flush + invalidate here? */ + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma); + *cs++ = 0; + + intel_ring_advance(lo, cs); + i915_request_add(lo); + + if (wait_for(READ_ONCE(*map), 10)) { + err = -ETIMEDOUT; + goto err_vma; + } + + /* Low priority request should be busywaiting now */ + if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) { + pr_err("%s: Busywaiting request did not!\n", + engine->name); + err = -EIO; + goto err_vma; + } + + hi = i915_request_alloc(engine, ctx_hi); + if (IS_ERR(hi)) { + err = PTR_ERR(hi); + goto err_vma; + } + + cs = intel_ring_begin(hi, 4); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(hi); + goto err_vma; + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma); + *cs++ = 0; + *cs++ = 0; + + intel_ring_advance(hi, cs); + i915_request_add(hi); + + if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to preempt semaphore busywait!\n", + engine->name); + + intel_engine_dump(engine, &p, "%s\n", engine->name); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(i915); + err = -EIO; + goto err_vma; + } + GEM_BUG_ON(READ_ONCE(*map)); + + if (igt_live_test_end(&t)) { + err = -EIO; + goto err_vma; + } + } + + err = 0; +err_vma: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + static int live_preempt(void *arg) { struct drm_i915_private *i915 = arg; @@ -1127,6 +1306,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_sanitycheck), + SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), SUBTEST(live_suppress_self_preempt), |