From b47161858ba13c9c7e03333132230d66e008dd55 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Apr 2015 13:41:17 +0100 Subject: drm/i915: Implement inter-engine read-read optimisations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin [v8] [danvet: s/\/req/g] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_debug.c | 92 +++++++++-------------------------- 1 file changed, 22 insertions(+), 70 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_debug.c') diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c index f462d1b51d97..17299d04189f 100644 --- a/drivers/gpu/drm/i915/i915_gem_debug.c +++ b/drivers/gpu/drm/i915/i915_gem_debug.c @@ -34,82 +34,34 @@ int i915_verify_lists(struct drm_device *dev) { static int warned; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; + struct intel_engine_cs *ring; int err = 0; + int i; if (warned) return 0; - list_for_each_entry(obj, &dev_priv->render_ring.active_list, list) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("freed render active %p\n", obj); - err++; - break; - } else if (!obj->active || - (obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) { - DRM_ERROR("invalid render active %p (a %d r %x)\n", - obj, - obj->active, - obj->base.read_domains); - err++; - } else if (obj->base.write_domain && list_empty(&obj->gpu_write_list)) { - DRM_ERROR("invalid render active %p (w %x, gwl %d)\n", - obj, - obj->base.write_domain, - !list_empty(&obj->gpu_write_list)); - err++; - } - } - - list_for_each_entry(obj, &dev_priv->mm.flushing_list, list) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("freed flushing %p\n", obj); - err++; - break; - } else if (!obj->active || - (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0 || - list_empty(&obj->gpu_write_list)) { - DRM_ERROR("invalid flushing %p (a %d w %x gwl %d)\n", - obj, - obj->active, - obj->base.write_domain, - !list_empty(&obj->gpu_write_list)); - err++; - } - } - - list_for_each_entry(obj, &dev_priv->mm.gpu_write_list, gpu_write_list) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("freed gpu write %p\n", obj); - err++; - break; - } else if (!obj->active || - (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) { - DRM_ERROR("invalid gpu write %p (a %d w %x)\n", - obj, - obj->active, - obj->base.write_domain); - err++; - } - } - - list_for_each_entry(obj, &i915_gtt_vm->inactive_list, list) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("freed inactive %p\n", obj); - err++; - break; - } else if (obj->pin_count || obj->active || - (obj->base.write_domain & I915_GEM_GPU_DOMAINS)) { - DRM_ERROR("invalid inactive %p (p %d a %d w %x)\n", - obj, - obj->pin_count, obj->active, - obj->base.write_domain); - err++; + for_each_ring(ring, dev_priv, i) { + list_for_each_entry(obj, &ring->active_list, ring_list[ring->id]) { + if (obj->base.dev != dev || + !atomic_read(&obj->base.refcount.refcount)) { + DRM_ERROR("%s: freed active obj %p\n", + ring->name, obj); + err++; + break; + } else if (!obj->active || + obj->last_read_req[ring->id] == NULL) { + DRM_ERROR("%s: invalid active obj %p\n", + ring->name, obj); + err++; + } else if (obj->base.write_domain) { + DRM_ERROR("%s: invalid write obj %p (w %x)\n", + ring->name, + obj, obj->base.write_domain); + err++; + } } } -- cgit v1.2.3