diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-08-16 11:52:08 +0300 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-08-18 13:59:02 +0300 |
commit | d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc (patch) | |
tree | cbb1cb9f87c16ba6c17cbd47279979c823555d70 /drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
parent | 170fa29b14fadf2deb361589cefe6a78b21b1b22 (diff) | |
download | linux-d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc.tar.xz |
drm/i915: Replace execbuf vma ht with an idr
This was the competing idea long ago, but it was only with the rewrite
of the idr as an radixtree and using the radixtree directly ourselves,
along with the realisation that we can store the vma directly in the
radixtree and only need a list for the reverse mapping, that made the
patch performant enough to displace using a hashtable. Though the vma ht
is fast and doesn't require any extra allocation (as we can embed the node
inside the vma), it does require a thread for resizing and serialization
and will have the occasional slow lookup. That is hairy enough to
investigate alternatives and favour them if equivalent in peak performance.
One advantage of allocating an indirection entry is that we can support a
single shared bo between many clients, something that was done on a
first-come first-serve basis for shared GGTT vma previously. To offset
the extra allocations, we create yet another kmem_cache for them.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170816085210.4199-5-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 79 |
1 files changed, 24 insertions, 55 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 95e461259d24..9f1057c4cf1c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -450,9 +450,7 @@ eb_validate_vma(struct i915_execbuffer *eb, } static int -eb_add_vma(struct i915_execbuffer *eb, - unsigned int i, struct i915_vma *vma, - unsigned int flags) +eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; int err; @@ -482,7 +480,7 @@ eb_add_vma(struct i915_execbuffer *eb, * to find the right target VMA when doing relocations. */ eb->vma[i] = vma; - eb->flags[i] = entry->flags | flags; + eb->flags[i] = entry->flags; vma->exec_flags = &eb->flags[i]; err = 0; @@ -647,19 +645,6 @@ static int eb_reserve(struct i915_execbuffer *eb) } while (1); } -static inline struct hlist_head * -ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle) -{ - return &lut->ht[hash_32(handle, lut->ht_bits)]; -} - -static inline bool -ht_needs_resize(const struct i915_gem_context_vma_lut *lut) -{ - return (4*lut->ht_count > 3*lut->ht_size || - 4*lut->ht_count + 1 < lut->ht_size); -} - static unsigned int eb_batch_index(const struct i915_execbuffer *eb) { if (eb->args->flags & I915_EXEC_BATCH_FIRST) @@ -688,7 +673,7 @@ static int eb_select_context(struct i915_execbuffer *eb) static int eb_lookup_vmas(struct i915_execbuffer *eb) { - struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut; + struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; struct drm_i915_gem_object *uninitialized_var(obj); unsigned int i; int err; @@ -702,24 +687,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) INIT_LIST_HEAD(&eb->relocs); INIT_LIST_HEAD(&eb->unbound); - if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)) - flush_work(&lut->resize); - GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); - for (i = 0; i < eb->buffer_count; i++) { u32 handle = eb->exec[i].handle; - struct hlist_head *hl = ht_head(lut, handle); - unsigned int flags = 0; + struct i915_lut_handle *lut; struct i915_vma *vma; - hlist_for_each_entry(vma, hl, ctx_node) { - GEM_BUG_ON(vma->ctx != eb->ctx); - - if (vma->ctx_handle != handle) - continue; - + vma = radix_tree_lookup(handles_vma, handle); + if (likely(vma)) goto add_vma; - } obj = i915_gem_object_lookup(eb->file, handle); if (unlikely(!obj)) { @@ -733,26 +708,28 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) goto err_obj; } - /* First come, first served */ - if (!vma->ctx) { - vma->ctx = eb->ctx; - vma->ctx_handle = handle; - hlist_add_head(&vma->ctx_node, hl); - lut->ht_count++; - lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS; - if (i915_vma_is_ggtt(vma)) { - GEM_BUG_ON(obj->vma_hashed); - obj->vma_hashed = vma; - } + lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL); + if (unlikely(!lut)) { + err = -ENOMEM; + goto err_obj; + } - /* transfer ref to ctx */ - obj = NULL; - } else { - flags = __EXEC_OBJECT_HAS_REF; + err = radix_tree_insert(handles_vma, handle, vma); + if (unlikely(err)) { + kfree(lut); + goto err_obj; } + list_add(&lut->obj_link, &obj->lut_list); + list_add(&lut->ctx_link, &eb->ctx->handles_list); + lut->ctx = eb->ctx; + lut->handle = handle; + + /* transfer ref to ctx */ + obj = NULL; + add_vma: - err = eb_add_vma(eb, i, vma, flags); + err = eb_add_vma(eb, i, vma); if (unlikely(err)) goto err_obj; @@ -760,13 +737,6 @@ add_vma: GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); } - if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) { - if (ht_needs_resize(lut)) - queue_work(system_highpri_wq, &lut->resize); - else - lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; - } - /* take note of the batch buffer before we might reorder the lists */ i = eb_batch_index(eb); eb->batch = eb->vma[i]; @@ -794,7 +764,6 @@ err_obj: i915_gem_object_put(obj); err_vma: eb->vma[i] = NULL; - lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; return err; } |