diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
38 files changed, 1062 insertions, 1196 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index bc0223716906..daf9284ef1f5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -27,15 +27,8 @@ static void __do_clflush(struct drm_i915_gem_object *obj) static int clflush_work(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); - struct drm_i915_gem_object *obj = clflush->obj; - int err; - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - __do_clflush(obj); - i915_gem_object_unpin_pages(obj); + __do_clflush(clflush->obj); return 0; } @@ -44,6 +37,7 @@ static void clflush_release(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); + i915_gem_object_unpin_pages(clflush->obj); i915_gem_object_put(clflush->obj); } @@ -63,6 +57,11 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) if (!clflush) return NULL; + if (__i915_gem_object_get_pages(obj) < 0) { + kfree(clflush); + return NULL; + } + dma_fence_work_init(&clflush->base, &clflush_ops); clflush->obj = i915_gem_object_get(obj); /* obj <-> clflush cycle */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4d2f40cf237b..fd8ee52e17a4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -232,6 +232,8 @@ static void intel_context_set_gem(struct intel_context *ce, if (ctx->sched.priority >= I915_PRIORITY_NORMAL && intel_engine_has_timeslices(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); + + intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us); } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -386,38 +388,6 @@ static bool __cancel_engine(struct intel_engine_cs *engine) return intel_engine_pulse(engine) == 0; } -static bool -__active_engine(struct i915_request *rq, struct intel_engine_cs **active) -{ - struct intel_engine_cs *engine, *locked; - bool ret = false; - - /* - * Serialise with __i915_request_submit() so that it sees - * is-banned?, or we know the request is already inflight. - * - * Note that rq->engine is unstable, and so we double - * check that we have acquired the lock on the final engine. - */ - locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); - locked = engine; - spin_lock(&locked->active.lock); - } - - if (i915_request_is_active(rq)) { - if (!__i915_request_is_complete(rq)) - *active = locked; - ret = true; - } - - spin_unlock_irq(&locked->active.lock); - - return ret; -} - static struct intel_engine_cs *active_engine(struct intel_context *ce) { struct intel_engine_cs *engine = NULL; @@ -445,7 +415,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) /* Check with the backend if the request is inflight */ found = true; if (likely(rcu_access_pointer(rq->timeline) == ce->timeline)) - found = __active_engine(rq, &engine); + found = i915_request_active_engine(rq, &engine); i915_request_put(rq); if (found) @@ -679,7 +649,7 @@ __create_context(struct drm_i915_private *i915) kref_init(&ctx->ref); ctx->i915 = i915; - ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); + ctx->sched.priority = I915_PRIORITY_NORMAL; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link); @@ -822,6 +792,41 @@ static void __assign_timeline(struct i915_gem_context *ctx, context_apply_all(ctx, __apply_timeline, timeline); } +static int __apply_watchdog(struct intel_context *ce, void *timeout_us) +{ + return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us); +} + +static int +__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us) +{ + int ret; + + ret = context_apply_all(ctx, __apply_watchdog, + (void *)(uintptr_t)timeout_us); + if (!ret) + ctx->watchdog.timeout_us = timeout_us; + + return ret; +} + +static void __set_default_fence_expiry(struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = ctx->i915; + int ret; + + if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) || + !i915->params.request_timeout_ms) + return; + + /* Default expiry for user fences. */ + ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000); + if (ret) + drm_notice(&i915->drm, + "Failed to configure default fence expiry! (%d)", + ret); +} + static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { @@ -866,6 +871,8 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) intel_timeline_put(timeline); } + __set_default_fence_expiry(ctx); + trace_i915_context_create(ctx); return ctx; @@ -1959,7 +1966,7 @@ static int set_priority(struct i915_gem_context *ctx, !capable(CAP_SYS_NICE)) return -EPERM; - ctx->sched.priority = I915_USER_PRIORITY(priority); + ctx->sched.priority = priority; context_apply_all(ctx, __apply_priority, ctx); return 0; @@ -2463,7 +2470,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_PRIORITY: args->size = 0; - args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT; + args->value = ctx->sched.priority; break; case I915_CONTEXT_PARAM_SSEU: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 1449f54924e0..340473aa70de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -154,6 +154,10 @@ struct i915_gem_context { */ atomic_t active_count; + struct { + u64 timeout_us; + } watchdog; + /** * @hang_timestamp: The last time(s) this context caused a GPU hang */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 04e9c04545ad..ccede73c6465 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -25,7 +25,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme struct scatterlist *src, *dst; int ret, i; - ret = i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages_unlocked(obj); if (ret) goto err; @@ -82,7 +82,7 @@ static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); void *vaddr; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -123,42 +123,48 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); + struct i915_gem_ww_ctx ww; int err; - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - err = i915_gem_object_lock_interruptible(obj, NULL); - if (err) - goto out; - - err = i915_gem_object_set_to_cpu_domain(obj, write); - i915_gem_object_unlock(obj); - -out: - i915_gem_object_unpin_pages(obj); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_pin_pages(obj); + if (!err) { + err = i915_gem_object_set_to_cpu_domain(obj, write); + i915_gem_object_unpin_pages(obj); + } + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); return err; } static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct i915_gem_ww_ctx ww; int err; - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - err = i915_gem_object_lock_interruptible(obj, NULL); - if (err) - goto out; - - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); - -out: - i915_gem_object_unpin_pages(obj); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_pin_pages(obj); + if (!err) { + err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unpin_pages(obj); + } + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); return err; } @@ -244,6 +250,9 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, } } + if (i915_gem_object_size_2big(dma_buf->size)) + return ERR_PTR(-E2BIG); + /* need to attach */ attach = dma_buf_attach(dma_buf, dev->dev); if (IS_ERR(attach)) @@ -258,7 +267,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, } drm_gem_private_object_init(dev, &obj->base, dma_buf->size); - i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class, 0); obj->base.import_attach = attach; obj->base.resv = dma_buf->resv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 36f54cedaaeb..073822100da7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -335,7 +335,14 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, * not allowed to be changed by userspace. */ if (i915_gem_object_is_proxy(obj)) { - ret = -ENXIO; + /* + * Silently allow cached for userptr; the vulkan driver + * sets all objects to cached + */ + if (!i915_gem_object_is_userptr(obj) || + args->caching != I915_CACHING_CACHED) + ret = -ENXIO; + goto out; } @@ -359,12 +366,12 @@ out: */ struct i915_vma * i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, u32 alignment, const struct i915_ggtt_view *view, unsigned int flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_gem_ww_ctx ww; struct i915_vma *vma; int ret; @@ -372,11 +379,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); - i915_gem_ww_ctx_init(&ww, true); -retry: - ret = i915_gem_object_lock(obj, &ww); - if (ret) - goto err; /* * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is @@ -391,7 +393,7 @@ retry: HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) - goto err; + return ERR_PTR(ret); /* * As the user may map the buffer once pinned in the display plane @@ -404,33 +406,20 @@ retry: vma = ERR_PTR(-ENOSPC); if ((flags & PIN_MAPPABLE) == 0 && (!view || view->type == I915_GGTT_VIEW_NORMAL)) - vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment, + vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, flags | PIN_MAPPABLE | PIN_NONBLOCK); if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) - vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, + vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, flags); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err; - } + if (IS_ERR(vma)) + return vma; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); i915_vma_mark_scanout(vma); i915_gem_object_flush_if_display_locked(obj); -err: - if (ret == -EDEADLK) { - ret = i915_gem_ww_ctx_backoff(&ww); - if (!ret) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - if (ret) - return ERR_PTR(ret); - return vma; } @@ -526,6 +515,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out; + if (i915_gem_object_is_userptr(obj)) { + /* + * Try to grab userptr pages, iris uses set_domain to check + * userptr validity + */ + err = i915_gem_object_userptr_validate(obj); + if (!err) + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | + (write_domain ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT); + goto out; + } + /* * Proxy objects do not control access to the backing storage, ergo * they cannot be used as a means to manipulate the cache domain @@ -537,6 +541,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, goto out; } + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out; + /* * Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through @@ -548,7 +556,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, */ err = i915_gem_object_pin_pages(obj); if (err) - goto out; + goto out_unlock; /* * Already in the desired write domain? Nothing for us to do! @@ -563,10 +571,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (READ_ONCE(obj->write_domain) == read_domains) goto out_unpin; - err = i915_gem_object_lock_interruptible(obj, NULL); - if (err) - goto out_unpin; - if (read_domains & I915_GEM_DOMAIN_WC) err = i915_gem_object_set_to_wc_domain(obj, write_domain); else if (read_domains & I915_GEM_DOMAIN_GTT) @@ -574,13 +578,15 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, else err = i915_gem_object_set_to_cpu_domain(obj, write_domain); +out_unpin: + i915_gem_object_unpin_pages(obj); + +out_unlock: i915_gem_object_unlock(obj); - if (write_domain) + if (!err && write_domain) i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); -out_unpin: - i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index d70ca36f74f6..5964e67c7d36 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -28,6 +28,7 @@ #include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_user_extensions.h" +#include "i915_memcpy.h" struct eb_vma { struct i915_vma *vma; @@ -49,16 +50,19 @@ enum { #define DBG_FORCE_RELOC 0 /* choose one of the above! */ }; -#define __EXEC_OBJECT_HAS_PIN BIT(31) -#define __EXEC_OBJECT_HAS_FENCE BIT(30) -#define __EXEC_OBJECT_NEEDS_MAP BIT(29) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(28) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ +/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */ +#define __EXEC_OBJECT_HAS_PIN BIT(30) +#define __EXEC_OBJECT_HAS_FENCE BIT(29) +#define __EXEC_OBJECT_USERPTR_INIT BIT(28) +#define __EXEC_OBJECT_NEEDS_MAP BIT(27) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(26) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */ #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) #define __EXEC_ENGINE_PINNED BIT(30) -#define __EXEC_INTERNAL_FLAGS (~0u << 30) +#define __EXEC_USERPTR_USED BIT(29) +#define __EXEC_INTERNAL_FLAGS (~0u << 29) #define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -419,13 +423,14 @@ static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, return pin_flags; } -static inline bool +static inline int eb_pin_vma(struct i915_execbuffer *eb, const struct drm_i915_gem_exec_object2 *entry, struct eb_vma *ev) { struct i915_vma *vma = ev->vma; u64 pin_flags; + int err; if (vma->node.size) pin_flags = vma->node.start; @@ -437,24 +442,29 @@ eb_pin_vma(struct i915_execbuffer *eb, pin_flags |= PIN_GLOBAL; /* Attempt to reuse the current location if available */ - /* TODO: Add -EDEADLK handling here */ - if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) { + err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags); + if (err == -EDEADLK) + return err; + + if (unlikely(err)) { if (entry->flags & EXEC_OBJECT_PINNED) - return false; + return err; /* Failing that pick any _free_ space if suitable */ - if (unlikely(i915_vma_pin_ww(vma, &eb->ww, + err = i915_vma_pin_ww(vma, &eb->ww, entry->pad_to_size, entry->alignment, eb_pin_flags(entry, ev->flags) | - PIN_USER | PIN_NOEVICT))) - return false; + PIN_USER | PIN_NOEVICT); + if (unlikely(err)) + return err; } if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { - if (unlikely(i915_vma_pin_fence(vma))) { + err = i915_vma_pin_fence(vma); + if (unlikely(err)) { i915_vma_unpin(vma); - return false; + return err; } if (vma->fence) @@ -462,7 +472,10 @@ eb_pin_vma(struct i915_execbuffer *eb, } ev->flags |= __EXEC_OBJECT_HAS_PIN; - return !eb_vma_misplaced(entry, vma, ev->flags); + if (eb_vma_misplaced(entry, vma, ev->flags)) + return -EBADSLT; + + return 0; } static inline void @@ -483,6 +496,13 @@ eb_validate_vma(struct i915_execbuffer *eb, struct drm_i915_gem_exec_object2 *entry, struct i915_vma *vma) { + /* Relocations are disallowed for all platforms after TGL-LP. This + * also covers all platforms with local memory. + */ + if (entry->relocation_count && + INTEL_GEN(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) + return -EINVAL; + if (unlikely(entry->flags & eb->invalid_flags)) return -EINVAL; @@ -853,6 +873,26 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) } eb_add_vma(eb, i, batch, vma); + + if (i915_gem_object_is_userptr(vma->obj)) { + err = i915_gem_object_userptr_submit_init(vma->obj); + if (err) { + if (i + 1 < eb->buffer_count) { + /* + * Execbuffer code expects last vma entry to be NULL, + * since we already initialized this entry, + * set the next value to NULL or we mess up + * cleanup handling. + */ + eb->vma[i + 1].vma = NULL; + } + + return err; + } + + eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT; + eb->args->flags |= __EXEC_USERPTR_USED; + } } if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) { @@ -898,7 +938,11 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) if (err) return err; - if (eb_pin_vma(eb, entry, ev)) { + err = eb_pin_vma(eb, entry, ev); + if (err == -EDEADLK) + return err; + + if (!err) { if (entry->offset != vma->node.start) { entry->offset = vma->node.start | UPDATE; eb->args->flags |= __EXEC_HAS_RELOC; @@ -914,6 +958,12 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) } } + if (!(ev->flags & EXEC_OBJECT_WRITE)) { + err = dma_resv_reserve_shared(vma->resv, 1); + if (err) + return err; + } + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); } @@ -944,7 +994,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_release_vmas(struct i915_execbuffer *eb, bool final) +static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr) { const unsigned int count = eb->buffer_count; unsigned int i; @@ -958,6 +1008,11 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final) eb_unreserve_vma(ev); + if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) { + ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT; + i915_gem_object_userptr_submit_fini(vma->obj); + } + if (final) i915_vma_put(vma); } @@ -1294,6 +1349,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, err = PTR_ERR(cmd); goto err_pool; } + intel_gt_buffer_pool_mark_used(pool); memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); @@ -1895,6 +1951,31 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb) return 0; } +static int eb_reinit_userptr(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + int ret; + + if (likely(!(eb->args->flags & __EXEC_USERPTR_USED))) + return 0; + + for (i = 0; i < count; i++) { + struct eb_vma *ev = &eb->vma[i]; + + if (!i915_gem_object_is_userptr(ev->vma->obj)) + continue; + + ret = i915_gem_object_userptr_submit_init(ev->vma->obj); + if (ret) + return ret; + + ev->flags |= __EXEC_OBJECT_USERPTR_INIT; + } + + return 0; +} + static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb, struct i915_request *rq) { @@ -1909,7 +1990,7 @@ repeat: } /* We may process another execbuffer during the unlock... */ - eb_release_vmas(eb, false); + eb_release_vmas(eb, false, true); i915_gem_ww_ctx_fini(&eb->ww); if (rq) { @@ -1951,7 +2032,7 @@ repeat: } if (!err) - flush_workqueue(eb->i915->mm.userptr_wq); + err = eb_reinit_userptr(eb); err_relock: i915_gem_ww_ctx_init(&eb->ww, true); @@ -2013,7 +2094,7 @@ repeat_validate: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false); + eb_release_vmas(eb, false, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto repeat_validate; @@ -2110,7 +2191,7 @@ retry: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false); + eb_release_vmas(eb, false, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto retry; @@ -2181,8 +2262,33 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) } if (err == 0) - err = i915_vma_move_to_active(vma, eb->request, flags); + err = i915_vma_move_to_active(vma, eb->request, + flags | __EXEC_OBJECT_NO_RESERVE); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { + spin_lock(&eb->i915->mm.notifier_lock); + + /* + * count is always at least 1, otherwise __EXEC_USERPTR_USED + * could not have been set + */ + for (i = 0; i < count; i++) { + struct eb_vma *ev = &eb->vma[i]; + struct drm_i915_gem_object *obj = ev->vma->obj; + + if (!i915_gem_object_is_userptr(obj)) + continue; + + err = i915_gem_object_userptr_submit_done(obj); + if (err) + break; + } + + spin_unlock(&eb->i915->mm.notifier_lock); } +#endif if (unlikely(err)) goto err_skip; @@ -2274,24 +2380,45 @@ struct eb_parse_work { struct i915_vma *trampoline; unsigned long batch_offset; unsigned long batch_length; + unsigned long *jump_whitelist; + const void *batch_map; + void *shadow_map; }; static int __eb_parse(struct dma_fence_work *work) { struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + int ret; + bool cookie; - return intel_engine_cmd_parser(pw->engine, - pw->batch, - pw->batch_offset, - pw->batch_length, - pw->shadow, - pw->trampoline); + cookie = dma_fence_begin_signalling(); + ret = intel_engine_cmd_parser(pw->engine, + pw->batch, + pw->batch_offset, + pw->batch_length, + pw->shadow, + pw->jump_whitelist, + pw->shadow_map, + pw->batch_map); + dma_fence_end_signalling(cookie); + + return ret; } static void __eb_parse_release(struct dma_fence_work *work) { struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + if (!IS_ERR_OR_NULL(pw->jump_whitelist)) + kfree(pw->jump_whitelist); + + if (pw->batch_map) + i915_gem_object_unpin_map(pw->batch->obj); + else + i915_gem_object_unpin_pages(pw->batch->obj); + + i915_gem_object_unpin_map(pw->shadow->obj); + if (pw->trampoline) i915_active_release(&pw->trampoline->active); i915_active_release(&pw->shadow->active); @@ -2341,6 +2468,8 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, struct i915_vma *trampoline) { struct eb_parse_work *pw; + struct drm_i915_gem_object *batch = eb->batch->vma->obj; + bool needs_clflush; int err; GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); @@ -2364,6 +2493,34 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, goto err_shadow; } + pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB); + if (IS_ERR(pw->shadow_map)) { + err = PTR_ERR(pw->shadow_map); + goto err_trampoline; + } + + needs_clflush = + !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); + + pw->batch_map = ERR_PTR(-ENODEV); + if (needs_clflush && i915_has_memcpy_from_wc()) + pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC); + + if (IS_ERR(pw->batch_map)) { + err = i915_gem_object_pin_pages(batch); + if (err) + goto err_unmap_shadow; + pw->batch_map = NULL; + } + + pw->jump_whitelist = + intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len, + trampoline); + if (IS_ERR(pw->jump_whitelist)) { + err = PTR_ERR(pw->jump_whitelist); + goto err_unmap_batch; + } + dma_fence_work_init(&pw->base, &eb_parse_ops); pw->engine = eb->engine; @@ -2382,6 +2539,10 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, if (err) goto err_commit; + err = dma_resv_reserve_shared(shadow->resv, 1); + if (err) + goto err_commit; + /* Wait for all writes (and relocs) into the batch to complete */ err = i915_sw_fence_await_reservation(&pw->base.chain, pw->batch->resv, NULL, false, @@ -2403,6 +2564,16 @@ err_commit: dma_fence_work_commit_imm(&pw->base); return err; +err_unmap_batch: + if (pw->batch_map) + i915_gem_object_unpin_map(batch); + else + i915_gem_object_unpin_pages(batch); +err_unmap_shadow: + i915_gem_object_unpin_map(shadow->obj); +err_trampoline: + if (trampoline) + i915_active_release(&trampoline->active); err_shadow: i915_active_release(&shadow->active); err_batch: @@ -2474,6 +2645,7 @@ static int eb_parse(struct i915_execbuffer *eb) err = PTR_ERR(shadow); goto err; } + intel_gt_buffer_pool_mark_used(pool); i915_gem_object_set_readonly(shadow->obj); shadow->private = pool; @@ -3263,7 +3435,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = eb_lookup_vmas(&eb); if (err) { - eb_release_vmas(&eb, true); + eb_release_vmas(&eb, true, true); goto err_engine; } @@ -3335,6 +3507,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb, batch); + err_request: i915_request_get(eb.request); err = eb_request_add(&eb, err); @@ -3355,7 +3528,7 @@ err_request: i915_request_put(eb.request); err_vma: - eb_release_vmas(&eb, true); + eb_release_vmas(&eb, true, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); WARN_ON(err == -EDEADLK); @@ -3401,106 +3574,6 @@ static bool check_buffer_count(size_t count) return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); } -/* - * Legacy execbuffer just creates an exec2 list from the original exec object - * list array and passes it to the real function. - */ -int -i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *i915 = to_i915(dev); - struct drm_i915_gem_execbuffer *args = data; - struct drm_i915_gem_execbuffer2 exec2; - struct drm_i915_gem_exec_object *exec_list = NULL; - struct drm_i915_gem_exec_object2 *exec2_list = NULL; - const size_t count = args->buffer_count; - unsigned int i; - int err; - - if (!check_buffer_count(count)) { - drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count); - return -EINVAL; - } - - exec2.buffers_ptr = args->buffers_ptr; - exec2.buffer_count = args->buffer_count; - exec2.batch_start_offset = args->batch_start_offset; - exec2.batch_len = args->batch_len; - exec2.DR1 = args->DR1; - exec2.DR4 = args->DR4; - exec2.num_cliprects = args->num_cliprects; - exec2.cliprects_ptr = args->cliprects_ptr; - exec2.flags = I915_EXEC_RENDER; - i915_execbuffer2_set_context_id(exec2, 0); - - err = i915_gem_check_execbuffer(&exec2); - if (err) - return err; - - /* Copy in the exec list from userland */ - exec_list = kvmalloc_array(count, sizeof(*exec_list), - __GFP_NOWARN | GFP_KERNEL); - - /* Allocate extra slots for use by the command parser */ - exec2_list = kvmalloc_array(count + 2, eb_element_size(), - __GFP_NOWARN | GFP_KERNEL); - if (exec_list == NULL || exec2_list == NULL) { - drm_dbg(&i915->drm, - "Failed to allocate exec list for %d buffers\n", - args->buffer_count); - kvfree(exec_list); - kvfree(exec2_list); - return -ENOMEM; - } - err = copy_from_user(exec_list, - u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec_list) * count); - if (err) { - drm_dbg(&i915->drm, "copy %d exec entries failed %d\n", - args->buffer_count, err); - kvfree(exec_list); - kvfree(exec2_list); - return -EFAULT; - } - - for (i = 0; i < args->buffer_count; i++) { - exec2_list[i].handle = exec_list[i].handle; - exec2_list[i].relocation_count = exec_list[i].relocation_count; - exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; - exec2_list[i].alignment = exec_list[i].alignment; - exec2_list[i].offset = exec_list[i].offset; - if (INTEL_GEN(to_i915(dev)) < 4) - exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; - else - exec2_list[i].flags = 0; - } - - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); - if (exec2.flags & __EXEC_HAS_RELOC) { - struct drm_i915_gem_exec_object __user *user_exec_list = - u64_to_user_ptr(args->buffers_ptr); - - /* Copy the new buffer offsets back to the user's exec list. */ - for (i = 0; i < args->buffer_count; i++) { - if (!(exec2_list[i].offset & UPDATE)) - continue; - - exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); - exec2_list[i].offset &= PIN_OFFSET_MASK; - if (__copy_to_user(&user_exec_list[i].offset, - &exec2_list[i].offset, - sizeof(user_exec_list[i].offset))) - break; - } - } - - kvfree(exec_list); - kvfree(exec2_list); - return err; -} - int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c deleted file mode 100644 index 8ab842c80f99..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "i915_gem_object.h" - -struct stub_fence { - struct dma_fence dma; - struct i915_sw_fence chain; -}; - -static int __i915_sw_fence_call -stub_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) -{ - struct stub_fence *stub = container_of(fence, typeof(*stub), chain); - - switch (state) { - case FENCE_COMPLETE: - dma_fence_signal(&stub->dma); - break; - - case FENCE_FREE: - dma_fence_put(&stub->dma); - break; - } - - return NOTIFY_DONE; -} - -static const char *stub_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *stub_timeline_name(struct dma_fence *fence) -{ - return "object"; -} - -static void stub_release(struct dma_fence *fence) -{ - struct stub_fence *stub = container_of(fence, typeof(*stub), dma); - - i915_sw_fence_fini(&stub->chain); - - BUILD_BUG_ON(offsetof(typeof(*stub), dma)); - dma_fence_free(&stub->dma); -} - -static const struct dma_fence_ops stub_fence_ops = { - .get_driver_name = stub_driver_name, - .get_timeline_name = stub_timeline_name, - .release = stub_release, -}; - -struct dma_fence * -i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) -{ - struct stub_fence *stub; - - assert_object_held(obj); - - stub = kmalloc(sizeof(*stub), GFP_KERNEL); - if (!stub) - return NULL; - - i915_sw_fence_init(&stub->chain, stub_notify); - dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock, - 0, 0); - - if (i915_sw_fence_await_reservation(&stub->chain, - obj->base.resv, NULL, true, - i915_fence_timeout(to_i915(obj->base.dev)), - I915_FENCE_GFP) < 0) - goto err; - - dma_resv_add_excl_fence(obj->base.resv, &stub->dma); - - return &stub->dma; - -err: - stub_release(&stub->dma); - return NULL; -} - -void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj, - struct dma_fence *fence) -{ - struct stub_fence *stub = container_of(fence, typeof(*stub), dma); - - i915_sw_fence_commit(&stub->chain); -} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index ad22f42541bd..21cc40897ca8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -138,8 +138,7 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { .name = "i915_gem_object_internal", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = i915_gem_object_get_pages_internal, .put_pages = i915_gem_object_put_pages_internal, }; @@ -178,7 +177,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); /* * Mark the object as volatile, such that the pages are marked as diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h index 87d8b27f426d..7fd22f3efbef 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h @@ -14,8 +14,6 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 194f35342710..ce1c83c13d05 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -40,13 +40,13 @@ int __i915_gem_lmem_object_init(struct intel_memory_region *mem, struct drm_i915_private *i915 = mem->i915; drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class, flags); obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - i915_gem_object_init_memory_region(obj, mem, flags); + i915_gem_object_init_memory_region(obj, mem); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index ec28a6cde49b..2561a2f1e54f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -246,12 +246,15 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) area->vm_flags & VM_WRITE)) return VM_FAULT_SIGBUS; + if (i915_gem_object_lock_interruptible(obj, NULL)) + return VM_FAULT_NOPAGE; + err = i915_gem_object_pin_pages(obj); if (err) goto out; iomap = -1; - if (!i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE)) { + if (!i915_gem_object_has_struct_page(obj)) { iomap = obj->mm.region->iomap.base; iomap -= obj->mm.region->region.start; } @@ -269,6 +272,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) i915_gem_object_unpin_pages(obj); out: + i915_gem_object_unlock(obj); return i915_error_to_vmf_fault(err); } @@ -417,7 +421,9 @@ vm_access(struct vm_area_struct *area, unsigned long addr, { struct i915_mmap_offset *mmo = area->vm_private_data; struct drm_i915_gem_object *obj = mmo->obj; + struct i915_gem_ww_ctx ww; void *vaddr; + int err = 0; if (i915_gem_object_is_readonly(obj) && write) return -EACCES; @@ -426,10 +432,18 @@ vm_access(struct vm_area_struct *area, unsigned long addr, if (addr >= obj->base.size) return -EINVAL; + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(obj, &ww); + if (err) + goto out; + /* As this is primarily for debugging, let's focus on simplicity */ vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out; + } if (write) { memcpy(vaddr + addr, buf, len); @@ -439,6 +453,16 @@ vm_access(struct vm_area_struct *area, unsigned long addr, } i915_gem_object_unpin_map(obj); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + if (err) + return err; return len; } @@ -653,9 +677,8 @@ __assign_mmap_offset(struct drm_file *file, } if (mmap_type != I915_MMAP_TYPE_GTT && - !i915_gem_object_type_has(obj, - I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_HAS_IOMEM)) { + !i915_gem_object_has_struct_page(obj) && + !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) { err = -ENODEV; goto out; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 70f798405f7f..ea74cbca95be 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -60,10 +60,8 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops, - struct lock_class_key *key) + struct lock_class_key *key, unsigned flags) { - __mutex_init(&obj->mm.lock, ops->name ?: "obj->mm.lock", key); - spin_lock_init(&obj->vma.lock); INIT_LIST_HEAD(&obj->vma.list); @@ -78,16 +76,14 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, init_rcu_head(&obj->rcu); obj->ops = ops; + GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS); + obj->flags = flags; obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_page.lock); INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_dma_page.lock); - - if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj)) - i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev), - &obj->mm.lock); } /** diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index d0ae834d787a..2ebd79537aea 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -16,6 +16,32 @@ #include "i915_gem_gtt.h" #include "i915_vma_types.h" +/* + * XXX: There is a prevalence of the assumption that we fit the + * object's page count inside a 32bit _signed_ variable. Let's document + * this and catch if we ever need to fix it. In the meantime, if you do + * spot such a local variable, please consider fixing! + * + * Aside from our own locals (for which we have no excuse!): + * - sg_table embeds unsigned int for num_pages + * - get_user_pages*() mixed ints with longs + */ +#define GEM_CHECK_SIZE_OVERFLOW(sz) \ + GEM_WARN_ON((sz) >> PAGE_SHIFT > INT_MAX) + +static inline bool i915_gem_object_size_2big(u64 size) +{ + struct drm_i915_gem_object *obj; + + if (GEM_CHECK_SIZE_OVERFLOW(size)) + return true; + + if (overflows_type(size, obj->base.size)) + return true; + + return false; +} + void i915_gem_init__objects(struct drm_i915_private *i915); struct drm_i915_gem_object *i915_gem_object_alloc(void); @@ -23,7 +49,8 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops, - struct lock_class_key *key); + struct lock_class_key *key, + unsigned alloc_flags); struct drm_i915_gem_object * i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size); @@ -32,11 +59,21 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, const void *data, resource_size_t size); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; + void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages, bool needs_clflush); +int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args); +int i915_gem_object_pread_phys(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args); + int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align); +void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, + struct sg_table *pages); +void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, + struct sg_table *pages); void i915_gem_flush_free_objects(struct drm_i915_private *i915); @@ -107,6 +144,20 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv) +/* + * If more than one potential simultaneous locker, assert held. + */ +static inline void assert_object_held_shared(struct drm_i915_gem_object *obj) +{ + /* + * Note mm list lookup is protected by + * kref_get_unless_zero(). + */ + if (IS_ENABLED(CONFIG_LOCKDEP) && + kref_read(&obj->base.refcount) > 0) + assert_object_held(obj); +} + static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, bool intr) @@ -152,11 +203,6 @@ static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) dma_resv_unlock(obj->base.resv); } -struct dma_fence * -i915_gem_object_lock_fence(struct drm_i915_gem_object *obj); -void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj, - struct dma_fence *fence); - static inline void i915_gem_object_set_readonly(struct drm_i915_gem_object *obj) { @@ -215,7 +261,7 @@ i915_gem_object_type_has(const struct drm_i915_gem_object *obj, static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE); + return obj->flags & I915_BO_ALLOC_STRUCT_PAGE; } static inline bool @@ -243,12 +289,6 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj) } static inline bool -i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) -{ - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL); -} - -static inline bool i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) { return READ_ONCE(obj->frontbuffer); @@ -299,22 +339,22 @@ struct scatterlist * __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, - unsigned int *offset); + unsigned int *offset, bool allow_alloc); static inline struct scatterlist * i915_gem_object_get_sg(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset) + unsigned int *offset, bool allow_alloc) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset); + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, allow_alloc); } static inline struct scatterlist * i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset) + unsigned int *offset, bool allow_alloc) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset); + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, allow_alloc); } struct page * @@ -341,27 +381,10 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ - I915_MM_NORMAL = 0, - /* - * Only used by struct_mutex, when called "recursively" from - * direct-reclaim-esque. Safe because there is only every one - * struct_mutex in the entire system. - */ - I915_MM_SHRINKER = 1, - /* - * Used for obj->mm.lock when allocating pages. Safe because the object - * isn't yet on any LRU, and therefore the shrinker can't deadlock on - * it. As soon as the object has pages, obj->mm.lock nests within - * fs_reclaim. - */ - I915_MM_GET_PAGES = 1, -}; - static inline int __must_check i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - might_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); + assert_object_held(obj); if (atomic_inc_not_zero(&obj->mm.pages_pin_count)) return 0; @@ -369,6 +392,8 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) return __i915_gem_object_get_pages(obj); } +int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj); + static inline bool i915_gem_object_has_pages(struct drm_i915_gem_object *obj) { @@ -427,6 +452,9 @@ void i915_gem_object_writeback(struct drm_i915_gem_object *obj); void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, enum i915_map_type type); +void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, + enum i915_map_type type); + void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, unsigned long offset, unsigned long size); @@ -495,6 +523,7 @@ int __must_check i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, u32 alignment, const struct i915_ggtt_view *view, unsigned int flags); @@ -558,4 +587,25 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj); +#ifdef CONFIG_MMU_NOTIFIER +static inline bool +i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) +{ + return obj->userptr.notifier.mm; +} + +int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj); +int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj); +void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj); +int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj); +#else +static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; } + +static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } +static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } +static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); } +static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } + +#endif + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index d6dac21fce0b..df8e8c18c6c9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -55,6 +55,9 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, if (unlikely(err)) goto out_put; + /* we pinned the pool, mark it as such */ + intel_gt_buffer_pool_mark_used(pool); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); @@ -277,6 +280,9 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, if (unlikely(err)) goto out_put; + /* we pinned the pool, mark it as such */ + intel_gt_buffer_pool_mark_used(pool); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 0438e00d4ca7..8e485cb3343c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -7,6 +7,8 @@ #ifndef __I915_GEM_OBJECT_TYPES_H__ #define __I915_GEM_OBJECT_TYPES_H__ +#include <linux/mmu_notifier.h> + #include <drm/drm_gem.h> #include <uapi/drm/i915_drm.h> @@ -30,12 +32,10 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) #define I915_GEM_OBJECT_HAS_IOMEM BIT(1) #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) #define I915_GEM_OBJECT_IS_PROXY BIT(3) #define I915_GEM_OBJECT_NO_MMAP BIT(4) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -171,9 +171,12 @@ struct drm_i915_gem_object { unsigned long flags; #define I915_BO_ALLOC_CONTIGUOUS BIT(0) #define I915_BO_ALLOC_VOLATILE BIT(1) -#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) -#define I915_BO_READONLY BIT(2) -#define I915_TILING_QUIRK_BIT 3 /* unknown swizzling; do not release! */ +#define I915_BO_ALLOC_STRUCT_PAGE BIT(2) +#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ + I915_BO_ALLOC_VOLATILE | \ + I915_BO_ALLOC_STRUCT_PAGE) +#define I915_BO_READONLY BIT(3) +#define I915_TILING_QUIRK_BIT 4 /* unknown swizzling; do not release! */ /* * Is the object to be mapped as read-only to the GPU @@ -213,7 +216,6 @@ struct drm_i915_gem_object { * Protects the pages and their use. Do not use directly, but * instead go through the pin/unpin interfaces. */ - struct mutex lock; atomic_t pages_pin_count; atomic_t shrink_pin; @@ -288,13 +290,16 @@ struct drm_i915_gem_object { unsigned long *bit_17; union { +#ifdef CONFIG_MMU_NOTIFIER struct i915_gem_userptr { uintptr_t ptr; + unsigned long notifier_seq; - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; + struct mmu_interval_notifier notifier; + struct page **pvec; + int page_ref; } userptr; +#endif struct drm_mm_node *stolen; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 43028f3539a6..aed8a37ccdc9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -19,7 +19,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, bool shrinkable; int i; - lockdep_assert_held(&obj->mm.lock); + assert_object_held_shared(obj); if (i915_gem_object_is_volatile(obj)) obj->mm.madv = I915_MADV_DONTNEED; @@ -70,6 +70,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct list_head *list; unsigned long flags; + assert_object_held(obj); spin_lock_irqsave(&i915->mm.obj_lock, flags); i915->mm.shrink_count++; @@ -91,6 +92,8 @@ int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); int err; + assert_object_held_shared(obj); + if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { drm_dbg(&i915->drm, "Attempting to obtain a purgeable object\n"); @@ -114,23 +117,41 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { int err; - err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); - if (err) - return err; + assert_object_held(obj); + + assert_object_held_shared(obj); if (unlikely(!i915_gem_object_has_pages(obj))) { GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); err = ____i915_gem_object_get_pages(obj); if (err) - goto unlock; + return err; smp_mb__before_atomic(); } atomic_inc(&obj->mm.pages_pin_count); -unlock: - mutex_unlock(&obj->mm.lock); + return 0; +} + +int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj) +{ + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_pin_pages(obj); + + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); return err; } @@ -145,7 +166,7 @@ void i915_gem_object_truncate(struct drm_i915_gem_object *obj) /* Try to discard unwanted pages */ void i915_gem_object_writeback(struct drm_i915_gem_object *obj) { - lockdep_assert_held(&obj->mm.lock); + assert_object_held_shared(obj); GEM_BUG_ON(i915_gem_object_has_pages(obj)); if (obj->ops->writeback) @@ -176,6 +197,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; + assert_object_held_shared(obj); + pages = fetch_and_zero(&obj->mm.pages); if (IS_ERR_OR_NULL(pages)) return pages; @@ -199,17 +222,12 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; - int err; if (i915_gem_object_has_pinned_pages(obj)) return -EBUSY; /* May be called by shrinker from within get_pages() (on another bo) */ - mutex_lock(&obj->mm.lock); - if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { - err = -EBUSY; - goto unlock; - } + assert_object_held_shared(obj); i915_gem_object_release_mmap_offset(obj); @@ -226,17 +244,10 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) * get_pages backends we should be better able to handle the * cancellation of the async task in a more uniform manner. */ - if (!pages && !i915_gem_object_needs_async_cancel(obj)) - pages = ERR_PTR(-EINVAL); - - if (!IS_ERR(pages)) + if (!IS_ERR_OR_NULL(pages)) obj->ops->put_pages(obj, pages); - err = 0; -unlock: - mutex_unlock(&obj->mm.lock); - - return err; + return 0; } /* The 'mapping' part of i915_gem_object_pin_map() below */ @@ -333,18 +344,15 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { enum i915_map_type has_type; - unsigned int flags; bool pinned; void *ptr; int err; - flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; - if (!i915_gem_object_type_has(obj, flags)) + if (!i915_gem_object_has_struct_page(obj) && + !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) return ERR_PTR(-ENXIO); - err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); - if (err) - return ERR_PTR(err); + assert_object_held(obj); pinned = !(type & I915_MAP_OVERRIDE); type &= ~I915_MAP_OVERRIDE; @@ -354,10 +362,8 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); err = ____i915_gem_object_get_pages(obj); - if (err) { - ptr = ERR_PTR(err); - goto out_unlock; - } + if (err) + return ERR_PTR(err); smp_mb__before_atomic(); } @@ -392,13 +398,23 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, obj->mm.mapping = page_pack_bits(ptr, type); } -out_unlock: - mutex_unlock(&obj->mm.lock); return ptr; err_unpin: atomic_dec(&obj->mm.pages_pin_count); - goto out_unlock; + return ptr; +} + +void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, + enum i915_map_type type) +{ + void *ret; + + i915_gem_object_lock(obj, NULL); + ret = i915_gem_object_pin_map(obj, type); + i915_gem_object_unlock(obj); + + return ret; } void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, @@ -448,7 +464,8 @@ struct scatterlist * __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, - unsigned int *offset) + unsigned int *offset, + bool allow_alloc) { const bool dma = iter == &obj->mm.get_dma_page; struct scatterlist *sg; @@ -470,6 +487,9 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, if (n < READ_ONCE(iter->sg_idx)) goto lookup; + if (!allow_alloc) + goto manual_lookup; + mutex_lock(&iter->lock); /* We prefer to reuse the last sg so that repeated lookup of this @@ -519,7 +539,16 @@ scan: if (unlikely(n < idx)) /* insertion completed by another thread */ goto lookup; - /* In case we failed to insert the entry into the radixtree, we need + goto manual_walk; + +manual_lookup: + idx = 0; + sg = obj->mm.pages->sgl; + count = __sg_page_count(sg); + +manual_walk: + /* + * In case we failed to insert the entry into the radixtree, we need * to look beyond the current sg. */ while (idx + count <= n) { @@ -566,7 +595,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); - sg = i915_gem_object_get_sg(obj, n, &offset); + sg = i915_gem_object_get_sg(obj, n, &offset, true); return nth_page(sg_page(sg), offset); } @@ -592,7 +621,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, struct scatterlist *sg; unsigned int offset; - sg = i915_gem_object_get_sg_dma(obj, n, &offset); + sg = i915_gem_object_get_sg_dma(obj, n, &offset, true); if (len) *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 01fe89afe8c0..81dc2bf59bc3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -76,6 +76,8 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + /* We're no longer struct page backed */ + obj->flags &= ~I915_BO_ALLOC_STRUCT_PAGE; __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -89,7 +91,7 @@ err_pci: return -ENOMEM; } -static void +void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, struct sg_table *pages) { @@ -134,9 +136,8 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, vaddr, dma); } -static int -phys_pwrite(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_pwrite *args) +int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) { void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); @@ -165,9 +166,8 @@ phys_pwrite(struct drm_i915_gem_object *obj, return 0; } -static int -phys_pread(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_pread *args) +int i915_gem_object_pread_phys(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) { void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); @@ -186,62 +186,14 @@ phys_pread(struct drm_i915_gem_object *obj, return 0; } -static void phys_release(struct drm_i915_gem_object *obj) -{ - fput(obj->base.filp); -} - -static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { - .name = "i915_gem_object_phys", - .get_pages = i915_gem_object_get_pages_phys, - .put_pages = i915_gem_object_put_pages_phys, - - .pread = phys_pread, - .pwrite = phys_pwrite, - - .release = phys_release, -}; - -int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) +static int i915_gem_object_shmem_to_phys(struct drm_i915_gem_object *obj) { struct sg_table *pages; int err; - if (align > obj->base.size) - return -EINVAL; - - if (obj->ops == &i915_gem_phys_ops) - return 0; - - if (!i915_gem_object_is_shmem(obj)) - return -EINVAL; - - err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); - if (err) - return err; - - mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); - - if (obj->mm.madv != I915_MADV_WILLNEED) { - err = -EFAULT; - goto err_unlock; - } - - if (i915_gem_object_has_tiling_quirk(obj)) { - err = -EFAULT; - goto err_unlock; - } - - if (obj->mm.mapping) { - err = -EBUSY; - goto err_unlock; - } - pages = __i915_gem_object_unset_pages(obj); - obj->ops = &i915_gem_phys_ops; - - err = ____i915_gem_object_get_pages(obj); + err = i915_gem_object_get_pages_phys(obj); if (err) goto err_xfer; @@ -249,25 +201,57 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) __i915_gem_object_pin_pages(obj); if (!IS_ERR_OR_NULL(pages)) - i915_gem_shmem_ops.put_pages(obj, pages); + i915_gem_object_put_pages_shmem(obj, pages); i915_gem_object_release_memory_region(obj); - - mutex_unlock(&obj->mm.lock); return 0; err_xfer: - obj->ops = &i915_gem_shmem_ops; if (!IS_ERR_OR_NULL(pages)) { unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); __i915_gem_object_set_pages(obj, pages, sg_page_sizes); } -err_unlock: - mutex_unlock(&obj->mm.lock); return err; } +int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) +{ + int err; + + assert_object_held(obj); + + if (align > obj->base.size) + return -EINVAL; + + if (!i915_gem_object_is_shmem(obj)) + return -EINVAL; + + if (!i915_gem_object_has_struct_page(obj)) + return 0; + + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (err) + return err; + + if (obj->mm.madv != I915_MADV_WILLNEED) + return -EFAULT; + + if (i915_gem_object_has_tiling_quirk(obj)) + return -EFAULT; + + if (obj->mm.mapping || i915_gem_object_has_pinned_pages(obj)) + return -EBUSY; + + if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { + drm_dbg(obj->base.dev, + "Attempting to obtain a purgeable object\n"); + return -EFAULT; + } + + return i915_gem_object_shmem_to_phys(obj); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_gem_phys.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 000e1cd8e920..8b9d7d14c4bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -116,7 +116,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) - i915_gem_shrink(i915, -1UL, NULL, ~0); + i915_gem_shrink(NULL, i915, -1UL, NULL, ~0); i915_gem_drain_freed_objects(i915); wbinvd_on_all_cpus(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 3e3dad22a683..6a84fb6dde24 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -106,13 +106,11 @@ err_free_sg: } void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, - struct intel_memory_region *mem, - unsigned long flags) + struct intel_memory_region *mem) { INIT_LIST_HEAD(&obj->mm.blocks); obj->mm.region = intel_memory_region_get(mem); - obj->flags |= flags; if (obj->base.size <= mem->min_page_size) obj->flags |= I915_BO_ALLOC_CONTIGUOUS; @@ -161,17 +159,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); - /* - * XXX: There is a prevalence of the assumption that we fit the - * object's page count inside a 32bit _signed_ variable. Let's document - * this and catch if we ever need to fix it. In the meantime, if you do - * spot such a local variable, please consider fixing! - */ - - if (size >> PAGE_SHIFT > INT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) + if (i915_gem_object_size_2big(size)) return ERR_PTR(-E2BIG); obj = i915_gem_object_alloc(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index f2ff6f8bff74..ebddc86d78f7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -17,8 +17,7 @@ void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, struct sg_table *pages); void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, - struct intel_memory_region *mem, - unsigned long flags); + struct intel_memory_region *mem); void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 680b370a8ef3..a9bfa66c8da1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -99,7 +99,7 @@ rebuild_st: goto err_sg; } - i915_gem_shrink(i915, 2 * page_count, NULL, *s++); + i915_gem_shrink(NULL, i915, 2 * page_count, NULL, *s++); /* * We've tried hard to allocate the memory by reaping @@ -296,8 +296,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, __start_cpu_write(obj); } -static void -shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) +void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages) { struct sgt_iter sgt_iter; struct pagevec pvec; @@ -331,6 +330,15 @@ shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) kfree(pages); } +static void +shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) +{ + if (likely(i915_gem_object_has_struct_page(obj))) + i915_gem_object_put_pages_shmem(obj, pages); + else + i915_gem_object_put_pages_phys(obj, pages); +} + static int shmem_pwrite(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg) @@ -343,6 +351,9 @@ shmem_pwrite(struct drm_i915_gem_object *obj, /* Caller already validated user args */ GEM_BUG_ON(!access_ok(user_data, arg->size)); + if (!i915_gem_object_has_struct_page(obj)) + return i915_gem_object_pwrite_phys(obj, arg); + /* * Before we instantiate/pin the backing store for our use, we * can prepopulate the shmemfs filp efficiently using a write into @@ -421,17 +432,27 @@ shmem_pwrite(struct drm_i915_gem_object *obj, return 0; } +static int +shmem_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *arg) +{ + if (!i915_gem_object_has_struct_page(obj)) + return i915_gem_object_pread_phys(obj, arg); + + return -ENODEV; +} + static void shmem_release(struct drm_i915_gem_object *obj) { - i915_gem_object_release_memory_region(obj); + if (obj->flags & I915_BO_ALLOC_STRUCT_PAGE) + i915_gem_object_release_memory_region(obj); fput(obj->base.filp); } const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .name = "i915_gem_object_shmem", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = shmem_get_pages, .put_pages = shmem_put_pages, @@ -439,6 +460,7 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .writeback = shmem_writeback, .pwrite = shmem_pwrite, + .pread = shmem_pread, .release = shmem_release, }; @@ -491,7 +513,8 @@ static int shmem_object_init(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -515,7 +538,7 @@ static int shmem_object_init(struct intel_memory_region *mem, i915_gem_object_set_cache_coherency(obj, cache_level); - i915_gem_object_init_memory_region(obj, mem, 0); + i915_gem_object_init_memory_region(obj, mem); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index c2dba1cd9532..3e248d3bd869 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -49,9 +49,9 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, flags = I915_GEM_OBJECT_UNBIND_TEST; if (i915_gem_object_unbind(obj, flags) == 0) - __i915_gem_object_put_pages(obj); + return true; - return !i915_gem_object_has_pages(obj); + return false; } static void try_to_writeback(struct drm_i915_gem_object *obj, @@ -94,7 +94,8 @@ static void try_to_writeback(struct drm_i915_gem_object *obj, * The number of pages of backing storage actually released. */ unsigned long -i915_gem_shrink(struct drm_i915_private *i915, +i915_gem_shrink(struct i915_gem_ww_ctx *ww, + struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned int shrink) @@ -113,6 +114,7 @@ i915_gem_shrink(struct drm_i915_private *i915, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; + int err; trace_i915_gem_shrink(i915, target, shrink); @@ -200,25 +202,40 @@ i915_gem_shrink(struct drm_i915_private *i915, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + err = 0; if (unsafe_drop_pages(obj, shrink)) { /* May arrive from get_pages on another bo */ - mutex_lock(&obj->mm.lock); - if (!i915_gem_object_has_pages(obj)) { + if (!ww) { + if (!i915_gem_object_trylock(obj)) + goto skip; + } else { + err = i915_gem_object_lock(obj, ww); + if (err) + goto skip; + } + + if (!__i915_gem_object_put_pages(obj)) { try_to_writeback(obj, shrink); count += obj->base.size >> PAGE_SHIFT; } - mutex_unlock(&obj->mm.lock); + if (!ww) + i915_gem_object_unlock(obj); } dma_resv_prune(obj->base.resv); scanned += obj->base.size >> PAGE_SHIFT; +skip: i915_gem_object_put(obj); spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (err) + break; } list_splice_tail(&still_in_list, phase->list); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + if (err) + return err; } if (shrink & I915_SHRINK_BOUND) @@ -249,7 +266,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) unsigned long freed = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - freed = i915_gem_shrink(i915, -1UL, NULL, + freed = i915_gem_shrink(NULL, i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); } @@ -295,7 +312,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) sc->nr_scanned = 0; - freed = i915_gem_shrink(i915, + freed = i915_gem_shrink(NULL, i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | @@ -304,7 +321,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) intel_wakeref_t wakeref; with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - freed += i915_gem_shrink(i915, + freed += i915_gem_shrink(NULL, i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_ACTIVE | @@ -329,7 +346,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - freed_pages += i915_gem_shrink(i915, -1UL, NULL, + freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); @@ -367,7 +384,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr intel_wakeref_t wakeref; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - freed_pages += i915_gem_shrink(i915, -1UL, NULL, + freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_VMAPS); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h index b397d7785789..8512470f6fd6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h @@ -9,10 +9,12 @@ #include <linux/bits.h> struct drm_i915_private; +struct i915_gem_ww_ctx; struct mutex; /* i915_gem_shrinker.c */ -unsigned long i915_gem_shrink(struct drm_i915_private *i915, +unsigned long i915_gem_shrink(struct i915_gem_ww_ctx *ww, + struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index a1e197a6e999..b0597de206de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -630,20 +630,22 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, int err; drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size); - i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class, 0); obj->stolen = stolen; obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); - err = i915_gem_object_pin_pages(obj); - if (err) - return err; + if (WARN_ON(!i915_gem_object_trylock(obj))) + return -EBUSY; - i915_gem_object_init_memory_region(obj, mem, 0); + err = i915_gem_object_pin_pages(obj); + if (!err) + i915_gem_object_init_memory_region(obj, mem); + i915_gem_object_unlock(obj); - return 0; + return err; } static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, @@ -686,7 +688,7 @@ struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *i915, resource_size_t size) { - return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN], + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN_SMEM], size, I915_BO_ALLOC_CONTIGUOUS); } @@ -726,7 +728,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, resource_size_t stolen_offset, resource_size_t size) { - struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN]; + struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN_SMEM]; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index d589d3d81085..9e8945013090 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -265,7 +265,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. */ - mutex_lock(&obj->mm.lock); if (i915_gem_object_has_pages(obj) && obj->mm.madv == I915_MADV_WILLNEED && i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { @@ -280,7 +279,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, i915_gem_object_set_tiling_quirk(obj); } } - mutex_unlock(&obj->mm.lock); spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index f2eaed6aca3d..a657b99ec760 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -2,10 +2,39 @@ * SPDX-License-Identifier: MIT * * Copyright © 2012-2014 Intel Corporation + * + * Based on amdgpu_mn, which bears the following notice: + * + * Copyright 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König <christian.koenig@amd.com> */ #include <linux/mmu_context.h> -#include <linux/mmu_notifier.h> #include <linux/mempolicy.h> #include <linux/swap.h> #include <linux/sched/mm.h> @@ -15,408 +44,121 @@ #include "i915_gem_object.h" #include "i915_scatterlist.h" -struct i915_mm_struct { - struct mm_struct *mm; - struct drm_i915_private *i915; - struct i915_mmu_notifier *mn; - struct hlist_node node; - struct kref kref; - struct rcu_work work; -}; - -#if defined(CONFIG_MMU_NOTIFIER) -#include <linux/interval_tree.h> +#ifdef CONFIG_MMU_NOTIFIER -struct i915_mmu_notifier { - spinlock_t lock; - struct hlist_node node; - struct mmu_notifier mn; - struct rb_root_cached objects; - struct i915_mm_struct *mm; -}; - -struct i915_mmu_object { - struct i915_mmu_notifier *mn; - struct drm_i915_gem_object *obj; - struct interval_tree_node it; -}; - -static void add_object(struct i915_mmu_object *mo) +/** + * i915_gem_userptr_invalidate - callback to notify about mm change + * + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() + * + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. + */ +static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); - interval_tree_insert(&mo->it, &mo->mn->objects); -} + struct drm_i915_gem_object *obj = container_of(mni, struct drm_i915_gem_object, userptr.notifier); + struct drm_i915_private *i915 = to_i915(obj->base.dev); + long r; -static void del_object(struct i915_mmu_object *mo) -{ - if (RB_EMPTY_NODE(&mo->it.rb)) - return; + if (!mmu_notifier_range_blockable(range)) + return false; - interval_tree_remove(&mo->it, &mo->mn->objects); - RB_CLEAR_NODE(&mo->it.rb); -} + spin_lock(&i915->mm.notifier_lock); -static void -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) -{ - struct i915_mmu_object *mo = obj->userptr.mmu_object; + mmu_interval_set_seq(mni, cur_seq); + + spin_unlock(&i915->mm.notifier_lock); /* - * During mm_invalidate_range we need to cancel any userptr that - * overlaps the range being invalidated. Doing so requires the - * struct_mutex, and that risks recursion. In order to cause - * recursion, the user must alias the userptr address space with - * a GTT mmapping (possible with a MAP_FIXED) - then when we have - * to invalidate that mmaping, mm_invalidate_range is called with - * the userptr address *and* the struct_mutex held. To prevent that - * we set a flag under the i915_mmu_notifier spinlock to indicate - * whether this object is valid. + * We don't wait when the process is exiting. This is valid + * because the object will be cleaned up anyway. + * + * This is also temporarily required as a hack, because we + * cannot currently force non-consistent batch buffers to preempt + * and reschedule by waiting on it, hanging processes on exit. */ - if (!mo) - return; + if (current->flags & PF_EXITING) + return true; - spin_lock(&mo->mn->lock); - if (value) - add_object(mo); - else - del_object(mo); - spin_unlock(&mo->mn->lock); -} - -static int -userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, - const struct mmu_notifier_range *range) -{ - struct i915_mmu_notifier *mn = - container_of(_mn, struct i915_mmu_notifier, mn); - struct interval_tree_node *it; - unsigned long end; - int ret = 0; - - if (RB_EMPTY_ROOT(&mn->objects.rb_root)) - return 0; - - /* interval ranges are inclusive, but invalidate range is exclusive */ - end = range->end - 1; - - spin_lock(&mn->lock); - it = interval_tree_iter_first(&mn->objects, range->start, end); - while (it) { - struct drm_i915_gem_object *obj; - - if (!mmu_notifier_range_blockable(range)) { - ret = -EAGAIN; - break; - } - - /* - * The mmu_object is released late when destroying the - * GEM object so it is entirely possible to gain a - * reference on an object in the process of being freed - * since our serialisation is via the spinlock and not - * the struct_mutex - and consequently use it after it - * is freed and then double free it. To prevent that - * use-after-free we only acquire a reference on the - * object if it is not in the process of being destroyed. - */ - obj = container_of(it, struct i915_mmu_object, it)->obj; - if (!kref_get_unless_zero(&obj->base.refcount)) { - it = interval_tree_iter_next(it, range->start, end); - continue; - } - spin_unlock(&mn->lock); - - ret = i915_gem_object_unbind(obj, - I915_GEM_OBJECT_UNBIND_ACTIVE | - I915_GEM_OBJECT_UNBIND_BARRIER); - if (ret == 0) - ret = __i915_gem_object_put_pages(obj); - i915_gem_object_put(obj); - if (ret) - return ret; - - spin_lock(&mn->lock); - - /* - * As we do not (yet) protect the mmu from concurrent insertion - * over this range, there is no guarantee that this search will - * terminate given a pathologic workload. - */ - it = interval_tree_iter_first(&mn->objects, range->start, end); - } - spin_unlock(&mn->lock); - - return ret; + /* we will unbind on next submission, still have userptr pins */ + r = dma_resv_wait_timeout_rcu(obj->base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + if (r <= 0) + drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); + return true; } -static const struct mmu_notifier_ops i915_gem_userptr_notifier = { - .invalidate_range_start = userptr_mn_invalidate_range_start, +static const struct mmu_interval_notifier_ops i915_gem_userptr_notifier_ops = { + .invalidate = i915_gem_userptr_invalidate, }; -static struct i915_mmu_notifier * -i915_mmu_notifier_create(struct i915_mm_struct *mm) -{ - struct i915_mmu_notifier *mn; - - mn = kmalloc(sizeof(*mn), GFP_KERNEL); - if (mn == NULL) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&mn->lock); - mn->mn.ops = &i915_gem_userptr_notifier; - mn->objects = RB_ROOT_CACHED; - mn->mm = mm; - - return mn; -} - -static void -i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) -{ - struct i915_mmu_object *mo; - - mo = fetch_and_zero(&obj->userptr.mmu_object); - if (!mo) - return; - - spin_lock(&mo->mn->lock); - del_object(mo); - spin_unlock(&mo->mn->lock); - kfree(mo); -} - -static struct i915_mmu_notifier * -i915_mmu_notifier_find(struct i915_mm_struct *mm) -{ - struct i915_mmu_notifier *mn, *old; - int err; - - mn = READ_ONCE(mm->mn); - if (likely(mn)) - return mn; - - mn = i915_mmu_notifier_create(mm); - if (IS_ERR(mn)) - return mn; - - err = mmu_notifier_register(&mn->mn, mm->mm); - if (err) { - kfree(mn); - return ERR_PTR(err); - } - - old = cmpxchg(&mm->mn, NULL, mn); - if (old) { - mmu_notifier_unregister(&mn->mn, mm->mm); - kfree(mn); - mn = old; - } - - return mn; -} - static int -i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, - unsigned flags) +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj) { - struct i915_mmu_notifier *mn; - struct i915_mmu_object *mo; - - if (flags & I915_USERPTR_UNSYNCHRONIZED) - return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; - - if (GEM_WARN_ON(!obj->userptr.mm)) - return -EINVAL; - - mn = i915_mmu_notifier_find(obj->userptr.mm); - if (IS_ERR(mn)) - return PTR_ERR(mn); - - mo = kzalloc(sizeof(*mo), GFP_KERNEL); - if (!mo) - return -ENOMEM; - - mo->mn = mn; - mo->obj = obj; - mo->it.start = obj->userptr.ptr; - mo->it.last = obj->userptr.ptr + obj->base.size - 1; - RB_CLEAR_NODE(&mo->it.rb); - - obj->userptr.mmu_object = mo; - return 0; -} - -static void -i915_mmu_notifier_free(struct i915_mmu_notifier *mn, - struct mm_struct *mm) -{ - if (mn == NULL) - return; - - mmu_notifier_unregister(&mn->mn, mm); - kfree(mn); -} - -#else - -static void -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) -{ -} - -static void -i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) -{ -} - -static int -i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, - unsigned flags) -{ - if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0) - return -ENODEV; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - return 0; -} - -static void -i915_mmu_notifier_free(struct i915_mmu_notifier *mn, - struct mm_struct *mm) -{ -} - -#endif - -static struct i915_mm_struct * -__i915_mm_struct_find(struct drm_i915_private *i915, struct mm_struct *real) -{ - struct i915_mm_struct *it, *mm = NULL; - - rcu_read_lock(); - hash_for_each_possible_rcu(i915->mm_structs, - it, node, - (unsigned long)real) - if (it->mm == real && kref_get_unless_zero(&it->kref)) { - mm = it; - break; - } - rcu_read_unlock(); - - return mm; + return mmu_interval_notifier_insert(&obj->userptr.notifier, current->mm, + obj->userptr.ptr, obj->base.size, + &i915_gem_userptr_notifier_ops); } -static int -i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) +static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_mm_struct *mm, *new; - int ret = 0; - - /* During release of the GEM object we hold the struct_mutex. This - * precludes us from calling mmput() at that time as that may be - * the last reference and so call exit_mmap(). exit_mmap() will - * attempt to reap the vma, and if we were holding a GTT mmap - * would then call drm_gem_vm_close() and attempt to reacquire - * the struct mutex. So in order to avoid that recursion, we have - * to defer releasing the mm reference until after we drop the - * struct_mutex, i.e. we need to schedule a worker to do the clean - * up. - */ - mm = __i915_mm_struct_find(i915, current->mm); - if (mm) - goto out; - - new = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!new) - return -ENOMEM; + struct page **pvec = NULL; - kref_init(&new->kref); - new->i915 = to_i915(obj->base.dev); - new->mm = current->mm; - new->mn = NULL; - - spin_lock(&i915->mm_lock); - mm = __i915_mm_struct_find(i915, current->mm); - if (!mm) { - hash_add_rcu(i915->mm_structs, - &new->node, - (unsigned long)new->mm); - mmgrab(current->mm); - mm = new; + spin_lock(&i915->mm.notifier_lock); + if (!--obj->userptr.page_ref) { + pvec = obj->userptr.pvec; + obj->userptr.pvec = NULL; } - spin_unlock(&i915->mm_lock); - if (mm != new) - kfree(new); - -out: - obj->userptr.mm = mm; - return ret; -} - -static void -__i915_mm_struct_free__worker(struct work_struct *work) -{ - struct i915_mm_struct *mm = container_of(work, typeof(*mm), work.work); - - i915_mmu_notifier_free(mm->mn, mm->mm); - mmdrop(mm->mm); - kfree(mm); -} - -static void -__i915_mm_struct_free(struct kref *kref) -{ - struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref); - - spin_lock(&mm->i915->mm_lock); - hash_del_rcu(&mm->node); - spin_unlock(&mm->i915->mm_lock); + GEM_BUG_ON(obj->userptr.page_ref < 0); + spin_unlock(&i915->mm.notifier_lock); - INIT_RCU_WORK(&mm->work, __i915_mm_struct_free__worker); - queue_rcu_work(system_wq, &mm->work); -} - -static void -i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj) -{ - if (obj->userptr.mm == NULL) - return; + if (pvec) { + const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - kref_put(&obj->userptr.mm->kref, __i915_mm_struct_free); - obj->userptr.mm = NULL; + unpin_user_pages(pvec, num_pages); + kvfree(pvec); + } } -struct get_pages_work { - struct work_struct work; - struct drm_i915_gem_object *obj; - struct task_struct *task; -}; - -static struct sg_table * -__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, - struct page **pvec, unsigned long num_pages) +static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; unsigned int sg_page_sizes; struct scatterlist *sg; + struct page **pvec; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + + spin_lock(&i915->mm.notifier_lock); + if (GEM_WARN_ON(!obj->userptr.page_ref)) { + spin_unlock(&i915->mm.notifier_lock); + ret = -EFAULT; + goto err_free; + } + + obj->userptr.page_ref++; + pvec = obj->userptr.pvec; + spin_unlock(&i915->mm.notifier_lock); alloc_table: sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, num_pages << PAGE_SHIFT, max_segment, NULL, 0, GFP_KERNEL); if (IS_ERR(sg)) { - kfree(st); - return ERR_CAST(sg); + ret = PTR_ERR(sg); + goto err; } ret = i915_gem_gtt_prepare_pages(obj, st); @@ -428,203 +170,20 @@ alloc_table: goto alloc_table; } - kfree(st); - return ERR_PTR(ret); + goto err; } sg_page_sizes = i915_sg_page_sizes(st->sgl); __i915_gem_object_set_pages(obj, st, sg_page_sizes); - return st; -} - -static void -__i915_gem_userptr_get_pages_worker(struct work_struct *_work) -{ - struct get_pages_work *work = container_of(_work, typeof(*work), work); - struct drm_i915_gem_object *obj = work->obj; - const unsigned long npages = obj->base.size >> PAGE_SHIFT; - unsigned long pinned; - struct page **pvec; - int ret; - - ret = -ENOMEM; - pinned = 0; - - pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (pvec != NULL) { - struct mm_struct *mm = obj->userptr.mm->mm; - unsigned int flags = 0; - int locked = 0; - - if (!i915_gem_object_is_readonly(obj)) - flags |= FOLL_WRITE; - - ret = -EFAULT; - if (mmget_not_zero(mm)) { - while (pinned < npages) { - if (!locked) { - mmap_read_lock(mm); - locked = 1; - } - ret = pin_user_pages_remote - (mm, - obj->userptr.ptr + pinned * PAGE_SIZE, - npages - pinned, - flags, - pvec + pinned, NULL, &locked); - if (ret < 0) - break; - - pinned += ret; - } - if (locked) - mmap_read_unlock(mm); - mmput(mm); - } - } - - mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); - if (obj->userptr.work == &work->work) { - struct sg_table *pages = ERR_PTR(ret); - - if (pinned == npages) { - pages = __i915_gem_userptr_alloc_pages(obj, pvec, - npages); - if (!IS_ERR(pages)) { - pinned = 0; - pages = NULL; - } - } - - obj->userptr.work = ERR_CAST(pages); - if (IS_ERR(pages)) - __i915_gem_userptr_set_active(obj, false); - } - mutex_unlock(&obj->mm.lock); - - unpin_user_pages(pvec, pinned); - kvfree(pvec); - - i915_gem_object_put(obj); - put_task_struct(work->task); - kfree(work); -} - -static struct sg_table * -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) -{ - struct get_pages_work *work; - - /* Spawn a worker so that we can acquire the - * user pages without holding our mutex. Access - * to the user pages requires mmap_lock, and we have - * a strict lock ordering of mmap_lock, struct_mutex - - * we already hold struct_mutex here and so cannot - * call gup without encountering a lock inversion. - * - * Userspace will keep on repeating the operation - * (thanks to EAGAIN) until either we hit the fast - * path or the worker completes. If the worker is - * cancelled or superseded, the task is still run - * but the results ignored. (This leads to - * complications that we may have a stray object - * refcount that we need to be wary of when - * checking for existing objects during creation.) - * If the worker encounters an error, it reports - * that error back to this function through - * obj->userptr.work = ERR_PTR. - */ - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (work == NULL) - return ERR_PTR(-ENOMEM); - - obj->userptr.work = &work->work; - - work->obj = i915_gem_object_get(obj); - - work->task = current; - get_task_struct(work->task); - - INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); - queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work); - - return ERR_PTR(-EAGAIN); -} - -static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) -{ - const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - struct mm_struct *mm = obj->userptr.mm->mm; - struct page **pvec; - struct sg_table *pages; - bool active; - int pinned; - unsigned int gup_flags = 0; - - /* If userspace should engineer that these pages are replaced in - * the vma between us binding this page into the GTT and completion - * of rendering... Their loss. If they change the mapping of their - * pages they need to create a new bo to point to the new vma. - * - * However, that still leaves open the possibility of the vma - * being copied upon fork. Which falls under the same userspace - * synchronisation issue as a regular bo, except that this time - * the process may not be expecting that a particular piece of - * memory is tied to the GPU. - * - * Fortunately, we can hook into the mmu_notifier in order to - * discard the page references prior to anything nasty happening - * to the vma (discard or cloning) which should prevent the more - * egregious cases from causing harm. - */ - - if (obj->userptr.work) { - /* active flag should still be held for the pending work */ - if (IS_ERR(obj->userptr.work)) - return PTR_ERR(obj->userptr.work); - else - return -EAGAIN; - } - - pvec = NULL; - pinned = 0; - - if (mm == current->mm) { - pvec = kvmalloc_array(num_pages, sizeof(struct page *), - GFP_KERNEL | - __GFP_NORETRY | - __GFP_NOWARN); - if (pvec) { - /* defer to worker if malloc fails */ - if (!i915_gem_object_is_readonly(obj)) - gup_flags |= FOLL_WRITE; - pinned = pin_user_pages_fast_only(obj->userptr.ptr, - num_pages, gup_flags, - pvec); - } - } - - active = false; - if (pinned < 0) { - pages = ERR_PTR(pinned); - pinned = 0; - } else if (pinned < num_pages) { - pages = __i915_gem_userptr_get_pages_schedule(obj); - active = pages == ERR_PTR(-EAGAIN); - } else { - pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages); - active = !IS_ERR(pages); - } - if (active) - __i915_gem_userptr_set_active(obj, true); - - if (IS_ERR(pages)) - unpin_user_pages(pvec, pinned); - kvfree(pvec); + return 0; - return PTR_ERR_OR_ZERO(pages); +err: + i915_gem_object_userptr_drop_ref(obj); +err_free: + kfree(st); + return ret; } static void @@ -634,9 +193,6 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, struct sgt_iter sgt_iter; struct page *page; - /* Cancel any inflight work and force them to restart their gup */ - obj->userptr.work = NULL; - __i915_gem_userptr_set_active(obj, false); if (!pages) return; @@ -676,42 +232,224 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, } mark_page_accessed(page); - unpin_user_page(page); } obj->mm.dirty = false; sg_free_table(pages); kfree(pages); + + i915_gem_object_userptr_drop_ref(obj); +} + +static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool get_pages) +{ + struct sg_table *pages; + int err; + + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (err) + return err; + + if (GEM_WARN_ON(i915_gem_object_has_pinned_pages(obj))) + return -EBUSY; + + assert_object_held(obj); + + pages = __i915_gem_object_unset_pages(obj); + if (!IS_ERR_OR_NULL(pages)) + i915_gem_userptr_put_pages(obj, pages); + + if (get_pages) + err = ____i915_gem_object_get_pages(obj); + + return err; +} + +int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; + struct page **pvec; + unsigned int gup_flags = 0; + unsigned long notifier_seq; + int pinned, ret; + + if (obj->userptr.notifier.mm != current->mm) + return -EFAULT; + + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + return ret; + + /* optimistically try to preserve current pages while unlocked */ + if (i915_gem_object_has_pages(obj) && + !mmu_interval_check_retry(&obj->userptr.notifier, + obj->userptr.notifier_seq)) { + spin_lock(&i915->mm.notifier_lock); + if (obj->userptr.pvec && + !mmu_interval_read_retry(&obj->userptr.notifier, + obj->userptr.notifier_seq)) { + obj->userptr.page_ref++; + + /* We can keep using the current binding, this is the fastpath */ + ret = 1; + } + spin_unlock(&i915->mm.notifier_lock); + } + + if (!ret) { + /* Make sure userptr is unbound for next attempt, so we don't use stale pages. */ + ret = i915_gem_object_userptr_unbind(obj, false); + } + i915_gem_object_unlock(obj); + if (ret < 0) + return ret; + + if (ret > 0) + return 0; + + notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); + + pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); + if (!pvec) + return -ENOMEM; + + if (!i915_gem_object_is_readonly(obj)) + gup_flags |= FOLL_WRITE; + + pinned = ret = 0; + while (pinned < num_pages) { + ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE, + num_pages - pinned, gup_flags, + &pvec[pinned]); + if (ret < 0) + goto out; + + pinned += ret; + } + ret = 0; + + spin_lock(&i915->mm.notifier_lock); + + if (mmu_interval_read_retry(&obj->userptr.notifier, + !obj->userptr.page_ref ? notifier_seq : + obj->userptr.notifier_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + if (!obj->userptr.page_ref++) { + obj->userptr.pvec = pvec; + obj->userptr.notifier_seq = notifier_seq; + + pvec = NULL; + } + +out_unlock: + spin_unlock(&i915->mm.notifier_lock); + +out: + if (pvec) { + unpin_user_pages(pvec, pinned); + kvfree(pvec); + } + + return ret; +} + +int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) +{ + if (mmu_interval_read_retry(&obj->userptr.notifier, + obj->userptr.notifier_seq)) { + /* We collided with the mmu notifier, need to retry */ + + return -EAGAIN; + } + + return 0; +} + +void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) +{ + i915_gem_object_userptr_drop_ref(obj); +} + +int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) +{ + int err; + + err = i915_gem_object_userptr_submit_init(obj); + if (err) + return err; + + err = i915_gem_object_lock_interruptible(obj, NULL); + if (!err) { + /* + * Since we only check validity, not use the pages, + * it doesn't matter if we collide with the mmu notifier, + * and -EAGAIN handling is not required. + */ + err = i915_gem_object_pin_pages(obj); + if (!err) + i915_gem_object_unpin_pages(obj); + + i915_gem_object_unlock(obj); + } + + i915_gem_object_userptr_submit_fini(obj); + return err; } static void i915_gem_userptr_release(struct drm_i915_gem_object *obj) { - i915_gem_userptr_release__mmu_notifier(obj); - i915_gem_userptr_release__mm_struct(obj); + GEM_WARN_ON(obj->userptr.page_ref); + + mmu_interval_notifier_remove(&obj->userptr.notifier); + obj->userptr.notifier.mm = NULL; } static int i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) { - if (obj->userptr.mmu_object) - return 0; + drm_dbg(obj->base.dev, "Exporting userptr no longer allowed\n"); + + return -EINVAL; +} + +static int +i915_gem_userptr_pwrite(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) +{ + drm_dbg(obj->base.dev, "pwrite to userptr no longer allowed\n"); + + return -EINVAL; +} - return i915_gem_userptr_init__mmu_notifier(obj, 0); +static int +i915_gem_userptr_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) +{ + drm_dbg(obj->base.dev, "pread from userptr no longer allowed\n"); + + return -EINVAL; } static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .name = "i915_gem_object_userptr", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE | + .flags = I915_GEM_OBJECT_IS_SHRINKABLE | I915_GEM_OBJECT_NO_MMAP | - I915_GEM_OBJECT_ASYNC_CANCEL, + I915_GEM_OBJECT_IS_PROXY, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, .dmabuf_export = i915_gem_userptr_dmabuf_export, + .pwrite = i915_gem_userptr_pwrite, + .pread = i915_gem_userptr_pread, .release = i915_gem_userptr_release, }; +#endif + /* * Creates a new mm object that wraps some normal memory from the process * context - user memory. @@ -752,12 +490,12 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - static struct lock_class_key lock_class; + static struct lock_class_key __maybe_unused lock_class; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_userptr *args = data; - struct drm_i915_gem_object *obj; - int ret; - u32 handle; + struct drm_i915_gem_object __maybe_unused *obj; + int __maybe_unused ret; + u32 __maybe_unused handle; if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) { /* We cannot support coherent userptr objects on hw without @@ -770,21 +508,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; - /* - * XXX: There is a prevalence of the assumption that we fit the - * object's page count inside a 32bit _signed_ variable. Let's document - * this and catch if we ever need to fix it. In the meantime, if you do - * spot such a local variable, please consider fixing! - * - * Aside from our own locals (for which we have no excuse!): - * - sg_table embeds unsigned int for num_pages - * - get_user_pages*() mixed ints with longs - */ - - if (args->user_size >> PAGE_SHIFT > INT_MAX) - return -E2BIG; - - if (overflows_type(args->user_size, obj->base.size)) + if (i915_gem_object_size_2big(args->user_size)) return -E2BIG; if (!args->user_size) @@ -796,6 +520,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size)) return -EFAULT; + if (args->flags & I915_USERPTR_UNSYNCHRONIZED) + return -ENODEV; + if (args->flags & I915_USERPTR_READ_ONLY) { /* * On almost all of the older hw, we cannot tell the GPU that @@ -805,17 +532,20 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENODEV; } +#ifdef CONFIG_MMU_NOTIFIER obj = i915_gem_object_alloc(); if (obj == NULL) return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); obj->userptr.ptr = args->user_ptr; + obj->userptr.notifier_seq = ULONG_MAX; if (args->flags & I915_USERPTR_READ_ONLY) i915_gem_object_set_readonly(obj); @@ -823,9 +553,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, * at binding. This means that we need to hook into the mmu_notifier * in order to detect if the mmu is destroyed. */ - ret = i915_gem_userptr_init__mm_struct(obj); - if (ret == 0) - ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags); + ret = i915_gem_userptr_init__mmu_notifier(obj); if (ret == 0) ret = drm_gem_handle_create(file, &obj->base, &handle); @@ -836,24 +564,20 @@ i915_gem_userptr_ioctl(struct drm_device *dev, args->handle = handle; return 0; +#else + return -ENODEV; +#endif } int i915_gem_init_userptr(struct drm_i915_private *dev_priv) { - spin_lock_init(&dev_priv->mm_lock); - hash_init(dev_priv->mm_structs); - - dev_priv->mm.userptr_wq = - alloc_workqueue("i915-userptr-acquire", - WQ_HIGHPRI | WQ_UNBOUND, - 0); - if (!dev_priv->mm.userptr_wq) - return -ENOMEM; +#ifdef CONFIG_MMU_NOTIFIER + spin_lock_init(&dev_priv->mm.notifier_lock); +#endif return 0; } void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv) { - destroy_workqueue(dev_priv->mm.userptr_wq); } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 2fb501a78a85..0c8ecfdf5405 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -89,7 +89,6 @@ static void huge_put_pages(struct drm_i915_gem_object *obj, static const struct drm_i915_gem_object_ops huge_ops = { .name = "huge-gem", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, .get_pages = huge_get_pages, .put_pages = huge_put_pages, }; @@ -115,7 +114,8 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops, &lock_class); + i915_gem_object_init(obj, &huge_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index aacf4856ccb4..dadd485bc52f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -140,8 +140,7 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, static const struct drm_i915_gem_object_ops huge_page_ops = { .name = "huge-gem", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = get_huge_pages, .put_pages = put_huge_pages, }; @@ -168,7 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops, &lock_class); + i915_gem_object_init(obj, &huge_page_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); i915_gem_object_set_volatile(obj); @@ -319,9 +319,9 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) drm_gem_private_object_init(&i915->drm, &obj->base, size); if (single) - i915_gem_object_init(obj, &fake_ops_single, &lock_class); + i915_gem_object_init(obj, &fake_ops_single, &lock_class, 0); else - i915_gem_object_init(obj, &fake_ops, &lock_class); + i915_gem_object_init(obj, &fake_ops, &lock_class, 0); i915_gem_object_set_volatile(obj); @@ -589,7 +589,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) goto out_put; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_put; @@ -653,15 +653,19 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) break; } + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); i915_gem_object_put(obj); } return 0; out_unpin: + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); + i915_gem_object_unlock(obj); out_put: i915_gem_object_put(obj); @@ -675,8 +679,10 @@ static void close_object_list(struct list_head *objects, list_for_each_entry_safe(obj, on, objects, st_link) { list_del(&obj->st_link); + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); i915_gem_object_put(obj); } } @@ -713,7 +719,7 @@ static int igt_mock_ppgtt_huge_fill(void *arg) break; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); break; @@ -889,7 +895,7 @@ static int igt_mock_ppgtt_64K(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_object_put; @@ -943,8 +949,10 @@ static int igt_mock_ppgtt_64K(void *arg) } i915_vma_unpin(vma); + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); i915_gem_object_put(obj); } } @@ -954,7 +962,9 @@ static int igt_mock_ppgtt_64K(void *arg) out_vma_unpin: i915_vma_unpin(vma); out_object_unpin: + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); + i915_gem_object_unlock(obj); out_object_put: i915_gem_object_put(obj); @@ -1024,7 +1034,7 @@ static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val) if (err) return err; - ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); + ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(ptr)) return PTR_ERR(ptr); @@ -1304,7 +1314,7 @@ try_again: return err; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { if (err == -ENXIO || err == -E2BIG) { i915_gem_object_put(obj); @@ -1327,8 +1337,10 @@ try_again: __func__, size, i); } out_unpin: + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); out_put: i915_gem_object_put(obj); @@ -1402,7 +1414,7 @@ static int igt_ppgtt_sanity_check(void *arg) return err; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); goto out; @@ -1416,8 +1428,10 @@ static int igt_ppgtt_sanity_check(void *arg) err = igt_write_huge(ctx, obj); + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); i915_gem_object_put(obj); if (err) { @@ -1462,7 +1476,7 @@ static int igt_tmpfs_fallback(void *arg) goto out_restore; } - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto out_put; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 6a674a7994df..d36873885cc1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -45,7 +45,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine) goto err_flush; } - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_put; @@ -157,7 +157,7 @@ static int prepare_blit(const struct tiled_blits *t, u32 src_pitch, dst_pitch; u32 cmd, *cs; - cs = i915_gem_object_pin_map(batch, I915_MAP_WC); + cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -377,7 +377,7 @@ static int verify_buffer(const struct tiled_blits *t, y = i915_prandom_u32_max_state(t->height, prng); p = y * t->width + x; - vaddr = i915_gem_object_pin_map(buf->vma->obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(buf->vma->obj, I915_MAP_WC); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -564,7 +564,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, int err; int i; - map = i915_gem_object_pin_map(t->scratch.vma->obj, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(t->scratch.vma->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 1117d2a44518..e937b6629019 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -160,7 +160,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) if (err) return err; - map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); @@ -183,7 +183,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) if (err) return err; - map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); @@ -200,17 +200,15 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) u32 *cs; int err; + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); if (err) goto out_unlock; - vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_unlock; - } - rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index d3f87dc4eda3..5fef592390cb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1094,7 +1094,7 @@ __read_slice_count(struct intel_context *ce, if (ret < 0) return ret; - buf = i915_gem_object_pin_map(obj, I915_MAP_WB); + buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(buf)) { ret = PTR_ERR(buf); return ret; @@ -1511,7 +1511,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (IS_ERR(obj)) return PTR_ERR(obj); - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out; @@ -1622,7 +1622,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto out_vm; - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out; @@ -1658,7 +1658,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto out_vm; - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out; @@ -1715,7 +1715,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto out_vm; - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out_vm; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index b6d43880b0c1..dd74bc09ec88 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -194,7 +194,7 @@ static int igt_dmabuf_import_ownership(void *arg) dma_buf_put(dmabuf); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); goto out_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index e1d50a5a1477..4df505e4c53a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -116,7 +116,7 @@ static int igt_gpu_reloc(void *arg) if (IS_ERR(scratch)) return PTR_ERR(scratch); - map = i915_gem_object_pin_map(scratch, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(scratch, I915_MAP_WC); if (IS_ERR(map)) { err = PTR_ERR(map); goto err_scratch; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index d429c7643ff2..5cf6df49c333 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -322,7 +322,7 @@ static int igt_partial_tiling(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { pr_err("Failed to allocate %u pages (%lu total), err=%d\n", nreal, obj->base.size / PAGE_SIZE, err); @@ -459,7 +459,7 @@ static int igt_smoke_tiling(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { pr_err("Failed to allocate %u pages (%lu total), err=%d\n", nreal, obj->base.size / PAGE_SIZE, err); @@ -798,7 +798,7 @@ static int wc_set(struct drm_i915_gem_object *obj) { void *vaddr; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -814,7 +814,7 @@ static int wc_check(struct drm_i915_gem_object *obj) void *vaddr; int err = 0; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -835,9 +835,8 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) return false; if (type != I915_MMAP_TYPE_GTT && - !i915_gem_object_type_has(obj, - I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_HAS_IOMEM)) + !i915_gem_object_has_struct_page(obj) && + !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) return false; return true; @@ -977,10 +976,8 @@ static const char *repr_mmap_type(enum i915_mmap_type type) static bool can_access(const struct drm_i915_gem_object *obj) { - unsigned int flags = - I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; - - return i915_gem_object_type_has(obj, flags); + return i915_gem_object_has_struct_page(obj) || + i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); } static int __igt_mmap_access(struct drm_i915_private *i915, @@ -1319,7 +1316,9 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, } if (type != I915_MMAP_TYPE_GTT) { + i915_gem_object_lock(obj, NULL); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); if (i915_gem_object_has_pages(obj)) { pr_err("Failed to put-pages object!\n"); err = -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c index bf853c40ec65..740ee8086a27 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c @@ -47,7 +47,7 @@ static int igt_gem_huge(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { pr_err("Failed to allocate %u pages (%lu total), err=%d\n", nreal, obj->base.size / PAGE_SIZE, err); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index 23b6e11bbc3e..8c335d1a8406 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -220,7 +220,7 @@ static int igt_fill_blt_thread(void *arg) return PTR_ERR(ctx); prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); + ctx->sched.priority = prio; } ce = i915_gem_context_get_engine(ctx, 0); @@ -262,7 +262,7 @@ static int igt_fill_blt_thread(void *arg) goto err_flush; } - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_put; @@ -338,7 +338,7 @@ static int igt_copy_blt_thread(void *arg) return PTR_ERR(ctx); prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); + ctx->sched.priority = prio; } ce = i915_gem_context_get_engine(ctx, 0); @@ -380,7 +380,7 @@ static int igt_copy_blt_thread(void *arg) goto err_flush; } - vaddr = i915_gem_object_pin_map(src, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_put_src; @@ -400,7 +400,7 @@ static int igt_copy_blt_thread(void *arg) goto err_put_src; } - vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_put_dst; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 8cee68c6a6dc..3a6ce87f8b52 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,13 +25,21 @@ static int mock_phys_object(void *arg) goto out; } + if (!i915_gem_object_has_struct_page(obj)) { + err = -EINVAL; + pr_err("shmem has no struct page\n"); + goto out_obj; + } + + i915_gem_object_lock(obj, NULL); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); + i915_gem_object_unlock(obj); if (err) { pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); goto out_obj; } - if (obj->ops != &i915_gem_phys_ops) { + if (i915_gem_object_has_struct_page(obj)) { pr_err("i915_gem_object_attach_phys did not create a phys object\n"); err = -EINVAL; goto out_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index d6783061bc72..0b092c62bb34 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -55,7 +55,7 @@ igt_emit_store_dw(struct i915_vma *vma, if (IS_ERR(obj)) return ERR_CAST(obj); - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto err; |