diff options
author | Dave Airlie <airlied@redhat.com> | 2021-01-22 03:08:57 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2021-01-22 03:08:58 +0300 |
commit | d82afcf9caaac26ce2642511115bca9dacf30f41 (patch) | |
tree | e084cc20577d3df73694ce69e8eb5f3e26bd0bf9 | |
parent | a6b8720c2f85143561c3453e1cf928a2f8586ac0 (diff) | |
parent | 69b4b99842201bc24c98ba66b922d8879e190483 (diff) | |
download | linux-d82afcf9caaac26ce2642511115bca9dacf30f41.tar.xz |
Merge tag 'drm-intel-gt-next-2021-01-21-1' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Cross-subsystem Changes:
- Includes gvt-gt-next-2021-01-18 + header check fix for GVT
Driver Changes:
- Fix for #2955: Clear potentially malicious register state before
executing clear residuals security mitigation (Chris)
- Fixes that lead to marking per-engine-reset as supported on Gen7
(Chris)
- Remove per-client stats from debugfs/i915_gem_objects) (Tvrtko, Chris)
- Add arbitration check before semaphore wait (Chris)
- Apply interactive priority to explicit flip fences (Chris)
- Make GEM errors non-fatal by default to help capturing logs during
development (Chris)
- Fix object page offset within a region in error capture (CQ, Matt A)
- Close race between enable_breadcrumbs and cancel_breadcrumbs (Chris)
- Almagamate clflushes on suspend/freeze to speed up S/R (Chris)
- Protect used framebuffers from casual eviction (Chris)
- Fix the sgt.pfn sanity check (Kui, Matt A)
- Reduce locking around i915_request.lock and ctx->engines_mutex (Chris)
- Simplify tracking for engine->fw_active and stats.active (Chris)
- Constrain pool objects by mapping type (Chris, Matt A)
- Use shrinkable status for unknown swizzle quirks (Chris)
- Do not suspend bonded requests if one hangs (Chris)
- Restore "Skip over completed active execlists" optimization (Chris)
- Move stolen node into GEM object union (Chris)
. Split gem_create into own file (Matt A)
- Convert object_create into object_init in LMEM region code (Matt A)
- Reduce test_and_set_bit to set_bit in i915_request_submit() (Chris)
- Mark up protected uses of 'i915_request_completed' (Chris)
- Remove extraneous inline modifiers (Chris)
- Add function to define defaults for GuC/HuC enable (John)
- Improve code locality by moving closer to single user (Matt A, Chris)
- Compiler warning fixes (Matt A, Chris)
- Selftest / CI improvements (Chris)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210121150747.GA58732@jlahtine-mobl.ger.corp.intel.com
78 files changed, 1191 insertions, 997 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 25cd9788a4d5..be76054c01d8 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -31,10 +31,13 @@ config DRM_I915_DEBUG select DRM_DEBUG_SELFTEST select DMABUF_SELFTESTS select SW_SYNC # signaling validation framework (igt/syncobj*) + select DRM_I915_WERROR + select DRM_I915_DEBUG_GEM + select DRM_I915_DEBUG_GEM_ONCE + select DRM_I915_DEBUG_MMIO + select DRM_I915_DEBUG_RUNTIME_PM select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST - select DRM_I915_DEBUG_RUNTIME_PM - select DRM_I915_DEBUG_MMIO default n help Choose this option to turn on extra driver debugging that may affect @@ -69,6 +72,21 @@ config DRM_I915_DEBUG_GEM If in doubt, say "N". +config DRM_I915_DEBUG_GEM_ONCE + bool "Make a GEM debug failure fatal" + default n + depends on DRM_I915_DEBUG_GEM + help + During development, we often only want the very first failure + as that would otherwise be lost in the deluge of subsequent + failures. However, more casual testers may not want to trigger + a hard BUG_ON and hope that the system remains sufficiently usable + to capture a bug report in situ. + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_ERRLOG_GEM bool "Insert extra logging (very verbose) for common GEM errors" default n diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d6ac946d0407..6673362219a2 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -136,6 +136,7 @@ gem-y += \ gem/i915_gem_clflush.o \ gem/i915_gem_client_blt.o \ gem/i915_gem_context.o \ + gem/i915_gem_create.o \ gem/i915_gem_dmabuf.o \ gem/i915_gem_domain.o \ gem/i915_gem_execbuffer.o \ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0189d379a55e..54025001ed7f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2247,7 +2247,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, */ ret = i915_vma_pin_fence(vma); if (ret != 0 && INTEL_GEN(dev_priv) < 4) { - i915_gem_object_unpin_from_display_plane(vma); + i915_vma_unpin(vma); vma = ERR_PTR(ret); goto err; } @@ -2265,12 +2265,9 @@ err: void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - i915_gem_object_lock(vma->obj, NULL); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); - i915_gem_object_unpin_from_display_plane(vma); - i915_gem_object_unlock(vma->obj); - + i915_vma_unpin(vma); i915_vma_put(vma); } @@ -15631,15 +15628,6 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) intel_unpin_fb_vma(vma, old_plane_state->flags); } -static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) -{ - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY), - }; - - i915_gem_object_wait_priority(obj, 0, &attr); -} - /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @_plane: drm plane to prepare for @@ -15656,6 +15644,9 @@ int intel_prepare_plane_fb(struct drm_plane *_plane, struct drm_plane_state *_new_plane_state) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY), + }; struct intel_plane *plane = to_intel_plane(_plane); struct intel_plane_state *new_plane_state = to_intel_plane_state(_new_plane_state); @@ -15695,6 +15686,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane, } if (new_plane_state->uapi.fence) { /* explicit fencing */ + i915_gem_fence_wait_priority(new_plane_state->uapi.fence, + &attr); ret = i915_sw_fence_await_dma_fence(&state->commit_ready, new_plane_state->uapi.fence, i915_fence_timeout(dev_priv), @@ -15716,7 +15709,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret) return ret; - fb_obj_bump_render_priority(obj); + i915_gem_object_wait_priority(obj, 0, &attr); i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB); if (!new_plane_state->uapi.fence) { /* implicit fencing */ diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 842c04e63214..84f853f113b9 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -256,7 +256,7 @@ static int intelfb_create(struct drm_fb_helper *helper, * If the object is stolen however, it will be full of whatever * garbage was left in there. */ - if (vma->obj->stolen && !prealloc) + if (!i915_gem_object_is_shmem(vma->obj) && !prealloc) memset_io(info->screen_base, 0, info->screen_size); /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ @@ -595,7 +595,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous * full of whatever garbage was left in there. */ if (state == FBINFO_STATE_RUNNING && - intel_fb_obj(&ifbdev->fb->base)->stolen) + !i915_gem_object_is_shmem(intel_fb_obj(&ifbdev->fb->base))) memset_io(info->screen_base, 0, info->screen_size); drm_fb_helper_set_suspend(&ifbdev->helper, state); diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index d898b370d7a4..7b38eee9980f 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -225,8 +225,10 @@ static void frontbuffer_release(struct kref *ref) struct i915_vma *vma; spin_lock(&obj->vma.lock); - for_each_ggtt_vma(vma, obj) + for_each_ggtt_vma(vma, obj) { + i915_vma_clear_scanout(vma); vma->display_alignment = I915_GTT_MIN_ALIGNMENT; + } spin_unlock(&obj->vma.lock); RCU_INIT_POINTER(obj->frontbuffer, NULL); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 6be5d8946c69..8edb9b2cdbeb 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -360,7 +360,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) intel_frontbuffer_flip_complete(overlay->i915, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); - i915_gem_object_unpin_from_display_plane(vma); + i915_vma_unpin(vma); i915_vma_put(vma); } @@ -861,7 +861,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, return 0; out_unpin: - i915_gem_object_unpin_from_display_plane(vma); + i915_vma_unpin(vma); out_pin_section: atomic_dec(&dev_priv->gpu_error.pending_fb_pin); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 68f58762d5e3..4d2f40cf237b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -408,7 +408,7 @@ __active_engine(struct i915_request *rq, struct intel_engine_cs **active) } if (i915_request_is_active(rq)) { - if (!i915_request_completed(rq)) + if (!__i915_request_is_complete(rq)) *active = locked; ret = true; } @@ -717,7 +717,8 @@ err_free: } static inline struct i915_gem_engines * -__context_engines_await(const struct i915_gem_context *ctx) +__context_engines_await(const struct i915_gem_context *ctx, + bool *user_engines) { struct i915_gem_engines *engines; @@ -726,6 +727,10 @@ __context_engines_await(const struct i915_gem_context *ctx) engines = rcu_dereference(ctx->engines); GEM_BUG_ON(!engines); + if (user_engines) + *user_engines = i915_gem_context_user_engines(ctx); + + /* successful await => strong mb */ if (unlikely(!i915_sw_fence_await(&engines->fence))) continue; @@ -749,7 +754,7 @@ context_apply_all(struct i915_gem_context *ctx, struct intel_context *ce; int err = 0; - e = __context_engines_await(ctx); + e = __context_engines_await(ctx, NULL); for_each_gem_engine(ce, e, it) { err = fn(ce, data); if (err) @@ -1075,7 +1080,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, return err; } - e = __context_engines_await(ctx); + e = __context_engines_await(ctx, NULL); if (!e) { i915_active_release(&cb->base); return -ENOENT; @@ -1838,27 +1843,6 @@ replace: return 0; } -static struct i915_gem_engines * -__copy_engines(struct i915_gem_engines *e) -{ - struct i915_gem_engines *copy; - unsigned int n; - - copy = alloc_engines(e->num_engines); - if (!copy) - return ERR_PTR(-ENOMEM); - - for (n = 0; n < e->num_engines; n++) { - if (e->engines[n]) - copy->engines[n] = intel_context_get(e->engines[n]); - else - copy->engines[n] = NULL; - } - copy->num_engines = n; - - return copy; -} - static int get_engines(struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1866,19 +1850,17 @@ get_engines(struct i915_gem_context *ctx, struct i915_context_param_engines __user *user; struct i915_gem_engines *e; size_t n, count, size; + bool user_engines; int err = 0; - err = mutex_lock_interruptible(&ctx->engines_mutex); - if (err) - return err; + e = __context_engines_await(ctx, &user_engines); + if (!e) + return -ENOENT; - e = NULL; - if (i915_gem_context_user_engines(ctx)) - e = __copy_engines(i915_gem_context_engines(ctx)); - mutex_unlock(&ctx->engines_mutex); - if (IS_ERR_OR_NULL(e)) { + if (!user_engines) { + i915_sw_fence_complete(&e->fence); args->size = 0; - return PTR_ERR_OR_ZERO(e); + return 0; } count = e->num_engines; @@ -1929,7 +1911,7 @@ get_engines(struct i915_gem_context *ctx, args->size = size; err_free: - free_engines(e); + i915_sw_fence_complete(&e->fence); return err; } @@ -2095,11 +2077,14 @@ static int copy_ring_size(struct intel_context *dst, static int clone_engines(struct i915_gem_context *dst, struct i915_gem_context *src) { - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; + struct i915_gem_engines *clone, *e; bool user_engines; unsigned long n; + e = __context_engines_await(src, &user_engines); + if (!e) + return -ENOENT; + clone = alloc_engines(e->num_engines); if (!clone) goto err_unlock; @@ -2141,9 +2126,7 @@ static int clone_engines(struct i915_gem_context *dst, } } clone->num_engines = n; - - user_engines = i915_gem_context_user_engines(src); - i915_gem_context_unlock_engines(src); + i915_sw_fence_complete(&e->fence); /* Serialised by constructor */ engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1)); @@ -2154,7 +2137,7 @@ static int clone_engines(struct i915_gem_context *dst, return 0; err_unlock: - i915_gem_context_unlock_engines(src); + i915_sw_fence_complete(&e->fence); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c new file mode 100644 index 000000000000..45d60e3d98e3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "gem/i915_gem_ioctls.h" +#include "gem/i915_gem_region.h" + +#include "i915_drv.h" + +static int +i915_gem_create(struct drm_file *file, + struct intel_memory_region *mr, + u64 *size_p, + u32 *handle_p) +{ + struct drm_i915_gem_object *obj; + u32 handle; + u64 size; + int ret; + + GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); + size = round_up(*size_p, mr->min_page_size); + if (size == 0) + return -EINVAL; + + /* For most of the ABI (e.g. mmap) we think in system pages */ + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + /* Allocate the new object */ + obj = i915_gem_object_create_region(mr, size, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + GEM_BUG_ON(size != obj->base.size); + + ret = drm_gem_handle_create(file, &obj->base, &handle); + /* drop reference from allocate - handle holds it now */ + i915_gem_object_put(obj); + if (ret) + return ret; + + *handle_p = handle; + *size_p = size; + return 0; +} + +int +i915_gem_dumb_create(struct drm_file *file, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + enum intel_memory_type mem_type; + int cpp = DIV_ROUND_UP(args->bpp, 8); + u32 format; + + switch (cpp) { + case 1: + format = DRM_FORMAT_C8; + break; + case 2: + format = DRM_FORMAT_RGB565; + break; + case 4: + format = DRM_FORMAT_XRGB8888; + break; + default: + return -EINVAL; + } + + /* have to work out size/pitch and return them */ + args->pitch = ALIGN(args->width * cpp, 64); + + /* align stride to page size so that we can remap */ + if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, + DRM_FORMAT_MOD_LINEAR)) + args->pitch = ALIGN(args->pitch, 4096); + + if (args->pitch < args->width) + return -EINVAL; + + args->size = mul_u32_u32(args->pitch, args->height); + + mem_type = INTEL_MEMORY_SYSTEM; + if (HAS_LMEM(to_i915(dev))) + mem_type = INTEL_MEMORY_LOCAL; + + return i915_gem_create(file, + intel_memory_region_by_type(to_i915(dev), + mem_type), + &args->size, &args->handle); +} + +/** + * Creates a new mm object and returns a handle to it. + * @dev: drm device pointer + * @data: ioctl data blob + * @file: drm file pointer + */ +int +i915_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_create *args = data; + + i915_gem_flush_free_objects(i915); + + return i915_gem_create(file, + intel_memory_region_by_type(i915, + INTEL_MEMORY_SYSTEM), + &args->size, &args->handle); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index fcce6909f201..36f54cedaaeb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -5,6 +5,7 @@ */ #include "display/intel_frontbuffer.h" +#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_clflush.h" @@ -15,13 +16,58 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + +static void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +{ + struct i915_vma *vma; + + assert_object_held(obj); + + if (!(obj->write_domain & flush_domains)) + return; + + switch (obj->write_domain) { + case I915_GEM_DOMAIN_GTT: + spin_lock(&obj->vma.lock); + for_each_ggtt_vma(vma, obj) { + if (i915_vma_unset_ggtt_write(vma)) + intel_gt_flush_ggtt_writes(vma->vm->gt); + } + spin_unlock(&obj->vma.lock); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); + break; + + case I915_GEM_DOMAIN_WC: + wmb(); + break; + + case I915_GEM_DOMAIN_CPU: + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + break; + + case I915_GEM_DOMAIN_RENDER: + if (gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; + break; + } + + obj->write_domain = 0; +} + static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) { /* * We manually flush the CPU domain so that we can override and * force the flush for the display, and perform it asyncrhonously. */ - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); if (obj->cache_dirty) i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->write_domain = 0; @@ -80,7 +126,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); + flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -141,7 +187,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); + flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -370,6 +416,7 @@ retry: } vma->display_alignment = max_t(u64, vma->display_alignment, alignment); + i915_vma_mark_scanout(vma); i915_gem_object_flush_if_display_locked(obj); @@ -387,48 +434,6 @@ err: return vma; } -static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_vma *vma; - - if (list_empty(&obj->vma.list)) - return; - - mutex_lock(&i915->ggtt.vm.mutex); - spin_lock(&obj->vma.lock); - for_each_ggtt_vma(vma, obj) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - GEM_BUG_ON(vma->vm != &i915->ggtt.vm); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); - } - spin_unlock(&obj->vma.lock); - mutex_unlock(&i915->ggtt.vm.mutex); - - if (i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - - if (obj->mm.madv == I915_MADV_WILLNEED && - !atomic_read(&obj->mm.shrink_pin)) - list_move_tail(&obj->mm.link, &i915->mm.shrink_list); - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - } -} - -void -i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) -{ - /* Bump the LRU to try and avoid premature eviction whilst flipping */ - i915_gem_object_bump_inactive_ggtt(vma->obj); - - i915_vma_unpin(vma); -} - /** * Moves a single object to the CPU read, and possibly write domain. * @obj: object to act on @@ -451,7 +456,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { @@ -569,9 +574,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, else err = i915_gem_object_set_to_cpu_domain(obj, write_domain); - /* And bump the LRU for this access */ - i915_gem_object_bump_inactive_ggtt(obj); - i915_gem_object_unlock(obj); if (write_domain) @@ -619,7 +621,7 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -670,7 +672,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu write domain, set ourself into the * gtt write domain and manually flush cachelines (as required). diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index b91b32195dcf..d70ca36f74f6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1276,7 +1276,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, int err; if (!pool) { - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); + pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE, + cache->has_llc ? + I915_MAP_WB : + I915_MAP_WC); if (IS_ERR(pool)) return PTR_ERR(pool); } @@ -1286,10 +1289,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_pool; - cmd = i915_gem_object_pin_map(pool->obj, - cache->has_llc ? - I915_MAP_FORCE_WB : - I915_MAP_FORCE_WC); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto err_pool; @@ -2458,7 +2458,8 @@ static int eb_parse(struct i915_execbuffer *eb) return -EINVAL; if (!pool) { - pool = intel_gt_get_buffer_pool(eb->engine->gt, len); + pool = intel_gt_get_buffer_pool(eb->engine->gt, len, + I915_MAP_WB); if (IS_ERR(pool)) return PTR_ERR(pool); eb->batch_pool = pool; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 932ee21e6609..194f35342710 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -31,18 +31,13 @@ i915_gem_object_create_lmem(struct drm_i915_private *i915, size, flags); } -struct drm_i915_gem_object * -__i915_gem_lmem_object_create(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +int __i915_gem_lmem_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags) { static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; - struct drm_i915_gem_object *obj; - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class); @@ -53,5 +48,5 @@ __i915_gem_lmem_object_create(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem, flags); - return obj; + return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index fc3f15580fe3..036d53c01de9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -21,9 +21,9 @@ i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, unsigned int flags); -struct drm_i915_gem_object * -__i915_gem_lmem_object_create(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags); +int __i915_gem_lmem_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags); #endif /* !__I915_GEM_LMEM_H */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 00d24000b5e8..83c6ee6a509a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -25,7 +25,6 @@ #include <linux/sched/mm.h> #include "display/intel_frontbuffer.h" -#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" @@ -313,52 +312,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) queue_work(i915->wq, &i915->mm.free_work); } -static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_level == I915_CACHE_NONE || - obj->cache_level == I915_CACHE_WT); -} - -void -i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, - unsigned int flush_domains) -{ - struct i915_vma *vma; - - assert_object_held(obj); - - if (!(obj->write_domain & flush_domains)) - return; - - switch (obj->write_domain) { - case I915_GEM_DOMAIN_GTT: - spin_lock(&obj->vma.lock); - for_each_ggtt_vma(vma, obj) { - if (i915_vma_unset_ggtt_write(vma)) - intel_gt_flush_ggtt_writes(vma->vm->gt); - } - spin_unlock(&obj->vma.lock); - - i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); - break; - - case I915_GEM_DOMAIN_WC: - wmb(); - break; - - case I915_GEM_DOMAIN_CPU: - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); - break; - - case I915_GEM_DOMAIN_RENDER: - if (gpu_write_needs_clflush(obj)) - obj->cache_dirty = true; - break; - } - - obj->write_domain = 0; -} - void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, enum fb_op_origin origin) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index be14486f63a7..5274e9d139b4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -188,6 +188,24 @@ i915_gem_object_set_volatile(struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_has_tiling_quirk(struct drm_i915_gem_object *obj) +{ + return test_bit(I915_TILING_QUIRK_BIT, &obj->flags); +} + +static inline void +i915_gem_object_set_tiling_quirk(struct drm_i915_gem_object *obj) +{ + set_bit(I915_TILING_QUIRK_BIT, &obj->flags); +} + +static inline void +i915_gem_object_clear_tiling_quirk(struct drm_i915_gem_object *obj) +{ + clear_bit(I915_TILING_QUIRK_BIT, &obj->flags); +} + +static inline bool i915_gem_object_type_has(const struct drm_i915_gem_object *obj, unsigned long flags) { @@ -384,14 +402,6 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_object_truncate(struct drm_i915_gem_object *obj); void i915_gem_object_writeback(struct drm_i915_gem_object *obj); -enum i915_map_type { - I915_MAP_WB = 0, - I915_MAP_WC, -#define I915_MAP_OVERRIDE BIT(31) - I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE, - I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE, -}; - /** * i915_gem_object_pin_map - return a contiguous mapping of the entire object * @obj: the object to map into kernel address space @@ -435,10 +445,6 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) void __i915_gem_object_release_map(struct drm_i915_gem_object *obj); -void -i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, - unsigned int flush_domains); - int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, unsigned int *needs_clflush); int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, @@ -486,7 +492,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, const struct i915_ggtt_view *view, unsigned int flags); -void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); @@ -512,6 +517,9 @@ static inline void __start_cpu_write(struct drm_i915_gem_object *obj) obj->cache_dirty = true; } +void i915_gem_fence_wait_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr); + int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, long timeout); @@ -540,4 +548,6 @@ i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, __i915_gem_object_invalidate_frontbuffer(obj, origin); } +bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj); + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 10cac9fac79b..d6dac21fce0b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -35,7 +35,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, count = div_u64(round_up(vma->size, block_size), block_size); size = (1 + 8 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size); + pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -55,7 +55,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, if (unlikely(err)) goto out_put; - cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out_unpin; @@ -257,7 +257,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, count = div_u64(round_up(dst->size, block_size), block_size); size = (1 + 11 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size); + pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -277,7 +277,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, if (unlikely(err)) goto out_put; - cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out_unpin; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index e2d9b7e1e152..0438e00d4ca7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -67,6 +67,14 @@ struct drm_i915_gem_object_ops { const char *name; /* friendly name for debug, e.g. lockdep classes */ }; +enum i915_map_type { + I915_MAP_WB = 0, + I915_MAP_WC, +#define I915_MAP_OVERRIDE BIT(31) + I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE, + I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE, +}; + enum i915_mmap_type { I915_MMAP_TYPE_GTT = 0, I915_MMAP_TYPE_WC, @@ -142,8 +150,6 @@ struct drm_i915_gem_object { */ struct list_head obj_link; - /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; union { struct rcu_head rcu; struct llist_node freed; @@ -167,6 +173,7 @@ struct drm_i915_gem_object { #define I915_BO_ALLOC_VOLATILE BIT(1) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) #define I915_BO_READONLY BIT(2) +#define I915_TILING_QUIRK_BIT 3 /* unknown swizzling; do not release! */ /* * Is the object to be mapped as read-only to the GPU @@ -275,12 +282,6 @@ struct drm_i915_gem_object { * pages were last acquired. */ bool dirty:1; - - /** - * This is set if the object has been pinned due to unknown - * swizzling. - */ - bool quirked:1; } mm; /** Record of address bit 17 of each page at last unbind. */ @@ -295,6 +296,8 @@ struct drm_i915_gem_object { struct work_struct *work; } userptr; + struct drm_mm_node *stolen; + unsigned long scratch; u64 encode; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 3db3c667c486..43028f3539a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -16,6 +16,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, { struct drm_i915_private *i915 = to_i915(obj->base.dev); unsigned long supported = INTEL_INFO(i915)->page_sizes; + bool shrinkable; int i; lockdep_assert_held(&obj->mm.lock); @@ -38,13 +39,6 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.pages = pages; - if (i915_gem_object_is_tiled(obj) && - i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - GEM_BUG_ON(obj->mm.quirked); - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; - } - GEM_BUG_ON(!sg_page_sizes); obj->mm.page_sizes.phys = sg_page_sizes; @@ -63,7 +57,16 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); - if (i915_gem_object_is_shrinkable(obj)) { + shrinkable = i915_gem_object_is_shrinkable(obj); + + if (i915_gem_object_is_tiled(obj) && + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_set_tiling_quirk(obj); + shrinkable = false; + } + + if (shrinkable) { struct list_head *list; unsigned long flags; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 3a4dfe2ef1da..3c0b157e2a35 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -213,7 +213,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) if (obj->ops == &i915_gem_phys_ops) return 0; - if (obj->ops != &i915_gem_shmem_ops) + if (!i915_gem_object_is_shmem(obj)) return -EINVAL; err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); @@ -227,7 +227,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) goto err_unlock; } - if (obj->mm.quirked) { + if (i915_gem_object_has_tiling_quirk(obj)) { err = -EFAULT; goto err_unlock; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 40d3e40500fa..215766cc22bf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -11,6 +11,13 @@ #include "i915_drv.h" +#if defined(CONFIG_X86) +#include <asm/smp.h> +#else +#define wbinvd_on_all_cpus() \ + pr_warn(DRIVER_NAME ": Missing cache flush in %s\n", __func__) +#endif + void i915_gem_suspend(struct drm_i915_private *i915) { GEM_TRACE("%s\n", dev_name(i915->drm.dev)); @@ -32,13 +39,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) i915_gem_drain_freed_objects(i915); } -static struct drm_i915_gem_object *first_mm_object(struct list_head *list) -{ - return list_first_entry_or_null(list, - struct drm_i915_gem_object, - mm.link); -} - void i915_gem_suspend_late(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj; @@ -48,6 +48,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) NULL }, **phase; unsigned long flags; + bool flush = false; /* * Neither the BIOS, ourselves or any other kernel @@ -73,29 +74,15 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { - LIST_HEAD(keep); - - while ((obj = first_mm_object(*phase))) { - list_move_tail(&obj->mm.link, &keep); - - /* Beware the background _i915_gem_free_objects */ - if (!kref_get_unless_zero(&obj->base.refcount)) - continue; - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - - i915_gem_object_lock(obj, NULL); - drm_WARN_ON(&i915->drm, - i915_gem_object_set_to_gtt_domain(obj, false)); - i915_gem_object_unlock(obj); - i915_gem_object_put(obj); - - spin_lock_irqsave(&i915->mm.obj_lock, flags); + list_for_each_entry(obj, *phase, mm.link) { + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + flush |= (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0; + __start_cpu_write(obj); /* presume auto-hibernate */ } - - list_splice_tail(&keep, *phase); } spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + if (flush) + wbinvd_on_all_cpus(); } void i915_gem_resume(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 835bd01f2e5d..3e3dad22a683 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -143,6 +143,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, unsigned int flags) { struct drm_i915_gem_object *obj; + int err; /* * NB: Our use of resource_size_t for the size stems from using struct @@ -173,9 +174,18 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (overflows_type(size, obj->base.size)) return ERR_PTR(-E2BIG); - obj = mem->ops->create_object(mem, size, flags); - if (!IS_ERR(obj)) - trace_i915_gem_object_create(obj); + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + err = mem->ops->init_object(mem, obj, size, flags); + if (err) + goto err_object_free; + + trace_i915_gem_object_create(obj); return obj; + +err_object_free: + i915_gem_object_free(obj); + return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 75e8b71c18b9..cf83c208688c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -464,26 +464,21 @@ static int __create_shmem(struct drm_i915_private *i915, return 0; } -static struct drm_i915_gem_object * -create_shmem(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +static int shmem_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags) { static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; - struct drm_i915_gem_object *obj; struct address_space *mapping; unsigned int cache_level; gfp_t mask; int ret; - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - ret = __create_shmem(i915, &obj->base, size); if (ret) - goto fail; + return ret; mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { @@ -522,11 +517,7 @@ create_shmem(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem, 0); - return obj; - -fail: - i915_gem_object_free(obj); - return ERR_PTR(ret); + return 0; } struct drm_i915_gem_object * @@ -611,7 +602,7 @@ static void release_shmem(struct intel_memory_region *mem) static const struct intel_memory_region_ops shmem_region_ops = { .init = init_shmem, .release = release_shmem, - .create_object = create_shmem, + .init_object = shmem_object_init, }; struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) @@ -621,3 +612,8 @@ struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) PAGE_SIZE, 0, &shmem_region_ops); } + +bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj) +{ + return obj->ops == &i915_gem_shmem_ops; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 41b9fbf4dbcc..551935348ad8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -621,18 +621,13 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { .release = i915_gem_object_release_stolen, }; -static struct drm_i915_gem_object * -__i915_gem_object_create_stolen(struct intel_memory_region *mem, - struct drm_mm_node *stolen) +static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + struct drm_mm_node *stolen) { static struct lock_class_key lock_class; - struct drm_i915_gem_object *obj; unsigned int cache_level; - int err = -ENOMEM; - - obj = i915_gem_object_alloc(); - if (!obj) - goto err; + int err; drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size); i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class); @@ -644,55 +639,47 @@ __i915_gem_object_create_stolen(struct intel_memory_region *mem, err = i915_gem_object_pin_pages(obj); if (err) - goto cleanup; + return err; i915_gem_object_init_memory_region(obj, mem, 0); - return obj; - -cleanup: - i915_gem_object_free(obj); -err: - return ERR_PTR(err); + return 0; } -static struct drm_i915_gem_object * -_i915_gem_object_create_stolen(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags) { struct drm_i915_private *i915 = mem->i915; - struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; if (!drm_mm_initialized(&i915->mm.stolen)) - return ERR_PTR(-ENODEV); + return -ENODEV; if (size == 0) - return ERR_PTR(-EINVAL); + return -EINVAL; stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return ERR_PTR(-ENOMEM); + return -ENOMEM; ret = i915_gem_stolen_insert_node(i915, stolen, size, 4096); - if (ret) { - obj = ERR_PTR(ret); + if (ret) goto err_free; - } - obj = __i915_gem_object_create_stolen(mem, stolen); - if (IS_ERR(obj)) + ret = __i915_gem_object_create_stolen(mem, obj, stolen); + if (ret) goto err_remove; - return obj; + return 0; err_remove: i915_gem_stolen_remove_node(i915, stolen); err_free: kfree(stolen); - return obj; + return ret; } struct drm_i915_gem_object * @@ -722,7 +709,7 @@ static void release_stolen(struct intel_memory_region *mem) static const struct intel_memory_region_ops i915_region_stolen_ops = { .init = init_stolen, .release = release_stolen, - .create_object = _i915_gem_object_create_stolen, + .init_object = _i915_gem_object_stolen_init, }; struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915) @@ -771,16 +758,31 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, goto err_free; } - obj = __i915_gem_object_create_stolen(mem, stolen); - if (IS_ERR(obj)) + obj = i915_gem_object_alloc(); + if (!obj) { + obj = ERR_PTR(-ENOMEM); goto err_stolen; + } + + ret = __i915_gem_object_create_stolen(mem, obj, stolen); + if (ret) { + obj = ERR_PTR(ret); + goto err_object_free; + } i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); return obj; +err_object_free: + i915_gem_object_free(obj); err_stolen: i915_gem_stolen_remove_node(i915, stolen); err_free: kfree(stolen); return obj; } + +bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj) +{ + return obj->ops == &i915_gem_object_stolen_ops; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index 61e028063f9f..b03489706796 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -30,6 +30,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv resource_size_t stolen_offset, resource_size_t size); +bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj); + #define I915_GEM_STOLEN_BIAS SZ_128K #endif /* __I915_GEM_STOLEN_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ffcaee74a249..d589d3d81085 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -270,14 +270,14 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, obj->mm.madv == I915_MADV_WILLNEED && i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (tiling == I915_TILING_NONE) { - GEM_BUG_ON(!obj->mm.quirked); - __i915_gem_object_unpin_pages(obj); - obj->mm.quirked = false; + GEM_BUG_ON(!i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_clear_tiling_quirk(obj); + i915_gem_object_make_shrinkable(obj); } if (!i915_gem_object_is_tiled(obj)) { - GEM_BUG_ON(obj->mm.quirked); - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; + GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_make_unshrinkable(obj); + i915_gem_object_set_tiling_quirk(obj); } } mutex_unlock(&obj->mm.lock); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index c1b13ac50d0f..4b9856d5ba14 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -5,6 +5,7 @@ */ #include <linux/dma-fence-array.h> +#include <linux/dma-fence-chain.h> #include <linux/jiffies.h> #include "gt/intel_engine.h" @@ -44,8 +45,7 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int count, i; int ret; - ret = dma_resv_get_fences_rcu(resv, - &excl, &count, &shared); + ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); if (ret) return ret; @@ -91,8 +91,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, return timeout; } -static void __fence_set_priority(struct dma_fence *fence, - const struct i915_sched_attr *attr) +static void fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { struct i915_request *rq; struct intel_engine_cs *engine; @@ -103,27 +103,47 @@ static void __fence_set_priority(struct dma_fence *fence, rq = to_request(fence); engine = rq->engine; - local_bh_disable(); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) engine->schedule(rq, attr); rcu_read_unlock(); - local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } -static void fence_set_priority(struct dma_fence *fence, - const struct i915_sched_attr *attr) +static inline bool __dma_fence_is_chain(const struct dma_fence *fence) +{ + return fence->ops == &dma_fence_chain_ops; +} + +void i915_gem_fence_wait_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { + if (dma_fence_is_signaled(fence)) + return; + + local_bh_disable(); + /* Recurse once into a fence-array */ if (dma_fence_is_array(fence)) { struct dma_fence_array *array = to_dma_fence_array(fence); int i; for (i = 0; i < array->num_fences; i++) - __fence_set_priority(array->fences[i], attr); + fence_set_priority(array->fences[i], attr); + } else if (__dma_fence_is_chain(fence)) { + struct dma_fence *iter; + + /* The chain is ordered; if we boost the last, we boost all */ + dma_fence_chain_for_each(iter, fence) { + fence_set_priority(to_dma_fence_chain(iter)->fence, + attr); + break; + } + dma_fence_put(iter); } else { - __fence_set_priority(fence, attr); + fence_set_priority(fence, attr); } + + local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } int @@ -139,12 +159,12 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, int ret; ret = dma_resv_get_fences_rcu(obj->base.resv, - &excl, &count, &shared); + &excl, &count, &shared); if (ret) return ret; for (i = 0; i < count; i++) { - fence_set_priority(shared[i], attr); + i915_gem_fence_wait_priority(shared[i], attr); dma_fence_put(shared[i]); } @@ -154,7 +174,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } if (excl) { - fence_set_priority(excl, attr); + i915_gem_fence_wait_priority(excl, attr); dma_fence_put(excl); } return 0; diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 680bd9442eb0..e08dff376339 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -12,9 +12,9 @@ #include "intel_gt.h" /* Write pde (index) from the page directory @pd to the page table @pt */ -static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, - const unsigned int pde, - const struct i915_page_table *pt) +static void gen6_write_pde(const struct gen6_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) { dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]); @@ -27,8 +27,6 @@ void gen7_ppgtt_enable(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; - struct intel_engine_cs *engine; - enum intel_engine_id id; u32 ecochk; intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); @@ -41,13 +39,6 @@ void gen7_ppgtt_enable(struct intel_gt *gt) ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; } intel_uncore_write(uncore, GAM_ECOCHK, ecochk); - - for_each_engine(engine, gt, id) { - /* GFX_MODE is per-ring on gen7+ */ - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - } } void gen6_ppgtt_enable(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index 94465374ca2f..8551e6de50e8 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -40,7 +40,7 @@ struct batch_vals { u32 size; }; -static inline int num_primitives(const struct batch_vals *bv) +static int num_primitives(const struct batch_vals *bv) { /* * We need to saturate the GPU with work in order to dispatch @@ -353,19 +353,21 @@ static void gen7_emit_pipeline_flush(struct batch_chunk *batch) static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch) { - u32 *cs = batch_alloc_items(batch, 0, 8); + u32 *cs = batch_alloc_items(batch, 0, 10); /* ivb: Stall before STATE_CACHE_INVALIDATE */ - *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = GFX_OP_PIPE_CONTROL(5); *cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD | PIPE_CONTROL_CS_STALL; *cs++ = 0; *cs++ = 0; + *cs++ = 0; - *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = GFX_OP_PIPE_CONTROL(5); *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE; *cs++ = 0; *cs++ = 0; + *cs++ = 0; batch_advance(batch, cs); } @@ -390,6 +392,17 @@ static void emit_batch(struct i915_vma * const vma, &cb_kernel_ivb, desc_count); + /* Reset inherited context registers */ + gen7_emit_pipeline_invalidate(&cmds); + batch_add(&cmds, MI_LOAD_REGISTER_IMM(2)); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7)); + batch_add(&cmds, 0xffff0000); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1)); + batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + gen7_emit_pipeline_invalidate(&cmds); + gen7_emit_pipeline_flush(&cmds); + + /* Switch to the media pipeline and our base address */ gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, MI_NOOP); @@ -399,9 +412,11 @@ static void emit_batch(struct i915_vma * const vma, gen7_emit_state_base_address(&cmds, descriptors); gen7_emit_pipeline_invalidate(&cmds); + /* Set the clear-residual kernel state */ gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count); + /* Execute the kernel on all HW threads */ for (i = 0; i < num_primitives(bv); i++) gen7_emit_media_object(&cmds, i); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 8066b93e6dc4..07ba524da90b 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -330,7 +330,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) return 0; } -static inline u32 preempt_address(struct intel_engine_cs *engine) +static u32 preempt_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_PREEMPT_ADDR); @@ -488,6 +488,7 @@ static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs) static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs) { + *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_POLL | @@ -495,6 +496,7 @@ static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = preempt_address(rq->engine); *cs++ = 0; + *cs++ = MI_NOOP; return cs; } diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index a37c968ef8f7..755522ced60d 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -109,7 +109,7 @@ static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) -static inline unsigned int +static unsigned int gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) { const int shift = gen8_pd_shift(lvl); @@ -125,7 +125,7 @@ gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) return i915_pde_index(end, shift) - *idx; } -static inline bool gen8_pd_contains(u64 start, u64 end, int lvl) +static bool gen8_pd_contains(u64 start, u64 end, int lvl) { const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); @@ -133,7 +133,7 @@ static inline bool gen8_pd_contains(u64 start, u64 end, int lvl) return (start ^ end) & mask && (start & ~mask) == 0; } -static inline unsigned int gen8_pt_count(u64 start, u64 end) +static unsigned int gen8_pt_count(u64 start, u64 end) { GEM_BUG_ON(start >= end); if ((start ^ end) >> gen8_pd_shift(1)) @@ -142,14 +142,13 @@ static inline unsigned int gen8_pt_count(u64 start, u64 end) return end - start; } -static inline unsigned int -gen8_pd_top_count(const struct i915_address_space *vm) +static unsigned int gen8_pd_top_count(const struct i915_address_space *vm) { unsigned int shift = __gen8_pte_shift(vm->top); return (vm->total + (1ull << shift) - 1) >> shift; } -static inline struct i915_page_directory * +static struct i915_page_directory * gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) { struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); @@ -160,7 +159,7 @@ gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); } -static inline struct i915_page_directory * +static struct i915_page_directory * gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) { return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index be2c285a0ac7..34a645d6babd 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -453,16 +453,17 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) { struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; struct intel_context *ce = rq->context; - unsigned long flags; bool release; - if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + spin_lock(&ce->signal_lock); + if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { + spin_unlock(&ce->signal_lock); return; + } - spin_lock_irqsave(&ce->signal_lock, flags); list_del_rcu(&rq->signal_link); release = remove_signaling_context(b, ce); - spin_unlock_irqrestore(&ce->signal_lock, flags); + spin_unlock(&ce->signal_lock); if (release) intel_context_put(ce); @@ -517,8 +518,8 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) list_for_each_entry_rcu(rq, &ce->signals, signal_link) drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : + __i915_request_is_complete(rq) ? "!" : + __i915_request_has_started(rq) ? "*" : "", jiffies_to_msecs(jiffies - rq->emitted_jiffies)); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index fa76602f9852..fb1b1d096975 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -342,7 +342,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->schedule = NULL; ewma__engine_latency_init(&engine->latency); - seqlock_init(&engine->stats.lock); + seqcount_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -1676,7 +1676,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, ktime_to_ms(intel_engine_get_busy_time(engine, &dummy))); drm_printf(m, "\tForcewake: %x domains, %d active\n", - engine->fw_domain, atomic_read(&engine->fw_active)); + engine->fw_domain, READ_ONCE(engine->fw_active)); rcu_read_lock(); rq = READ_ONCE(engine->heartbeat.systole); @@ -1754,7 +1754,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, * add it to the total. */ *now = ktime_get(); - if (atomic_read(&engine->stats.active)) + if (READ_ONCE(engine->stats.active)) total = ktime_add(total, ktime_sub(*now, engine->stats.start)); return total; @@ -1773,9 +1773,9 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) ktime_t total; do { - seq = read_seqbegin(&engine->stats.lock); + seq = read_seqcount_begin(&engine->stats.lock); total = __intel_engine_get_busy_time(engine, now); - } while (read_seqretry(&engine->stats.lock, seq)); + } while (read_seqcount_retry(&engine->stats.lock, seq)); return total; } @@ -1811,7 +1811,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) struct intel_timeline *tl = request->context->timeline; list_for_each_entry_from_reverse(request, &tl->requests, link) { - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) break; active = request; @@ -1822,10 +1822,10 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) return active; list_for_each_entry(request, &engine->active.requests, sched.link) { - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) continue; - if (!i915_request_started(request)) + if (!__i915_request_has_started(request)) continue; /* More than one preemptible request may match! */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 2843db731b7d..e67d09259dd0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -79,7 +79,7 @@ static int __engine_unpark(struct intel_wakeref *wf) #if IS_ENABLED(CONFIG_LOCKDEP) -static inline unsigned long __timeline_mark_lock(struct intel_context *ce) +static unsigned long __timeline_mark_lock(struct intel_context *ce) { unsigned long flags; @@ -89,8 +89,8 @@ static inline unsigned long __timeline_mark_lock(struct intel_context *ce) return flags; } -static inline void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) +static void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); local_irq_restore(flags); @@ -98,13 +98,13 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #else -static inline unsigned long __timeline_mark_lock(struct intel_context *ce) +static unsigned long __timeline_mark_lock(struct intel_context *ce) { return 0; } -static inline void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) +static void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h new file mode 100644 index 000000000000..24fbdd94351a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_ENGINE_STATS_H__ +#define __INTEL_ENGINE_STATS_H__ + +#include <linux/atomic.h> +#include <linux/ktime.h> +#include <linux/seqlock.h> + +#include "i915_gem.h" /* GEM_BUG_ON */ +#include "intel_engine.h" + +static inline void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (engine->stats.active) { + engine->stats.active++; + return; + } + + /* The writer is serialised; but the pmu reader may be from hardirq */ + local_irq_save(flags); + write_seqcount_begin(&engine->stats.lock); + + engine->stats.start = ktime_get(); + engine->stats.active++; + + write_seqcount_end(&engine->stats.lock); + local_irq_restore(flags); + + GEM_BUG_ON(!engine->stats.active); +} + +static inline void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + GEM_BUG_ON(!engine->stats.active); + if (engine->stats.active > 1) { + engine->stats.active--; + return; + } + + local_irq_save(flags); + write_seqcount_begin(&engine->stats.lock); + + engine->stats.active--; + engine->stats.total = + ktime_add(engine->stats.total, + ktime_sub(ktime_get(), engine->stats.start)); + + write_seqcount_end(&engine->stats.lock); + local_irq_restore(flags); +} + +#endif /* __INTEL_ENGINE_STATS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index df62e793e747..d2346b425547 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -319,7 +319,7 @@ struct intel_engine_cs { * as possible. */ enum forcewake_domains fw_domain; - atomic_t fw_active; + unsigned int fw_active; unsigned long context_tag; @@ -516,12 +516,12 @@ struct intel_engine_cs { /** * @active: Number of contexts currently scheduled in. */ - atomic_t active; + unsigned int active; /** * @lock: Lock protecting the below fields. */ - seqlock_t lock; + seqcount_t lock; /** * @total: Total time this engine was busy. diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index d7d5a58990bb..ac1be7a632d3 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -115,6 +115,7 @@ #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_engine_stats.h" #include "intel_execlists_submission.h" #include "intel_gt.h" #include "intel_gt_pm.h" @@ -230,8 +231,7 @@ active_request(const struct intel_timeline * const tl, struct i915_request *rq) return __active_request(tl, rq, 0); } -static inline void -ring_set_paused(const struct intel_engine_cs *engine, int state) +static void ring_set_paused(const struct intel_engine_cs *engine, int state) { /* * We inspect HWS_PREEMPT with a semaphore inside @@ -244,12 +244,12 @@ ring_set_paused(const struct intel_engine_cs *engine, int state) wmb(); } -static inline struct i915_priolist *to_priolist(struct rb_node *rb) +static struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); } -static inline int rq_prio(const struct i915_request *rq) +static int rq_prio(const struct i915_request *rq) { return READ_ONCE(rq->sched.attr.priority); } @@ -299,8 +299,8 @@ static int virtual_prio(const struct intel_engine_execlists *el) return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; } -static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *rq) +static bool need_preempt(const struct intel_engine_cs *engine, + const struct i915_request *rq) { int last_prio; @@ -351,7 +351,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, queue_prio(&engine->execlists)) > last_prio; } -__maybe_unused static inline bool +__maybe_unused static bool assert_priority_queue(const struct i915_request *prev, const struct i915_request *next) { @@ -418,7 +418,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) return __unwind_incomplete_requests(engine); } -static inline void +static void execlists_context_status_change(struct i915_request *rq, unsigned long status) { /* @@ -432,39 +432,6 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } -static void intel_engine_context_in(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (atomic_add_unless(&engine->stats.active, 1, 0)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (!atomic_add_unless(&engine->stats.active, 1, 0)) { - engine->stats.start = ktime_get(); - atomic_inc(&engine->stats.active); - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static void intel_engine_context_out(struct intel_engine_cs *engine) -{ - unsigned long flags; - - GEM_BUG_ON(!atomic_read(&engine->stats.active)); - - if (atomic_add_unless(&engine->stats.active, -1, 1)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (atomic_dec_and_test(&engine->stats.active)) { - engine->stats.total = - ktime_add(engine->stats.total, - ktime_sub(ktime_get(), engine->stats.start)); - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - static void reset_active(struct i915_request *rq, struct intel_engine_cs *engine) { @@ -503,7 +470,7 @@ static void reset_active(struct i915_request *rq, ce->lrc.lrca = lrc_update_regs(ce, engine, head); } -static inline struct intel_engine_cs * +static struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { struct intel_engine_cs * const engine = rq->engine; @@ -539,7 +506,7 @@ __execlists_schedule_in(struct i915_request *rq) ce->lrc.ccid |= engine->execlists.ccid; __intel_gt_pm_get(engine->gt); - if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)) + if (engine->fw_domain && !engine->fw_active++) intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -549,7 +516,7 @@ __execlists_schedule_in(struct i915_request *rq) return engine; } -static inline void execlists_schedule_in(struct i915_request *rq, int idx) +static void execlists_schedule_in(struct i915_request *rq, int idx) { struct intel_context * const ce = rq->context; struct intel_engine_cs *old; @@ -608,9 +575,9 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_hi_schedule(&ve->base.execlists.tasklet); } -static inline void __execlists_schedule_out(struct i915_request *rq) +static void __execlists_schedule_out(struct i915_request * const rq, + struct intel_context * const ce) { - struct intel_context * const ce = rq->context; struct intel_engine_cs * const engine = rq->engine; unsigned int ccid; @@ -621,6 +588,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq) */ CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid); + GEM_BUG_ON(ce->inflight != engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) lrc_check_regs(ce, engine, "after"); @@ -645,7 +613,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq) lrc_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) + if (engine->fw_domain && !--engine->fw_active) intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); intel_gt_pm_put_async(engine->gt); @@ -660,10 +628,12 @@ static inline void __execlists_schedule_out(struct i915_request *rq) */ if (ce->engine != engine) kick_siblings(rq, ce); + + WRITE_ONCE(ce->inflight, NULL); + intel_context_put(ce); } -static inline void -execlists_schedule_out(struct i915_request *rq) +static inline void execlists_schedule_out(struct i915_request *rq) { struct intel_context * const ce = rq->context; @@ -671,12 +641,8 @@ execlists_schedule_out(struct i915_request *rq) GEM_BUG_ON(!ce->inflight); ce->inflight = ptr_dec(ce->inflight); - if (!__intel_context_inflight_count(ce->inflight)) { - GEM_BUG_ON(ce->inflight != rq->engine); - __execlists_schedule_out(rq); - WRITE_ONCE(ce->inflight, NULL); - intel_context_put(ce); - } + if (!__intel_context_inflight_count(ce->inflight)) + __execlists_schedule_out(rq, ce); i915_request_put(rq); } @@ -728,7 +694,7 @@ static u64 execlists_update_context(struct i915_request *rq) return desc; } -static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) +static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) { if (execlists->ctrl_reg) { writel(lower_32_bits(desc), execlists->submit_reg + port * 2); @@ -757,7 +723,7 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) return buf; } -static __maybe_unused void +static __maybe_unused noinline void trace_ports(const struct intel_engine_execlists *execlists, const char *msg, struct i915_request * const *ports) @@ -774,13 +740,13 @@ trace_ports(const struct intel_engine_execlists *execlists, dump_port(p1, sizeof(p1), ", ", ports[1])); } -static inline bool +static bool reset_in_progress(const struct intel_engine_execlists *execlists) { return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } -static __maybe_unused bool +static __maybe_unused noinline bool assert_pending_valid(const struct intel_engine_execlists *execlists, const char *msg) { @@ -1258,12 +1224,20 @@ static void set_preempt_timeout(struct intel_engine_cs *engine, active_preempt_timeout(engine, rq)); } +static bool completed(const struct i915_request *rq) +{ + if (i915_request_has_sentinel(rq)) + return false; + + return __i915_request_is_complete(rq); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; - struct i915_request *last = *execlists->active; + struct i915_request *last, * const *active; struct virtual_engine *ve; struct rb_node *rb; bool submit = false; @@ -1300,21 +1274,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * i.e. we will retrigger preemption following the ack in case * of trouble. * - * In theory we can skip over completed contexts that have not - * yet been processed by events (as those events are in flight): - * - * while ((last = *active) && i915_request_completed(last)) - * active++; - * - * However, the GPU cannot handle this as it will ultimately - * find itself trying to jump back into a context it has just - * completed and barf. */ + active = execlists->active; + while ((last = *active) && completed(last)) + active++; if (last) { - if (__i915_request_is_complete(last)) { - goto check_secondary; - } else if (need_preempt(engine, last)) { + if (need_preempt(engine, last)) { ENGINE_TRACE(engine, "preempting last=%llx:%lld, prio=%d, hint=%d\n", last->fence.context, @@ -1393,9 +1359,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * we hopefully coalesce several updates into a single * submission. */ -check_secondary: - if (!list_is_last(&last->sched.link, - &engine->active.requests)) { + if (active[1]) { /* * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. @@ -1596,7 +1560,7 @@ done: * of ordered contexts. */ if (submit && - memcmp(execlists->active, + memcmp(active, execlists->pending, (port - execlists->pending) * sizeof(*port))) { *port = NULL; @@ -1604,7 +1568,7 @@ done: execlists_schedule_in(*port, port - execlists->pending); WRITE_ONCE(execlists->yield, -1); - set_preempt_timeout(engine, *execlists->active); + set_preempt_timeout(engine, *active); execlists_submit_ports(engine); } else { ring_set_paused(engine, 0); @@ -1621,12 +1585,12 @@ static void execlists_dequeue_irq(struct intel_engine_cs *engine) local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ } -static inline void clear_ports(struct i915_request **ports, int count) +static void clear_ports(struct i915_request **ports, int count) { memset_p((void **)ports, NULL, count); } -static inline void +static void copy_ports(struct i915_request **dst, struct i915_request **src, int count) { /* A memcpy_p() would be very useful here! */ @@ -1660,8 +1624,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists, return inactive; } -static inline void -invalidate_csb_entries(const u64 *first, const u64 *last) +static void invalidate_csb_entries(const u64 *first, const u64 *last) { clflush((void *)first); clflush((void *)last); @@ -1693,7 +1656,7 @@ invalidate_csb_entries(const u64 *first, const u64 *last) * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from */ -static inline bool gen12_csb_parse(const u64 csb) +static bool gen12_csb_parse(const u64 csb) { bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb)); bool new_queue = @@ -1720,7 +1683,7 @@ static inline bool gen12_csb_parse(const u64 csb) return false; } -static inline bool gen8_csb_parse(const u64 csb) +static bool gen8_csb_parse(const u64 csb) { return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); } @@ -1759,8 +1722,7 @@ wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb) return entry; } -static inline u64 -csb_read(const struct intel_engine_cs *engine, u64 * const csb) +static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb) { u64 entry = READ_ONCE(*csb); @@ -2026,6 +1988,9 @@ static void __execlists_hold(struct i915_request *rq) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; @@ -2124,6 +2089,9 @@ static void __execlists_unhold(struct i915_request *rq) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Propagate any change in error status */ if (rq->fence.error) i915_request_set_error_once(w, rq->fence.error); @@ -3180,8 +3148,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) } } -static inline void -logical_ring_default_irqs(struct intel_engine_cs *engine) +static void logical_ring_default_irqs(struct intel_engine_cs *engine) { unsigned int shift = 0; @@ -3296,7 +3263,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) old = fetch_and_zero(&ve->request); if (old) { - GEM_BUG_ON(!i915_request_completed(old)); + GEM_BUG_ON(!__i915_request_is_complete(old)); __i915_request_submit(old); i915_request_put(old); } @@ -3573,7 +3540,7 @@ static void virtual_submit_request(struct i915_request *rq) } if (ve->request) { /* background completion from preempt-to-busy */ - GEM_BUG_ON(!i915_request_completed(ve->request)); + GEM_BUG_ON(!__i915_request_is_complete(ve->request)); __i915_request_submit(ve->request); i915_request_put(ve->request); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 104cb30e8c13..06d84cf09570 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -145,7 +145,8 @@ static void pool_retire(struct i915_active *ref) } static struct intel_gt_buffer_pool_node * -node_create(struct intel_gt_buffer_pool *pool, size_t sz) +node_create(struct intel_gt_buffer_pool *pool, size_t sz, + enum i915_map_type type) { struct intel_gt *gt = to_gt(pool); struct intel_gt_buffer_pool_node *node; @@ -169,12 +170,14 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz) i915_gem_object_set_readonly(obj); + node->type = type; node->obj = obj; return node; } struct intel_gt_buffer_pool_node * -intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size, + enum i915_map_type type) { struct intel_gt_buffer_pool *pool = >->buffer_pool; struct intel_gt_buffer_pool_node *node; @@ -191,6 +194,9 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) if (node->obj->base.size < size) continue; + if (node->type != type) + continue; + age = READ_ONCE(node->age); if (!age) continue; @@ -205,7 +211,7 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) rcu_read_unlock(); if (&node->link == list) { - node = node_create(pool, size); + node = node_create(pool, size, type); if (IS_ERR(node)) return node; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h index 42cbac003e8a..6068f8f1762e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h @@ -15,7 +15,8 @@ struct intel_gt; struct i915_request; struct intel_gt_buffer_pool_node * -intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size); +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size, + enum i915_map_type type); static inline int intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h index bcf1658c9633..d8d82c890da8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h @@ -11,10 +11,9 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> +#include "gem/i915_gem_object_types.h" #include "i915_active_types.h" -struct drm_i915_gem_object; - struct intel_gt_buffer_pool { spinlock_t lock; struct list_head cache_list[4]; @@ -31,6 +30,7 @@ struct intel_gt_buffer_pool_node { struct rcu_head rcu; }; unsigned long age; + enum i915_map_type type; }; #endif /* INTEL_GT_BUFFER_POOL_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a0fc78c89b61..94f485b591af 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1035,7 +1035,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) return cs; } -static inline u32 context_wa_bb_offset(const struct intel_context *ce) +static u32 context_wa_bb_offset(const struct intel_context *ce) { return PAGE_SIZE * ce->wa_bb_page; } @@ -1098,7 +1098,7 @@ setup_indirect_ctx_bb(const struct intel_context *ce, * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ -static inline u32 lrc_descriptor(const struct intel_context *ce) +static u32 lrc_descriptor(const struct intel_context *ce) { u32 desc; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index c4512ee4daf2..8acb84960cd0 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -472,7 +472,7 @@ static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, return table->table[I915_MOCS_PTE].l3cc_value; } -static inline u32 l3cc_combine(u16 low, u16 high) +static u32 l3cc_combine(u16 low, u16 high) { return low | (u32)high << 16; } diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 46d9aceda64c..96b85a10ef33 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -80,7 +80,7 @@ void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl) kfree(pt); } -static inline void +static void write_dma_entry(struct drm_i915_gem_object * const pdma, const unsigned short idx, const u64 encoded_entry) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index d7b8e4457fc2..35504c97f11d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -49,7 +49,7 @@ static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) return rc6_to_gt(rc)->i915; } -static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) { intel_uncore_write_fw(uncore, reg, val); } diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 83992312a34b..60393ce5614d 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -98,7 +98,7 @@ region_lmem_init(struct intel_memory_region *mem) static const struct intel_memory_region_ops intel_region_lmem_ops = { .init = region_lmem_init, .release = region_lmem_release, - .create_object = __i915_gem_lmem_object_create, + .init_object = __i915_gem_lmem_object_init, }; struct intel_memory_region * diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 9d177297db79..61410cd62927 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -151,8 +151,7 @@ static void mark_innocent(struct i915_request *rq) void __i915_request_reset(struct i915_request *rq, bool guilty) { RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); - - GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(__i915_request_is_complete(rq)); rcu_read_lock(); /* protect the GEM context */ if (guilty) { @@ -1110,7 +1109,7 @@ error: goto finish; } -static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) +static int intel_gt_reset_engine(struct intel_engine_cs *engine) { return __intel_gt_reset(engine->gt, engine->mask); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 06385550450c..78d1360caa0f 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -42,7 +42,7 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww) /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - if (vma->obj->stolen) + if (i915_gem_object_is_stolen(vma->obj)) flags |= PIN_MAPPABLE; else flags |= PIN_HIGH; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 20f42722be8b..4984ff565424 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -122,31 +122,27 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) hwsp = RING_HWS_PGA(engine->mmio_base); } - intel_uncore_write(engine->uncore, hwsp, offset); - intel_uncore_posting_read(engine->uncore, hwsp); + intel_uncore_write_fw(engine->uncore, hwsp, offset); + intel_uncore_posting_read_fw(engine->uncore, hwsp); } static void flush_cs_tlb(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - - if (!IS_GEN_RANGE(dev_priv, 6, 7)) + if (!IS_GEN_RANGE(engine->i915, 6, 7)) return; /* ring should be idle before issuing a sync flush*/ - drm_WARN_ON(&dev_priv->drm, - (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); - - ENGINE_WRITE(engine, RING_INSTPM, - _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | - INSTPM_SYNC_FLUSH)); - if (intel_wait_for_register(engine->uncore, - RING_INSTPM(engine->mmio_base), - INSTPM_SYNC_FLUSH, 0, - 1000)) - drm_err(&dev_priv->drm, - "%s: wait for SyncFlush to complete for TLB invalidation timed out\n", - engine->name); + GEM_DEBUG_WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + + ENGINE_WRITE_FW(engine, RING_INSTPM, + _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | + INSTPM_SYNC_FLUSH)); + if (__intel_wait_for_register_fw(engine->uncore, + RING_INSTPM(engine->mmio_base), + INSTPM_SYNC_FLUSH, 0, + 2000, 0, NULL)) + ENGINE_TRACE(engine, + "wait for SyncFlush to complete for TLB invalidation timed out\n"); } static void ring_setup_status_page(struct intel_engine_cs *engine) @@ -157,21 +153,6 @@ static void ring_setup_status_page(struct intel_engine_cs *engine) flush_cs_tlb(engine); } -static bool stop_ring(struct intel_engine_cs *engine) -{ - intel_engine_stop_cs(engine); - - ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL)); - - ENGINE_WRITE(engine, RING_HEAD, 0); - ENGINE_WRITE(engine, RING_TAIL, 0); - - /* The ring must be empty before it is disabled */ - ENGINE_WRITE(engine, RING_CTL, 0); - - return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0; -} - static struct i915_address_space *vm_alias(struct i915_address_space *vm) { if (i915_is_ggtt(vm)) @@ -189,9 +170,16 @@ static void set_pp_dir(struct intel_engine_cs *engine) { struct i915_address_space *vm = vm_alias(engine->gt->vm); - if (vm) { - ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); - ENGINE_WRITE(engine, RING_PP_DIR_BASE, pp_dir(vm)); + if (!vm) + return; + + ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); + ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm)); + + if (INTEL_GEN(engine->i915) >= 7) { + ENGINE_WRITE_FW(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); } } @@ -199,38 +187,10 @@ static int xcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->legacy.ring; - int ret = 0; ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", ring->head, ring->tail); - intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); - - /* WaClearRingBufHeadRegAtInit:ctg,elk */ - if (!stop_ring(engine)) { - /* G45 ring initialization often fails to reset head to zero */ - drm_dbg(&dev_priv->drm, "%s head not reset to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_HEAD), - ENGINE_READ(engine, RING_TAIL), - ENGINE_READ(engine, RING_START)); - - if (!stop_ring(engine)) { - drm_err(&dev_priv->drm, - "failed to set %s head to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_HEAD), - ENGINE_READ(engine, RING_TAIL), - ENGINE_READ(engine, RING_START)); - ret = -EIO; - goto out; - } - } - if (HWS_NEEDS_PHYSICAL(dev_priv)) ring_setup_phys_status_page(engine); else @@ -247,7 +207,7 @@ static int xcs_resume(struct intel_engine_cs *engine) * also enforces ordering), otherwise the hw might lose the new ring * register values. */ - ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma)); + ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma)); /* Check that the ring offsets point within the ring! */ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); @@ -257,46 +217,44 @@ static int xcs_resume(struct intel_engine_cs *engine) set_pp_dir(engine); /* First wake the ring up to an empty/idle ring */ - ENGINE_WRITE(engine, RING_HEAD, ring->head); - ENGINE_WRITE(engine, RING_TAIL, ring->head); + ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); + ENGINE_WRITE_FW(engine, RING_TAIL, ring->head); ENGINE_POSTING_READ(engine, RING_TAIL); - ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID); + ENGINE_WRITE_FW(engine, RING_CTL, + RING_CTL_SIZE(ring->size) | RING_VALID); /* If the head is still not zero, the ring is dead */ - if (intel_wait_for_register(engine->uncore, - RING_CTL(engine->mmio_base), - RING_VALID, RING_VALID, - 50)) { - drm_err(&dev_priv->drm, "%s initialization failed " - "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_CTL) & RING_VALID, - ENGINE_READ(engine, RING_HEAD), ring->head, - ENGINE_READ(engine, RING_TAIL), ring->tail, - ENGINE_READ(engine, RING_START), - i915_ggtt_offset(ring->vma)); - ret = -EIO; - goto out; + if (__intel_wait_for_register_fw(engine->uncore, + RING_CTL(engine->mmio_base), + RING_VALID, RING_VALID, + 5000, 0, NULL)) { + drm_err(&dev_priv->drm, + "%s initialization failed; " + "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", + engine->name, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_CTL) & RING_VALID, + ENGINE_READ(engine, RING_HEAD), ring->head, + ENGINE_READ(engine, RING_TAIL), ring->tail, + ENGINE_READ(engine, RING_START), + i915_ggtt_offset(ring->vma)); + return -EIO; } if (INTEL_GEN(dev_priv) > 2) - ENGINE_WRITE(engine, - RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); + ENGINE_WRITE_FW(engine, + RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); /* Now awake, let it get started */ if (ring->tail != ring->head) { - ENGINE_WRITE(engine, RING_TAIL, ring->tail); + ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail); ENGINE_POSTING_READ(engine, RING_TAIL); } /* Papering over lost _interrupts_ immediately following the restart */ intel_engine_signal_breadcrumbs(engine); -out: - intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); - - return ret; + return 0; } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -332,11 +290,25 @@ static void xcs_sanitize(struct intel_engine_cs *engine) clflush_cache_range(engine->status_page.addr, PAGE_SIZE); } -static void reset_prepare(struct intel_engine_cs *engine) +static bool stop_ring(struct intel_engine_cs *engine) { - struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; + /* Empty the ring by skipping to the end */ + ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL)); + ENGINE_POSTING_READ(engine, RING_HEAD); + /* The ring must be empty before it is disabled */ + ENGINE_WRITE_FW(engine, RING_CTL, 0); + ENGINE_POSTING_READ(engine, RING_CTL); + + /* Then reset the disabled ring */ + ENGINE_WRITE_FW(engine, RING_HEAD, 0); + ENGINE_WRITE_FW(engine, RING_TAIL, 0); + + return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0; +} + +static void reset_prepare(struct intel_engine_cs *engine) +{ /* * We stop engines, otherwise we might get failed reset and a * dead gpu (on elk). Also as modern gpu as kbl can suffer @@ -348,30 +320,35 @@ static void reset_prepare(struct intel_engine_cs *engine) * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) * * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * WaClearRingBufHeadRegAtInit:ctg,elk * * FIXME: Wa for more modern gens needs to be validated */ ENGINE_TRACE(engine, "\n"); + intel_engine_stop_cs(engine); - if (intel_engine_stop_cs(engine)) - ENGINE_TRACE(engine, "timed out on STOP_RING\n"); - - intel_uncore_write_fw(uncore, - RING_HEAD(base), - intel_uncore_read_fw(uncore, RING_TAIL(base))); - intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ - - intel_uncore_write_fw(uncore, RING_HEAD(base), 0); - intel_uncore_write_fw(uncore, RING_TAIL(base), 0); - intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); - - /* The ring must be empty before it is disabled */ - intel_uncore_write_fw(uncore, RING_CTL(base), 0); + if (!stop_ring(engine)) { + /* G45 ring initialization often fails to reset head to zero */ + drm_dbg(&engine->i915->drm, + "%s head not reset to zero " + "ctl %08x head %08x tail %08x start %08x\n", + engine->name, + ENGINE_READ_FW(engine, RING_CTL), + ENGINE_READ_FW(engine, RING_HEAD), + ENGINE_READ_FW(engine, RING_TAIL), + ENGINE_READ_FW(engine, RING_START)); + } - /* Check acts as a post */ - if (intel_uncore_read_fw(uncore, RING_HEAD(base))) - ENGINE_TRACE(engine, "ring head [%x] not parked\n", - intel_uncore_read_fw(uncore, RING_HEAD(base))); + if (!stop_ring(engine)) { + drm_err(&engine->i915->drm, + "failed to set %s head to zero " + "ctl %08x head %08x tail %08x start %08x\n", + engine->name, + ENGINE_READ_FW(engine, RING_CTL), + ENGINE_READ_FW(engine, RING_HEAD), + ENGINE_READ_FW(engine, RING_TAIL), + ENGINE_READ_FW(engine, RING_START)); + } } static void reset_rewind(struct intel_engine_cs *engine, bool stalled) @@ -382,12 +359,14 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) rq = NULL; spin_lock_irqsave(&engine->active.lock, flags); + rcu_read_lock(); list_for_each_entry(pos, &engine->active.requests, sched.link) { - if (!i915_request_completed(pos)) { + if (!__i915_request_is_complete(pos)) { rq = pos; break; } } + rcu_read_unlock(); /* * The guilty request will get skipped on a hung engine. @@ -663,9 +642,9 @@ static int load_pd_dir(struct i915_request *rq, return rq->engine->emit_flush(rq, EMIT_FLUSH); } -static inline int mi_set_context(struct i915_request *rq, - struct intel_context *ce, - u32 flags) +static int mi_set_context(struct i915_request *rq, + struct intel_context *ce, + u32 flags) { struct intel_engine_cs *engine = rq->engine; struct drm_i915_private *i915 = engine->i915; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 69e1bd46cc46..ee5835c29c03 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -43,7 +43,7 @@ static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) return mask & ~rps->pm_intrmsk_mbz; } -static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) { intel_uncore_write_fw(uncore, reg, val); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 7fe05918a76e..037b0e3ccbed 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -582,11 +582,11 @@ int intel_timeline_read_hwsp(struct i915_request *from, rcu_read_lock(); cl = rcu_dereference(from->hwsp_cacheline); - if (i915_request_completed(from)) /* confirm cacheline is valid */ + if (i915_request_signaled(from)) /* confirm cacheline is valid */ goto unlock; if (unlikely(!i915_active_acquire_if_busy(&cl->active))) goto unlock; /* seqno wrapped and completed! */ - if (unlikely(i915_request_completed(from))) + if (unlikely(__i915_request_is_complete(from))) goto release; rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d99773e6776e..3fdcd5ff71dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1304,7 +1304,7 @@ bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) } __maybe_unused -static inline bool is_nonpriv_flags_valid(u32 flags) +static bool is_nonpriv_flags_valid(u32 flags) { /* Check only valid flag bits are set */ if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 460c3e9542f4..463bb6a700c8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -704,6 +704,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; + unsigned long count; IGT_TIMEOUT(end_time); if (active && !intel_engine_can_store_dword(engine)) @@ -721,6 +722,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); + count = 0; do { if (active) { struct i915_request *rq; @@ -770,9 +772,13 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) err = -EINVAL; break; } + + count++; } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); + pr_info("%s: Completed %lu %s resets\n", + engine->name, count, active ? "active" : "idle"); if (err) break; @@ -1623,7 +1629,8 @@ static int igt_reset_queue(void *arg) prev = rq; count++; } while (time_before(jiffies, end_time)); - pr_info("%s: Completed %d resets\n", engine->name, count); + pr_info("%s: Completed %d queued resets\n", + engine->name, count); *h.batch = MI_BATCH_BUFFER_END; intel_gt_chipset_flush(engine->gt); @@ -1720,7 +1727,8 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", engine->name, mode, p->name); - tasklet_disable(t); + if (t->func) + tasklet_disable(t); if (strcmp(p->name, "softirq")) local_bh_disable(); p->critical_section_begin(); @@ -1730,8 +1738,10 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, p->critical_section_end(); if (strcmp(p->name, "softirq")) local_bh_enable(); - tasklet_enable(t); - tasklet_hi_schedule(t); + if (t->func) { + tasklet_enable(t); + tasklet_hi_schedule(t); + } if (err) pr_err("i915_reset_engine(%s:%s) failed under %s\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index b7befcfbdcde..8784257ec808 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -321,7 +321,10 @@ static int igt_atomic_engine_reset(void *arg) goto out_unlock; for_each_engine(engine, gt, id) { - tasklet_disable(&engine->execlists.tasklet); + struct tasklet_struct *t = &engine->execlists.tasklet; + + if (t->func) + tasklet_disable(t); intel_engine_pm_get(engine); for (p = igt_atomic_phases; p->name; p++) { @@ -345,8 +348,10 @@ static int igt_atomic_engine_reset(void *arg) } intel_engine_pm_put(engine); - tasklet_enable(&engine->execlists.tasklet); - tasklet_hi_schedule(&engine->execlists.tasklet); + if (t->func) { + tasklet_enable(t); + tasklet_hi_schedule(t); + } if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index 5982b62f913d..a4d8fc9e2374 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -33,7 +33,7 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) struct file *file; void *ptr; - if (obj->ops == &i915_gem_shmem_ops) { + if (i915_gem_object_is_shmem(obj)) { file = obj->base.filp; atomic_long_inc(&file->f_count); return file; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 6a0452815c41..6abb8f2dc33d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -15,6 +15,29 @@ static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; +static void uc_expand_default_options(struct intel_uc *uc) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + + if (i915->params.enable_guc != -1) + return; + + /* Don't enable GuC/HuC on pre-Gen12 */ + if (INTEL_GEN(i915) < 12) { + i915->params.enable_guc = 0; + return; + } + + /* Don't enable GuC/HuC on older Gen12 platforms */ + if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) { + i915->params.enable_guc = 0; + return; + } + + /* Default: enable HuC authentication only */ + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; +} + /* Reset GuC providing us with fresh state for both GuC and HuC. */ static int __intel_uc_reset_hw(struct intel_uc *uc) @@ -52,9 +75,6 @@ static void __confirm_options(struct intel_uc *uc) yesno(intel_uc_wants_guc_submission(uc)), yesno(intel_uc_wants_huc(uc))); - if (i915->params.enable_guc == -1) - return; - if (i915->params.enable_guc == 0) { GEM_BUG_ON(intel_uc_wants_guc(uc)); GEM_BUG_ON(intel_uc_wants_guc_submission(uc)); @@ -79,8 +99,7 @@ static void __confirm_options(struct intel_uc *uc) "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "GuC submission is N/A"); - if (i915->params.enable_guc & ~(ENABLE_GUC_SUBMISSION | - ENABLE_GUC_LOAD_HUC)) + if (i915->params.enable_guc & ~ENABLE_GUC_MASK) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "undocumented flag"); @@ -88,6 +107,8 @@ static void __confirm_options(struct intel_uc *uc) void intel_uc_init_early(struct intel_uc *uc) { + uc_expand_default_options(uc); + intel_guc_init_early(&uc->guc); intel_huc_init_early(&uc->huc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 602f1a0bc587..67b06fde1225 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -152,16 +152,11 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) uc_fw->path = NULL; } } - - /* We don't want to enable GuC/HuC on pre-Gen11 by default */ - if (i915->params.enable_guc == -1 && p < INTEL_ICELAKE) - uc_fw->path = NULL; } static const char *__override_guc_firmware_path(struct drm_i915_private *i915) { - if (i915->params.enable_guc & (ENABLE_GUC_SUBMISSION | - ENABLE_GUC_LOAD_HUC)) + if (i915->params.enable_guc & ENABLE_GUC_MASK) return i915->params.guc_firmware_path; return ""; } diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 3fea967ee817..7fb91de06557 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -38,11 +38,17 @@ #include "i915_drv.h" #include "gt/intel_gpu_commands.h" +#include "gt/intel_lrc.h" #include "gt/intel_ring.h" +#include "gt/intel_gt_requests.h" #include "gvt.h" #include "i915_pvinfo.h" #include "trace.h" +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" +#include "gt/intel_context.h" + #define INVALID_OP (~0U) #define OP_LEN_MI 9 @@ -455,6 +461,7 @@ enum { RING_BUFFER_INSTRUCTION, BATCH_BUFFER_INSTRUCTION, BATCH_BUFFER_2ND_LEVEL, + RING_BUFFER_CTX, }; enum { @@ -496,6 +503,7 @@ struct parser_exec_state { */ int saved_buf_addr_type; bool is_ctx_wa; + bool is_init_ctx; const struct cmd_info *info; @@ -709,6 +717,11 @@ static inline u32 cmd_val(struct parser_exec_state *s, int index) return *cmd_ptr(s, index); } +static inline bool is_init_ctx(struct parser_exec_state *s) +{ + return (s->buf_type == RING_BUFFER_CTX && s->is_init_ctx); +} + static void parser_exec_state_dump(struct parser_exec_state *s) { int cnt = 0; @@ -722,7 +735,8 @@ static void parser_exec_state_dump(struct parser_exec_state *s) gvt_dbg_cmd(" %s %s ip_gma(%08lx) ", s->buf_type == RING_BUFFER_INSTRUCTION ? - "RING_BUFFER" : "BATCH_BUFFER", + "RING_BUFFER" : ((s->buf_type == RING_BUFFER_CTX) ? + "CTX_BUFFER" : "BATCH_BUFFER"), s->buf_addr_type == GTT_BUFFER ? "GTT" : "PPGTT", s->ip_gma); @@ -757,7 +771,8 @@ static inline void update_ip_va(struct parser_exec_state *s) if (WARN_ON(s->ring_head == s->ring_tail)) return; - if (s->buf_type == RING_BUFFER_INSTRUCTION) { + if (s->buf_type == RING_BUFFER_INSTRUCTION || + s->buf_type == RING_BUFFER_CTX) { unsigned long ring_top = s->ring_start + s->ring_size; if (s->ring_head > s->ring_tail) { @@ -821,68 +836,12 @@ static inline int cmd_length(struct parser_exec_state *s) *addr = val; \ } while (0) -static bool is_shadowed_mmio(unsigned int offset) -{ - bool ret = false; - - if ((offset == 0x2168) || /*BB current head register UDW */ - (offset == 0x2140) || /*BB current header register */ - (offset == 0x211c) || /*second BB header register UDW */ - (offset == 0x2114)) { /*second BB header register UDW */ - ret = true; - } - return ret; -} - -static inline bool is_force_nonpriv_mmio(unsigned int offset) -{ - return (offset >= 0x24d0 && offset < 0x2500); -} - -static int force_nonpriv_reg_handler(struct parser_exec_state *s, - unsigned int offset, unsigned int index, char *cmd) -{ - struct intel_gvt *gvt = s->vgpu->gvt; - unsigned int data; - u32 ring_base; - u32 nopid; - - if (!strcmp(cmd, "lri")) - data = cmd_val(s, index + 1); - else { - gvt_err("Unexpected forcenonpriv 0x%x write from cmd %s\n", - offset, cmd); - return -EINVAL; - } - - ring_base = s->engine->mmio_base; - nopid = i915_mmio_reg_offset(RING_NOPID(ring_base)); - - if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) && - data != nopid) { - gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", - offset, data); - patch_value(s, cmd_ptr(s, index), nopid); - return 0; - } - return 0; -} - static inline bool is_mocs_mmio(unsigned int offset) { return ((offset >= 0xc800) && (offset <= 0xcff8)) || ((offset >= 0xb020) && (offset <= 0xb0a0)); } -static int mocs_cmd_reg_handler(struct parser_exec_state *s, - unsigned int offset, unsigned int index) -{ - if (!is_mocs_mmio(offset)) - return -EINVAL; - vgpu_vreg(s->vgpu, offset) = cmd_val(s, index + 1); - return 0; -} - static int is_cmd_update_pdps(unsigned int offset, struct parser_exec_state *s) { @@ -930,6 +889,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, struct intel_vgpu *vgpu = s->vgpu; struct intel_gvt *gvt = vgpu->gvt; u32 ctx_sr_ctl; + u32 *vreg, vreg_old; if (offset + 4 > gvt->device_info.mmio_size) { gvt_vgpu_err("%s access to (%x) outside of MMIO range\n", @@ -937,34 +897,101 @@ static int cmd_reg_handler(struct parser_exec_state *s, return -EFAULT; } + if (is_init_ctx(s)) { + struct intel_gvt_mmio_info *mmio_info; + + intel_gvt_mmio_set_cmd_accessible(gvt, offset); + mmio_info = intel_gvt_find_mmio_info(gvt, offset); + if (mmio_info && mmio_info->write) + intel_gvt_mmio_set_cmd_write_patch(gvt, offset); + return 0; + } + if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) { gvt_vgpu_err("%s access to non-render register (%x)\n", cmd, offset); return -EBADRQC; } - if (is_shadowed_mmio(offset)) { - gvt_vgpu_err("found access of shadowed MMIO %x\n", offset); - return 0; + if (!strncmp(cmd, "srm", 3) || + !strncmp(cmd, "lrm", 3)) { + if (offset != i915_mmio_reg_offset(GEN8_L3SQCREG4) && + offset != 0x21f0) { + gvt_vgpu_err("%s access to register (%x)\n", + cmd, offset); + return -EPERM; + } else + return 0; } - if (is_mocs_mmio(offset) && - mocs_cmd_reg_handler(s, offset, index)) - return -EINVAL; + if (!strncmp(cmd, "lrr-src", 7) || + !strncmp(cmd, "lrr-dst", 7)) { + gvt_vgpu_err("not allowed cmd %s\n", cmd); + return -EPERM; + } + + if (!strncmp(cmd, "pipe_ctrl", 9)) { + /* TODO: add LRI POST logic here */ + return 0; + } - if (is_force_nonpriv_mmio(offset) && - force_nonpriv_reg_handler(s, offset, index, cmd)) + if (strncmp(cmd, "lri", 3)) return -EPERM; + /* below are all lri handlers */ + vreg = &vgpu_vreg(s->vgpu, offset); + if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) { + gvt_vgpu_err("%s access to non-render register (%x)\n", + cmd, offset); + return -EBADRQC; + } + + if (is_cmd_update_pdps(offset, s) && + cmd_pdp_mmio_update_handler(s, offset, index)) + return -EINVAL; + if (offset == i915_mmio_reg_offset(DERRMR) || offset == i915_mmio_reg_offset(FORCEWAKE_MT)) { /* Writing to HW VGT_PVINFO_PAGE offset will be discarded */ patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE); } - if (is_cmd_update_pdps(offset, s) && - cmd_pdp_mmio_update_handler(s, offset, index)) - return -EINVAL; + if (is_mocs_mmio(offset)) + *vreg = cmd_val(s, index + 1); + + vreg_old = *vreg; + + if (intel_gvt_mmio_is_cmd_write_patch(gvt, offset)) { + u32 cmdval_new, cmdval; + struct intel_gvt_mmio_info *mmio_info; + + cmdval = cmd_val(s, index + 1); + + mmio_info = intel_gvt_find_mmio_info(gvt, offset); + if (!mmio_info) { + cmdval_new = cmdval; + } else { + u64 ro_mask = mmio_info->ro_mask; + int ret; + + if (likely(!ro_mask)) + ret = mmio_info->write(s->vgpu, offset, + &cmdval, 4); + else { + gvt_vgpu_err("try to write RO reg %x\n", + offset); + ret = -EBADRQC; + } + if (ret) + return ret; + cmdval_new = *vreg; + } + if (cmdval_new != cmdval) + patch_value(s, cmd_ptr(s, index+1), cmdval_new); + } + + /* only patch cmd. restore vreg value if changed in mmio write handler*/ + *vreg = vreg_old; /* TODO * In order to let workload with inhibit context to generate @@ -1216,6 +1243,8 @@ static int cmd_handler_mi_batch_buffer_end(struct parser_exec_state *s) s->buf_type = BATCH_BUFFER_INSTRUCTION; ret = ip_gma_set(s, s->ret_ip_gma_bb); s->buf_addr_type = s->saved_buf_addr_type; + } else if (s->buf_type == RING_BUFFER_CTX) { + ret = ip_gma_set(s, s->ring_tail); } else { s->buf_type = RING_BUFFER_INSTRUCTION; s->buf_addr_type = GTT_BUFFER; @@ -2764,7 +2793,8 @@ static int command_scan(struct parser_exec_state *s, gma_bottom = rb_start + rb_len; while (s->ip_gma != gma_tail) { - if (s->buf_type == RING_BUFFER_INSTRUCTION) { + if (s->buf_type == RING_BUFFER_INSTRUCTION || + s->buf_type == RING_BUFFER_CTX) { if (!(s->ip_gma >= rb_start) || !(s->ip_gma < gma_bottom)) { gvt_vgpu_err("ip_gma %lx out of ring scope." @@ -3057,6 +3087,171 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) return 0; } +/* generate dummy contexts by sending empty requests to HW, and let + * the HW to fill Engine Contexts. This dummy contexts are used for + * initialization purpose (update reg whitelist), so referred to as + * init context here + */ +void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu) +{ + struct intel_gvt *gvt = vgpu->gvt; + struct drm_i915_private *dev_priv = gvt->gt->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const unsigned long start = LRC_STATE_PN * PAGE_SIZE; + struct i915_request *rq; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_request *requests[I915_NUM_ENGINES] = {}; + bool is_ctx_pinned[I915_NUM_ENGINES] = {}; + int ret; + + if (gvt->is_reg_whitelist_updated) + return; + + for_each_engine(engine, &dev_priv->gt, id) { + ret = intel_context_pin(s->shadow[id]); + if (ret) { + gvt_vgpu_err("fail to pin shadow ctx\n"); + goto out; + } + is_ctx_pinned[id] = true; + + rq = i915_request_create(s->shadow[id]); + if (IS_ERR(rq)) { + gvt_vgpu_err("fail to alloc default request\n"); + ret = -EIO; + goto out; + } + requests[id] = i915_request_get(rq); + i915_request_add(rq); + } + + if (intel_gt_wait_for_idle(&dev_priv->gt, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + ret = -EIO; + goto out; + } + + /* scan init ctx to update cmd accessible list */ + for_each_engine(engine, &dev_priv->gt, id) { + int size = engine->context_size - PAGE_SIZE; + void *vaddr; + struct parser_exec_state s; + struct drm_i915_gem_object *obj; + struct i915_request *rq; + + rq = requests[id]; + GEM_BUG_ON(!i915_request_completed(rq)); + GEM_BUG_ON(!intel_context_is_pinned(rq->context)); + obj = rq->context->state->obj; + + if (!obj) { + ret = -EIO; + goto out; + } + + i915_gem_object_set_cache_coherency(obj, + I915_CACHE_LLC); + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + gvt_err("failed to pin init ctx obj, ring=%d, err=%lx\n", + id, PTR_ERR(vaddr)); + goto out; + } + + s.buf_type = RING_BUFFER_CTX; + s.buf_addr_type = GTT_BUFFER; + s.vgpu = vgpu; + s.engine = engine; + s.ring_start = 0; + s.ring_size = size; + s.ring_head = 0; + s.ring_tail = size; + s.rb_va = vaddr + start; + s.workload = NULL; + s.is_ctx_wa = false; + s.is_init_ctx = true; + + /* skipping the first RING_CTX_SIZE(0x50) dwords */ + ret = ip_gma_set(&s, RING_CTX_SIZE); + if (ret) { + i915_gem_object_unpin_map(obj); + goto out; + } + + ret = command_scan(&s, 0, size, 0, size); + if (ret) + gvt_err("Scan init ctx error\n"); + + i915_gem_object_unpin_map(obj); + } + +out: + if (!ret) + gvt->is_reg_whitelist_updated = true; + + for (id = 0; id < I915_NUM_ENGINES ; id++) { + if (requests[id]) + i915_request_put(requests[id]); + + if (is_ctx_pinned[id]) + intel_context_unpin(s->shadow[id]); + } +} + +int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + unsigned long gma_head, gma_tail, gma_start, ctx_size; + struct parser_exec_state s; + int ring_id = workload->engine->id; + struct intel_context *ce = vgpu->submission.shadow[ring_id]; + int ret; + + GEM_BUG_ON(atomic_read(&ce->pin_count) < 0); + + ctx_size = workload->engine->context_size - PAGE_SIZE; + + /* Only ring contxt is loaded to HW for inhibit context, no need to + * scan engine context + */ + if (is_inhibit_context(ce)) + return 0; + + gma_start = i915_ggtt_offset(ce->state) + LRC_STATE_PN*PAGE_SIZE; + gma_head = 0; + gma_tail = ctx_size; + + s.buf_type = RING_BUFFER_CTX; + s.buf_addr_type = GTT_BUFFER; + s.vgpu = workload->vgpu; + s.engine = workload->engine; + s.ring_start = gma_start; + s.ring_size = ctx_size; + s.ring_head = gma_start + gma_head; + s.ring_tail = gma_start + gma_tail; + s.rb_va = ce->lrc_reg_state; + s.workload = workload; + s.is_ctx_wa = false; + s.is_init_ctx = false; + + /* don't scan the first RING_CTX_SIZE(0x50) dwords, as it's ring + * context + */ + ret = ip_gma_set(&s, gma_start + gma_head + RING_CTX_SIZE); + if (ret) + goto out; + + ret = command_scan(&s, gma_head, gma_tail, + gma_start, ctx_size); +out: + if (ret) + gvt_vgpu_err("scan shadow ctx error\n"); + + return ret; +} + static int init_cmd_table(struct intel_gvt *gvt) { unsigned int gen_type = intel_gvt_get_device_type(gvt); diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.h b/drivers/gpu/drm/i915/gvt/cmd_parser.h index ab25d151932a..416d345e2816 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.h +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.h @@ -40,6 +40,7 @@ struct intel_gvt; struct intel_shadow_wa_ctx; +struct intel_vgpu; struct intel_vgpu_workload; void intel_gvt_clean_cmd_parser(struct intel_gvt *gvt); @@ -50,4 +51,8 @@ int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload); int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); +void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu); + +int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload); + #endif diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 62a4807424bb..03c993d68f10 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -248,7 +248,7 @@ struct gvt_mmio_block { #define INTEL_GVT_MMIO_HASH_BITS 11 struct intel_gvt_mmio { - u8 *mmio_attribute; + u16 *mmio_attribute; /* Register contains RO bits */ #define F_RO (1 << 0) /* Register contains graphics address */ @@ -267,6 +267,8 @@ struct intel_gvt_mmio { * logical context image */ #define F_SR_IN_CTX (1 << 7) +/* Value of command write of this reg needs to be patched */ +#define F_CMD_WRITE_PATCH (1 << 8) struct gvt_mmio_block *mmio_block; unsigned int num_mmio_block; @@ -333,6 +335,7 @@ struct intel_gvt { u32 *mocs_mmio_offset_list; u32 mocs_mmio_offset_list_cnt; } engine_mmio_list; + bool is_reg_whitelist_updated; struct dentry *debugfs_root; }; @@ -416,6 +419,9 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt); #define vgpu_fence_base(vgpu) (vgpu->fence.base) #define vgpu_fence_sz(vgpu) (vgpu->fence.size) +/* ring context size i.e. the first 0x50 dwords*/ +#define RING_CTX_SIZE 320 + struct intel_vgpu_creation_params { __u64 handle; __u64 low_gm_sz; /* in MB */ @@ -687,6 +693,35 @@ static inline void intel_gvt_mmio_set_sr_in_ctx( } void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu); +/** + * intel_gvt_mmio_set_cmd_write_patch - + * mark an MMIO if its cmd write needs to be + * patched + * @gvt: a GVT device + * @offset: register offset + * + */ +static inline void intel_gvt_mmio_set_cmd_write_patch( + struct intel_gvt *gvt, unsigned int offset) +{ + gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_WRITE_PATCH; +} + +/** + * intel_gvt_mmio_is_cmd_write_patch - check if an mmio's cmd access needs to + * be patched + * @gvt: a GVT device + * @offset: register offset + * + * Returns: + * True if GPU commmand write to an MMIO should be patched + */ +static inline bool intel_gvt_mmio_is_cmd_write_patch( + struct intel_gvt *gvt, unsigned int offset) +{ + return gvt->mmio.mmio_attribute[offset >> 2] & F_CMD_WRITE_PATCH; +} + void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu); void intel_gvt_debugfs_init(struct intel_gvt *gvt); void intel_gvt_debugfs_clean(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 0d124ced5f94..6eeaeecb7f85 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -83,7 +83,7 @@ static void write_vreg(struct intel_vgpu *vgpu, unsigned int offset, memcpy(&vgpu_vreg(vgpu, offset), p_data, bytes); } -static struct intel_gvt_mmio_info *find_mmio_info(struct intel_gvt *gvt, +struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, unsigned int offset) { struct intel_gvt_mmio_info *e; @@ -96,7 +96,7 @@ static struct intel_gvt_mmio_info *find_mmio_info(struct intel_gvt *gvt, } static int new_mmio_info(struct intel_gvt *gvt, - u32 offset, u8 flags, u32 size, + u32 offset, u16 flags, u32 size, u32 addr_mask, u32 ro_mask, u32 device, gvt_mmio_func read, gvt_mmio_func write) { @@ -118,7 +118,7 @@ static int new_mmio_info(struct intel_gvt *gvt, return -ENOMEM; info->offset = i; - p = find_mmio_info(gvt, info->offset); + p = intel_gvt_find_mmio_info(gvt, info->offset); if (p) { WARN(1, "dup mmio definition offset %x\n", info->offset); @@ -1965,7 +1965,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) /* RING MODE */ #define RING_REG(base) _MMIO((base) + 0x29c) - MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, + MMIO_RING_DFH(RING_REG, D_ALL, + F_MODE_MASK | F_CMD_ACCESS | F_CMD_WRITE_PATCH, NULL, ring_mode_mmio_write); #undef RING_REG @@ -2885,8 +2886,8 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(_MMIO(0xb110), D_BDW); - MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, - NULL, force_nonpriv_write); + MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS | F_CMD_WRITE_PATCH, 0, 0, + D_BDW_PLUS, NULL, force_nonpriv_write); MMIO_D(_MMIO(0x44484), D_BDW_PLUS); MMIO_D(_MMIO(0x4448c), D_BDW_PLUS); @@ -3626,7 +3627,7 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, /* * Normal tracked MMIOs. */ - mmio_info = find_mmio_info(gvt, offset); + mmio_info = intel_gvt_find_mmio_info(gvt, offset); if (!mmio_info) { gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes); goto default_rw; diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 9e862dc73579..7c26af39fbfc 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -80,6 +80,9 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, int (*handler)(struct intel_gvt *gvt, u32 offset, void *data), void *data); +struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, + unsigned int offset); + int intel_vgpu_init_mmio(struct intel_vgpu *vgpu); void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr); void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index b58860dee970..244cc7320b54 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -133,4 +133,6 @@ #define RING_GFX_MODE(base) _MMIO((base) + 0x29c) #define VF_GUARDBAND _MMIO(0x83a4) + +#define BCS_TILE_REGISTER_VAL_OFFSET (0x43*4) #endif diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 6af5c06caee0..43f31c2eab14 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -137,6 +137,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) int i; bool skip = false; int ring_id = workload->engine->id; + int ret; GEM_BUG_ON(!intel_context_is_pinned(ctx)); @@ -163,16 +164,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) COPY_REG(bb_per_ctx_ptr); COPY_REG(rcs_indirect_ctx); COPY_REG(rcs_indirect_ctx_offset); - } + } else if (workload->engine->id == BCS0) + intel_gvt_hypervisor_read_gpa(vgpu, + workload->ring_context_gpa + + BCS_TILE_REGISTER_VAL_OFFSET, + (void *)shadow_ring_context + + BCS_TILE_REGISTER_VAL_OFFSET, 4); #undef COPY_REG #undef COPY_REG_MASKED + /* don't copy Ring Context (the first 0x50 dwords), + * only copy the Engine Context part from guest + */ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa + - sizeof(*shadow_ring_context), + RING_CTX_SIZE, (void *)shadow_ring_context + - sizeof(*shadow_ring_context), - I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + RING_CTX_SIZE, + I915_GTT_PAGE_SIZE - RING_CTX_SIZE); sr_oa_regs(workload, (u32 *)shadow_ring_context, false); @@ -239,6 +248,11 @@ read: gpa_size = I915_GTT_PAGE_SIZE; dst = context_base + (i << I915_GTT_PAGE_SHIFT); } + ret = intel_gvt_scan_engine_context(workload); + if (ret) { + gvt_vgpu_err("invalid cmd found in guest context pages\n"); + return ret; + } s->last_ctx[ring_id].valid = true; return 0; } diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index e49944fde333..5a7226339cf4 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -500,9 +500,11 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, mutex_lock(&gvt->lock); vgpu = __intel_gvt_create_vgpu(gvt, ¶m); - if (!IS_ERR(vgpu)) + if (!IS_ERR(vgpu)) { /* calculate left instance change for types */ intel_gvt_update_vgpu_types(gvt); + intel_gvt_update_reg_whitelist(vgpu); + } mutex_unlock(&gvt->lock); return vgpu; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 82d0f19e86df..ced9a96d7c34 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1143,7 +1143,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, void *dst, *src; int ret; - dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB); + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); if (IS_ERR(dst)) return dst; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index de8e0e44cfb6..88336ff4bf09 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -210,7 +210,7 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) spin_unlock(&obj->vma.lock); seq_printf(m, " (pinned x %d)", pin_count); - if (obj->stolen) + if (i915_gem_object_is_stolen(obj)) seq_printf(m, " (stolen: %08llx)", obj->stolen->start); if (i915_gem_object_is_framebuffer(obj)) seq_printf(m, " (fb)"); @@ -220,145 +220,6 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (%s)", engine->name); } -struct file_stats { - struct i915_address_space *vm; - unsigned long count; - u64 total; - u64 active, inactive; - u64 closed; -}; - -static int per_file_stats(int id, void *ptr, void *data) -{ - struct drm_i915_gem_object *obj = ptr; - struct file_stats *stats = data; - struct i915_vma *vma; - - if (IS_ERR_OR_NULL(obj) || !kref_get_unless_zero(&obj->base.refcount)) - return 0; - - stats->count++; - stats->total += obj->base.size; - - spin_lock(&obj->vma.lock); - if (!stats->vm) { - for_each_ggtt_vma(vma, obj) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - if (i915_vma_is_active(vma)) - stats->active += vma->node.size; - else - stats->inactive += vma->node.size; - - if (i915_vma_is_closed(vma)) - stats->closed += vma->node.size; - } - } else { - struct rb_node *p = obj->vma.tree.rb_node; - - while (p) { - long cmp; - - vma = rb_entry(p, typeof(*vma), obj_node); - cmp = i915_vma_compare(vma, stats->vm, NULL); - if (cmp == 0) { - if (drm_mm_node_allocated(&vma->node)) { - if (i915_vma_is_active(vma)) - stats->active += vma->node.size; - else - stats->inactive += vma->node.size; - - if (i915_vma_is_closed(vma)) - stats->closed += vma->node.size; - } - break; - } - if (cmp < 0) - p = p->rb_right; - else - p = p->rb_left; - } - } - spin_unlock(&obj->vma.lock); - - i915_gem_object_put(obj); - return 0; -} - -#define print_file_stats(m, name, stats) do { \ - if (stats.count) \ - seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu closed)\n", \ - name, \ - stats.count, \ - stats.total, \ - stats.active, \ - stats.inactive, \ - stats.closed); \ -} while (0) - -static void print_context_stats(struct seq_file *m, - struct drm_i915_private *i915) -{ - struct file_stats kstats = {}; - struct i915_gem_context *ctx, *cn; - - spin_lock(&i915->gem.contexts.lock); - list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { - struct i915_gem_engines_iter it; - struct intel_context *ce; - - if (!kref_get_unless_zero(&ctx->ref)) - continue; - - spin_unlock(&i915->gem.contexts.lock); - - for_each_gem_engine(ce, - i915_gem_context_lock_engines(ctx), it) { - if (intel_context_pin_if_active(ce)) { - rcu_read_lock(); - if (ce->state) - per_file_stats(0, - ce->state->obj, &kstats); - per_file_stats(0, ce->ring->vma->obj, &kstats); - rcu_read_unlock(); - intel_context_unpin(ce); - } - } - i915_gem_context_unlock_engines(ctx); - - mutex_lock(&ctx->mutex); - if (!IS_ERR_OR_NULL(ctx->file_priv)) { - struct file_stats stats = { - .vm = rcu_access_pointer(ctx->vm), - }; - struct drm_file *file = ctx->file_priv->file; - struct task_struct *task; - char name[80]; - - rcu_read_lock(); - idr_for_each(&file->object_idr, per_file_stats, &stats); - rcu_read_unlock(); - - rcu_read_lock(); - task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID); - snprintf(name, sizeof(name), "%s", - task ? task->comm : "<unknown>"); - rcu_read_unlock(); - - print_file_stats(m, name, stats); - } - mutex_unlock(&ctx->mutex); - - spin_lock(&i915->gem.contexts.lock); - list_safe_reset_next(ctx, cn, link); - i915_gem_context_put(ctx); - } - spin_unlock(&i915->gem.contexts.lock); - - print_file_stats(m, "[k]contexts", kstats); -} - static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -372,9 +233,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data) for_each_memory_region(mr, i915, id) seq_printf(m, "%s: total:%pa, available:%pa bytes\n", mr->name, &mr->total, &mr->avail); - seq_putc(m, '\n'); - - print_context_stats(m, i915); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 17a4636ee542..9b04dff5eb32 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -180,108 +180,6 @@ try_again: } static int -i915_gem_create(struct drm_file *file, - struct intel_memory_region *mr, - u64 *size_p, - u32 *handle_p) -{ - struct drm_i915_gem_object *obj; - u32 handle; - u64 size; - int ret; - - GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); - size = round_up(*size_p, mr->min_page_size); - if (size == 0) - return -EINVAL; - - /* For most of the ABI (e.g. mmap) we think in system pages */ - GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); - - /* Allocate the new object */ - obj = i915_gem_object_create_region(mr, size, 0); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - ret = drm_gem_handle_create(file, &obj->base, &handle); - /* drop reference from allocate - handle holds it now */ - i915_gem_object_put(obj); - if (ret) - return ret; - - *handle_p = handle; - *size_p = size; - return 0; -} - -int -i915_gem_dumb_create(struct drm_file *file, - struct drm_device *dev, - struct drm_mode_create_dumb *args) -{ - enum intel_memory_type mem_type; - int cpp = DIV_ROUND_UP(args->bpp, 8); - u32 format; - - switch (cpp) { - case 1: - format = DRM_FORMAT_C8; - break; - case 2: - format = DRM_FORMAT_RGB565; - break; - case 4: - format = DRM_FORMAT_XRGB8888; - break; - default: - return -EINVAL; - } - - /* have to work out size/pitch and return them */ - args->pitch = ALIGN(args->width * cpp, 64); - - /* align stride to page size so that we can remap */ - if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, - DRM_FORMAT_MOD_LINEAR)) - args->pitch = ALIGN(args->pitch, 4096); - - if (args->pitch < args->width) - return -EINVAL; - - args->size = mul_u32_u32(args->pitch, args->height); - - mem_type = INTEL_MEMORY_SYSTEM; - if (HAS_LMEM(to_i915(dev))) - mem_type = INTEL_MEMORY_LOCAL; - - return i915_gem_create(file, - intel_memory_region_by_type(to_i915(dev), - mem_type), - &args->size, &args->handle); -} - -/** - * Creates a new mm object and returns a handle to it. - * @dev: drm device pointer - * @data: ioctl data blob - * @file: drm file pointer - */ -int -i915_gem_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *i915 = to_i915(dev); - struct drm_i915_gem_create *args = data; - - i915_gem_flush_free_objects(i915); - - return i915_gem_create(file, - intel_memory_region_by_type(i915, - INTEL_MEMORY_SYSTEM), - &args->size, &args->handle); -} - -static int shmem_pread(struct page *page, int offset, int len, char __user *user_data, bool needs_clflush) { @@ -1059,14 +957,14 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, i915_gem_object_is_tiled(obj) && i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (obj->mm.madv == I915_MADV_WILLNEED) { - GEM_BUG_ON(!obj->mm.quirked); - __i915_gem_object_unpin_pages(obj); - obj->mm.quirked = false; + GEM_BUG_ON(!i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_clear_tiling_quirk(obj); + i915_gem_object_make_shrinkable(obj); } if (args->madv == I915_MADV_WILLNEED) { - GEM_BUG_ON(obj->mm.quirked); - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; + GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_make_unshrinkable(obj); + i915_gem_object_set_tiling_quirk(obj); } } @@ -1277,19 +1175,13 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) * the objects as well, see i915_gem_freeze() */ - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - i915_gem_shrink(i915, -1UL, NULL, ~0); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + i915_gem_shrink(i915, -1UL, NULL, ~0); i915_gem_drain_freed_objects(i915); - list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { - i915_gem_object_lock(obj, NULL); - drm_WARN_ON(&i915->drm, - i915_gem_object_set_to_cpu_domain(obj, true)); - i915_gem_object_unlock(obj); - } - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + wbinvd_on_all_cpus(); + list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) + __start_cpu_write(obj); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index a4cad3f154ca..e622aee6e4be 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -38,11 +38,18 @@ struct drm_i915_private; #define GEM_SHOW_DEBUG() drm_debug_enabled(DRM_UT_DRIVER) +#ifdef CONFIG_DRM_I915_DEBUG_GEM_ONCE +#define __GEM_BUG(cond) BUG() +#else +#define __GEM_BUG(cond) \ + WARN(1, "%s:%d GEM_BUG_ON(%s)\n", __func__, __LINE__, __stringify(cond)) +#endif + #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \ __func__, __LINE__, __stringify(condition)); \ GEM_TRACE_DUMP(); \ - BUG(); \ + __GEM_BUG(condition); \ } \ } while(0) #define GEM_WARN_ON(expr) WARN_ON(expr) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index e1a66c8245b8..4d2d59a9942b 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -61,6 +61,17 @@ mark_free(struct drm_mm_scan *scan, return drm_mm_scan_add_block(scan, &vma->node); } +static bool defer_evict(struct i915_vma *vma) +{ + if (i915_vma_is_active(vma)) + return true; + + if (i915_vma_is_scanout(vma)) + return true; + + return false; +} + /** * i915_gem_evict_something - Evict vmas to make room for binding a new one * @vm: address space to evict from @@ -150,7 +161,7 @@ search_again: * To notice when we complete one full cycle, we record the * first active element seen, before moving it to the tail. */ - if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) { + if (active != ERR_PTR(-EAGAIN) && defer_evict(vma)) { if (!active) active = vma; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8b163ee1b86d..f962693404b7 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1051,7 +1051,9 @@ i915_vma_coredump_create(const struct intel_gt *gt, for_each_sgt_daddr(dma, iter, vma->pages) { void __iomem *s; - s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE); + s = io_mapping_map_wc(&mem->iomap, + dma - mem->region.start, + PAGE_SIZE); ret = compress_page(compress, (void __force *)s, dst, true); diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index 43039dc8c607..666808cb3a32 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -62,7 +62,7 @@ static int remap_sg(pte_t *pte, unsigned long addr, void *data) { struct remap_pfn *r = data; - if (GEM_WARN_ON(!r->sgt.pfn)) + if (GEM_WARN_ON(!r->sgt.sgp)) return -EINVAL; /* Special PTE are not associated with any struct page */ diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 330c03e2b4f7..f031966af5b7 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -32,6 +32,7 @@ struct drm_printer; #define ENABLE_GUC_SUBMISSION BIT(0) #define ENABLE_GUC_LOAD_HUC BIT(1) +#define ENABLE_GUC_MASK GENMASK(1, 0) /* * Invoke param, a function-like macro, for each i915 param, with arguments: diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 39608381b4a4..020b5f561f07 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -455,6 +455,7 @@ static const struct intel_device_info snb_m_gt2_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_reset_engine = true, \ .has_rps = true, \ .dma_mask_size = 40, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ @@ -513,6 +514,7 @@ static const struct intel_device_info vlv_info = { .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), .has_runtime_pm = 1, .has_rc6 = 1, + .has_reset_engine = true, .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, @@ -571,8 +573,7 @@ static const struct intel_device_info hsw_gt3_info = { .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ - .has_64bit_reloc = 1, \ - .has_reset_engine = 1 + .has_64bit_reloc = 1 #define BDW_PLATFORM \ GEN8_FEATURES, \ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0b1a46a0d866..22e39d938f17 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -276,7 +276,7 @@ static void remove_from_engine(struct i915_request *rq) bool i915_request_retire(struct i915_request *rq) { - if (!i915_request_completed(rq)) + if (!__i915_request_is_complete(rq)) return false; RQ_TRACE(rq, "\n"); @@ -342,8 +342,7 @@ void i915_request_retire_upto(struct i915_request *rq) struct i915_request *tmp; RQ_TRACE(rq, "\n"); - - GEM_BUG_ON(!i915_request_completed(rq)); + GEM_BUG_ON(!__i915_request_is_complete(rq)); do { tmp = list_first_entry(&tl->requests, typeof(*tmp), link); @@ -552,8 +551,10 @@ bool __i915_request_submit(struct i915_request *request) * dropped upon retiring. (Otherwise if resubmit a *retired* * request, this would be a horrible use-after-free.) */ - if (i915_request_completed(request)) - goto xfer; + if (__i915_request_is_complete(request)) { + list_del_init(&request->sched.link); + goto active; + } if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); @@ -588,11 +589,11 @@ bool __i915_request_submit(struct i915_request *request) engine->serial++; result = true; -xfer: - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { - list_move_tail(&request->sched.link, &engine->active.requests); - clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); - } + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + list_move_tail(&request->sched.link, &engine->active.requests); +active: + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); /* * XXX Rollback bonded-execution on __i915_request_unsubmit()? @@ -652,7 +653,7 @@ void __i915_request_unsubmit(struct i915_request *request) i915_request_cancel_breadcrumb(request); /* We've already spun, don't charge on resubmitting. */ - if (request->sched.semaphores && i915_request_started(request)) + if (request->sched.semaphores && __i915_request_has_started(request)) request->sched.semaphores = 0; /* @@ -864,7 +865,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; - GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(__i915_request_is_complete(rq)); rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -970,15 +971,22 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) if (i915_request_started(signal)) return 0; + /* + * The caller holds a reference on @signal, but we do not serialise + * against it being retired and removed from the lists. + * + * We do not hold a reference to the request before @signal, and + * so must be very careful to ensure that it is not _recycled_ as + * we follow the link backwards. + */ fence = NULL; rcu_read_lock(); - spin_lock_irq(&signal->lock); do { struct list_head *pos = READ_ONCE(signal->link.prev); struct i915_request *prev; /* Confirm signal has not been retired, the link is valid */ - if (unlikely(i915_request_started(signal))) + if (unlikely(__i915_request_has_started(signal))) break; /* Is signal the earliest request on its timeline? */ @@ -1003,7 +1011,6 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) fence = &prev->fence; } while (0); - spin_unlock_irq(&signal->lock); rcu_read_unlock(); if (!fence) return 0; @@ -1520,7 +1527,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) */ prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); - if (prev && !i915_request_completed(prev)) { + if (prev && !__i915_request_is_complete(prev)) { /* * The requests are supposed to be kept in order. However, * we need to be wary in case the timeline->last_request @@ -1897,10 +1904,10 @@ static char queue_status(const struct i915_request *rq) static const char *run_status(const struct i915_request *rq) { - if (i915_request_completed(rq)) + if (__i915_request_is_complete(rq)) return "!"; - if (i915_request_started(rq)) + if (__i915_request_has_started(rq)) return "*"; if (!i915_sw_fence_signaled(&rq->semaphore)) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 318e359bf5c3..7144239f08df 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -520,7 +520,7 @@ void i915_request_show_with_schedule(struct drm_printer *m, if (signaler->timeline == rq->timeline) continue; - if (i915_request_completed(signaler)) + if (__i915_request_is_complete(signaler)) continue; i915_request_show(m, signaler, prefix, indent + 2); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 5b3a3c653454..a64adc8c883b 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -363,6 +363,21 @@ i915_vma_unpin_fence(struct i915_vma *vma) void i915_vma_parked(struct intel_gt *gt); +static inline bool i915_vma_is_scanout(const struct i915_vma *vma) +{ + return test_bit(I915_VMA_SCANOUT_BIT, __i915_vma_flags(vma)); +} + +static inline void i915_vma_mark_scanout(struct i915_vma *vma) +{ + set_bit(I915_VMA_SCANOUT_BIT, __i915_vma_flags(vma)); +} + +static inline void i915_vma_clear_scanout(struct i915_vma *vma) +{ + clear_bit(I915_VMA_SCANOUT_BIT, __i915_vma_flags(vma)); +} + #define for_each_until(cond) if (cond) break; else /** diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 9e9082dc8f4b..f5cb848b7a7e 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -249,6 +249,9 @@ struct i915_vma { #define I915_VMA_USERFAULT ((int)BIT(I915_VMA_USERFAULT_BIT)) #define I915_VMA_GGTT_WRITE ((int)BIT(I915_VMA_GGTT_WRITE_BIT)) +#define I915_VMA_SCANOUT_BIT 18 +#define I915_VMA_SCANOUT ((int)BIT(I915_VMA_SCANOUT_BIT)) + struct i915_active active; #define I915_VMA_PAGES_BIAS 24 diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 6590d55df6cb..6ffc0673f005 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -57,10 +57,10 @@ struct intel_memory_region_ops { int (*init)(struct intel_memory_region *mem); void (*release)(struct intel_memory_region *mem); - struct drm_i915_gem_object * - (*create_object)(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags); + int (*init_object)(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags); }; struct intel_memory_region { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 3512bb8433cf..f99bb0113726 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -38,8 +38,8 @@ static void quirk_add(struct drm_i915_gem_object *obj, struct list_head *objects) { /* quirk is only for live tiled objects, use it to declare ownership */ - GEM_BUG_ON(obj->mm.quirked); - obj->mm.quirked = true; + GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_set_tiling_quirk(obj); list_add(&obj->st_link, objects); } @@ -85,7 +85,7 @@ static void unpin_ggtt(struct i915_ggtt *ggtt) struct i915_vma *vma; list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) - if (vma->obj->mm.quirked) + if (i915_gem_object_has_tiling_quirk(vma->obj)) i915_vma_unpin(vma); } @@ -94,8 +94,8 @@ static void cleanup_objects(struct i915_ggtt *ggtt, struct list_head *list) struct drm_i915_gem_object *obj, *on; list_for_each_entry_safe(obj, on, list, st_link) { - GEM_BUG_ON(!obj->mm.quirked); - obj->mm.quirked = false; + GEM_BUG_ON(!i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_set_tiling_quirk(obj); i915_gem_object_put(obj); } diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 979d96f27c43..3c6021415274 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -15,21 +15,16 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = { .release = i915_gem_object_release_memory_region, }; -static struct drm_i915_gem_object * -mock_object_create(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +static int mock_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags) { static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; - struct drm_i915_gem_object *obj; if (size > mem->mm.size) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); + return -E2BIG; drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class); @@ -40,13 +35,13 @@ mock_object_create(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem, flags); - return obj; + return 0; } static const struct intel_memory_region_ops mock_region_ops = { .init = intel_memory_region_init_buddy, .release = intel_memory_region_release_buddy, - .create_object = mock_object_create, + .init_object = mock_object_init, }; struct intel_memory_region * |