diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
72 files changed, 2293 insertions, 972 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 890191f286e3..6e9292918bfc 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -185,7 +185,6 @@ static void gen6_alloc_va_range(struct i915_address_space *vm, pt = stash->pt[0]; __i915_gem_object_pin_pages(pt->base); - i915_gem_object_make_unshrinkable(pt->base); fill32_px(pt, vm->scratch[0]->encode); @@ -262,30 +261,14 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - __i915_vma_put(ppgtt->vma); - gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); mutex_destroy(&ppgtt->flush); - mutex_destroy(&ppgtt->pin_mutex); free_pd(&ppgtt->base.vm, ppgtt->base.pd); } -static int pd_vma_set_pages(struct i915_vma *vma) -{ - vma->pages = ERR_PTR(-ENODEV); - return 0; -} - -static void pd_vma_clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - - vma->pages = NULL; -} - static void pd_vma_bind(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma *vma, @@ -325,43 +308,10 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) } static const struct i915_vma_ops pd_vma_ops = { - .set_pages = pd_vma_set_pages, - .clear_pages = pd_vma_clear_pages, .bind_vma = pd_vma_bind, .unbind_vma = pd_vma_unbind, }; -static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) -{ - struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; - struct i915_vma *vma; - - GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); - GEM_BUG_ON(size > ggtt->vm.total); - - vma = i915_vma_alloc(); - if (!vma) - return ERR_PTR(-ENOMEM); - - i915_active_init(&vma->active, NULL, NULL, 0); - - kref_init(&vma->ref); - mutex_init(&vma->pages_mutex); - vma->vm = i915_vm_get(&ggtt->vm); - vma->ops = &pd_vma_ops; - vma->private = ppgtt; - - vma->size = size; - vma->fence_size = size; - atomic_set(&vma->flags, I915_VMA_GGTT); - vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ - - INIT_LIST_HEAD(&vma->obj_link); - INIT_LIST_HEAD(&vma->closed_link); - - return vma; -} - int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); @@ -378,42 +328,92 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) return 0; - if (mutex_lock_interruptible(&ppgtt->pin_mutex)) - return -EINTR; + /* grab the ppgtt resv to pin the object */ + err = i915_vm_lock_objects(&ppgtt->base.vm, ww); + if (err) + return err; /* * PPGTT PDEs reside in the GGTT and consists of 512 entries. The * allocator works in address space sizes, so it's multiplied by page * size. We allocate at the top of the GTT to avoid fragmentation. */ - err = 0; - if (!atomic_read(&ppgtt->pin_count)) + if (!atomic_read(&ppgtt->pin_count)) { err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH); + + GEM_BUG_ON(ppgtt->vma->fence); + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma)); + } if (!err) atomic_inc(&ppgtt->pin_count); - mutex_unlock(&ppgtt->pin_mutex); return err; } -void gen6_ppgtt_unpin(struct i915_ppgtt *base) +static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj) { - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + obj->mm.pages = ZERO_SIZE_PTR; + return 0; +} - GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); - if (atomic_dec_and_test(&ppgtt->pin_count)) - i915_vma_unpin(ppgtt->vma); +static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ } -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) +static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { + .name = "pd_dummy_obj", + .get_pages = pd_dummy_obj_get_pages, + .put_pages = pd_dummy_obj_put_pages, +}; + +static struct i915_page_directory * +gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt; + struct i915_page_directory *pd; + int err; - if (!atomic_read(&ppgtt->pin_count)) - return; + pd = __alloc_pd(I915_PDES); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); - i915_vma_unpin(ppgtt->vma); - atomic_set(&ppgtt->pin_count, 0); + pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915, + &pd_dummy_obj_ops, + I915_PDES * SZ_4K); + if (IS_ERR(pd->pt.base)) { + err = PTR_ERR(pd->pt.base); + pd->pt.base = NULL; + goto err_pd; + } + + pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm); + pd->pt.base->shares_resv_from = &ppgtt->base.vm; + + ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL); + if (IS_ERR(ppgtt->vma)) { + err = PTR_ERR(ppgtt->vma); + ppgtt->vma = NULL; + goto err_pd; + } + + /* The dummy object we create is special, override ops.. */ + ppgtt->vma->ops = &pd_vma_ops; + ppgtt->vma->private = ppgtt; + return pd; + +err_pd: + free_pd(&ppgtt->base.vm, pd); + return ERR_PTR(err); +} + +void gen6_ppgtt_unpin(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + + GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); + if (atomic_dec_and_test(&ppgtt->pin_count)) + i915_vma_unpin(ppgtt->vma); } struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) @@ -427,7 +427,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) return ERR_PTR(-ENOMEM); mutex_init(&ppgtt->flush); - mutex_init(&ppgtt->pin_mutex); ppgtt_init(&ppgtt->base, gt, 0); ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); @@ -440,21 +439,16 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma; ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - ppgtt->base.pd = __alloc_pd(I915_PDES); - if (!ppgtt->base.pd) { - err = -ENOMEM; - goto err_free; - } - err = gen6_ppgtt_init_scratch(ppgtt); if (err) - goto err_pd; + goto err_free; - ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); - if (IS_ERR(ppgtt->vma)) { - err = PTR_ERR(ppgtt->vma); + ppgtt->base.pd = gen6_alloc_top_pd(ppgtt); + if (IS_ERR(ppgtt->base.pd)) { + err = PTR_ERR(ppgtt->base.pd); goto err_scratch; } @@ -462,10 +456,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) err_scratch: free_scratch(&ppgtt->base.vm); -err_pd: - free_pd(&ppgtt->base.vm, ppgtt->base.pd); err_free: - mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h index 6a61a5c3a85a..5e5cf2ec3309 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -19,7 +19,6 @@ struct gen6_ppgtt { u32 pp_dir; atomic_t pin_count; - struct mutex pin_mutex; bool scan_for_unused_pt; }; @@ -71,7 +70,6 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww); void gen6_ppgtt_unpin(struct i915_ppgtt *base); -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); void gen6_ppgtt_enable(struct intel_gt *gt); void gen7_ppgtt_enable(struct intel_gt *gt); struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 461844dffd7e..e320610dd0b8 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -42,7 +42,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0)) dc_flush_wa = true; } diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 037a9a6e4889..b012c50f7ce7 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -18,7 +18,7 @@ static u64 gen8_pde_encode(const dma_addr_t addr, const enum i915_cache_level level) { - u64 pde = addr | _PAGE_PRESENT | _PAGE_RW; + u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; if (level != I915_CACHE_NONE) pde |= PPAT_CACHED_PDE; @@ -32,10 +32,10 @@ static u64 gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) { - gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; if (unlikely(flags & PTE_READ_ONLY)) - pte &= ~_PAGE_RW; + pte &= ~GEN8_PAGE_RW; if (flags & PTE_LM) pte |= GEN12_PPGTT_PTE_LM; @@ -301,7 +301,6 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, pt = stash->pt[!!lvl]; __i915_gem_object_pin_pages(pt->base); - i915_gem_object_make_unshrinkable(pt->base); fill_px(pt, vm->scratch[lvl]->encode); @@ -652,7 +651,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) vm->scratch[0]->encode = gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_LLC, pte_flags); + I915_CACHE_NONE, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -668,7 +667,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) } fill_px(obj, vm->scratch[i - 1]->encode); - obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC); + obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE); vm->scratch[i] = obj; } @@ -777,10 +776,29 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); - if (HAS_LMEM(gt->i915)) + if (HAS_LMEM(gt->i915)) { ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; - else + + /* + * On some platforms the hw has dropped support for 4K GTT pages + * when dealing with LMEM, and due to the design of 64K GTT + * pages in the hw, we can only mark the *entire* page-table as + * operating in 64K GTT mode, since the enable bit is still on + * the pde, and not the pte. And since we still need to allow + * 4K GTT pages for SMEM objects, we can't have a "normal" 4K + * page-table with scratch pointing to LMEM, since that's + * undefined from the hw pov. The simplest solution is to just + * move the 64K scratch page to SMEM on such platforms and call + * it a day, since that should work for all configurations. + */ + if (HAS_64K_PAGES(gt->i915)) + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; + else + ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem; + } else { ppgtt->vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; + } err = gen8_init_scratch(&ppgtt->vm); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..ba083d800a08 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -219,7 +219,7 @@ int __intel_context_do_pin_ww(struct intel_context *ce, */ err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww); - if (!err && ce->ring->vma->obj) + if (!err) err = i915_gem_object_lock(ce->ring->vma->obj, ww); if (!err && ce->state) err = i915_gem_object_lock(ce->state->obj, ww); @@ -228,17 +228,17 @@ int __intel_context_do_pin_ww(struct intel_context *ce, if (err) return err; - err = i915_active_acquire(&ce->active); + err = ce->ops->pre_pin(ce, ww, &vaddr); if (err) goto err_ctx_unpin; - err = ce->ops->pre_pin(ce, ww, &vaddr); + err = i915_active_acquire(&ce->active); if (err) - goto err_release; + goto err_post_unpin; err = mutex_lock_interruptible(&ce->pin_mutex); if (err) - goto err_post_unpin; + goto err_release; intel_engine_pm_might_get(ce->engine); @@ -273,11 +273,11 @@ int __intel_context_do_pin_ww(struct intel_context *ce, err_unlock: mutex_unlock(&ce->pin_mutex); +err_release: + i915_active_release(&ce->active); err_post_unpin: if (!handoff) ce->ops->post_unpin(ce); -err_release: - i915_active_release(&ce->active); err_ctx_unpin: intel_context_post_unpin(ce); @@ -364,7 +364,7 @@ static int __intel_context_active(struct i915_active *active) return 0; } -static int __i915_sw_fence_call +static int sw_fence_dummy_notify(struct i915_sw_fence *sf, enum i915_sw_fence_notify state) { diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 246c37d72cd7..d8c74bbf9aae 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -211,7 +211,8 @@ static inline void intel_context_enter(struct intel_context *ce) static inline void intel_context_mark_active(struct intel_context *ce) { - lockdep_assert_held(&ce->timeline->mutex); + lockdep_assert(lockdep_is_held(&ce->timeline->mutex) || + test_bit(CONTEXT_IS_PARKING, &ce->flags)); ++ce->active_count; } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 9e0177dc5484..30cd81ad8911 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -118,6 +118,7 @@ struct intel_context { #define CONTEXT_LRCA_DIRTY 9 #define CONTEXT_GUC_INIT 10 #define CONTEXT_PERMA_PIN 11 +#define CONTEXT_IS_PARKING 12 struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index ff6753ccb129..352254e001b4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -325,6 +325,38 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); + if (GRAPHICS_VER(gt->i915) >= 11) { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN11_GRDOM_RENDER, + [BCS0] = GEN11_GRDOM_BLT, + [VCS0] = GEN11_GRDOM_MEDIA, + [VCS1] = GEN11_GRDOM_MEDIA2, + [VCS2] = GEN11_GRDOM_MEDIA3, + [VCS3] = GEN11_GRDOM_MEDIA4, + [VCS4] = GEN11_GRDOM_MEDIA5, + [VCS5] = GEN11_GRDOM_MEDIA6, + [VCS6] = GEN11_GRDOM_MEDIA7, + [VCS7] = GEN11_GRDOM_MEDIA8, + [VECS0] = GEN11_GRDOM_VECS, + [VECS1] = GEN11_GRDOM_VECS2, + [VECS2] = GEN11_GRDOM_VECS3, + [VECS3] = GEN11_GRDOM_VECS4, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + engine->reset_domain = engine_reset_domains[id]; + } else { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN6_GRDOM_RENDER, + [BCS0] = GEN6_GRDOM_BLT, + [VCS0] = GEN6_GRDOM_MEDIA, + [VCS1] = GEN8_GRDOM_MEDIA2, + [VECS0] = GEN6_GRDOM_VECS, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + engine->reset_domain = engine_reset_domains[id]; + } engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; @@ -363,7 +395,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, DRIVER_CAPS(i915)->has_logical_contexts = true; ewma__engine_latency_init(&engine->latency); - seqcount_init(&engine->stats.lock); + seqcount_init(&engine->stats.execlists.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -1676,14 +1708,18 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, static void print_request_ring(struct drm_printer *m, struct i915_request *rq) { + struct i915_vma_snapshot *vsnap = &rq->batch_snapshot; void *ring; int size; + if (!i915_vma_snapshot_present(vsnap)) + vsnap = NULL; + drm_printf(m, "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u, + vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u); size = rq->tail - rq->head; if (rq->tail < rq->head) @@ -1915,22 +1951,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_engine_print_breadcrumbs(engine, m); } -static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, - ktime_t *now) -{ - ktime_t total = engine->stats.total; - - /* - * If the engine is executing something at the moment - * add it to the total. - */ - *now = ktime_get(); - if (READ_ONCE(engine->stats.active)) - total = ktime_add(total, ktime_sub(*now, engine->stats.start)); - - return total; -} - /** * intel_engine_get_busy_time() - Return current accumulated engine busyness * @engine: engine to report on @@ -1940,15 +1960,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, */ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) { - unsigned int seq; - ktime_t total; - - do { - seq = read_seqcount_begin(&engine->stats.lock); - total = __intel_engine_get_busy_time(engine, now); - } while (read_seqcount_retry(&engine->stats.lock, seq)); - - return total; + return engine->busyness(engine, now); } struct intel_context * diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index a1334b48dde7..b0a4a2dbe3ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -26,7 +26,7 @@ static void dbg_poison_ce(struct intel_context *ce) int type = i915_coherent_map_type(ce->engine->i915, obj, true); void *map; - if (!i915_gem_object_trylock(obj)) + if (!i915_gem_object_trylock(obj, NULL)) return; map = i915_gem_object_pin_map(obj, type); @@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf) return 0; } -#if IS_ENABLED(CONFIG_LOCKDEP) - -static unsigned long __timeline_mark_lock(struct intel_context *ce) -{ - unsigned long flags; - - local_irq_save(flags); - mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); - - return flags; -} - -static void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) -{ - mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); - local_irq_restore(flags); -} - -#else - -static unsigned long __timeline_mark_lock(struct intel_context *ce) -{ - return 0; -} - -static void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) -{ -} - -#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ - static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) { struct i915_request *rq = to_request(fence); @@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) { struct intel_context *ce = engine->kernel_context; struct i915_request *rq; - unsigned long flags; bool result = true; /* @@ -214,7 +180,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * engine->wakeref.count, we may see the request completion and retire * it causing an underflow of the engine->wakeref. */ - flags = __timeline_mark_lock(ce); + set_bit(CONTEXT_IS_PARKING, &ce->flags); GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); rq = __i915_request_create(ce, GFP_NOWAIT); @@ -246,7 +212,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) result = false; out_unlock: - __timeline_mark_unlock(ce, flags); + clear_bit(CONTEXT_IS_PARKING, &ce->flags); return result; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h index 24fbdd94351a..8e762d683e50 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h @@ -15,45 +15,46 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) { + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; unsigned long flags; - if (engine->stats.active) { - engine->stats.active++; + if (stats->active) { + stats->active++; return; } /* The writer is serialised; but the pmu reader may be from hardirq */ local_irq_save(flags); - write_seqcount_begin(&engine->stats.lock); + write_seqcount_begin(&stats->lock); - engine->stats.start = ktime_get(); - engine->stats.active++; + stats->start = ktime_get(); + stats->active++; - write_seqcount_end(&engine->stats.lock); + write_seqcount_end(&stats->lock); local_irq_restore(flags); - GEM_BUG_ON(!engine->stats.active); + GEM_BUG_ON(!stats->active); } static inline void intel_engine_context_out(struct intel_engine_cs *engine) { + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; unsigned long flags; - GEM_BUG_ON(!engine->stats.active); - if (engine->stats.active > 1) { - engine->stats.active--; + GEM_BUG_ON(!stats->active); + if (stats->active > 1) { + stats->active--; return; } local_irq_save(flags); - write_seqcount_begin(&engine->stats.lock); + write_seqcount_begin(&stats->lock); - engine->stats.active--; - engine->stats.total = - ktime_add(engine->stats.total, - ktime_sub(ktime_get(), engine->stats.start)); + stats->active--; + stats->total = ktime_add(stats->total, + ktime_sub(ktime_get(), stats->start)); - write_seqcount_end(&engine->stats.lock); + write_seqcount_end(&stats->lock); local_irq_restore(flags); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e0f773585c29..36365bdbe1ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -257,6 +257,55 @@ struct intel_engine_execlists { #define INTEL_ENGINE_CS_MAX_NAME 8 +struct intel_engine_execlists_stats { + /** + * @active: Number of contexts currently scheduled in. + */ + unsigned int active; + + /** + * @lock: Lock protecting the below fields. + */ + seqcount_t lock; + + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases where + * engine is currently busy (active > 0). + */ + ktime_t total; + + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; +}; + +struct intel_engine_guc_stats { + /** + * @running: Active state of the engine when busyness was last sampled. + */ + bool running; + + /** + * @prev_total: Previous value of total runtime clock cycles. + */ + u32 prev_total; + + /** + * @total_gt_clks: Total gt clock cycles this engine was busy. + */ + u64 total_gt_clks; + + /** + * @start_gt_clk: GT clock time of last idle to active transition. + */ + u64 start_gt_clk; +}; + struct intel_engine_cs { struct drm_i915_private *i915; struct intel_gt *gt; @@ -269,6 +318,7 @@ struct intel_engine_cs { unsigned int guc_id; intel_engine_mask_t mask; + u32 reset_domain; /** * @logical_mask: logical mask of engine, reported to user space via * query IOCTL and used to communicate with the GuC in logical space. @@ -439,6 +489,12 @@ struct intel_engine_cs { void (*add_active_request)(struct i915_request *rq); void (*remove_active_request)(struct i915_request *rq); + /* + * Get engine busyness and the time at which the busyness was sampled. + */ + ktime_t (*busyness)(struct intel_engine_cs *engine, + ktime_t *now); + struct intel_engine_execlists execlists; /* @@ -488,30 +544,10 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); struct { - /** - * @active: Number of contexts currently scheduled in. - */ - unsigned int active; - - /** - * @lock: Lock protecting the below fields. - */ - seqcount_t lock; - - /** - * @total: Total time this engine was busy. - * - * Accumulated time not counting the most recent block in cases - * where engine is currently busy (active > 0). - */ - ktime_t total; - - /** - * @start: Timestamp of the last idle to active transition. - * - * Idle is defined as active == 0, active is active > 0. - */ - ktime_t start; + union { + struct intel_engine_execlists_stats execlists; + struct intel_engine_guc_stats guc; + }; /** * @rps: Utilisation at last RPS sampling. diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 8f8bea08e734..9ce85a845105 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -116,7 +116,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915) disabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); - if (intel_uc_uses_guc_submission(&i915->gt.uc)) + if (intel_uc_uses_guc_submission(&to_gt(i915)->uc)) enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP; for (i = 0; i < ARRAY_SIZE(map); i++) { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..a69df5e9e77a 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2186,7 +2186,8 @@ struct execlists_capture { static void execlists_capture_work(struct work_struct *work) { struct execlists_capture *cap = container_of(work, typeof(*cap), work); - const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; + const gfp_t gfp = __GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | + __GFP_NOWARN; struct intel_engine_cs *engine = cap->rq->engine; struct intel_gt_coredump *gt = cap->error->gt; struct intel_engine_capture_vma *vma; @@ -3293,6 +3294,38 @@ static void execlists_release(struct intel_engine_cs *engine) lrc_fini_wa_ctx(engine); } +static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine, + ktime_t *now) +{ + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; + ktime_t total = stats->total; + + /* + * If the engine is executing something at the moment + * add it to the total. + */ + *now = ktime_get(); + if (READ_ONCE(stats->active)) + total = ktime_add(total, ktime_sub(*now, stats->start)); + + return total; +} + +static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine, + ktime_t *now) +{ + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; + unsigned int seq; + ktime_t total; + + do { + seq = read_seqcount_begin(&stats->lock); + total = __execlists_engine_busyness(engine, now); + } while (read_seqcount_retry(&stats->lock, seq)); + + return total; +} + static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { @@ -3349,6 +3382,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_bb_start = gen8_emit_bb_start; else engine->emit_bb_start = gen8_emit_bb_start_noarb; + + engine->busyness = execlists_engine_busyness; } static void logical_ring_default_irqs(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 57c97554393b..5263dda7f8d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -3,12 +3,14 @@ * Copyright © 2020 Intel Corporation */ +#include <linux/agp_backend.h> #include <linux/stop_machine.h> #include <asm/set_memory.h> #include <asm/smp.h> #include <drm/i915_drm.h> +#include <drm/intel-gtt.h> #include "gem/i915_gem_lmem.h" @@ -20,9 +22,6 @@ #include "intel_gtt.h" #include "gen8_ppgtt.h" -static int -i915_get_ggtt_vma_pages(struct i915_vma *vma); - static void i915_ggtt_color_adjust(const struct drm_mm_node *node, unsigned long color, u64 *start, @@ -104,7 +103,7 @@ static bool needs_idle_maps(struct drm_i915_private *i915) * Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if (!intel_vtd_active()) + if (!intel_vtd_active(i915)) return false; if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915)) @@ -116,17 +115,26 @@ static bool needs_idle_maps(struct drm_i915_private *i915) return false; } -void i915_ggtt_suspend(struct i915_ggtt *ggtt) +/** + * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM + * @vm: The VM to suspend the mappings for + * + * Suspend the memory mappings for all objects mapped to HW via the GGTT or a + * DPT page table. + */ +void i915_ggtt_suspend_vm(struct i915_address_space *vm) { struct i915_vma *vma, *vn; int open; - mutex_lock(&ggtt->vm.mutex); + drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); + + mutex_lock(&vm->mutex); /* Skip rewriting PTE on VMA unbind. */ - open = atomic_xchg(&ggtt->vm.open, 0); + open = atomic_xchg(&vm->open, 0); - list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); i915_vma_wait_for_bind(vma); @@ -139,11 +147,17 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt) } } - ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->invalidate(ggtt); - atomic_set(&ggtt->vm.open, open); + vm->clear_range(vm, 0, vm->total); - mutex_unlock(&ggtt->vm.mutex); + atomic_set(&vm->open, open); + + mutex_unlock(&vm->mutex); +} + +void i915_ggtt_suspend(struct i915_ggtt *ggtt) +{ + i915_ggtt_suspend_vm(&ggtt->vm); + ggtt->invalidate(ggtt); intel_gt_check_and_clear_faults(ggtt->vm.gt); } @@ -192,7 +206,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) { - gen8_pte_t pte = addr | _PAGE_PRESENT; + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; if (flags & PTE_LM) pte |= GEN12_GGTT_PTE_LM; @@ -875,21 +889,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return 0; } -int ggtt_set_pages(struct i915_vma *vma) -{ - int ret; - - GEM_BUG_ON(vma->pages); - - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - - vma->page_sizes = vma->obj->mm.page_sizes; - - return 0; -} - static void gen6_gmch_remove(struct i915_address_space *vm) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); @@ -924,6 +923,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) size = gen8_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; @@ -950,8 +950,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; ggtt->vm.pte_encode = gen8_ggtt_pte_encode; @@ -1077,6 +1075,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; ggtt->vm.clear_range = nop_clear_range; if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) @@ -1100,8 +1099,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; return ggtt_probe_common(ggtt, size); } @@ -1129,6 +1126,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; if (needs_idle_maps(i915)) { drm_notice(&i915->drm, @@ -1145,8 +1143,6 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; if (unlikely(ggtt->do_idle_maps)) drm_notice(&i915->drm, @@ -1212,11 +1208,11 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) { int ret; - ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); + ret = ggtt_probe_hw(&i915->ggtt, to_gt(i915)); if (ret) return ret; - if (intel_vtd_active()) + if (intel_vtd_active(i915)) drm_info(&i915->drm, "VT-d active for gfx access\n"); return 0; @@ -1253,368 +1249,66 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) ggtt->invalidate(ggtt); } -void i915_ggtt_resume(struct i915_ggtt *ggtt) +/** + * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM + * @vm: The VM to restore the mappings for + * + * Restore the memory mappings for all objects mapped to HW via the GGTT or a + * DPT page table. + * + * Returns %true if restoring the mapping for any object that was in a write + * domain before suspend. + */ +bool i915_ggtt_resume_vm(struct i915_address_space *vm) { struct i915_vma *vma; - bool flush = false; + bool write_domain_objs = false; int open; - intel_gt_check_and_clear_faults(ggtt->vm.gt); + drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); /* First fill our portion of the GTT with scratch pages */ - ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); + vm->clear_range(vm, 0, vm->total); /* Skip rewriting PTE on VMA unbind. */ - open = atomic_xchg(&ggtt->vm.open, 0); + open = atomic_xchg(&vm->open, 0); /* clflush objects bound into the GGTT and rebind them. */ - list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { + list_for_each_entry(vma, &vm->bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; unsigned int was_bound = atomic_read(&vma->flags) & I915_VMA_BIND_MASK; GEM_BUG_ON(!was_bound); - vma->ops->bind_vma(&ggtt->vm, NULL, vma, + vma->ops->bind_vma(vm, NULL, vma, obj ? obj->cache_level : 0, was_bound); if (obj) { /* only used during resume => exclusive access */ - flush |= fetch_and_zero(&obj->write_domain); + write_domain_objs |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; } } - atomic_set(&ggtt->vm.open, open); - ggtt->invalidate(ggtt); - - if (flush) - wbinvd_on_all_cpus(); - - if (GRAPHICS_VER(ggtt->vm.i915) >= 8) - setup_private_pat(ggtt->vm.gt->uncore); - - intel_ggtt_restore_fences(ggtt); -} - -static struct scatterlist * -rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, - unsigned int width, unsigned int height, - unsigned int src_stride, unsigned int dst_stride, - struct sg_table *st, struct scatterlist *sg) -{ - unsigned int column, row; - unsigned int src_idx; - - for (column = 0; column < width; column++) { - unsigned int left; - - src_idx = src_stride * (height - 1) + column + offset; - for (row = 0; row < height; row++) { - st->nents++; - /* - * We don't need the pages, but need to initialize - * the entries so the sg list can be happily traversed. - * The only thing we need are DMA addresses. - */ - sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); - sg_dma_address(sg) = - i915_gem_object_get_dma_address(obj, src_idx); - sg_dma_len(sg) = I915_GTT_PAGE_SIZE; - sg = sg_next(sg); - src_idx -= src_stride; - } - - left = (dst_stride - height) * I915_GTT_PAGE_SIZE; - - if (!left) - continue; - - st->nents++; - - /* - * The DE ignores the PTEs for the padding tiles, the sg entry - * here is just a conenience to indicate how many padding PTEs - * to insert at this spot. - */ - sg_set_page(sg, NULL, left, 0); - sg_dma_address(sg) = 0; - sg_dma_len(sg) = left; - sg = sg_next(sg); - } - - return sg; -} - -static noinline struct sg_table * -intel_rotate_pages(struct intel_rotation_info *rot_info, - struct drm_i915_gem_object *obj) -{ - unsigned int size = intel_rotation_info_size(rot_info); - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - int ret = -ENOMEM; - int i; - - /* Allocate target SG list. */ - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, size, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - st->nents = 0; - sg = st->sgl; - - for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) - sg = rotate_pages(obj, rot_info->plane[i].offset, - rot_info->plane[i].width, rot_info->plane[i].height, - rot_info->plane[i].src_stride, - rot_info->plane[i].dst_stride, - st, sg); - - return st; - -err_sg_alloc: - kfree(st); -err_st_alloc: - - drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rot_info->plane[0].width, - rot_info->plane[0].height, size); - - return ERR_PTR(ret); -} - -static struct scatterlist * -remap_pages(struct drm_i915_gem_object *obj, - unsigned int offset, unsigned int alignment_pad, - unsigned int width, unsigned int height, - unsigned int src_stride, unsigned int dst_stride, - struct sg_table *st, struct scatterlist *sg) -{ - unsigned int row; - - if (!width || !height) - return sg; - - if (alignment_pad) { - st->nents++; - - /* - * The DE ignores the PTEs for the padding tiles, the sg entry - * here is just a convenience to indicate how many padding PTEs - * to insert at this spot. - */ - sg_set_page(sg, NULL, alignment_pad * 4096, 0); - sg_dma_address(sg) = 0; - sg_dma_len(sg) = alignment_pad * 4096; - sg = sg_next(sg); - } - - for (row = 0; row < height; row++) { - unsigned int left = width * I915_GTT_PAGE_SIZE; - - while (left) { - dma_addr_t addr; - unsigned int length; - - /* - * We don't need the pages, but need to initialize - * the entries so the sg list can be happily traversed. - * The only thing we need are DMA addresses. - */ - - addr = i915_gem_object_get_dma_address_len(obj, offset, &length); - - length = min(left, length); - - st->nents++; - - sg_set_page(sg, NULL, length, 0); - sg_dma_address(sg) = addr; - sg_dma_len(sg) = length; - sg = sg_next(sg); - - offset += length / I915_GTT_PAGE_SIZE; - left -= length; - } - - offset += src_stride - width; - - left = (dst_stride - width) * I915_GTT_PAGE_SIZE; - - if (!left) - continue; - - st->nents++; - - /* - * The DE ignores the PTEs for the padding tiles, the sg entry - * here is just a conenience to indicate how many padding PTEs - * to insert at this spot. - */ - sg_set_page(sg, NULL, left, 0); - sg_dma_address(sg) = 0; - sg_dma_len(sg) = left; - sg = sg_next(sg); - } - - return sg; -} - -static noinline struct sg_table * -intel_remap_pages(struct intel_remapped_info *rem_info, - struct drm_i915_gem_object *obj) -{ - unsigned int size = intel_remapped_info_size(rem_info); - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int gtt_offset = 0; - int ret = -ENOMEM; - int i; - - /* Allocate target SG list. */ - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, size, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - st->nents = 0; - sg = st->sgl; - - for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { - unsigned int alignment_pad = 0; - - if (rem_info->plane_alignment) - alignment_pad = ALIGN(gtt_offset, rem_info->plane_alignment) - gtt_offset; - - sg = remap_pages(obj, - rem_info->plane[i].offset, alignment_pad, - rem_info->plane[i].width, rem_info->plane[i].height, - rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride, - st, sg); - - gtt_offset += alignment_pad + - rem_info->plane[i].dst_stride * rem_info->plane[i].height; - } - - i915_sg_trim(st); - - return st; - -err_sg_alloc: - kfree(st); -err_st_alloc: + atomic_set(&vm->open, open); - drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rem_info->plane[0].width, - rem_info->plane[0].height, size); - - return ERR_PTR(ret); + return write_domain_objs; } -static noinline struct sg_table * -intel_partial_pages(const struct i915_ggtt_view *view, - struct drm_i915_gem_object *obj) -{ - struct sg_table *st; - struct scatterlist *sg, *iter; - unsigned int count = view->partial.size; - unsigned int offset; - int ret = -ENOMEM; - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, count, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); - GEM_BUG_ON(!iter); - - sg = st->sgl; - st->nents = 0; - do { - unsigned int len; - - len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), - count << PAGE_SHIFT); - sg_set_page(sg, NULL, len, 0); - sg_dma_address(sg) = - sg_dma_address(iter) + (offset << PAGE_SHIFT); - sg_dma_len(sg) = len; - - st->nents++; - count -= len >> PAGE_SHIFT; - if (count == 0) { - sg_mark_end(sg); - i915_sg_trim(st); /* Drop any unused tail entries. */ - - return st; - } - - sg = __sg_next(sg); - iter = __sg_next(iter); - offset = 0; - } while (1); - -err_sg_alloc: - kfree(st); -err_st_alloc: - return ERR_PTR(ret); -} - -static int -i915_get_ggtt_vma_pages(struct i915_vma *vma) +void i915_ggtt_resume(struct i915_ggtt *ggtt) { - int ret; + bool flush; - /* - * The vma->pages are only valid within the lifespan of the borrowed - * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so - * must be the vma->pages. A simple rule is that vma->pages must only - * be accessed when the obj->mm.pages are pinned. - */ - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); + intel_gt_check_and_clear_faults(ggtt->vm.gt); - switch (vma->ggtt_view.type) { - default: - GEM_BUG_ON(vma->ggtt_view.type); - fallthrough; - case I915_GGTT_VIEW_NORMAL: - vma->pages = vma->obj->mm.pages; - return 0; + flush = i915_ggtt_resume_vm(&ggtt->vm); - case I915_GGTT_VIEW_ROTATED: - vma->pages = - intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); - break; + ggtt->invalidate(ggtt); - case I915_GGTT_VIEW_REMAPPED: - vma->pages = - intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); - break; + if (flush) + wbinvd_on_all_cpus(); - case I915_GGTT_VIEW_PARTIAL: - vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); - break; - } + if (GRAPHICS_VER(ggtt->vm.i915) >= 8) + setup_private_pat(ggtt->vm.gt->uncore); - ret = 0; - if (IS_ERR(vma->pages)) { - ret = PTR_ERR(vma->pages); - vma->pages = NULL; - drm_err(&vma->vm->i915->drm, - "Failed to get pages for VMA view type %u (%d)!\n", - vma->ggtt_view.type, ret); - } - return ret; + intel_ggtt_restore_fences(ggtt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 1cb1948ac959..35d0fcd3a86c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -3,6 +3,8 @@ * Copyright © 2019 Intel Corporation */ +#include <drm/intel-gtt.h> + #include "intel_gt_debugfs.h" #include "gem/i915_gem_lmem.h" @@ -23,13 +25,12 @@ #include "shmem_utils.h" #include "pxp/intel_pxp.h" -void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) +void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { - gt->i915 = i915; - gt->uncore = &i915->uncore; - spin_lock_init(>->irq_lock); + mutex_init(>->tlb_invalidate_lock); + INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -46,6 +47,12 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_rps_init_early(>->rps); } +void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) +{ + gt->i915 = i915; + gt->uncore = &i915->uncore; +} + int intel_gt_probe_lmem(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -907,3 +914,109 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (GRAPHICS_VER(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (GRAPHICS_VER(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + drm_err_ratelimited(>->i915->drm, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + /* + * Use delayed put since a) we mostly expect a flurry of TLB + * invalidations so it is good to avoid paying the forcewake cost and + * b) it works around a bug in Icelake which cannot cope with too rapid + * transitions. + */ + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + mutex_unlock(>->tlb_invalidate_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 74e771871a9b..a913fb6ffec3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -35,6 +35,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) } void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); +void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); int intel_gt_probe_lmem(struct intel_gt *gt); int intel_gt_init_mmio(struct intel_gt *gt); @@ -90,4 +91,6 @@ void intel_gt_info_print(const struct intel_gt_info *info, void intel_gt_watchdog_work(struct work_struct *work); +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index acc49c56a9f3..9db3dcbd917f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -9,11 +9,6 @@ #include "intel_engine_pm.h" #include "intel_gt_buffer_pool.h" -static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool) -{ - return container_of(pool, struct intel_gt, buffer_pool); -} - static struct list_head * bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz) { @@ -141,7 +136,7 @@ static struct intel_gt_buffer_pool_node * node_create(struct intel_gt_buffer_pool *pool, size_t sz, enum i915_map_type type) { - struct intel_gt *gt = to_gt(pool); + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); struct intel_gt_buffer_pool_node *node; struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h index e307ceb99031..17e79b735cfe 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h @@ -10,11 +10,7 @@ struct intel_gt; -#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \ - static int __name ## _open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, __name ## _show, inode->i_private); \ -} \ +#define __GT_DEBUGFS_ATTRIBUTE_FOPS(__name) \ static const struct file_operations __name ## _fops = { \ .owner = THIS_MODULE, \ .open = __name ## _open, \ @@ -23,6 +19,21 @@ static const struct file_operations __name ## _fops = { \ .release = single_release, \ } +#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ +__GT_DEBUGFS_ATTRIBUTE_FOPS(__name) + +#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(__name, __size_vf) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open_size(file, __name ## _show, inode->i_private, \ + __size_vf(inode->i_private)); \ +} \ +__GT_DEBUGFS_ATTRIBUTE_FOPS(__name) + void intel_gt_debugfs_register(struct intel_gt *gt); struct intel_gt_debugfs_file { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 795689eb3fc7..c0fa41e4c803 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -86,6 +86,7 @@ static int __gt_unpark(struct intel_wakeref *wf) intel_rc6_unpark(>->rc6); intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); + intel_guc_busyness_unpark(gt); intel_gt_unpark_requests(gt); runtime_begin(gt); @@ -104,6 +105,7 @@ static int __gt_park(struct intel_wakeref *wf) runtime_end(gt); intel_gt_park_requests(gt); + intel_guc_busyness_park(gt); i915_vma_parked(gt); i915_pmu_gt_parked(i915); intel_rps_park(>->rps); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 14216cc471b1..f20687796490 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -73,6 +73,8 @@ struct intel_gt { struct intel_uc uc; + struct mutex tlb_invalidate_lock; + struct i915_wa_list wa_list; struct intel_gt_timelines { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index b67f620c3d93..a94be0306464 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -8,6 +8,8 @@ #include <linux/fault-inject.h> #include <linux/sched/mm.h> +#include <drm/drm_cache.h> + #include "gem/i915_gem_lmem.h" #include "i915_trace.h" #include "intel_gt.h" @@ -222,19 +224,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) INIT_LIST_HEAD(&vm->bound_list); } -void clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - - if (vma->pages != vma->obj->mm.pages) { - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - - memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); -} - void *__px_vaddr(struct drm_i915_gem_object *p) { enum i915_map_type type; @@ -274,6 +263,7 @@ static void poison_scratch_page(struct drm_i915_gem_object *scratch) val = POISON_FREE; memset(vaddr, val, scratch->base.size); + drm_clflush_virt_range(vaddr, scratch->base.size); } int setup_scratch_page(struct i915_address_space *vm) @@ -299,7 +289,7 @@ int setup_scratch_page(struct i915_address_space *vm) do { struct drm_i915_gem_object *obj; - obj = vm->alloc_pt_dma(vm, size); + obj = vm->alloc_scratch_dma(vm, size); if (IS_ERR(obj)) goto skip; @@ -335,6 +325,18 @@ skip: if (size == I915_GTT_PAGE_SIZE_4K) return -ENOMEM; + /* + * If we need 64K minimum GTT pages for device local-memory, + * like on XEHPSDV, then we need to fail the allocation here, + * otherwise we can't safely support the insertion of + * local-memory pages for this vm, since the HW expects the + * correct physical alignment and size when the page-table is + * operating in 64K GTT mode, which includes any scratch PTEs, + * since userspace can still touch them. + */ + if (HAS_64K_PAGES(vm->i915)) + return -ENOMEM; + size = I915_GTT_PAGE_SIZE_4K; } while (1); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index bc6750263359..177b42b935a1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -135,6 +135,9 @@ typedef u64 gen8_pte_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) +#define GEN8_PAGE_PRESENT BIT_ULL(0) +#define GEN8_PAGE_RW BIT_ULL(1) + #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) @@ -206,9 +209,6 @@ struct i915_vma_ops { */ void (*unbind_vma)(struct i915_address_space *vm, struct i915_vma *vma); - - int (*set_pages)(struct i915_vma *vma); - void (*clear_pages)(struct i915_vma *vma); }; struct i915_address_space { @@ -265,6 +265,8 @@ struct i915_address_space { struct drm_i915_gem_object * (*alloc_pt_dma)(struct i915_address_space *vm, int sz); + struct drm_i915_gem_object * + (*alloc_scratch_dma)(struct i915_address_space *vm, int sz); u64 (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, @@ -544,6 +546,8 @@ int i915_ppgtt_init_hw(struct intel_gt *gt); struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags); +void i915_ggtt_suspend_vm(struct i915_address_space *vm); +bool i915_ggtt_resume_vm(struct i915_address_space *vm); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); @@ -594,10 +598,6 @@ release_pd_entry(struct i915_page_directory * const pd, const struct drm_i915_gem_object * const scratch); void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); -int ggtt_set_pages(struct i915_vma *vma); -int ppgtt_set_pages(struct i915_vma *vma); -void clear_pages(struct i915_vma *vma); - void ppgtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma *vma, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 56156cf18c41..b3489599e4de 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1167,6 +1167,11 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); + /* Wa_16013000631:dg2 */ + if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_G11(ce->engine->i915)) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); + return cs; } diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index afb1cce9a352..18b44af56969 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -13,7 +13,6 @@ struct insert_pte_data { u64 offset; - bool is_lmem; }; #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ @@ -40,7 +39,7 @@ static void insert_pte(struct i915_address_space *vm, struct insert_pte_data *d = data; vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, - d->is_lmem ? PTE_LM : 0); + i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0); d->offset += PAGE_SIZE; } @@ -134,8 +133,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) goto err_vm; /* Now allow the GPU to rewrite the PTE via its own ppGTT */ - d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]); - vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d); + vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d); } return &vm->vm; @@ -281,10 +279,10 @@ static int emit_pte(struct i915_request *rq, GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); /* Compute the page directory offset for the target address range */ - offset += (u64)rq->engine->instance << 32; offset >>= 12; offset *= sizeof(u64); offset += 2 * CHUNK_SZ; + offset += (u64)rq->engine->instance << 32; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -406,7 +404,7 @@ static int emit_copy(struct i915_request *rq, int size) int intel_context_migrate_copy(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -433,8 +431,8 @@ intel_context_migrate_copy(struct intel_context *ce, goto out_ce; } - if (await) { - err = i915_request_await_dma_fence(rq, await); + if (deps) { + err = i915_request_await_deps(rq, deps); if (err) goto out_rq; @@ -444,7 +442,7 @@ intel_context_migrate_copy(struct intel_context *ce, goto out_rq; } - await = NULL; + deps = NULL; } /* The PTE updates + copy must not be interrupted. */ @@ -527,7 +525,7 @@ static int emit_clear(struct i915_request *rq, int size, u32 value) int intel_context_migrate_clear(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, @@ -552,8 +550,8 @@ intel_context_migrate_clear(struct intel_context *ce, goto out_ce; } - if (await) { - err = i915_request_await_dma_fence(rq, await); + if (deps) { + err = i915_request_await_deps(rq, deps); if (err) goto out_rq; @@ -563,7 +561,7 @@ intel_context_migrate_clear(struct intel_context *ce, goto out_rq; } - await = NULL; + deps = NULL; } /* The PTE updates + clear must not be interrupted. */ @@ -601,7 +599,7 @@ out_ce: int intel_migrate_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -626,7 +624,7 @@ int intel_migrate_copy(struct intel_migrate *m, if (err) goto out; - err = intel_context_migrate_copy(ce, await, + err = intel_context_migrate_copy(ce, deps, src, src_cache_level, src_is_lmem, dst, dst_cache_level, dst_is_lmem, out); @@ -640,7 +638,7 @@ out: int intel_migrate_clear(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, @@ -663,7 +661,7 @@ intel_migrate_clear(struct intel_migrate *m, if (err) goto out; - err = intel_context_migrate_clear(ce, await, sg, cache_level, + err = intel_context_migrate_clear(ce, deps, sg, cache_level, is_lmem, value, out); intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h index 4e18e755a00b..ccc677ec4aa3 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.h +++ b/drivers/gpu/drm/i915/gt/intel_migrate.h @@ -11,6 +11,7 @@ #include "intel_migrate_types.h" struct dma_fence; +struct i915_deps; struct i915_request; struct i915_gem_ww_ctx; struct intel_gt; @@ -23,7 +24,7 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m); int intel_migrate_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -33,7 +34,7 @@ int intel_migrate_copy(struct intel_migrate *m, struct i915_request **out); int intel_context_migrate_copy(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -45,7 +46,7 @@ int intel_context_migrate_copy(struct intel_context *ce, int intel_migrate_clear(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, @@ -53,7 +54,7 @@ intel_migrate_clear(struct intel_migrate *m, struct i915_request **out); int intel_context_migrate_clear(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 15f9ada28a7a..9c253ba593c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -424,7 +424,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->unused_entries_index = I915_MOCS_PTE; if (IS_DG2(i915)) { - if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) { + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); table->table = dg2_mocs_table_g10_ax; } else { diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 4396bfd630d8..083b3090c69c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -289,16 +289,6 @@ void i915_vm_free_pt_stash(struct i915_address_space *vm, } } -int ppgtt_set_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(vma->pages); - - vma->pages = vma->obj->mm.pages; - vma->page_sizes = vma->obj->mm.page_sizes; - - return 0; -} - void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, unsigned long lmem_pt_obj_flags) { @@ -315,6 +305,4 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; - ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; - ppgtt->vm.vma_ops.clear_pages = clear_pages; } diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 43093dd2d0c9..c3155ee58689 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -117,10 +117,17 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - pg_enable = - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; + /* Wa_16011777198 - Render powergating must remain disabled */ + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) + pg_enable = + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; + else + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; if (GRAPHICS_VER(gt->i915) >= 12) { for (i = 0; i < I915_MAX_VCS; i++) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index afb35d2e5c73..fde2dcb59809 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -66,12 +66,16 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem) DMA_ATTR_FORCE_CONTIGUOUS); } -static void +static int region_lmem_release(struct intel_memory_region *mem) { - intel_region_ttm_fini(mem); + int ret; + + ret = intel_region_ttm_fini(mem); io_mapping_fini(&mem->iomap); release_fake_lmem_bar(mem); + + return ret; } static int @@ -158,7 +162,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt) static bool get_legacy_lowmem_region(struct intel_uncore *uncore, u64 *start, u32 *size) { - if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_C0)) + if (!IS_DG1_GRAPHICS_STEP(uncore->i915, STEP_A0, STEP_C0)) return false; *start = 0; @@ -193,6 +197,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct intel_memory_region *mem; + resource_size_t min_page_size; resource_size_t io_start; resource_size_t lmem_size; int err; @@ -207,10 +212,12 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2))) return ERR_PTR(-ENODEV); + min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : + I915_GTT_PAGE_SIZE_4K; mem = intel_memory_region_create(i915, 0, lmem_size, - I915_GTT_PAGE_SIZE_4K, + min_page_size, io_start, INTEL_MEMORY_LOCAL, 0, @@ -231,7 +238,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) return mem; err_region_put: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 91200c43951f..7be0002d9d70 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -6,7 +6,7 @@ #include <linux/sched/mm.h> #include <linux/stop_machine.h> -#include "display/intel_display_types.h" +#include "display/intel_display.h" #include "display/intel_overlay.h" #include "gem/i915_gem_context.h" @@ -297,13 +297,6 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - static const u32 hw_engine_mask[] = { - [RCS0] = GEN6_GRDOM_RENDER, - [BCS0] = GEN6_GRDOM_BLT, - [VCS0] = GEN6_GRDOM_MEDIA, - [VCS1] = GEN8_GRDOM_MEDIA2, - [VECS0] = GEN6_GRDOM_VECS, - }; struct intel_engine_cs *engine; u32 hw_mask; @@ -314,8 +307,7 @@ static int gen6_reset_engines(struct intel_gt *gt, hw_mask = 0; for_each_engine_masked(engine, gt, engine_mask, tmp) { - GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); - hw_mask |= hw_engine_mask[engine->id]; + hw_mask |= engine->reset_domain; } } @@ -492,22 +484,6 @@ static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - static const u32 hw_engine_mask[] = { - [RCS0] = GEN11_GRDOM_RENDER, - [BCS0] = GEN11_GRDOM_BLT, - [VCS0] = GEN11_GRDOM_MEDIA, - [VCS1] = GEN11_GRDOM_MEDIA2, - [VCS2] = GEN11_GRDOM_MEDIA3, - [VCS3] = GEN11_GRDOM_MEDIA4, - [VCS4] = GEN11_GRDOM_MEDIA5, - [VCS5] = GEN11_GRDOM_MEDIA6, - [VCS6] = GEN11_GRDOM_MEDIA7, - [VCS7] = GEN11_GRDOM_MEDIA8, - [VECS0] = GEN11_GRDOM_VECS, - [VECS1] = GEN11_GRDOM_VECS2, - [VECS2] = GEN11_GRDOM_VECS3, - [VECS3] = GEN11_GRDOM_VECS4, - }; struct intel_engine_cs *engine; intel_engine_mask_t tmp; u32 reset_mask, unlock_mask = 0; @@ -518,8 +494,7 @@ static int gen11_reset_engines(struct intel_gt *gt, } else { reset_mask = 0; for_each_engine_masked(engine, gt, engine_mask, tmp) { - GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); - reset_mask |= hw_engine_mask[engine->id]; + reset_mask |= engine->reset_domain; ret = gen11_lock_sfc(engine, &reset_mask, &unlock_mask); if (ret) goto sfc_unlock; @@ -1367,20 +1342,27 @@ void intel_gt_handle_error(struct intel_gt *gt, /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */ synchronize_rcu_expedited(); - /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, gt, tmp) { - while (test_and_set_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags)) - wait_on_bit(>->reset.flags, - I915_RESET_ENGINE + engine->id, - TASK_UNINTERRUPTIBLE); + /* + * Prevent any other reset-engine attempt. We don't do this for GuC + * submission the GuC owns the per-engine reset, not the i915. + */ + if (!intel_uc_uses_guc_submission(>->uc)) { + for_each_engine(engine, gt, tmp) { + while (test_and_set_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags)) + wait_on_bit(>->reset.flags, + I915_RESET_ENGINE + engine->id, + TASK_UNINTERRUPTIBLE); + } } intel_gt_reset_global(gt, engine_mask, msg); - for_each_engine(engine, gt, tmp) - clear_bit_unlock(I915_RESET_ENGINE + engine->id, - >->reset.flags); + if (!intel_uc_uses_guc_submission(>->uc)) { + for_each_engine(engine, gt, tmp) + clear_bit_unlock(I915_RESET_ENGINE + engine->id, + >->reset.flags); + } clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); smp_mb__after_atomic(); wake_up_all(>->reset.queue); @@ -1441,6 +1423,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES > I915_WEDGED_ON_INIT); intel_gt_set_wedged(gt); + i915_disable_error_state(gt->i915, -ENODEV); set_bit(I915_WEDGED_ON_INIT, >->reset.flags); /* Wedged on init is non-recoverable */ @@ -1450,6 +1433,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) void intel_gt_set_wedged_on_fini(struct intel_gt *gt) { intel_gt_set_wedged(gt); + i915_disable_error_state(gt->i915, -ENODEV); set_bit(I915_WEDGED_ON_FINI, >->reset.flags); intel_gt_retire_requests(gt); /* cleanup any wedged requests */ } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 586dca1731ce..3e6fac0340ef 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1357,7 +1357,7 @@ retry: err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww); if (!err && gen7_wa_vma) err = i915_gem_object_lock(gen7_wa_vma->obj, &ww); - if (!err && engine->legacy.ring->vma->obj) + if (!err) err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww); if (!err) err = intel_timeline_pin(timeline, &ww); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 5e275f8dda8c..54e7df788dbf 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -936,8 +936,70 @@ void intel_rps_park(struct intel_rps *rps) GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); } +u32 intel_rps_get_boost_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + return slpc->boost_freq; + } else { + return intel_gpu_freq(rps, rps->boost_freq); + } +} + +static int rps_set_boost_freq(struct intel_rps *rps, u32 val) +{ + bool boost = false; + + /* Validate against (static) hardware limits */ + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || val > rps->max_freq) + return -EINVAL; + + mutex_lock(&rps->lock); + if (val != rps->boost_freq) { + rps->boost_freq = val; + boost = atomic_read(&rps->num_waiters); + } + mutex_unlock(&rps->lock); + if (boost) + schedule_work(&rps->work); + + return 0; +} + +int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + return intel_guc_slpc_set_boost_freq(slpc, freq); + } else { + return rps_set_boost_freq(rps, freq); + } +} + +void intel_rps_dec_waiters(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + intel_guc_slpc_dec_waiters(slpc); + } else { + atomic_dec(&rps->num_waiters); + } +} + void intel_rps_boost(struct i915_request *rq) { + struct intel_guc_slpc *slpc; + if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; @@ -945,6 +1007,16 @@ void intel_rps_boost(struct i915_request *rq) if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + /* Return if old value is non zero */ + if (!atomic_fetch_inc(&slpc->num_waiters)) + schedule_work(&slpc->boost_work); + + return; + } + if (atomic_fetch_inc(&rps->num_waiters)) return; @@ -2154,6 +2226,65 @@ u32 intel_rps_read_state_cap(struct intel_rps *rps) return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); } +static void intel_rps_set_manual(struct intel_rps *rps, bool enable) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE; + + /* Allow punit to process software requests */ + intel_uncore_write(uncore, GEN6_RP_CONTROL, state); +} + +void intel_rps_raise_unslice(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp0_unslice_req; + + mutex_lock(&rps->lock); + + if (rps_uses_slpc(rps)) { + /* RP limits have not been initialized yet for SLPC path */ + rp0_unslice_req = ((intel_rps_read_state_cap(rps) >> 0) + & 0xff) * GEN9_FREQ_SCALER; + + intel_rps_set_manual(rps, true); + intel_uncore_write(uncore, GEN6_RPNSWREQ, + ((rp0_unslice_req << + GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | + GEN9_IGNORE_SLICE_RATIO)); + intel_rps_set_manual(rps, false); + } else { + intel_rps_set(rps, rps->rp0_freq); + } + + mutex_unlock(&rps->lock); +} + +void intel_rps_lower_unslice(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rpn_unslice_req; + + mutex_lock(&rps->lock); + + if (rps_uses_slpc(rps)) { + /* RP limits have not been initialized yet for SLPC path */ + rpn_unslice_req = ((intel_rps_read_state_cap(rps) >> 16) + & 0xff) * GEN9_FREQ_SCALER; + + intel_rps_set_manual(rps, true); + intel_uncore_write(uncore, GEN6_RPNSWREQ, + ((rpn_unslice_req << + GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | + GEN9_IGNORE_SLICE_RATIO)); + intel_rps_set_manual(rps, false); + } else { + intel_rps_set(rps, rps->min_freq); + } + + mutex_unlock(&rps->lock); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; @@ -2230,7 +2361,7 @@ unsigned long i915_read_mch_val(void) return 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - struct intel_ips *ips = &i915->gt.rps.ips; + struct intel_ips *ips = &to_gt(i915)->rps.ips; spin_lock_irq(&mchdev_lock); chipset_val = __ips_chipset_val(ips); @@ -2257,7 +2388,7 @@ bool i915_gpu_raise(void) if (!i915) return false; - rps = &i915->gt.rps; + rps = &to_gt(i915)->rps; spin_lock_irq(&mchdev_lock); if (rps->max_freq_softlimit < rps->max_freq) @@ -2284,7 +2415,7 @@ bool i915_gpu_lower(void) if (!i915) return false; - rps = &i915->gt.rps; + rps = &to_gt(i915)->rps; spin_lock_irq(&mchdev_lock); if (rps->max_freq_softlimit > rps->min_freq) @@ -2310,7 +2441,7 @@ bool i915_gpu_busy(void) if (!i915) return false; - ret = i915->gt.awake; + ret = to_gt(i915)->awake; drm_dev_put(&i915->drm); return ret; @@ -2333,11 +2464,11 @@ bool i915_gpu_turbo_disable(void) if (!i915) return false; - rps = &i915->gt.rps; + rps = &to_gt(i915)->rps; spin_lock_irq(&mchdev_lock); rps->max_freq_softlimit = rps->min_freq; - ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq); + ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq); spin_unlock_irq(&mchdev_lock); drm_dev_put(&i915->drm); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 11960d64ca82..c6d76a3d1331 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -23,6 +23,9 @@ void intel_rps_disable(struct intel_rps *rps); void intel_rps_park(struct intel_rps *rps); void intel_rps_unpark(struct intel_rps *rps); void intel_rps_boost(struct i915_request *rq); +void intel_rps_dec_waiters(struct intel_rps *rps); +u32 intel_rps_get_boost_frequency(struct intel_rps *rps); +int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq); int intel_rps_set(struct intel_rps *rps, u8 val); void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); @@ -42,6 +45,8 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); u32 intel_rps_read_state_cap(struct intel_rps *rps); +void intel_rps_raise_unslice(struct intel_rps *rps); +void intel_rps_lower_unslice(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 2400d6423ba5..ab3277a3d593 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -482,7 +482,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_GT_STEP(i915, STEP_C0, STEP_FOREVER)) + if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER)) wa_masked_en(wal, COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -560,6 +560,22 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* * These settings aren't actually workarounds, but general tuning settings that + * need to be programmed on dg2 platform. + */ +static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); + wa_add(wal, + FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128, + 0, false); +} + +/* + * These settings aren't actually workarounds, but general tuning settings that * need to be programmed on several platforms. */ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, @@ -637,6 +653,42 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); } +static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + dg2_ctx_gt_tuning_init(engine, wal); + + /* Wa_16011186671:dg2_g11 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); + wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_14010469329:dg2_g10 */ + wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); + + /* + * Wa_22010465075:dg2_g10 + * Wa_22010613112:dg2_g10 + * Wa_14010698770:dg2_g10 + */ + wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + } + + /* Wa_16013271637:dg2 */ + wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE); + + /* Wa_22012532006:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) + wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); +} + static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -723,7 +775,11 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_DG1(i915)) + if (IS_DG2(i915)) + dg2_ctx_workarounds_init(engine, wal); + else if (IS_XEHPSDV(i915)) + ; /* noop; none at this time */ + else if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 12) gen12_ctx_workarounds_init(engine, wal); @@ -871,10 +927,51 @@ hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) } static void +gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu; + unsigned int slice, subslice; + u32 mcr, mcr_mask; + + GEM_BUG_ON(GRAPHICS_VER(i915) != 9); + + /* + * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml + * Before any MMIO read into slice/subslice specific registers, MCR + * packet control register needs to be programmed to point to any + * enabled s/ss pair. Otherwise, incorrect values will be returned. + * This means each subsequent MMIO read will be forwarded to an + * specific s/ss combination, but this is OK since these registers + * are consistent across s/ss in almost all cases. In the rare + * occasions, such as INSTDONE, where this value is dependent + * on s/ss combo, the read should be done with read_subslice_reg. + */ + slice = ffs(sseu->slice_mask) - 1; + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); + subslice = ffs(intel_sseu_get_subslices(sseu, slice)); + GEM_BUG_ON(!subslice); + subslice--; + + /* + * We use GEN8_MCR..() macros to calculate the |mcr| value for + * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads + */ + mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + + drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr); + + wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); +} + +static void gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; + /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */ + gen9_wa_init_mcr(i915, wal); + /* WaDisableKillLogic:bxt,skl,kbl */ if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915)) wa_write_or(wal, @@ -909,7 +1006,7 @@ skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0)) + if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); @@ -921,7 +1018,7 @@ kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen9_gt_workarounds_init(gt, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -1138,7 +1235,7 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || - IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0)) + IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1192,19 +1289,19 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen12_gt_workarounds_init(gt, wal); /* Wa_1409420604:tgl */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1408615072:tgl[a0] */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL); } @@ -1217,7 +1314,7 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen12_gt_workarounds_init(gt, wal); /* Wa_1607087056:dg1 */ - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1238,7 +1335,179 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = gt->i915; + + xehp_init_mcr(gt, wal); + + /* Wa_1409757795:xehpsdv */ + wa_write_or(wal, SCCGCTL94DC, CG3DDISURB); + + /* Wa_18011725039:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { + wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); + wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); + } + + /* Wa_16011155590:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + TSGUNIT_CLKGATE_DIS); + + /* Wa_14011780169:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) { + wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | + GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | + GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | + GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | + GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | + GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS); + wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | + GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | + GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | + GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | + GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | + GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | + GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | + GAMTLBVEBOX0_CLKGATE_DIS); + } + + /* Wa_14012362059:xehpsdv */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + + /* Wa_16012725990:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER)) + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS); + + /* Wa_14011060649:xehpsdv */ + wa_14011060649(gt, wal); + + /* Wa_14014368820:xehpsdv */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); +} + +static void +dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + struct intel_engine_cs *engine; + int id; + xehp_init_mcr(gt, wal); + + /* Wa_14011060649:dg2 */ + wa_14011060649(gt, wal); + + /* + * Although there are per-engine instances of these registers, + * they technically exist outside the engine itself and are not + * impacted by engine resets. Furthermore, they're part of the + * GuC blacklist so trying to treat them as engine workarounds + * will result in GuC initialization failure and a wedged GPU. + */ + for_each_engine(engine, gt, id) { + if (engine->class != VIDEO_DECODE_CLASS) + continue; + + /* Wa_16010515920:dg2_g10 */ + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) + wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base), + ALNUNIT_CLKGATE_DIS); + } + + if (IS_DG2_G10(gt->i915)) { + /* Wa_22010523718:dg2 */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + CG3DDISCFEG_CLKGATE_DIS); + + /* Wa_14011006942:dg2 */ + wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE, + DSS_ROUTER_CLKGATE_DIS); + } + + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_14010680813:dg2_g10 */ + wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS | + EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS); + + /* Wa_14010948348:dg2_g10 */ + wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); + + /* Wa_14011037102:dg2_g10 */ + wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); + + /* Wa_14011371254:dg2_g10 */ + wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); + + /* Wa_14011431319:dg2_g10 */ + wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | + GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | + GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | + GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | + GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | + GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS); + wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | + GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | + GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | + GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | + GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | + GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | + GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | + GAMTLBVEBOX0_CLKGATE_DIS); + + /* Wa_14010569222:dg2_g10 */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + GAMEDIA_CLKGATE_DIS); + + /* Wa_14011028019:dg2_g10 */ + wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); + } + + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14012362059:dg2 */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + } + + /* Wa_1509235366:dg2 */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); + + /* Wa_14014830051:dg2 */ + wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); + + /* + * The following are not actually "workarounds" but rather + * recommended tuning settings documented in the bspec's + * performance guide section. + */ + wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); } static void @@ -1246,7 +1515,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; - if (IS_XEHPSDV(i915)) + if (IS_DG2(i915)) + dg2_gt_workarounds_init(gt, wal); + else if (IS_XEHPSDV(i915)) xehpsdv_gt_workarounds_init(gt, wal); else if (IS_DG1(i915)) dg1_gt_workarounds_init(gt, wal); @@ -1520,7 +1791,7 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_RANGE_4); } -static void cml_whitelist_build(struct intel_engine_cs *engine) +static void allow_read_ctx_timestamp(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -1528,6 +1799,11 @@ static void cml_whitelist_build(struct intel_engine_cs *engine) whitelist_reg_ext(w, RING_CTX_TIMESTAMP(engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); +} + +static void cml_whitelist_build(struct intel_engine_cs *engine) +{ + allow_read_ctx_timestamp(engine); cfl_whitelist_build(engine); } @@ -1536,6 +1812,8 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; + allow_read_ctx_timestamp(engine); + switch (engine->class) { case RENDER_CLASS: /* WaAllowUMDToModifyHalfSliceChicken7:icl */ @@ -1571,15 +1849,9 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* hucStatus2RegOffset */ whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); - whitelist_reg_ext(w, - RING_CTX_TIMESTAMP(engine->mmio_base), - RING_FORCE_TO_NONPRIV_ACCESS_RD); break; default: - whitelist_reg_ext(w, - RING_CTX_TIMESTAMP(engine->mmio_base), - RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1588,6 +1860,8 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; + allow_read_ctx_timestamp(engine); + switch (engine->class) { case RENDER_CLASS: /* @@ -1604,16 +1878,17 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4); - /* Wa_1808121037:tgl */ + /* + * Wa_1808121037:tgl + * Wa_14012131227:dg1 + * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p + */ whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1); /* Wa_1806527549:tgl */ whitelist_reg(w, HIZ_CHICKEN); break; default: - whitelist_reg_ext(w, - RING_CTX_TIMESTAMP(engine->mmio_base), - RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1625,13 +1900,46 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine) tgl_whitelist_build(engine); /* GEN:BUG:1409280441:dg1 */ - if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) && + if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) && (engine->class == RENDER_CLASS || engine->class == COPY_ENGINE_CLASS)) whitelist_reg_ext(w, RING_ID(engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); } +static void xehpsdv_whitelist_build(struct intel_engine_cs *engine) +{ + allow_read_ctx_timestamp(engine); +} + +static void dg2_whitelist_build(struct intel_engine_cs *engine) +{ + struct i915_wa_list *w = &engine->whitelist; + + allow_read_ctx_timestamp(engine); + + switch (engine->class) { + case RENDER_CLASS: + /* + * Wa_1507100340:dg2_g10 + * + * This covers 4 registers which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); + + break; + default: + break; + } +} + void intel_engine_init_whitelist(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1639,7 +1947,11 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, "whitelist", engine->name); - if (IS_DG1(i915)) + if (IS_DG2(i915)) + dg2_whitelist_build(engine); + else if (IS_XEHPSDV(i915)) + xehpsdv_whitelist_build(engine); + else if (IS_DG1(i915)) dg1_whitelist_build(engine); else if (GRAPHICS_VER(i915) == 12) tgl_whitelist_build(engine); @@ -1713,13 +2025,119 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); } } + +static bool needs_wa_1308578152(struct intel_engine_cs *engine) +{ + u64 dss_mask = intel_sseu_get_subslices(&engine->gt->info.sseu, 0); + + return (dss_mask & GENMASK(GEN_DSS_PER_GSLICE - 1, 0)) == 0; +} + static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || - IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { + if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14013392000:dg2_g11 */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); + + /* Wa_16011620976:dg2_g11 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14012419201:dg2 */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, + GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_G11(engine->i915)) { + /* + * Wa_22012826095:dg2 + * Wa_22013059131:dg2 + */ + wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); + + /* Wa_22013059131:dg2 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0, + FORCE_1_SUB_MESSAGE_PER_FRAGMENT); + } + + /* Wa_1308578152:dg2_g10 when first gslice is fused off */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) && + needs_wa_1308578152(engine)) { + wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, + GEN12_REPLAY_MODE_GRANULARITY); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(engine->i915)) { + /* Wa_22013037850:dg2 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW); + + /* Wa_22012856258:dg2 */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, + GEN12_DISABLE_READ_SUPPRESSION); + + /* + * Wa_22010960976:dg2 + * Wa_14013347512:dg2 + */ + wa_masked_dis(wal, GEN12_HDC_CHICKEN0, + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* + * Wa_1608949956:dg2_g10 + * Wa_14010198302:dg2_g10 + */ + wa_masked_en(wal, GEN8_ROW_CHICKEN, + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); + + /* + * Wa_14010918519:dg2_g10 + * + * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, + * so ignoring verification. + */ + wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, + FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, + 0, false); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_22010430635:dg2 */ + wa_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); + + /* Wa_14010648519:dg2 */ + wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_G11(engine->i915)) { + /* Wa_22012654132:dg2 */ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); + } + + /* Wa_14013202645:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) + wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); + + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || + IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1607138336:tgl[a0],dg1[a0] * Wa_1607063988:tgl[a0],dg1[a0] @@ -1729,7 +2147,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); } - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1606679103:tgl * (see also Wa_1606682166:icl) @@ -1764,7 +2182,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || - IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || + IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, @@ -1777,8 +2195,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } - - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* * Wa_1607030317:tgl @@ -2131,7 +2548,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) { + if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) { wa_write(wal, RING_SEMA_WAIT_POLL(engine->mmio_base), 1); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 8b89215afe46..c0637bf799a3 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -17,7 +17,7 @@ static int mock_timeline_pin(struct intel_timeline *tl) { int err; - if (WARN_ON(!i915_gem_object_trylock(tl->hwsp_ggtt->obj))) + if (WARN_ON(!i915_gem_object_trylock(tl->hwsp_ggtt->obj, NULL))) return -EBUSY; err = intel_timeline_pin_map(tl); @@ -35,9 +35,31 @@ static void mock_timeline_unpin(struct intel_timeline *tl) atomic_dec(&tl->pin_count); } +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) +{ + struct i915_address_space *vm = &ggtt->vm; + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { - const unsigned long sz = PAGE_SIZE / 2; + const unsigned long sz = PAGE_SIZE; struct intel_ring *ring; ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); @@ -50,15 +72,11 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->vaddr = (void *)(ring + 1); atomic_set(&ring->pin_count, 1); - ring->vma = i915_vma_alloc(); - if (!ring->vma) { + ring->vma = create_ring_vma(engine->gt->ggtt, PAGE_SIZE); + if (IS_ERR(ring->vma)) { kfree(ring); return NULL; } - i915_active_init(&ring->vma->active, NULL, NULL, 0); - __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma)); - __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags); - ring->vma->node.size = sz; intel_ring_update_space(ring); @@ -67,8 +85,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) static void mock_ring_free(struct intel_ring *ring) { - i915_active_fini(&ring->vma->active); - i915_vma_free(ring->vma); + i915_vma_put(ring->vma); kfree(ring); } @@ -125,6 +142,7 @@ static void mock_context_unpin(struct intel_context *ce) static void mock_context_post_unpin(struct intel_context *ce) { + i915_vma_unpin(ce->ring->vma); } static void mock_context_destroy(struct kref *ref) @@ -169,7 +187,7 @@ static int mock_context_alloc(struct intel_context *ce) static int mock_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **unused) { - return 0; + return i915_vma_pin_ww(ce->ring->vma, ww, 0, 0, PIN_GLOBAL | PIN_HIGH); } static int mock_context_pin(struct intel_context *ce, void *unused) @@ -327,7 +345,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, struct mock_engine *engine; GEM_BUG_ON(id >= I915_NUM_ENGINES); - GEM_BUG_ON(!i915->gt.uncore); + GEM_BUG_ON(!to_gt(i915)->uncore); engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); if (!engine) @@ -335,8 +353,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; - engine->base.gt = &i915->gt; - engine->base.uncore = i915->gt.uncore; + engine->base.gt = to_gt(i915); + engine->base.uncore = to_gt(i915)->uncore; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; engine->base.mask = BIT(id); @@ -359,8 +377,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.release = mock_engine_release; - i915->gt.engine[id] = &engine->base; - i915->gt.engine_class[0][id] = &engine->base; + to_gt(i915)->engine[id] = &engine->base; + to_gt(i915)->engine_class[0][id] = &engine->base; /* fake hw queue */ spin_lock_init(&engine->hw_lock); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index fa7b99a671dd..76fbae358072 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -442,7 +442,7 @@ int intel_context_live_selftests(struct drm_i915_private *i915) SUBTEST(live_active_context), SUBTEST(live_remote_context), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (intel_gt_is_wedged(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.c b/drivers/gpu/drm/i915/gt/selftest_engine.c index 262764f6d90a..57fea9ea1705 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine.c @@ -12,7 +12,7 @@ int intel_engine_live_selftests(struct drm_i915_private *i915) live_engine_pm_selftests, NULL, }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); typeof(*tests) *fn; for (fn = tests; *fn; fn++) { diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 64abf5feabfa..1b75f478d1b8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -361,10 +361,10 @@ int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) SUBTEST(perf_mi_noop), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } static int intel_mmio_bases_check(void *arg) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 6e6e4d747cca..273d440a53e3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -378,13 +378,13 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) int saved_hangcheck; int err; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; saved_hangcheck = i915->params.enable_hangcheck; i915->params.enable_hangcheck = INT_MAX; - err = intel_gt_live_subtests(tests, &i915->gt); + err = intel_gt_live_subtests(tests, to_gt(i915)); i915->params.enable_hangcheck = saved_hangcheck; return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 75569666105d..8af261831470 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -214,6 +214,31 @@ static int live_engine_timestamps(void *arg) return 0; } +static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness) +{ + ktime_t start, unused, dt; + + if (!intel_engine_uses_guc(engine)) + return 0; + + /* + * In GuC mode of submission, the busyness stats may get updated after + * the batch starts running. Poll for a change in busyness and timeout + * after 500 us. + */ + start = ktime_get(); + while (intel_engine_get_busy_time(engine, &unused) == busyness) { + dt = ktime_get() - start; + if (dt > 10000000) { + pr_err("active wait timed out %lld\n", dt); + ENGINE_TRACE(engine, "active wait time out %lld\n", dt); + return -ETIME; + } + } + + return 0; +} + static int live_engine_busy_stats(void *arg) { struct intel_gt *gt = arg; @@ -232,6 +257,7 @@ static int live_engine_busy_stats(void *arg) GEM_BUG_ON(intel_gt_pm_is_awake(gt)); for_each_engine(engine, gt, id) { struct i915_request *rq; + ktime_t busyness, dummy; ktime_t de, dt; ktime_t t[2]; @@ -274,16 +300,23 @@ static int live_engine_busy_stats(void *arg) } i915_request_add(rq); + busyness = intel_engine_get_busy_time(engine, &dummy); if (!igt_wait_for_spinner(&spin, rq)) { intel_gt_set_wedged(engine->gt); err = -ETIME; goto end; } + err = __spin_until_busier(engine, busyness); + if (err) { + GEM_TRACE_DUMP(); + goto end; + } + ENGINE_TRACE(engine, "measuring busy time\n"); preempt_disable(); de = intel_engine_get_busy_time(engine, &t[0]); - udelay(100); + mdelay(10); de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); preempt_enable(); dt = ktime_sub(t[1], t[0]); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index b367ecfa42de..e10da897e07a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -4502,11 +4502,11 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_virtual_reset), }; - if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP) + if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP) return 0; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index b9441217ca3d..8bf62a5826cc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -43,7 +43,7 @@ static void measure_clocks(struct intel_engine_cs *engine, int i; for (i = 0; i < 5; i++) { - preempt_disable(); + local_irq_disable(); cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP); dt[i] = ktime_get(); @@ -51,7 +51,7 @@ static void measure_clocks(struct intel_engine_cs *engine, dt[i] = ktime_sub(ktime_get(), dt[i]); cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP); - preempt_enable(); + local_irq_enable(); } /* Use the median of both cycle/dt; close enough */ @@ -193,10 +193,10 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915) SUBTEST(live_gt_resume), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } int intel_gt_pm_late_selftests(struct drm_i915_private *i915) @@ -210,8 +210,8 @@ int intel_gt_pm_late_selftests(struct drm_i915_private *i915) SUBTEST(live_rc6_ctx_wa), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7e2d99dd012d..15d63435ec4d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -471,7 +471,8 @@ static int igt_reset_nop_engine(void *arg) count = 0; st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); do { int i; @@ -528,7 +529,7 @@ static int igt_reset_nop_engine(void *arg) break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -582,7 +583,8 @@ static int igt_reset_fail_engine(void *arg) } st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); force_reset_timeout(engine); err = intel_engine_reset(engine, NULL); @@ -679,7 +681,7 @@ static int igt_reset_fail_engine(void *arg) out: pr_info("%s(%s): %d resets\n", __func__, engine->name, count); skip: - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); intel_context_put(ce); @@ -734,7 +736,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_engine_count = i915_reset_engine_count(global, engine); st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); count = 0; do { struct i915_request *rq = NULL; @@ -824,7 +827,7 @@ restore: if (err) break; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); pr_info("%s: Completed %lu %s resets\n", engine->name, count, active ? "active" : "idle"); @@ -1042,7 +1045,8 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ st_engine_heartbeat_disable_no_pm(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); do { struct i915_request *rq = NULL; struct intel_selftest_saved_policy saved; @@ -1165,7 +1169,7 @@ restore: if (err) break; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable_no_pm(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", @@ -2014,7 +2018,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_evict_fence), SUBTEST(igt_handle_error), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); intel_wakeref_t wakeref; int err; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b0977a3b699b..618c905daa19 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1847,5 +1847,5 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) if (!HAS_LOGICAL_RING_CONTEXTS(i915)) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 12ef2837c89b..fa4293d2944f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -49,6 +49,7 @@ static int copy(struct intel_migrate *migrate, if (IS_ERR(src)) return 0; + sz = src->base.size; dst = i915_gem_object_create_internal(i915, sz); if (IS_ERR(dst)) goto err_free_src; @@ -441,7 +442,7 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915) SUBTEST(thread_global_copy), SUBTEST(thread_global_clear), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (!gt->migrate.context) return 0; @@ -464,7 +465,7 @@ create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) return obj; } - i915_gem_object_trylock(obj); + i915_gem_object_trylock(obj, NULL); err = i915_gem_object_pin_pages(obj); if (err) { i915_gem_object_unlock(obj); @@ -657,7 +658,7 @@ int intel_migrate_perf_selftests(struct drm_i915_private *i915) SUBTEST(perf_clear_blt), SUBTEST(perf_copy_blt), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (intel_gt_is_wedged(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 13d25bf2a94a..c1d861333c44 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -451,5 +451,5 @@ int intel_mocs_live_selftests(struct drm_i915_private *i915) if (!get_mocs_settings(i915, &table)) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 7a50c9f4071b..8a873f6bda7f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -376,7 +376,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_atomic_reset), SUBTEST(igt_atomic_engine_reset), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (!intel_has_gpu_reset(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c index 041954408d0f..70f9ac1ec2c7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -291,8 +291,8 @@ int intel_ring_submission_live_selftests(struct drm_i915_private *i915) SUBTEST(live_ctx_switch_wa), }; - if (i915->gt.submission_method > INTEL_SUBMISSION_RING) + if (to_gt(i915)->submission_method > INTEL_SUBMISSION_RING) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 9334bad131a2..b768cea5943d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -39,7 +39,7 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) static int live_slpc_clamp_min(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); struct intel_guc_slpc *slpc = >->uc.guc.slpc; struct intel_rps *rps = >->rps; struct intel_engine_cs *engine; @@ -166,7 +166,7 @@ static int live_slpc_clamp_min(void *arg) static int live_slpc_clamp_max(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); struct intel_guc_slpc *slpc; struct intel_rps *rps; struct intel_engine_cs *engine; @@ -304,7 +304,7 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_slpc_clamp_min), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index d0b6a3afcf44..e2eb686a9763 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -159,7 +159,7 @@ static int mock_hwsp_freelist(void *arg) INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); - state.gt = &i915->gt; + state.gt = to_gt(i915); /* * Create a bunch of timelines and check that their HWSP do not overlap. @@ -1416,8 +1416,8 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915) SUBTEST(live_hwsp_rollover_user), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 962e91ba3be4..0287c2573c51 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -1387,8 +1387,8 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) SUBTEST(live_engine_reset_workarounds), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index ba10bd374cee..fe5d7d261797 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -144,6 +144,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, + INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 31cf9fb48c7e..f9240d4baa69 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -95,6 +95,11 @@ struct intel_guc { */ struct ida guc_ids; /** + * @num_guc_ids: Number of guc_ids, selftest feature to be able + * to reduce this number while testing. + */ + int num_guc_ids; + /** * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ unsigned long *guc_ids_bitmap; @@ -138,6 +143,8 @@ struct intel_guc { u32 ads_regset_size; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @ads_engine_usage_size: size of engine usage in the ADS */ + u32 ads_engine_usage_size; /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ struct i915_vma *lrc_desc_pool; @@ -172,6 +179,41 @@ struct intel_guc { /** @send_mutex: used to serialize the intel_guc_send actions */ struct mutex send_mutex; + + /** + * @timestamp: GT timestamp object that stores a copy of the timestamp + * and adjusts it for overflow using a worker. + */ + struct { + /** + * @lock: Lock protecting the below fields and the engine stats. + */ + spinlock_t lock; + + /** + * @gt_stamp: 64 bit extended value of the GT timestamp. + */ + u64 gt_stamp; + + /** + * @ping_delay: Period for polling the GT timestamp for + * overflow. + */ + unsigned long ping_delay; + + /** + * @work: Periodic work to adjust GT timestamp, engine and + * context usage for overflows. + */ + struct delayed_work work; + } timestamp; + +#ifdef CONFIG_DRM_I915_SELFTEST + /** + * @number_guc_id_stolen: The number of guc_ids that have been stolen + */ + int number_guc_id_stolen; +#endif }; static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 621c893a009f..1a1edae67e4e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -26,6 +26,8 @@ * | guc_policies | * +---------------------------------------+ * | guc_gt_system_info | + * +---------------------------------------+ + * | guc_engine_usage | * +---------------------------------------+ <== static * | guc_mmio_reg[countA] (engine 0.0) | * | guc_mmio_reg[countB] (engine 0.1) | @@ -47,6 +49,7 @@ struct __guc_ads_blob { struct guc_ads ads; struct guc_policies policies; struct guc_gt_system_info system_info; + struct guc_engine_usage engine_usage; /* From here on, location is dynamic! Refer to above diagram. */ struct guc_mmio_reg regset[0]; } __packed; @@ -628,3 +631,21 @@ void intel_guc_ads_reset(struct intel_guc *guc) guc_ads_private_data_reset(guc); } + +u32 intel_guc_engine_usage_offset(struct intel_guc *guc) +{ + struct __guc_ads_blob *blob = guc->ads_blob; + u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma); + u32 offset = base + ptr_offset(blob, engine_usage); + + return offset; +} + +struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct __guc_ads_blob *blob = guc->ads_blob; + u8 guc_class = engine_class_to_guc_class(engine->class); + + return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)]; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h index 3d85051d57e4..e74c110facff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -6,8 +6,11 @@ #ifndef _INTEL_GUC_ADS_H_ #define _INTEL_GUC_ADS_H_ +#include <linux/types.h> + struct intel_guc; struct drm_printer; +struct intel_engine_cs; int intel_guc_ads_create(struct intel_guc *guc); void intel_guc_ads_destroy(struct intel_guc *guc); @@ -15,5 +18,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc); void intel_guc_ads_reset(struct intel_guc *guc); void intel_guc_ads_print_policy_info(struct intel_guc *guc, struct drm_printer *p); +struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine); +u32 intel_guc_engine_usage_offset(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index a0cc34be7b56..aa6dd6415202 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -523,6 +523,15 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct) CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n", ktime_ms_delta(ktime_get(), ct->stall_time), send->status, recv->status); + CT_ERROR(ct, "H2G Space: %u (Bytes)\n", + atomic_read(&ct->ctbs.send.space) * 4); + CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head); + CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail); + CT_ERROR(ct, "G2H Space: %u (Bytes)\n", + atomic_read(&ct->ctbs.recv.space) * 4); + CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head); + CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail); + ct->ctbs.send.broken = true; } @@ -582,12 +591,19 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw) { + bool h2g = h2g_has_room(ct, h2g_dw); + bool g2h = g2h_has_room(ct, g2h_dw); + lockdep_assert_held(&ct->ctbs.send.lock); - if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) { + if (unlikely(!h2g || !g2h)) { if (ct->stall_time == KTIME_MAX) ct->stall_time = ktime_get(); + /* Be paranoid and kick G2H tasklet to free credits */ + if (!g2h) + tasklet_hi_schedule(&ct->receive_tasklet); + if (unlikely(ct_deadlocked(ct))) return -EPIPE; else diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 196424be0998..31420ce1ce6b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -40,9 +40,8 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) } } -/* Copy RSA signature from the fw image to HW for verification */ -static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, - struct intel_uncore *uncore) +static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) { u32 rsa[UOS_RSA_SCRATCH_COUNT]; size_t copied; @@ -58,6 +57,27 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, return 0; } +static int guc_xfer_rsa_vma(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) +{ + struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); + + intel_uncore_write(uncore, UOS_RSA_SCRATCH(0), + intel_guc_ggtt_offset(guc, guc_fw->rsa_data)); + + return 0; +} + +/* Copy RSA signature from the fw image to HW for verification */ +static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) +{ + if (guc_fw->rsa_data) + return guc_xfer_rsa_vma(guc_fw, uncore); + else + return guc_xfer_rsa_mmio(guc_fw, uncore); +} + /* * Read the GuC status register (GUC_STATUS) and store it in the * specified location; then return a boolean indicating whether @@ -142,7 +162,10 @@ int intel_guc_fw_upload(struct intel_guc *guc) /* * Note that GuC needs the CSS header plus uKernel code to be copied * by the DMA engine in one operation, whereas the RSA signature is - * loaded via MMIO. + * loaded separately, either by copying it to the UOS_RSA_SCRATCH + * register (if key size <= 256) or through a ggtt-pinned vma (if key + * size > 256). The RSA size and therefore the way we provide it to the + * HW is fixed for each platform and hard-coded in the bootrom. */ ret = guc_xfer_rsa(&guc->fw, uncore); if (ret) @@ -164,6 +187,6 @@ int intel_guc_fw_upload(struct intel_guc *guc) return 0; out: - intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 722933e26347..7072e30e99f4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -294,6 +294,19 @@ struct guc_ads { u32 reserved[15]; } __packed; +/* Engine usage stats */ +struct guc_engine_usage_record { + u32 current_context_index; + u32 last_switch_in_stamp; + u32 reserved0; + u32 total_runtime; + u32 reserved1[4]; +} __packed; + +struct guc_engine_usage { + struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; +} __packed; + /* GuC logging structures */ enum guc_log_buffer_type { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index ac1ee1d5ce10..fe6ab7550a14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -15,9 +15,12 @@ struct intel_guc; -#ifdef CONFIG_DRM_I915_DEBUG_GUC +#if defined(CONFIG_DRM_I915_DEBUG_GUC) #define CRASH_BUFFER_SIZE SZ_2M #define DEBUG_BUFFER_SIZE SZ_16M +#elif defined(CONFIG_DRM_I915_DEBUG_GEM) +#define CRASH_BUFFER_SIZE SZ_1M +#define DEBUG_BUFFER_SIZE SZ_2M #else #define CRASH_BUFFER_SIZE SZ_8K #define DEBUG_BUFFER_SIZE SZ_64K diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c index 46026c2c1722..ddfbe334689f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c @@ -10,28 +10,80 @@ #include "intel_guc.h" #include "intel_guc_log.h" #include "intel_guc_log_debugfs.h" +#include "intel_uc.h" + +static u32 obj_to_guc_log_dump_size(struct drm_i915_gem_object *obj) +{ + u32 size; + + if (!obj) + return PAGE_SIZE; + + /* "0x%08x 0x%08x 0x%08x 0x%08x\n" => 16 bytes -> 44 chars => x2.75 */ + size = ((obj->base.size * 11) + 3) / 4; + + /* Add padding for final blank line, any extra header info, etc. */ + size = PAGE_ALIGN(size + PAGE_SIZE); + + return size; +} + +static u32 guc_log_dump_size(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + + if (!intel_guc_is_supported(guc)) + return PAGE_SIZE; + + if (!log->vma) + return PAGE_SIZE; + + return obj_to_guc_log_dump_size(log->vma->obj); +} static int guc_log_dump_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); + int ret; - return intel_guc_log_dump(m->private, &p, false); + ret = intel_guc_log_dump(m->private, &p, false); + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m)) + pr_warn_once("preallocated size:%zx for %s exceeded\n", + m->size, __func__); + + return ret; +} +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_log_dump, guc_log_dump_size); + +static u32 guc_load_err_dump_size(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + struct intel_uc *uc = container_of(guc, struct intel_uc, guc); + + if (!intel_guc_is_supported(guc)) + return PAGE_SIZE; + + return obj_to_guc_log_dump_size(uc->load_err_log); } -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_log_dump); static int guc_load_err_log_dump_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m)) + pr_warn_once("preallocated size:%zx for %s exceeded\n", + m->size, __func__); + return intel_guc_log_dump(m->private, &p, true); } -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_load_err_log_dump, guc_load_err_dump_size); static int guc_log_level_get(void *data, u64 *val) { struct intel_guc_log *log = data; - if (!intel_guc_is_used(log_to_guc(log))) + if (!log->vma) return -ENODEV; *val = intel_guc_log_get_level(log); @@ -43,7 +95,7 @@ static int guc_log_level_set(void *data, u64 val) { struct intel_guc_log *log = data; - if (!intel_guc_is_used(log_to_guc(log))) + if (!log->vma) return -ENODEV; return intel_guc_log_set_level(log, val); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 65a3e7fdb2b2..13b27b8ff74e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -79,29 +79,6 @@ static void slpc_mem_set_disabled(struct slpc_shared_data *data, slpc_mem_set_param(data, enable_id, 0); } -int intel_guc_slpc_init(struct intel_guc_slpc *slpc) -{ - struct intel_guc *guc = slpc_to_guc(slpc); - struct drm_i915_private *i915 = slpc_to_i915(slpc); - u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); - int err; - - GEM_BUG_ON(slpc->vma); - - err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); - if (unlikely(err)) { - drm_err(&i915->drm, - "Failed to allocate SLPC struct (err=%pe)\n", - ERR_PTR(err)); - return err; - } - - slpc->max_freq_softlimit = 0; - slpc->min_freq_softlimit = 0; - - return err; -} - static u32 slpc_get_state(struct intel_guc_slpc *slpc) { struct slpc_shared_data *data; @@ -203,6 +180,86 @@ static int slpc_unset_param(struct intel_guc_slpc *slpc, return guc_action_slpc_unset_param(guc, id); } +static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct intel_guc *guc = slpc_to_guc(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + lockdep_assert_held(&slpc->lock); + + if (!intel_guc_is_ready(guc)) + return -ENODEV; + + /* + * This function is a little different as compared to + * intel_guc_slpc_set_min_freq(). Softlimit will not be updated + * here since this is used to temporarily change min freq, + * for example, during a waitboost. Caller is responsible for + * checking bounds. + */ + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); + if (ret) + drm_err(&i915->drm, "Unable to force min freq to %u: %d", + freq, ret); + } + + return ret; +} + +static void slpc_boost_work(struct work_struct *work) +{ + struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + + /* + * Raise min freq to boost. It's possible that + * this is greater than current max. But it will + * certainly be limited by RP0. An error setting + * the min param is not fatal. + */ + mutex_lock(&slpc->lock); + if (atomic_read(&slpc->num_waiters)) { + slpc_force_min_freq(slpc, slpc->boost_freq); + slpc->num_boosts++; + } + mutex_unlock(&slpc->lock); +} + +int intel_guc_slpc_init(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; + + GEM_BUG_ON(slpc->vma); + + err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); + if (unlikely(err)) { + drm_err(&i915->drm, + "Failed to allocate SLPC struct (err=%pe)\n", + ERR_PTR(err)); + return err; + } + + slpc->max_freq_softlimit = 0; + slpc->min_freq_softlimit = 0; + + slpc->boost_freq = 0; + atomic_set(&slpc->num_waiters, 0); + slpc->num_boosts = 0; + + mutex_init(&slpc->lock); + INIT_WORK(&slpc->boost_work, slpc_boost_work); + + return err; +} + static const char *slpc_global_state_to_string(enum slpc_global_state state) { switch (state) { @@ -393,7 +450,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) val > slpc->max_freq_softlimit) return -EINVAL; + /* Need a lock now since waitboost can be modifying min as well */ + mutex_lock(&slpc->lock); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, val); @@ -406,6 +467,8 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) if (!ret) slpc->min_freq_softlimit = val; + mutex_unlock(&slpc->lock); + return ret; } @@ -522,6 +585,9 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER; slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) * GT_FREQUENCY_MULTIPLIER; + + if (!slpc->boost_freq) + slpc->boost_freq = slpc->rp0_freq; } /* @@ -557,7 +623,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) if (unlikely(ret < 0)) return ret; - intel_guc_pm_intrmsk_enable(&i915->gt); + intel_guc_pm_intrmsk_enable(to_gt(i915)); slpc_get_rp_values(slpc); @@ -588,6 +654,47 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) return 0; } +int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val) +{ + int ret = 0; + + if (val < slpc->min_freq || val > slpc->rp0_freq) + return -EINVAL; + + mutex_lock(&slpc->lock); + + if (slpc->boost_freq != val) { + /* Apply only if there are active waiters */ + if (atomic_read(&slpc->num_waiters)) { + ret = slpc_force_min_freq(slpc, val); + if (ret) { + ret = -EIO; + goto done; + } + } + + slpc->boost_freq = val; + } + +done: + mutex_unlock(&slpc->lock); + return ret; +} + +void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) +{ + /* + * Return min back to the softlimit. + * This is called during request retire, + * so we don't need to fail that if the + * set_param fails. + */ + mutex_lock(&slpc->lock); + if (atomic_dec_and_test(&slpc->num_waiters)) + slpc_force_min_freq(slpc, slpc->min_freq_softlimit); + mutex_unlock(&slpc->lock); +} + int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -611,6 +718,8 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p slpc_decode_max_freq(slpc)); drm_printf(p, "\tMin freq: %u MHz\n", slpc_decode_min_freq(slpc)); + drm_printf(p, "\twaitboosts: %u\n", + slpc->num_boosts); } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index e45054d5b9b4..0caa8fee3c04 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -34,9 +34,12 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p); void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); +void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); +void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 41d13527666f..bf5b9a563c09 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -6,6 +6,9 @@ #ifndef _INTEL_GUC_SLPC_TYPES_H_ #define _INTEL_GUC_SLPC_TYPES_H_ +#include <linux/atomic.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> #include <linux/types.h> #define SLPC_RESET_TIMEOUT_MS 5 @@ -20,10 +23,20 @@ struct intel_guc_slpc { u32 min_freq; u32 rp0_freq; u32 rp1_freq; + u32 boost_freq; /* frequency softlimits */ u32 min_freq_softlimit; u32 max_freq_softlimit; + + /* Protects set/reset of boost freq + * and value of num_waiters + */ + struct mutex lock; + + struct work_struct boost_work; + atomic_t num_waiters; + u32 num_boosts; }; #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 302e9ff0602c..e7517206af82 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -13,6 +13,7 @@ #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_clock_utils.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" @@ -21,6 +22,7 @@ #include "gt/intel_mocs.h" #include "gt/intel_ring.h" +#include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -143,7 +145,8 @@ guc_create_parallel(struct intel_engine_cs **engines, * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for * multi-lrc. */ -#define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16) +#define NUMBER_MULTI_LRC_GUC_ID(guc) \ + ((guc)->submission_state.num_guc_ids / 16) /* * Below is a set of functions which control the GuC scheduling state which @@ -1038,8 +1041,6 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) spin_unlock(&ce->guc_state.lock); - GEM_BUG_ON(!do_put && !destroyed); - if (pending_enable || destroyed || deregister) { decr_outstanding_submission_g2h(guc); if (deregister) @@ -1077,6 +1078,271 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) xa_unlock_irqrestore(&guc->context_lookup, flags); } +/* + * GuC stores busyness stats for each engine at context in/out boundaries. A + * context 'in' logs execution start time, 'out' adds in -> out delta to total. + * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with + * GuC. + * + * __i915_pmu_event_read samples engine busyness. When sampling, if context id + * is valid (!= ~0) and start is non-zero, the engine is considered to be + * active. For an active engine total busyness = total + (now - start), where + * 'now' is the time at which the busyness is sampled. For inactive engine, + * total busyness = total. + * + * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. + * + * The start and total values provided by GuC are 32 bits and wrap around in a + * few minutes. Since perf pmu provides busyness as 64 bit monotonically + * increasing ns values, there is a need for this implementation to account for + * overflows and extend the GuC provided values to 64 bits before returning + * busyness to the user. In order to do that, a worker runs periodically at + * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in + * 27 seconds for a gt clock frequency of 19.2 MHz). + */ + +#define WRAP_TIME_CLKS U32_MAX +#define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) + +static void +__extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) +{ + u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); + u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); + + if (new_start == lower_32_bits(*prev_start)) + return; + + if (new_start < gt_stamp_last && + (new_start - gt_stamp_last) <= POLL_TIME_CLKS) + gt_stamp_hi++; + + if (new_start > gt_stamp_last && + (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) + gt_stamp_hi--; + + *prev_start = ((u64)gt_stamp_hi << 32) | new_start; +} + +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +{ + struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine); + struct intel_engine_guc_stats *stats = &engine->stats.guc; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 last_switch = rec->last_switch_in_stamp; + u32 ctx_id = rec->current_context_index; + u32 total = rec->total_runtime; + + lockdep_assert_held(&guc->timestamp.lock); + + stats->running = ctx_id != ~0U && last_switch; + if (stats->running) + __extend_last_switch(guc, &stats->start_gt_clk, last_switch); + + /* + * Instead of adjusting the total for overflow, just add the + * difference from previous sample stats->total_gt_clks + */ + if (total && total != ~0U) { + stats->total_gt_clks += (u32)(total - stats->prev_total); + stats->prev_total = total; + } +} + +static void guc_update_pm_timestamp(struct intel_guc *guc, + struct intel_engine_cs *engine, + ktime_t *now) +{ + u32 gt_stamp_now, gt_stamp_hi; + + lockdep_assert_held(&guc->timestamp.lock); + + gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); + gt_stamp_now = intel_uncore_read(engine->uncore, + RING_TIMESTAMP(engine->mmio_base)); + *now = ktime_get(); + + if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp)) + gt_stamp_hi++; + + guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now; +} + +/* + * Unlike the execlist mode of submission total and active times are in terms of + * gt clocks. The *now parameter is retained to return the cpu time at which the + * busyness was sampled. + */ +static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) +{ + struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; + struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; + struct intel_gt *gt = engine->gt; + struct intel_guc *guc = >->uc.guc; + u64 total, gt_stamp_saved; + unsigned long flags; + u32 reset_count; + bool in_reset; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + + /* + * If a reset happened, we risk reading partially updated engine + * busyness from GuC, so we just use the driver stored copy of busyness. + * Synchronize with gt reset using reset_count and the + * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count + * after I915_RESET_BACKOFF flag, so ensure that the reset_count is + * usable by checking the flag afterwards. + */ + reset_count = i915_reset_count(gpu_error); + in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); + + *now = ktime_get(); + + /* + * The active busyness depends on start_gt_clk and gt_stamp. + * gt_stamp is updated by i915 only when gt is awake and the + * start_gt_clk is derived from GuC state. To get a consistent + * view of activity, we query the GuC state only if gt is awake. + */ + if (!in_reset && intel_gt_pm_get_if_awake(gt)) { + stats_saved = *stats; + gt_stamp_saved = guc->timestamp.gt_stamp; + guc_update_engine_gt_clks(engine); + guc_update_pm_timestamp(guc, engine, now); + intel_gt_pm_put_async(gt); + if (i915_reset_count(gpu_error) != reset_count) { + *stats = stats_saved; + guc->timestamp.gt_stamp = gt_stamp_saved; + } + } + + total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); + if (stats->running) { + u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; + + total += intel_gt_clock_interval_to_ns(gt, clk); + } + + spin_unlock_irqrestore(&guc->timestamp.lock, flags); + + return ns_to_ktime(total); +} + +static void __reset_guc_busyness_stats(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + ktime_t unused; + + cancel_delayed_work_sync(&guc->timestamp.work); + + spin_lock_irqsave(&guc->timestamp.lock, flags); + + for_each_engine(engine, gt, id) { + guc_update_pm_timestamp(guc, engine, &unused); + guc_update_engine_gt_clks(engine); + engine->stats.guc.prev_total = 0; + } + + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + +static void __update_guc_busyness_stats(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + ktime_t unused; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) { + guc_update_pm_timestamp(guc, engine, &unused); + guc_update_engine_gt_clks(engine); + } + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + +static void guc_timestamp_ping(struct work_struct *wrk) +{ + struct intel_guc *guc = container_of(wrk, typeof(*guc), + timestamp.work.work); + struct intel_uc *uc = container_of(guc, typeof(*uc), guc); + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + int srcu, ret; + + /* + * Synchronize with gt reset to make sure the worker does not + * corrupt the engine/guc stats. + */ + ret = intel_gt_reset_trylock(gt, &srcu); + if (ret) + return; + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + __update_guc_busyness_stats(guc); + + intel_gt_reset_unlock(gt, srcu); + + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, + guc->timestamp.ping_delay); +} + +static int guc_action_enable_usage_stats(struct intel_guc *guc) +{ + u32 offset = intel_guc_engine_usage_offset(guc); + u32 action[] = { + INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, + offset, + 0, + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static void guc_init_engine_stats(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, + guc->timestamp.ping_delay); + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { + int ret = guc_action_enable_usage_stats(guc); + + if (ret) + drm_err(>->i915->drm, + "Failed to enable usage stats: %d!\n", ret); + } +} + +void intel_guc_busyness_park(struct intel_gt *gt) +{ + struct intel_guc *guc = >->uc.guc; + + if (!guc_submission_initialized(guc)) + return; + + cancel_delayed_work(&guc->timestamp.work); + __update_guc_busyness_stats(guc); +} + +void intel_guc_busyness_unpark(struct intel_gt *gt) +{ + struct intel_guc *guc = >->uc.guc; + + if (!guc_submission_initialized(guc)) + return; + + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, + guc->timestamp.ping_delay); +} + static inline bool submission_disabled(struct intel_guc *guc) { @@ -1138,6 +1404,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) intel_gt_park_heartbeats(guc_to_gt(guc)); disable_submission(guc); guc->interrupts.disable(guc); + __reset_guc_busyness_stats(guc); /* Flush IRQ handler */ spin_lock_irq(&guc_to_gt(guc)->irq_lock); @@ -1484,6 +1751,7 @@ static void destroyed_worker_func(struct work_struct *w); */ int intel_guc_submission_init(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); int ret; if (guc->lrc_desc_pool) @@ -1508,10 +1776,14 @@ int intel_guc_submission_init(struct intel_guc *guc) destroyed_worker_func); guc->submission_state.guc_ids_bitmap = - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL); + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); if (!guc->submission_state.guc_ids_bitmap) return -ENOMEM; + spin_lock_init(&guc->timestamp.lock); + INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); + guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; + return 0; } @@ -1598,13 +1870,13 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) if (intel_context_is_parent(ce)) ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, - NUMBER_MULTI_LRC_GUC_ID, + NUMBER_MULTI_LRC_GUC_ID(guc), order_base_2(ce->parallel.number_children + 1)); else ret = ida_simple_get(&guc->submission_state.guc_ids, - NUMBER_MULTI_LRC_GUC_ID, - GUC_MAX_LRC_DESCRIPTORS, + NUMBER_MULTI_LRC_GUC_ID(guc), + guc->submission_state.num_guc_ids, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(ret < 0)) @@ -1670,6 +1942,10 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) set_context_guc_id_invalid(cn); +#ifdef CONFIG_DRM_I915_SELFTEST + guc->number_guc_id_stolen++; +#endif + return 0; } else { return -EAGAIN; @@ -2373,7 +2649,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) unsigned long flags; bool disabled; - lockdep_assert_held(&guc->submission_state.lock); GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); @@ -2389,7 +2664,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) } spin_unlock_irqrestore(&ce->guc_state.lock, flags); if (unlikely(disabled)) { - __release_guc_id(guc, ce); + release_guc_id(guc, ce); __guc_context_destroy(ce); return; } @@ -2423,36 +2698,48 @@ static void __guc_context_destroy(struct intel_context *ce) static void guc_flush_destroyed_contexts(struct intel_guc *guc) { - struct intel_context *ce, *cn; + struct intel_context *ce; unsigned long flags; GEM_BUG_ON(!submission_disabled(guc) && guc_submission_initialized(guc)); - spin_lock_irqsave(&guc->submission_state.lock, flags); - list_for_each_entry_safe(ce, cn, - &guc->submission_state.destroyed_contexts, - destroyed_link) { - list_del_init(&ce->destroyed_link); - __release_guc_id(guc, ce); + while (!list_empty(&guc->submission_state.destroyed_contexts)) { + spin_lock_irqsave(&guc->submission_state.lock, flags); + ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, + struct intel_context, + destroyed_link); + if (ce) + list_del_init(&ce->destroyed_link); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + + if (!ce) + break; + + release_guc_id(guc, ce); __guc_context_destroy(ce); } - spin_unlock_irqrestore(&guc->submission_state.lock, flags); } static void deregister_destroyed_contexts(struct intel_guc *guc) { - struct intel_context *ce, *cn; + struct intel_context *ce; unsigned long flags; - spin_lock_irqsave(&guc->submission_state.lock, flags); - list_for_each_entry_safe(ce, cn, - &guc->submission_state.destroyed_contexts, - destroyed_link) { - list_del_init(&ce->destroyed_link); + while (!list_empty(&guc->submission_state.destroyed_contexts)) { + spin_lock_irqsave(&guc->submission_state.lock, flags); + ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, + struct intel_context, + destroyed_link); + if (ce) + list_del_init(&ce->destroyed_link); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + + if (!ce) + break; + guc_lrc_desc_unpin(ce); } - spin_unlock_irqrestore(&guc->submission_state.lock, flags); } static void destroyed_worker_func(struct work_struct *w) @@ -3369,7 +3656,9 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen12_emit_flush_xcs; } engine->set_default_submission = guc_set_default_submission; + engine->busyness = guc_engine_busyness; + engine->flags |= I915_ENGINE_SUPPORTS_STATS; engine->flags |= I915_ENGINE_HAS_PREEMPTION; engine->flags |= I915_ENGINE_HAS_TIMESLICES; @@ -3468,6 +3757,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { guc_init_lrc_mapping(guc); + guc_init_engine_stats(guc); } void intel_guc_submission_disable(struct intel_guc *guc) @@ -3494,6 +3784,7 @@ static bool __guc_submission_selected(struct intel_guc *guc) void intel_guc_submission_init_early(struct intel_guc *guc) { + guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS; guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } @@ -3695,6 +3986,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) { struct intel_context *ce; + unsigned long flags; int desc_idx; if (unlikely(len != 1)) { @@ -3703,11 +3995,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, } desc_idx = msg[0]; + + /* + * The context lookup uses the xarray but lookups only require an RCU lock + * not the full spinlock. So take the lock explicitly and keep it until the + * context has been reference count locked to ensure it can't be destroyed + * asynchronously until the reset is done. + */ + xa_lock_irqsave(&guc->context_lookup, flags); ce = g2h_context_lookup(guc, desc_idx); + if (ce) + intel_context_get(ce); + xa_unlock_irqrestore(&guc->context_lookup, flags); + if (unlikely(!ce)) return -EPROTO; guc_handle_context_reset(guc, ce); + intel_context_put(ce); return 0; } @@ -3728,11 +4033,12 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) { struct intel_engine_cs *engine; + struct intel_gt *gt = guc_to_gt(guc); u8 guc_class, instance; u32 reason; if (unlikely(len != 3)) { - drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + drm_err(>->i915->drm, "Invalid length %u", len); return -EPROTO; } @@ -3742,12 +4048,19 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, engine = guc_lookup_engine(guc, guc_class, instance); if (unlikely(!engine)) { - drm_err(&guc_to_gt(guc)->i915->drm, + drm_err(>->i915->drm, "Invalid engine %d:%d", guc_class, instance); return -EPROTO; } - intel_gt_handle_error(guc_to_gt(guc), engine->mask, + /* + * This is an unexpected failure of a hardware feature. So, log a real + * error message not just the informational that comes with the reset. + */ + drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", + guc_class, instance, engine->name, reason); + + intel_gt_handle_error(gt, engine->mask, I915_ERROR_CAPTURE, "GuC failed to reset %s (reason=0x%08x)\n", engine->name, reason); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index c7ef44fa0c36..5a95a9f0a8e3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -28,6 +28,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, void intel_guc_dump_active_requests(struct intel_engine_cs *engine, struct i915_request *hung_rq, struct drm_printer *m); +void intel_guc_busyness_park(struct intel_gt *gt); +void intel_guc_busyness_unpark(struct intel_gt *gt); bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index ff4b6869b80b..d10b227ac4aa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -54,65 +54,6 @@ void intel_huc_init_early(struct intel_huc *huc) } } -static int intel_huc_rsa_data_create(struct intel_huc *huc) -{ - struct intel_gt *gt = huc_to_gt(huc); - struct intel_guc *guc = >->uc.guc; - struct i915_vma *vma; - size_t copied; - void *vaddr; - int err; - - err = i915_inject_probe_error(gt->i915, -ENXIO); - if (err) - return err; - - /* - * HuC firmware will sit above GUC_GGTT_TOP and will not map - * through GTT. Unfortunately, this means GuC cannot perform - * the HuC auth. as the rsa offset now falls within the GuC - * inaccessible range. We resort to perma-pinning an additional - * vma within the accessible range that only contains the rsa - * signature. The GuC can use this extra pinning to perform - * the authentication since its GGTT offset will be GuC - * accessible. - */ - GEM_BUG_ON(huc->fw.rsa_size > PAGE_SIZE); - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map_unlocked(vma->obj, - i915_coherent_map_type(gt->i915, - vma->obj, true)); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - err = PTR_ERR(vaddr); - goto unpin_out; - } - - copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size); - i915_gem_object_unpin_map(vma->obj); - - if (copied < huc->fw.rsa_size) { - err = -ENOMEM; - goto unpin_out; - } - - huc->rsa_data = vma; - - return 0; - -unpin_out: - i915_vma_unpin_and_release(&vma, 0); - return err; -} - -static void intel_huc_rsa_data_destroy(struct intel_huc *huc) -{ - i915_vma_unpin_and_release(&huc->rsa_data, 0); -} - int intel_huc_init(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; @@ -122,21 +63,10 @@ int intel_huc_init(struct intel_huc *huc) if (err) goto out; - /* - * HuC firmware image is outside GuC accessible range. - * Copy the RSA signature out of the image into - * a perma-pinned region set aside for it - */ - err = intel_huc_rsa_data_create(huc); - if (err) - goto out_fini; - intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE); return 0; -out_fini: - intel_uc_fw_fini(&huc->fw); out: i915_probe_error(i915, "failed with %d\n", err); return err; @@ -147,7 +77,6 @@ void intel_huc_fini(struct intel_huc *huc) if (!intel_uc_fw_is_loadable(&huc->fw)) return; - intel_huc_rsa_data_destroy(huc); intel_uc_fw_fini(&huc->fw); } @@ -177,7 +106,7 @@ int intel_huc_auth(struct intel_huc *huc) goto fail; ret = intel_guc_auth_huc(guc, - intel_guc_ggtt_offset(guc, huc->rsa_data)); + intel_guc_ggtt_offset(guc, huc->fw.rsa_data)); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); goto fail; @@ -199,7 +128,7 @@ int intel_huc_auth(struct intel_huc *huc) fail: i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret); - intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index daee43b661d4..ae8c8a6c8cc8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -15,8 +15,6 @@ struct intel_huc { struct intel_uc_fw fw; /* HuC-specific additions */ - struct i915_vma *rsa_data; - struct { i915_reg_t reg; u32 mask; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 2fef3b0bbe95..09ed29df67bc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -8,6 +8,7 @@ #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" +#include "gt/intel_rps.h" #include "intel_uc.h" #include "i915_drv.h" @@ -35,7 +36,7 @@ static void uc_expand_default_options(struct intel_uc *uc) } /* Intermediate platforms are HuC authentication only */ - if (IS_ALDERLAKE_S(i915)) { + if (IS_ALDERLAKE_S(i915) && !IS_ADLS_RPLS(i915)) { i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; return; } @@ -462,6 +463,8 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + intel_rps_raise_unslice(&uc_to_gt(uc)->rps); + while (attempts--) { /* * Always reset the GuC just before (re)loading, so @@ -499,6 +502,9 @@ static int __uc_init_hw(struct intel_uc *uc) ret = intel_guc_slpc_enable(&guc->slpc); if (ret) goto err_submission; + } else { + /* Restore GT back to RPn for non-SLPC path */ + intel_rps_lower_unslice(&uc_to_gt(uc)->rps); } drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", @@ -529,6 +535,9 @@ err_submission: err_log_capture: __uc_capture_load_err_log(uc); err_out: + /* Return GT back to RPn */ + intel_rps_lower_unslice(&uc_to_gt(uc)->rps); + __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 3aa87be4f2e4..a5af05bde6f2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -48,22 +48,39 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * Note that RKL and ADL-S have the same GuC/HuC device ID's and use the same * firmware as TGL. */ -#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ - fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0), huc_def(icl, 9, 0, 0)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0), huc_def(cml, 4, 0, 0)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0), huc_def(glk, 4, 0, 0)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0), huc_def(bxt, 2, 0, 0)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0), huc_def(skl, 2, 0, 0)) +#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0)) \ + fw_def(DG1, 0, guc_def(dg1, 62, 0, 0)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0)) \ + fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0)) + +#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ + fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ + fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \ + fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \ + fw_def(ROCKETLAKE, 0, huc_def(tgl, 7, 9, 3)) \ + fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) \ + fw_def(JASPERLAKE, 0, huc_def(ehl, 9, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, huc_def(icl, 9, 0, 0)) \ + fw_def(COMETLAKE, 5, huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, huc_def(kbl, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, huc_def(skl, 2, 0, 0)) #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ @@ -79,11 +96,11 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) /* All blobs need to be declared via MODULE_FIRMWARE() */ -#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \ - MODULE_FIRMWARE(guc_); \ - MODULE_FIRMWARE(huc_); +#define INTEL_UC_MODULE_FW(platform_, revid_, uc_) \ + MODULE_FIRMWARE(uc_); -INTEL_UC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH, MAKE_HUC_FW_PATH) +INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) +INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH) /* The below structs and macros are used to iterate across the list of blobs */ struct __packed uc_fw_blob { @@ -106,31 +123,47 @@ struct __packed uc_fw_blob { struct __packed uc_fw_platform_requirement { enum intel_platform p; u8 rev; /* first platform rev using this FW */ - const struct uc_fw_blob blobs[INTEL_UC_FW_NUM_TYPES]; + const struct uc_fw_blob blob; }; -#define MAKE_FW_LIST(platform_, revid_, guc_, huc_) \ +#define MAKE_FW_LIST(platform_, revid_, uc_) \ { \ .p = INTEL_##platform_, \ .rev = revid_, \ - .blobs[INTEL_UC_FW_TYPE_GUC] = guc_, \ - .blobs[INTEL_UC_FW_TYPE_HUC] = huc_, \ + .blob = uc_, \ }, +struct fw_blobs_by_type { + const struct uc_fw_platform_requirement *blobs; + u32 count; +}; + static void __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { - static const struct uc_fw_platform_requirement fw_blobs[] = { - INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB) + static const struct uc_fw_platform_requirement blobs_guc[] = { + INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) + }; + static const struct uc_fw_platform_requirement blobs_huc[] = { + INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) }; + static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { + [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, + [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, + }; + static const struct uc_fw_platform_requirement *fw_blobs; enum intel_platform p = INTEL_INFO(i915)->platform; + u32 fw_count; u8 rev = INTEL_REVID(i915); int i; - for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) { + GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); + fw_blobs = blobs_all[uc_fw->type].blobs; + fw_count = blobs_all[uc_fw->type].count; + + for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) { if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { - const struct uc_fw_blob *blob = - &fw_blobs[i].blobs[uc_fw->type]; + const struct uc_fw_blob *blob = &fw_blobs[i].blob; uc_fw->path = blob->path; uc_fw->major_ver_wanted = blob->major; uc_fw->minor_ver_wanted = blob->minor; @@ -140,7 +173,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) /* make sure the list is ordered as expected */ if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { - for (i = 1; i < ARRAY_SIZE(fw_blobs); i++) { + for (i = 1; i < fw_count; i++) { if (fw_blobs[i].p < fw_blobs[i - 1].p) continue; @@ -322,13 +355,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); /* now RSA */ - if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) { - drm_warn(&i915->drm, "%s firmware %s: unexpected key size: %u != %u\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, - css->key_size_dw, UOS_RSA_SCRATCH_COUNT); - err = -EPROTO; - goto fail; - } uc_fw->rsa_size = css->key_size_dw * sizeof(u32); /* At least, it should have header, uCode and RSA. Size of all three. */ @@ -540,10 +566,79 @@ fail: i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return err; } +static inline bool uc_fw_need_rsa_in_memory(struct intel_uc_fw *uc_fw) +{ + /* + * The HW reads the GuC RSA from memory if the key size is > 256 bytes, + * while it reads it from the 64 RSA registers if it is smaller. + * The HuC RSA is always read from memory. + */ + return uc_fw->type == INTEL_UC_FW_TYPE_HUC || uc_fw->rsa_size > 256; +} + +static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw) +{ + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + struct i915_vma *vma; + size_t copied; + void *vaddr; + int err; + + err = i915_inject_probe_error(gt->i915, -ENXIO); + if (err) + return err; + + if (!uc_fw_need_rsa_in_memory(uc_fw)) + return 0; + + /* + * uC firmwares will sit above GUC_GGTT_TOP and will not map through + * GGTT. Unfortunately, this means that the GuC HW cannot perform the uC + * authentication from memory, as the RSA offset now falls within the + * GuC inaccessible range. We resort to perma-pinning an additional vma + * within the accessible range that only contains the RSA signature. + * The GuC HW can use this extra pinning to perform the authentication + * since its GGTT offset will be GuC accessible. + */ + GEM_BUG_ON(uc_fw->rsa_size > PAGE_SIZE); + vma = intel_guc_allocate_vma(>->uc.guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map_unlocked(vma->obj, + i915_coherent_map_type(gt->i915, vma->obj, true)); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + err = PTR_ERR(vaddr); + goto unpin_out; + } + + copied = intel_uc_fw_copy_rsa(uc_fw, vaddr, vma->size); + i915_gem_object_unpin_map(vma->obj); + + if (copied < uc_fw->rsa_size) { + err = -ENOMEM; + goto unpin_out; + } + + uc_fw->rsa_data = vma; + + return 0; + +unpin_out: + i915_vma_unpin_and_release(&vma, 0); + return err; +} + +static void uc_fw_rsa_data_destroy(struct intel_uc_fw *uc_fw) +{ + i915_vma_unpin_and_release(&uc_fw->rsa_data, 0); +} + int intel_uc_fw_init(struct intel_uc_fw *uc_fw) { int err; @@ -558,14 +653,29 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) if (err) { DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", intel_uc_fw_type_repr(uc_fw->type), err); - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + goto out; } + err = uc_fw_rsa_data_create(uc_fw); + if (err) { + DRM_DEBUG_DRIVER("%s fw rsa data creation failed, err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto out_unpin; + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(uc_fw->obj); +out: + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL); return err; } void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) { + uc_fw_rsa_data_destroy(uc_fw); + if (i915_gem_object_has_pinned_pages(uc_fw->obj)) i915_gem_object_unpin_pages(uc_fw->obj); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 1e00bf65639e..d9d1dc0b4cbb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -32,11 +32,12 @@ struct intel_gt; * | | MISSING <--/ | \--> ERROR | * | fetch | V | * | | AVAILABLE | - * +------------+- | -+ + * +------------+- | \ -+ + * | | | \--> INIT FAIL | * | init | V | * | | /------> LOADABLE <----<-----------\ | * +------------+- \ / \ \ \ -+ - * | | FAIL <--< \--> TRANSFERRED \ | + * | | LOAD FAIL <--< \--> TRANSFERRED \ | * | upload | \ / \ / | * | | \---------/ \--> RUNNING | * +------------+---------------------------------------------------+ @@ -50,8 +51,9 @@ enum intel_uc_fw_status { INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */ INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */ INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + INTEL_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */ INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ - INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */ + INTEL_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */ INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ INTEL_UC_FIRMWARE_RUNNING /* init/auth done */ }; @@ -84,6 +86,7 @@ struct intel_uc_fw { * or during a GT reset (mutex guarantees single threaded). */ struct i915_vma dummy; + struct i915_vma *rsa_data; /* * The firmware build process will generate a version header file with major and @@ -130,10 +133,12 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) return "ERROR"; case INTEL_UC_FIRMWARE_AVAILABLE: return "AVAILABLE"; + case INTEL_UC_FIRMWARE_INIT_FAIL: + return "INIT FAIL"; case INTEL_UC_FIRMWARE_LOADABLE: return "LOADABLE"; - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; + case INTEL_UC_FIRMWARE_LOAD_FAIL: + return "LOAD FAIL"; case INTEL_UC_FIRMWARE_TRANSFERRED: return "TRANSFERRED"; case INTEL_UC_FIRMWARE_RUNNING: @@ -155,7 +160,8 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status) return -ENOENT; case INTEL_UC_FIRMWARE_ERROR: return -ENOEXEC; - case INTEL_UC_FIRMWARE_FAIL: + case INTEL_UC_FIRMWARE_INIT_FAIL: + case INTEL_UC_FIRMWARE_LOAD_FAIL: return -EIO; case INTEL_UC_FIRMWARE_SELECTED: return -ESTALE; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index fb0e4a7bd8ca..d3327b802b76 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -3,8 +3,21 @@ * Copyright �� 2021 Intel Corporation */ +#include "selftests/igt_spinner.h" #include "selftests/intel_scheduler_helpers.h" +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIMEDOUT; + + return err; +} + static struct i915_request *nop_user_request(struct intel_context *ce, struct i915_request *from) { @@ -110,12 +123,172 @@ err: return ret; } +/* + * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one + * + * This test creates a spinner which is used to block all subsequent submissions + * until it completes. Next, a loop creates a context and a NOP request each + * iteration until the guc_ids are exhausted (request creation returns -EAGAIN). + * The spinner is ended, unblocking all requests created in the loop. At this + * point all guc_ids are exhausted but are available to steal. Try to create + * another request which should successfully steal a guc_id. Wait on last + * request to complete, idle GPU, verify a guc_id was stolen via a counter, and + * exit the test. Test also artificially reduces the number of guc_ids so the + * test runs in a timely manner. + */ +static int intel_guc_steal_guc_ids(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_guc *guc = >->uc.guc; + int ret, sv, context_index = 0; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine; + struct intel_context **ce; + struct igt_spinner spin; + struct i915_request *spin_rq = NULL, *rq, *last = NULL; + int number_guc_id_stolen = guc->number_guc_id_stolen; + + ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL); + if (!ce) { + pr_err("Context array allocation failed\n"); + return -ENOMEM; + } + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + engine = intel_selftest_find_any_engine(gt); + sv = guc->submission_state.num_guc_ids; + guc->submission_state.num_guc_ids = 4096; + + /* Create spinner to block requests in below loop */ + ce[context_index] = intel_context_create(engine); + if (IS_ERR(ce[context_index])) { + ret = PTR_ERR(ce[context_index]); + ce[context_index] = NULL; + pr_err("Failed to create context: %d\n", ret); + goto err_wakeref; + } + ret = igt_spinner_init(&spin, engine->gt); + if (ret) { + pr_err("Failed to create spinner: %d\n", ret); + goto err_contexts; + } + spin_rq = igt_spinner_create_request(&spin, ce[context_index], + MI_ARB_CHECK); + if (IS_ERR(spin_rq)) { + ret = PTR_ERR(spin_rq); + pr_err("Failed to create spinner request: %d\n", ret); + goto err_contexts; + } + ret = request_add_spin(spin_rq, &spin); + if (ret) { + pr_err("Failed to add Spinner request: %d\n", ret); + goto err_spin_rq; + } + + /* Use all guc_ids */ + while (ret != -EAGAIN) { + ce[++context_index] = intel_context_create(engine); + if (IS_ERR(ce[context_index])) { + ret = PTR_ERR(ce[context_index--]); + ce[context_index] = NULL; + pr_err("Failed to create context: %d\n", ret); + goto err_spin_rq; + } + + rq = nop_user_request(ce[context_index], spin_rq); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + rq = NULL; + if (ret != -EAGAIN) { + pr_err("Failed to create request, %d: %d\n", + context_index, ret); + goto err_spin_rq; + } + } else { + if (last) + i915_request_put(last); + last = rq; + } + } + + /* Release blocked requests */ + igt_spinner_end(&spin); + ret = intel_selftest_wait_for_rq(spin_rq); + if (ret) { + pr_err("Spin request failed to complete: %d\n", ret); + i915_request_put(last); + goto err_spin_rq; + } + i915_request_put(spin_rq); + igt_spinner_fini(&spin); + spin_rq = NULL; + + /* Wait for last request */ + ret = i915_request_wait(last, 0, HZ * 30); + i915_request_put(last); + if (ret < 0) { + pr_err("Last request failed to complete: %d\n", ret); + goto err_spin_rq; + } + + /* Try to steal guc_id */ + rq = nop_user_request(ce[context_index], NULL); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + pr_err("Failed to steal guc_id, %d: %d\n", context_index, ret); + goto err_spin_rq; + } + + /* Wait for request with stolen guc_id */ + ret = i915_request_wait(rq, 0, HZ); + i915_request_put(rq); + if (ret < 0) { + pr_err("Request with stolen guc_id failed to complete: %d\n", + ret); + goto err_spin_rq; + } + + /* Wait for idle */ + ret = intel_gt_wait_for_idle(gt, HZ * 30); + if (ret < 0) { + pr_err("GT failed to idle: %d\n", ret); + goto err_spin_rq; + } + + /* Verify a guc_id was stolen */ + if (guc->number_guc_id_stolen == number_guc_id_stolen) { + pr_err("No guc_id was stolen"); + ret = -EINVAL; + } else { + ret = 0; + } + +err_spin_rq: + if (spin_rq) { + igt_spinner_end(&spin); + intel_selftest_wait_for_rq(spin_rq); + i915_request_put(spin_rq); + igt_spinner_fini(&spin); + intel_gt_wait_for_idle(gt, HZ * 30); + } +err_contexts: + for (; context_index >= 0 && ce[context_index]; --context_index) + intel_context_put(ce[context_index]); +err_wakeref: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + kfree(ce); + guc->submission_state.num_guc_ids = sv; + + return ret; +} + int intel_guc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(intel_guc_scrub_ctbs), + SUBTEST(intel_guc_steal_guc_ids), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (intel_gt_is_wedged(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c index 50953c8e8b53..1297ddbf7f88 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c @@ -167,7 +167,7 @@ int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(intel_guc_multi_lrc_basic), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (intel_gt_is_wedged(gt)) return 0; |