summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.c25
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c75
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c87
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c26
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h144
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c47
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c53
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c74
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c30
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sa_media.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c58
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_wopcm.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c36
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c11
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c59
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c16
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_slpc.c19
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c2
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/README6
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm2
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm2
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c50
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c104
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c125
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c42
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c2
70 files changed, 883 insertions, 500 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
index 8fe0499308ff..8116fd5987e2 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
@@ -169,7 +169,7 @@ static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
return cs;
}
-u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
return __gen2_emit_breadcrumb(rq, cs, 16, 8);
}
@@ -179,7 +179,7 @@ u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
return __gen2_emit_breadcrumb(rq, cs, 8, 8);
}
-/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
+/* Just userspace ABI convention to limit the wa batch bo to a reasonable size */
#define I830_BATCH_LIMIT SZ_256K
#define I830_TLB_ENTRIES (2)
#define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
@@ -248,7 +248,7 @@ int i830_emit_bb_start(struct i915_request *rq,
return 0;
}
-int gen3_emit_bb_start(struct i915_request *rq,
+int gen2_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags)
{
@@ -292,29 +292,12 @@ int gen4_emit_bb_start(struct i915_request *rq,
void gen2_irq_enable(struct intel_engine_cs *engine)
{
- struct drm_i915_private *i915 = engine->i915;
-
- i915->irq_mask &= ~engine->irq_enable_mask;
- intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
- ENGINE_POSTING_READ16(engine, RING_IMR);
-}
-
-void gen2_irq_disable(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
-
- i915->irq_mask |= engine->irq_enable_mask;
- intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
-}
-
-void gen3_irq_enable(struct intel_engine_cs *engine)
-{
engine->i915->irq_mask &= ~engine->irq_enable_mask;
intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
}
-void gen3_irq_disable(struct intel_engine_cs *engine)
+void gen2_irq_disable(struct intel_engine_cs *engine)
{
engine->i915->irq_mask |= engine->irq_enable_mask;
intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
index a5cd64a65c9e..7b37560fc356 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
@@ -15,13 +15,13 @@ int gen2_emit_flush(struct i915_request *rq, u32 mode);
int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode);
int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode);
-u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs);
+u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs);
u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs);
int i830_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags);
-int gen3_emit_bb_start(struct i915_request *rq,
+int gen2_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags);
int gen4_emit_bb_start(struct i915_request *rq,
@@ -30,8 +30,6 @@ int gen4_emit_bb_start(struct i915_request *rq,
void gen2_irq_enable(struct intel_engine_cs *engine);
void gen2_irq_disable(struct intel_engine_cs *engine);
-void gen3_irq_enable(struct intel_engine_cs *engine);
-void gen3_irq_disable(struct intel_engine_cs *engine);
void gen5_irq_enable(struct intel_engine_cs *engine);
void gen5_irq_disable(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
index d38b914d1206..6e89112f68ae 100644
--- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -399,7 +399,8 @@ static void emit_batch(struct i915_vma * const vma,
batch_add(&cmds, MI_LOAD_REGISTER_IMM(2));
batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7));
batch_add(&cmds, 0xffff0000 |
- ((IS_IVB_GT1(i915) || IS_VALLEYVIEW(i915)) ?
+ (((IS_IVYBRIDGE(i915) && INTEL_INFO(i915)->gt == 1) ||
+ IS_VALLEYVIEW(i915)) ?
HIZ_RAW_STALL_OPT_DISABLE :
0));
batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1));
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 20b9b04ec1e0..cc866773ba6f 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -70,7 +70,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
if (!--b->irq_enabled)
b->irq_disable(b);
- WRITE_ONCE(b->irq_armed, 0);
+ WRITE_ONCE(b->irq_armed, NULL);
intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 40269e4c1e31..325da0414d94 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -126,9 +126,6 @@ execlists_active(const struct intel_engine_execlists *execlists)
return active;
}
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
-
static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4d30a86016f2..b721bbd23356 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -308,7 +308,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
/*
* There is a discrepancy here between the size reported
* by the register and the size of the context layout
- * in the docs. Both are described as authorative!
+ * in the docs. Both are described as authoritative!
*
* The discrepancy is on the order of a few cachelines,
* but the total is under one page (4k), which is our
@@ -677,7 +677,7 @@ void intel_engines_release(struct intel_gt *gt)
* in case we aborted before completely initialising the engines.
*/
GEM_BUG_ON(intel_gt_pm_is_awake(gt));
- if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ if (!intel_gt_gpu_reset_clobbers_display(gt))
intel_gt_reset_all_engines(gt);
/* Decouple the backend; but keep the layout for late GPU resets */
@@ -769,9 +769,8 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt)
if (MEDIA_VER_FULL(i915) < IP_VER(12, 55))
media_fuse = ~media_fuse;
- vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
- vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
- GEN11_GT_VEBOX_DISABLE_SHIFT;
+ vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse);
+ vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse);
if (MEDIA_VER_FULL(i915) >= IP_VER(12, 55)) {
fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
@@ -845,7 +844,7 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
* Note that we have a catch-22 situation where we need to be able to access
* the blitter forcewake domain to read the engine fuses, but at the same time
* we need to know which engines are available on the system to know which
- * forcewake domains are present. We solve this by intializing the forcewake
+ * forcewake domains are present. We solve this by initializing the forcewake
* domains based on the full engine mask in the platform capabilities before
* calling this function and pruning the domains for fused-off engines
* afterwards.
@@ -1411,7 +1410,7 @@ create_ggtt_bind_context(struct intel_engine_cs *engine)
/*
* MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple
- * bind requets at a time so get a bigger ring.
+ * bind requests at a time so get a bigger ring.
*/
return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_512K,
I915_GEM_HWS_GGTT_BIND_ADDR,
@@ -1533,7 +1532,7 @@ int intel_engines_init(struct intel_gt *gt)
/**
* intel_engine_cleanup_common - cleans up the engine state created by
- * the common initiailizers.
+ * the common initializers.
* @engine: Engine to cleanup.
*
* This cleans up everything created by the common helpers.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index a8eac59e3779..1c4784cb296c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -15,6 +15,7 @@
#define HEAD_WRAP_COUNT 0xFFE00000
#define HEAD_WRAP_ONE 0x00200000
#define HEAD_ADDR 0x001FFFFC
+#define HEAD_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */
#define RING_START(base) _MMIO((base) + 0x38)
#define RING_CTL(base) _MMIO((base) + 0x3c)
#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */
@@ -26,7 +27,6 @@
#define RING_VALID_MASK 0x00000001
#define RING_VALID 0x00000001
#define RING_INVALID 0x00000000
-#define RING_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */
#define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */
#define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */
#define RING_SYNC_0(base) _MMIO((base) + 0x40)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index ba55c059063d..155b6255a63e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -237,7 +237,7 @@ struct intel_engine_execlists {
*/
struct i915_request * const *active;
/**
- * @inflight: the set of contexts submitted and acknowleged by HW
+ * @inflight: the set of contexts submitted and acknowledged by HW
*
* The set of inflight contexts is managed by reading CS events
* from the HW. On a context-switch event (not preemption), we
@@ -260,7 +260,7 @@ struct intel_engine_execlists {
unsigned int port_mask;
/**
- * @virtual: Queue of requets on a virtual engine, sorted by priority.
+ * @virtual: Queue of requests on a virtual engine, sorted by priority.
* Each RB entry is a struct i915_priolist containing a list of requests
* of the same priority.
*/
@@ -343,6 +343,11 @@ struct intel_engine_guc_stats {
* @start_gt_clk: GT clock time of last idle to active transition.
*/
u64 start_gt_clk;
+
+ /**
+ * @total: The last value of total returned
+ */
+ u64 total;
};
union intel_engine_tlb_inv_reg {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 72090f52fb85..03baa7fa0a27 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -405,15 +405,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
return active;
}
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
-{
- struct intel_engine_cs *engine =
- container_of(execlists, typeof(*engine), execlists);
-
- return __unwind_incomplete_requests(engine);
-}
-
static void
execlists_context_status_change(struct i915_request *rq, unsigned long status)
{
@@ -2511,7 +2502,7 @@ static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir)
ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
ENGINE_TRACE(engine, "semaphore yield: %08x\n",
engine->execlists.yield);
- if (del_timer(&engine->execlists.timer))
+ if (timer_delete(&engine->execlists.timer))
tasklet = true;
}
@@ -3379,8 +3370,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
static void execlists_shutdown(struct intel_engine_cs *engine)
{
/* Synchronise with residual timers and any softirq they raise */
- del_timer_sync(&engine->execlists.timer);
- del_timer_sync(&engine->execlists.preempt);
+ timer_delete_sync(&engine->execlists.timer);
+ timer_delete_sync(&engine->execlists.preempt);
tasklet_kill(&engine->sched_engine->tasklet);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index d60a6ca0cae5..46a5aa4ab9c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -107,11 +107,12 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
/**
* i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
* @vm: The VM to suspend the mappings for
+ * @evict_all: Evict all VMAs
*
* Suspend the memory mappings for all objects mapped to HW via the GGTT or a
* DPT page table.
*/
-void i915_ggtt_suspend_vm(struct i915_address_space *vm)
+void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all)
{
struct i915_vma *vma, *vn;
int save_skip_rewrite;
@@ -157,7 +158,7 @@ retry:
goto retry;
}
- if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
+ if (evict_all || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
i915_vma_wait_for_bind(vma);
__i915_vma_evict(vma, false);
@@ -172,13 +173,15 @@ retry:
vm->skip_pte_rewrite = save_skip_rewrite;
mutex_unlock(&vm->mutex);
+
+ drm_WARN_ON(&vm->i915->drm, evict_all && !list_empty(&vm->bound_list));
}
void i915_ggtt_suspend(struct i915_ggtt *ggtt)
{
struct intel_gt *gt;
- i915_ggtt_suspend_vm(&ggtt->vm);
+ i915_ggtt_suspend_vm(&ggtt->vm, false);
ggtt->invalidate(ggtt);
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
@@ -286,6 +289,14 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
return pte;
}
+static dma_addr_t gen8_ggtt_pte_decode(u64 pte, bool *is_present, bool *is_local)
+{
+ *is_present = pte & GEN8_PAGE_PRESENT;
+ *is_local = pte & GEN12_GGTT_PTE_LM;
+
+ return pte & GEN12_GGTT_PTE_ADDR_MASK;
+}
+
static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
{
struct intel_gt *gt = ggtt->vm.gt;
@@ -432,6 +443,11 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
writeq(pte, addr);
}
+static gen8_pte_t gen8_get_pte(void __iomem *addr)
+{
+ return readq(addr);
+}
+
static void gen8_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
@@ -447,6 +463,16 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
ggtt->invalidate(ggtt);
}
+static dma_addr_t gen8_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t __iomem *pte =
+ (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ return ggtt->vm.pte_decode(gen8_get_pte(pte), is_present, is_local);
+}
+
static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
dma_addr_t addr, u64 offset,
unsigned int pat_index, u32 flags)
@@ -602,6 +628,17 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
ggtt->invalidate(ggtt);
}
+static dma_addr_t gen6_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset,
+ bool *is_present, bool *is_local)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen6_pte_t __iomem *pte =
+ (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ return vm->pte_decode(ioread32(pte), is_present, is_local);
+}
+
/*
* Binds an object into the global gtt with the specified cache level.
* The object will be accessible to the GPU via commands whose operands
@@ -766,6 +803,14 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm,
vm->clear_range(vm, vma_res->start, vma_res->vma_size);
}
+dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+ return ggtt->vm.read_entry(vm, offset, is_present, is_local);
+}
+
/*
* Reserve the top of the GuC address space for firmware images. Addresses
* beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
@@ -1242,6 +1287,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.scratch_range = gen8_ggtt_clear_range;
ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
+ ggtt->vm.read_entry = gen8_ggtt_read_entry;
/*
* Serialize GTT updates with aperture access on BXT if VT-d is on,
@@ -1288,6 +1334,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
else
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+ ggtt->vm.pte_decode = gen8_ggtt_pte_decode;
+
return ggtt_probe_common(ggtt, size);
}
@@ -1387,6 +1435,14 @@ static u64 iris_pte_encode(dma_addr_t addr,
return pte;
}
+static dma_addr_t gen6_pte_decode(u64 pte, bool *is_present, bool *is_local)
+{
+ *is_present = pte & GEN6_PTE_VALID;
+ *is_local = false;
+
+ return ((pte & 0xff0) << 28) | (pte & ~0xfff);
+}
+
static int gen6_gmch_probe(struct i915_ggtt *ggtt)
{
struct drm_i915_private *i915 = ggtt->vm.i915;
@@ -1425,6 +1481,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.scratch_range = gen6_ggtt_clear_range;
ggtt->vm.insert_page = gen6_ggtt_insert_page;
ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
+ ggtt->vm.read_entry = gen6_ggtt_read_entry;
ggtt->vm.cleanup = gen6_gmch_remove;
ggtt->invalidate = gen6_ggtt_invalidate;
@@ -1440,6 +1497,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
else
ggtt->vm.pte_encode = snb_pte_encode;
+ ggtt->vm.pte_decode = gen6_pte_decode;
+
ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
@@ -1545,6 +1604,7 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915)
/**
* i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM
* @vm: The VM to restore the mappings for
+ * @all_evicted: Were all VMAs expected to be evicted on suspend?
*
* Restore the memory mappings for all objects mapped to HW via the GGTT or a
* DPT page table.
@@ -1552,13 +1612,18 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915)
* Returns %true if restoring the mapping for any object that was in a write
* domain before suspend.
*/
-bool i915_ggtt_resume_vm(struct i915_address_space *vm)
+bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted)
{
struct i915_vma *vma;
bool write_domain_objs = false;
drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
+ if (all_evicted) {
+ drm_WARN_ON(&vm->i915->drm, !list_empty(&vm->bound_list));
+ return false;
+ }
+
/* First fill our portion of the GTT with scratch pages */
vm->clear_range(vm, 0, vm->total);
@@ -1598,7 +1663,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
intel_gt_check_and_clear_faults(gt);
- flush = i915_ggtt_resume_vm(&ggtt->vm);
+ flush = i915_ggtt_resume_vm(&ggtt->vm, false);
if (drm_mm_node_allocated(&ggtt->error_capture))
ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index 0ffba50981e3..0c723e7c71a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -328,6 +328,7 @@ static bool fence_is_active(const struct i915_fence_reg *fence)
static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
{
+ struct intel_display *display = &ggtt->vm.i915->display;
struct i915_fence_reg *active = NULL;
struct i915_fence_reg *fence, *fn;
@@ -353,7 +354,7 @@ static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
}
/* Wait for completion of pending flips which consume fences */
- if (intel_has_pending_fb_unpin(ggtt->vm.i915))
+ if (intel_has_pending_fb_unpin(display))
return ERR_PTR(-EAGAIN);
return ERR_PTR(-ENOBUFS);
@@ -749,7 +750,7 @@ static void swizzle_page(struct page *page)
char *vaddr;
int i;
- vaddr = kmap(page);
+ vaddr = kmap_local_page(page);
for (i = 0; i < PAGE_SIZE; i += 128) {
memcpy(temp, &vaddr[i], 64);
@@ -757,7 +758,7 @@ static void swizzle_page(struct page *page)
memcpy(&vaddr[i + 64], temp, 64);
}
- kunmap(page);
+ kunmap_local(vaddr);
}
/**
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
index 59eed0a0ce90..c5f5f0bdfb2c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
@@ -27,6 +27,13 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm,
intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
}
+static dma_addr_t gmch_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local)
+{
+ return intel_gmch_gtt_read_entry(offset >> PAGE_SHIFT,
+ is_present, is_local);
+}
+
static void gmch_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
unsigned int pat_index,
@@ -103,6 +110,7 @@ int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.insert_entries = gmch_ggtt_insert_entries;
ggtt->vm.clear_range = gmch_ggtt_clear_range;
ggtt->vm.scratch_range = gmch_ggtt_clear_range;
+ ggtt->vm.read_entry = gmch_ggtt_read_entry;
ggtt->vm.cleanup = gmch_ggtt_remove;
ggtt->invalidate = gmch_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index a6c69a706fd7..3d3b1ba76e2b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -185,7 +185,7 @@ int intel_gt_init_hw(struct intel_gt *gt)
if (IS_HASWELL(i915))
intel_uncore_write(uncore,
HSW_MI_PREDICATE_RESULT_2,
- IS_HASWELL_GT3(i915) ?
+ INTEL_INFO(i915)->gt == 3 ?
LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
/* Apply the GT workarounds... */
@@ -302,25 +302,48 @@ static void gen6_check_faults(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- u32 fault;
for_each_engine(engine, gt, id) {
+ u32 fault;
+
fault = GEN6_RING_FAULT_REG_READ(engine);
+
if (fault & RING_FAULT_VALID) {
gt_dbg(gt, "Unexpected fault\n"
- "\tAddr: 0x%08lx\n"
+ "\tAddr: 0x%08x\n"
"\tAddress space: %s\n"
"\tSource ID: %d\n"
"\tType: %d\n",
- fault & PAGE_MASK,
+ fault & RING_FAULT_VADDR_MASK,
fault & RING_FAULT_GTTSEL_MASK ?
"GGTT" : "PPGTT",
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
+ REG_FIELD_GET(RING_FAULT_SRCID_MASK, fault),
+ REG_FIELD_GET(RING_FAULT_FAULT_TYPE_MASK, fault));
}
}
}
+static void gen8_report_fault(struct intel_gt *gt, u32 fault,
+ u32 fault_data0, u32 fault_data1)
+{
+ u64 fault_addr;
+
+ fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
+ ((u64)fault_data0 << 12);
+
+ gt_dbg(gt, "Unexpected fault\n"
+ "\tAddr: 0x%08x_%08x\n"
+ "\tAddress space: %s\n"
+ "\tEngine ID: %d\n"
+ "\tSource ID: %d\n"
+ "\tType: %d\n",
+ upper_32_bits(fault_addr), lower_32_bits(fault_addr),
+ fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+ REG_FIELD_GET(RING_FAULT_ENGINE_ID_MASK, fault),
+ REG_FIELD_GET(RING_FAULT_SRCID_MASK, fault),
+ REG_FIELD_GET(RING_FAULT_FAULT_TYPE_MASK, fault));
+}
+
static void xehp_check_faults(struct intel_gt *gt)
{
u32 fault;
@@ -333,28 +356,10 @@ static void xehp_check_faults(struct intel_gt *gt)
* toward the primary instance.
*/
fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
- if (fault & RING_FAULT_VALID) {
- u32 fault_data0, fault_data1;
- u64 fault_addr;
-
- fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
- fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
-
- fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
- ((u64)fault_data0 << 12);
-
- gt_dbg(gt, "Unexpected fault\n"
- "\tAddr: 0x%08x_%08x\n"
- "\tAddress space: %s\n"
- "\tEngine ID: %d\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- upper_32_bits(fault_addr), lower_32_bits(fault_addr),
- fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
- GEN8_RING_FAULT_ENGINE_ID(fault),
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
- }
+ if (fault & RING_FAULT_VALID)
+ gen8_report_fault(gt, fault,
+ intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0),
+ intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1));
}
static void gen8_check_faults(struct intel_gt *gt)
@@ -374,28 +379,10 @@ static void gen8_check_faults(struct intel_gt *gt)
}
fault = intel_uncore_read(uncore, fault_reg);
- if (fault & RING_FAULT_VALID) {
- u32 fault_data0, fault_data1;
- u64 fault_addr;
-
- fault_data0 = intel_uncore_read(uncore, fault_data0_reg);
- fault_data1 = intel_uncore_read(uncore, fault_data1_reg);
-
- fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
- ((u64)fault_data0 << 12);
-
- gt_dbg(gt, "Unexpected fault\n"
- "\tAddr: 0x%08x_%08x\n"
- "\tAddress space: %s\n"
- "\tEngine ID: %d\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- upper_32_bits(fault_addr), lower_32_bits(fault_addr),
- fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
- GEN8_RING_FAULT_ENGINE_ID(fault),
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
- }
+ if (fault & RING_FAULT_VALID)
+ gen8_report_fault(gt, fault,
+ intel_uncore_read(uncore, fault_data0_reg),
+ intel_uncore_read(uncore, fault_data1_reg));
}
void intel_gt_check_and_clear_faults(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
index 6e63505fe478..6c499692d61e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -35,9 +35,7 @@ static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore,
u32 f24_mhz = 24000000;
u32 f25_mhz = 25000000;
u32 f38_4_mhz = 38400000;
- u32 crystal_clock =
- (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
- GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+ u32 crystal_clock = rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK;
switch (crystal_clock) {
case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
@@ -80,8 +78,7 @@ static u32 gen11_read_clock_frequency(struct intel_uncore *uncore)
* register increments from this frequency (it might
* increment only every few clock cycle).
*/
- freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
- GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+ freq >>= 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0);
}
return freq;
@@ -102,8 +99,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore)
* register increments from this frequency (it might
* increment only every few clock cycle).
*/
- freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >>
- CTC_SHIFT_PARAMETER_SHIFT);
+ freq >>= 3 - REG_FIELD_GET(CTC_SHIFT_PARAMETER_MASK, ctc_reg);
}
return freq;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index ad4c51f18d3a..75e802e10be2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -452,10 +452,10 @@ void gen8_gt_irq_reset(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
- GEN8_IRQ_RESET_NDX(uncore, GT, 0);
- GEN8_IRQ_RESET_NDX(uncore, GT, 1);
- GEN8_IRQ_RESET_NDX(uncore, GT, 2);
- GEN8_IRQ_RESET_NDX(uncore, GT, 3);
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(0));
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(1));
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(2));
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(3));
}
void gen8_gt_irq_postinstall(struct intel_gt *gt)
@@ -476,14 +476,14 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
gt->pm_ier = 0x0;
gt->pm_imr = ~gt->pm_ier;
- GEN8_IRQ_INIT_NDX(uncore, GT, 0, ~gt_interrupts[0], gt_interrupts[0]);
- GEN8_IRQ_INIT_NDX(uncore, GT, 1, ~gt_interrupts[1], gt_interrupts[1]);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(0), ~gt_interrupts[0], gt_interrupts[0]);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(1), ~gt_interrupts[1], gt_interrupts[1]);
/*
* RPS interrupts will get enabled/disabled on demand when RPS itself
- * is enabled/disabled. Same wil be the case for GuC interrupts.
+ * is enabled/disabled. Same will be the case for GuC interrupts.
*/
- GEN8_IRQ_INIT_NDX(uncore, GT, 2, gt->pm_imr, gt->pm_ier);
- GEN8_IRQ_INIT_NDX(uncore, GT, 3, ~gt_interrupts[3], gt_interrupts[3]);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(2), gt->pm_imr, gt->pm_ier);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(3), ~gt_interrupts[3], gt_interrupts[3]);
}
static void gen5_gt_update_irq(struct intel_gt *gt,
@@ -514,9 +514,9 @@ void gen5_gt_irq_reset(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
- GEN3_IRQ_RESET(uncore, GT);
+ gen2_irq_reset(uncore, GT_IRQ_REGS);
if (GRAPHICS_VER(gt->i915) >= 6)
- GEN3_IRQ_RESET(uncore, GEN6_PM);
+ gen2_irq_reset(uncore, GEN6_PM_IRQ_REGS);
}
void gen5_gt_irq_postinstall(struct intel_gt *gt)
@@ -538,7 +538,7 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt)
else
gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
- GEN3_IRQ_INIT(uncore, GT, gt->gt_imr, gt_irqs);
+ gen2_irq_init(uncore, GT_IRQ_REGS, gt->gt_imr, gt_irqs);
if (GRAPHICS_VER(gt->i915) >= 6) {
/*
@@ -551,6 +551,6 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt)
}
gt->pm_imr = 0xffffffff;
- GEN3_IRQ_INIT(uncore, GEN6_PM, gt->pm_imr, pm_irqs);
+ gen2_irq_init(uncore, GEN6_PM_IRQ_REGS, gt->pm_imr, pm_irqs);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index b8912bd6c08e..a60822e2b5d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -121,9 +121,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
gt->info.mslice_mask =
intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask,
GEN_DSS_PER_MSLICE);
- gt->info.mslice_mask |=
- (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
- GEN12_MEML3_EN_MASK);
+ gt->info.mslice_mask |= REG_FIELD_GET(GEN12_MEML3_EN_MASK,
+ intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3));
if (!gt->info.mslice_mask) /* should be impossible! */
gt_warn(gt, "mslice mask all zero!\n");
@@ -239,7 +238,7 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt,
* to remain in multicast mode for reads. There's no real
* downside to this, so we'll just go ahead and do so on all
* platforms; we'll only clear the multicast bit from the mask
- * when exlicitly doing a write operation.
+ * when explicitly doing a write operation.
*/
if (rw_flag == FW_REG_WRITE)
mcr_mask |= GEN11_MCR_MULTICAST;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index c08fdb65cc69..3182f19b9837 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -70,6 +70,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
{
struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
struct drm_i915_private *i915 = gt->i915;
+ struct intel_display *display = &i915->display;
GT_TRACE(gt, "\n");
@@ -84,7 +85,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
* Work around it by grabbing a GT IRQ power domain whilst there is any
* GT activity, preventing any DC state transitions.
*/
- gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+ gt->awake = intel_display_power_get(display, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!gt->awake);
intel_rc6_unpark(&gt->rc6);
@@ -103,6 +104,7 @@ static int __gt_park(struct intel_wakeref *wf)
struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
struct drm_i915_private *i915 = gt->i915;
+ struct intel_display *display = &i915->display;
GT_TRACE(gt, "\n");
@@ -120,7 +122,7 @@ static int __gt_park(struct intel_wakeref *wf)
/* Defer dropping the display power well for 100ms, it's slow! */
GEM_BUG_ON(!wakeref);
- intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+ intel_display_power_put_async(display, POWER_DOMAIN_GT_IRQ, wakeref);
return 0;
}
@@ -156,7 +158,7 @@ void intel_gt_pm_init(struct intel_gt *gt)
static bool reset_engines(struct intel_gt *gt)
{
- if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ if (intel_gt_gpu_reset_clobbers_display(gt))
return false;
return intel_gt_reset_all_engines(gt) == 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 911fd0160221..6f25c747bc29 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -35,7 +35,7 @@ static inline void __intel_gt_pm_get(struct intel_gt *gt)
static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt)
{
if (!intel_wakeref_get_if_active(&gt->wakeref))
- return 0;
+ return NULL;
return intel_wakeref_track(&gt->wakeref);
}
@@ -73,7 +73,7 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t ha
}
#define with_intel_gt_pm(gt, wf) \
- for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0)
+ for ((wf) = intel_gt_pm_get(gt); (wf); intel_gt_pm_put((gt), (wf)), (wf) = NULL)
/**
* with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
@@ -84,7 +84,7 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t ha
* @wf: pointer to a temporary wakeref.
*/
#define with_intel_gt_pm_if_awake(gt, wf) \
- for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0)
+ for ((wf) = intel_gt_pm_get_if_awake(gt); (wf); intel_gt_pm_put_async((gt), (wf)), (wf) = NULL)
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
@@ -105,9 +105,13 @@ int intel_gt_runtime_resume(struct intel_gt *gt);
ktime_t intel_gt_get_awake_time(const struct intel_gt *gt);
+#define INTEL_WAKEREF_MOCK_GT ERR_PTR(-ENODEV)
+
static inline bool is_mock_gt(const struct intel_gt *gt)
{
- return I915_SELFTEST_ONLY(gt->awake == -ENODEV);
+ BUILD_BUG_ON(INTEL_WAKEREF_DEF == INTEL_WAKEREF_MOCK_GT);
+
+ return I915_SELFTEST_ONLY(gt->awake == INTEL_WAKEREF_MOCK_GT);
}
#endif /* INTEL_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 8d08b38874ef..b635aa2820d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -431,7 +431,7 @@ static int llc_show(struct seq_file *m, void *data)
max_gpu_freq /= GEN9_FREQ_SCALER;
}
- seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
+ seq_puts(m, "GPU freq (MHz)\tEffective GPU freq (MHz)\tEffective Ring freq (MHz)\n");
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 57a3c83d3655..7421ed18d8d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -30,18 +30,15 @@
/* RPM unit config (Gen8+) */
#define RPM_CONFIG0 _MMIO(0xd00)
-#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3
-#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
-#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0
-#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3
-#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1
-#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 0)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 1)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 2)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 3)
+#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_BIT(3)
+#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ REG_FIELD_PREP(GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 0)
+#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ REG_FIELD_PREP(GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 1)
+#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1)
#define RPM_CONFIG1 _MMIO(0xd04)
#define GEN10_GT_NOA_ENABLE (1 << 9)
@@ -326,6 +323,12 @@
_RING_FAULT_REG_VCS, \
_RING_FAULT_REG_VECS, \
_RING_FAULT_REG_BCS))
+#define RING_FAULT_VADDR_MASK REG_GENMASK(31, 12) /* pre-bdw */
+#define RING_FAULT_ENGINE_ID_MASK REG_GENMASK(16, 12) /* bdw+ */
+#define RING_FAULT_GTTSEL_MASK REG_BIT(11) /* pre-bdw */
+#define RING_FAULT_SRCID_MASK REG_GENMASK(10, 3)
+#define RING_FAULT_FAULT_TYPE_MASK REG_GENMASK(2, 1) /* ivb+ */
+#define RING_FAULT_VALID REG_BIT(0)
#define ERROR_GEN6 _MMIO(0x40a0)
@@ -385,6 +388,8 @@
#define GEN8_FAULT_TLB_DATA0 _MMIO(0x4b10)
#define GEN8_FAULT_TLB_DATA1 _MMIO(0x4b14)
+#define FAULT_GTT_SEL REG_BIT(4)
+#define FAULT_VA_HIGH_BITS REG_GENMASK(3, 0)
#define GEN11_GACB_PERF_CTRL _MMIO(0x4b80)
#define GEN11_HASH_CTRL_MASK (0x3 << 12 | 0xf << 0)
@@ -409,6 +414,9 @@
#define GEN7_SO_PRIM_STORAGE_NEEDED(n) _MMIO(0x5240 + (n) * 8)
#define GEN7_SO_PRIM_STORAGE_NEEDED_UDW(n) _MMIO(0x5240 + (n) * 8 + 4)
+#define GEN8_WM_CHICKEN2 MCR_REG(0x5584)
+#define WAIT_ON_DEPTH_STALL_DONE_DISABLE REG_BIT(5)
+
#define GEN9_WM_CHICKEN3 _MMIO(0x5588)
#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9)
@@ -432,6 +440,7 @@
#define XEHPG_INSTDONE_GEOM_SVG MCR_REG(0x666c)
#define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */
+#define DISABLE_REPACKING_FOR_COMPRESSION REG_BIT(15) /* jsl+ */
#define RC_OP_FLUSH_ENABLE (1 << 0)
#define HIZ_RAW_STALL_OPT_DISABLE (1 << 2)
#define CACHE_MODE_1 _MMIO(0x7004) /* IVB+ */
@@ -503,11 +512,12 @@
#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
#define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice) * 0x4)
+#define GEN9_PGCTL_SS_ACK(subslice) REG_BIT(2 + (subslice) * 2)
+#define GEN9_PGCTL_SLICE_ACK REG_BIT(0)
+
#define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \
((slice) % 3) * 0x4)
-#define GEN9_PGCTL_SLICE_ACK (1 << 0)
-#define GEN9_PGCTL_SS_ACK(subslice) (1 << (2 + (subslice) * 2))
-#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? 0x7F : 0x1F)
+#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? REG_GENMASK(6, 0) : REG_GENMASK(4, 0))
#define GEN9_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + (slice) * 0x8)
#define GEN10_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + ((slice) / 3) * 0x30 + \
@@ -515,14 +525,14 @@
#define GEN9_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + (slice) * 0x8)
#define GEN10_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + ((slice) / 3) * 0x30 + \
((slice) % 3) * 0x8)
-#define GEN9_PGCTL_SSA_EU08_ACK (1 << 0)
-#define GEN9_PGCTL_SSA_EU19_ACK (1 << 2)
-#define GEN9_PGCTL_SSA_EU210_ACK (1 << 4)
-#define GEN9_PGCTL_SSA_EU311_ACK (1 << 6)
-#define GEN9_PGCTL_SSB_EU08_ACK (1 << 8)
-#define GEN9_PGCTL_SSB_EU19_ACK (1 << 10)
-#define GEN9_PGCTL_SSB_EU210_ACK (1 << 12)
-#define GEN9_PGCTL_SSB_EU311_ACK (1 << 14)
+#define GEN9_PGCTL_SSB_EU311_ACK REG_BIT(14)
+#define GEN9_PGCTL_SSB_EU210_ACK REG_BIT(12)
+#define GEN9_PGCTL_SSB_EU19_ACK REG_BIT(10)
+#define GEN9_PGCTL_SSB_EU08_ACK REG_BIT(8)
+#define GEN9_PGCTL_SSA_EU311_ACK REG_BIT(6)
+#define GEN9_PGCTL_SSA_EU210_ACK REG_BIT(4)
+#define GEN9_PGCTL_SSA_EU19_ACK REG_BIT(2)
+#define GEN9_PGCTL_SSA_EU08_ACK REG_BIT(0)
#define VF_PREEMPTION _MMIO(0x83a4)
#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
@@ -579,7 +589,7 @@
#define GEN10_L3BANK_MASK 0x0F
/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
#define GEN12_MAX_MSLICES 4
-#define GEN12_MEML3_EN_MASK 0x0F
+#define GEN12_MEML3_EN_MASK REG_GENMASK(3, 0)
#define HSW_PAVP_FUSE1 _MMIO(0x911c)
#define XEHP_SFC_ENABLE_MASK REG_GENMASK(27, 24)
@@ -589,37 +599,30 @@
#define HSW_F1_EU_DIS_6EUS 2
#define GEN8_FUSE2 _MMIO(0x9120)
-#define GEN8_F2_SS_DIS_SHIFT 21
-#define GEN8_F2_SS_DIS_MASK (0x7 << GEN8_F2_SS_DIS_SHIFT)
-#define GEN8_F2_S_ENA_SHIFT 25
-#define GEN8_F2_S_ENA_MASK (0x7 << GEN8_F2_S_ENA_SHIFT)
-#define GEN9_F2_SS_DIS_SHIFT 20
-#define GEN9_F2_SS_DIS_MASK (0xf << GEN9_F2_SS_DIS_SHIFT)
-#define GEN10_F2_S_ENA_SHIFT 22
-#define GEN10_F2_S_ENA_MASK (0x3f << GEN10_F2_S_ENA_SHIFT)
-#define GEN10_F2_SS_DIS_SHIFT 18
-#define GEN10_F2_SS_DIS_MASK (0xf << GEN10_F2_SS_DIS_SHIFT)
+#define GEN10_F2_S_ENA_MASK REG_GENMASK(27, 22)
+#define GEN10_F2_SS_DIS_MASK REG_GENMASK(21, 18)
+#define GEN8_F2_S_ENA_MASK REG_GENMASK(27, 25)
+#define GEN9_F2_SS_DIS_MASK REG_GENMASK(23, 20)
+#define GEN8_F2_SS_DIS_MASK REG_GENMASK(23, 21)
#define GEN8_EU_DISABLE0 _MMIO(0x9134)
#define GEN9_EU_DISABLE(slice) _MMIO(0x9134 + (slice) * 0x4)
#define GEN11_EU_DISABLE _MMIO(0x9134)
-#define GEN8_EU_DIS0_S0_MASK 0xffffff
-#define GEN8_EU_DIS0_S1_SHIFT 24
-#define GEN8_EU_DIS0_S1_MASK (0xff << GEN8_EU_DIS0_S1_SHIFT)
-#define GEN11_EU_DIS_MASK 0xFF
+#define GEN8_EU_DIS0_S1_MASK REG_GENMASK(31, 24)
+#define GEN8_EU_DIS0_S0_MASK REG_GENMASK(23, 0)
+#define GEN11_EU_DIS_MASK REG_GENMASK(7, 0)
#define XEHP_EU_ENABLE _MMIO(0x9134)
-#define XEHP_EU_ENA_MASK 0xFF
+#define XEHP_EU_ENA_MASK REG_GENMASK(7, 0)
#define GEN8_EU_DISABLE1 _MMIO(0x9138)
-#define GEN8_EU_DIS1_S1_MASK 0xffff
-#define GEN8_EU_DIS1_S2_SHIFT 16
-#define GEN8_EU_DIS1_S2_MASK (0xffff << GEN8_EU_DIS1_S2_SHIFT)
+#define GEN8_EU_DIS1_S2_MASK REG_GENMASK(31, 16)
+#define GEN8_EU_DIS1_S1_MASK REG_GENMASK(15, 0)
#define GEN11_GT_SLICE_ENABLE _MMIO(0x9138)
-#define GEN11_GT_S_ENA_MASK 0xFF
+#define GEN11_GT_S_ENA_MASK REG_GENMASK(7, 0)
#define GEN8_EU_DISABLE2 _MMIO(0x913c)
-#define GEN8_EU_DIS2_S2_MASK 0xff
+#define GEN8_EU_DIS2_S2_MASK REG_GENMASK(7, 0)
#define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913c)
#define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913c)
@@ -627,9 +630,8 @@
#define GEN10_EU_DISABLE3 _MMIO(0x9140)
#define GEN10_EU_DIS_SS_MASK 0xff
#define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140)
-#define GEN11_GT_VDBOX_DISABLE_MASK 0xff
-#define GEN11_GT_VEBOX_DISABLE_SHIFT 16
-#define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT)
+#define GEN11_GT_VEBOX_DISABLE_MASK REG_GENMASK(19, 16)
+#define GEN11_GT_VDBOX_DISABLE_MASK REG_GENMASK(7, 0)
#define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144)
#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT _MMIO(0x9148)
@@ -877,11 +879,10 @@
/* GPM unit config (Gen9+) */
#define CTC_MODE _MMIO(0xa26c)
-#define CTC_SOURCE_PARAMETER_MASK 1
-#define CTC_SOURCE_CRYSTAL_CLOCK 0
-#define CTC_SOURCE_DIVIDE_LOGIC 1
-#define CTC_SHIFT_PARAMETER_SHIFT 1
-#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT)
+#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1)
+#define CTC_SOURCE_PARAMETER_MASK REG_BIT(0)
+#define CTC_SOURCE_CRYSTAL_CLOCK REG_FIELD_PREP(CTC_SOURCE_PARAMETER_MASK, 0)
+#define CTC_SOURCE_DIVIDE_LOGIC REG_FIELD_PREP(CTC_SOURCE_PARAMETER_MASK, 1)
/* GPM MSG_IDLE */
#define MSG_IDLE_CS _MMIO(0x8000)
@@ -925,12 +926,12 @@
#define CHV_POWER_SS0_SIG1 _MMIO(0xa720)
#define CHV_POWER_SS0_SIG2 _MMIO(0xa724)
#define CHV_POWER_SS1_SIG1 _MMIO(0xa728)
-#define CHV_SS_PG_ENABLE (1 << 1)
-#define CHV_EU08_PG_ENABLE (1 << 9)
-#define CHV_EU19_PG_ENABLE (1 << 17)
-#define CHV_EU210_PG_ENABLE (1 << 25)
+#define CHV_EU210_PG_ENABLE REG_BIT(25)
+#define CHV_EU19_PG_ENABLE REG_BIT(17)
+#define CHV_EU08_PG_ENABLE REG_BIT(9)
+#define CHV_SS_PG_ENABLE REG_BIT(1)
#define CHV_POWER_SS1_SIG2 _MMIO(0xa72c)
-#define CHV_EU311_PG_ENABLE (1 << 1)
+#define CHV_EU311_PG_ENABLE REG_BIT(1)
#define GEN7_SARCHKMD _MMIO(0xb000)
#define GEN7_DISABLE_DEMAND_PREFETCH (1 << 31)
@@ -1034,17 +1035,12 @@
#define XEHP_FAULT_TLB_DATA0 MCR_REG(0xceb8)
#define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc)
#define XEHP_FAULT_TLB_DATA1 MCR_REG(0xcebc)
-#define FAULT_VA_HIGH_BITS (0xf << 0)
-#define FAULT_GTT_SEL (1 << 4)
+/* see GEN8_FAULT_TLB_DATA0/1 */
#define GEN12_RING_FAULT_REG _MMIO(0xcec4)
#define XEHP_RING_FAULT_REG MCR_REG(0xcec4)
#define XELPMP_RING_FAULT_REG _MMIO(0xcec4)
-#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7)
-#define RING_FAULT_GTTSEL_MASK (1 << 11)
-#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff)
-#define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3)
-#define RING_FAULT_VALID (1 << 0)
+/* see GEN8_RING_FAULT_REG */
#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
#define XEHP_GFX_TLB_INV_CR MCR_REG(0xced8)
@@ -1433,16 +1429,12 @@
#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
#define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168)
-#define CHV_FGT_DISABLE_SS0 (1 << 10)
-#define CHV_FGT_DISABLE_SS1 (1 << 11)
-#define CHV_FGT_EU_DIS_SS0_R0_SHIFT 16
-#define CHV_FGT_EU_DIS_SS0_R0_MASK (0xf << CHV_FGT_EU_DIS_SS0_R0_SHIFT)
-#define CHV_FGT_EU_DIS_SS0_R1_SHIFT 20
-#define CHV_FGT_EU_DIS_SS0_R1_MASK (0xf << CHV_FGT_EU_DIS_SS0_R1_SHIFT)
-#define CHV_FGT_EU_DIS_SS1_R0_SHIFT 24
-#define CHV_FGT_EU_DIS_SS1_R0_MASK (0xf << CHV_FGT_EU_DIS_SS1_R0_SHIFT)
-#define CHV_FGT_EU_DIS_SS1_R1_SHIFT 28
-#define CHV_FGT_EU_DIS_SS1_R1_MASK (0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT)
+#define CHV_FGT_EU_DIS_SS1_R1_MASK REG_GENMASK(31, 28)
+#define CHV_FGT_EU_DIS_SS1_R0_MASK REG_GENMASK(27, 24)
+#define CHV_FGT_EU_DIS_SS0_R1_MASK REG_GENMASK(23, 20)
+#define CHV_FGT_EU_DIS_SS0_R0_MASK REG_GENMASK(19, 16)
+#define CHV_FGT_DISABLE_SS1 REG_BIT(11)
+#define CHV_FGT_DISABLE_SS0 REG_BIT(10)
#define BCS_SWCTRL _MMIO(0x22200)
#define BCS_SRC_Y REG_BIT(0)
@@ -1472,6 +1464,10 @@
GEN6_PM_RP_DOWN_THRESHOLD | \
GEN6_PM_RP_DOWN_TIMEOUT)
+#define GEN6_PM_IRQ_REGS I915_IRQ_REGS(GEN6_PMIMR, \
+ GEN6_PMIER, \
+ GEN6_PMIIR)
+
#define GEN7_GT_SCRATCH(i) _MMIO(0x4f100 + (i) * 4)
#define GEN7_GT_SCRATCH_REG_NUM 8
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index d7784650e4d9..1154cd2b7c34 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -464,6 +464,45 @@ static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj,
return err ?: count;
}
+static ssize_t slpc_power_profile_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buff)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+
+ switch (slpc->power_profile) {
+ case SLPC_POWER_PROFILES_BASE:
+ return sysfs_emit(buff, "[%s] %s\n", "base", "power_saving");
+ case SLPC_POWER_PROFILES_POWER_SAVING:
+ return sysfs_emit(buff, "%s [%s]\n", "base", "power_saving");
+ }
+
+ return sysfs_emit(buff, "%u\n", slpc->power_profile);
+}
+
+static ssize_t slpc_power_profile_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buff, size_t count)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ char power_saving[] = "power_saving";
+ char base[] = "base";
+ int err;
+ u32 val;
+
+ if (!strncmp(buff, power_saving, sizeof(power_saving) - 1))
+ val = SLPC_POWER_PROFILES_POWER_SAVING;
+ else if (!strncmp(buff, base, sizeof(base) - 1))
+ val = SLPC_POWER_PROFILES_BASE;
+ else
+ return -EINVAL;
+
+ err = intel_guc_slpc_set_power_profile(slpc, val);
+ return err ?: count;
+}
+
struct intel_gt_bool_throttle_attr {
struct attribute attr;
ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
@@ -668,6 +707,7 @@ INTEL_GT_ATTR_RO(media_RP0_freq_mhz);
INTEL_GT_ATTR_RO(media_RPn_freq_mhz);
INTEL_GT_ATTR_RW(slpc_ignore_eff_freq);
+INTEL_GT_ATTR_RW(slpc_power_profile);
static const struct attribute *media_perf_power_attrs[] = {
&attr_media_freq_factor.attr,
@@ -864,6 +904,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
gt_warn(gt, "failed to create ignore_eff_freq sysfs (%pe)", ERR_PTR(ret));
}
+ if (intel_uc_uses_guc_slpc(&gt->uc)) {
+ ret = sysfs_create_file(kobj, &attr_slpc_power_profile.attr);
+ if (ret)
+ gt_warn(gt, "failed to create slpc_power_profile sysfs (%pe)",
+ ERR_PTR(ret));
+ }
+
if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) {
ret = sysfs_create_files(kobj, throttle_reason_attrs);
if (ret)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 30b128b1fde7..afbc5c769308 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -176,7 +176,6 @@ static void clear_vm_list(struct list_head *list)
i915_vma_destroy_locked(vma);
i915_gem_object_put(obj);
}
-
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 6b85222ee3ea..9d3a3ad567a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -312,6 +312,7 @@ struct i915_address_space {
u64 (*pte_encode)(dma_addr_t addr,
unsigned int pat_index,
u32 flags); /* Create a valid PTE */
+ dma_addr_t (*pte_decode)(u64 pte, bool *is_present, bool *is_local);
#define PTE_READ_ONLY BIT(0)
#define PTE_LM BIT(1)
@@ -340,6 +341,8 @@ struct i915_address_space {
struct i915_vma_resource *vma_res,
unsigned int pat_index,
u32 flags);
+ dma_addr_t (*read_entry)(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local);
void (*cleanup)(struct i915_address_space *vm);
void (*foreach)(struct i915_address_space *vm,
@@ -590,6 +593,9 @@ void intel_ggtt_bind_vma(struct i915_address_space *vm,
void intel_ggtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma_resource *vma_res);
+dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local);
+
int i915_ggtt_probe_hw(struct drm_i915_private *i915);
int i915_ggtt_init_hw(struct drm_i915_private *i915);
int i915_ggtt_enable_hw(struct drm_i915_private *i915);
@@ -608,8 +614,8 @@ int i915_ppgtt_init_hw(struct intel_gt *gt);
struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
unsigned long lmem_pt_obj_flags);
-void i915_ggtt_suspend_vm(struct i915_address_space *vm);
-bool i915_ggtt_resume_vm(struct i915_address_space *vm);
+void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all);
+bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7bd5d2c29056..c481b56fa67d 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -751,7 +751,6 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
{
-
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
/*
* Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
@@ -820,8 +819,10 @@ static bool ctx_needs_runalone(const struct intel_context *ce)
bool ctx_is_protected = false;
/*
- * On MTL and newer platforms, protected contexts require setting
- * the LRC run-alone bit or else the encryption will not happen.
+ * Wa_14019159160 - Case 2.
+ * On some platforms, protected contexts require setting
+ * the LRC run-alone bit or else the encryption/decryption will not happen.
+ * NOTE: Case 2 only applies to PXP use-case of said workaround.
*/
if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) &&
(ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) {
@@ -850,6 +851,7 @@ static void init_common_regs(u32 * const regs,
if (GRAPHICS_VER(engine->i915) < 11)
ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
+ /* Wa_14019159160 - Case 2.*/
if (ctx_needs_runalone(ce))
ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE);
regs[CTX_CONTEXT_CONTROL] = ctl;
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 6f7af4077135..aff5aca591e6 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -304,7 +304,7 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m)
struct intel_context *ce;
/*
- * We randomly distribute contexts across the engines upon constrction,
+ * We randomly distribute contexts across the engines upon construction,
* as they all share the same pinned vm, and so in order to allow
* multiple blits to run in parallel, we must construct each blit
* to use a different range of the vm for its GTT. This has to be
@@ -646,7 +646,7 @@ calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
* When CHUNK_SZ is passed all the pages upto CHUNK_SZ
* will be taken for the blt. in Flat-ccs supported
* platform Smem obj will have more pages than required
- * for main meory hence limit it to the required size
+ * for main memory hence limit it to the required size
* for main memory
*/
return min_t(u64, bytes_to_cpy, CHUNK_SZ);
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index d791d63d49b4..5dd8121f4b15 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -314,7 +314,6 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = {
};
static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
-
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
/* WB - L3 */
@@ -675,7 +674,7 @@ void intel_mocs_init(struct intel_gt *gt)
__init_mocs_table(gt->uncore, &table, global_mocs_offset());
/*
- * Initialize the L3CC table as part of mocs initalization to make
+ * Initialize the L3CC table as part of mocs initialization to make
* sure the LNCFCMOCSx registers are programmed for the subsequent
* memory transactions including guc transactions
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index c864d101faf9..9ca42589da4d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -117,23 +117,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_RC6_ENABLE |
GEN6_RC_CTL_EI_MODE(1);
- /*
- * BSpec 52698 - Render powergating must be off.
- * FIXME BSpec is outdated, disabling powergating for MTL is just
- * temporary wa and should be removed after fixing real cause
- * of forcewake timeouts.
- */
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
- pg_enable =
- GEN9_MEDIA_PG_ENABLE |
- GEN11_MEDIA_SAMPLER_PG_ENABLE;
- else
- pg_enable =
- GEN9_RENDER_PG_ENABLE |
- GEN9_MEDIA_PG_ENABLE |
- GEN11_MEDIA_SAMPLER_PG_ENABLE;
+ pg_enable =
+ GEN9_RENDER_PG_ENABLE |
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE;
- if (GRAPHICS_VER(gt->i915) >= 12) {
+ if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) {
for (i = 0; i < I915_MAX_VCS; i++)
if (HAS_ENGINE(gt, _VCS(i)))
pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 8f1ea95471ef..dbdcfe130ad4 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -986,7 +986,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
awake = reset_prepare(gt);
/* Even if the GPU reset fails, it should still stop the engines */
- if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ if (!intel_gt_gpu_reset_clobbers_display(gt))
intel_gt_reset_all_engines(gt);
for_each_engine(engine, gt, id)
@@ -1098,7 +1098,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
dma_fence_put(fence);
- /* Restart iteration after droping lock */
+ /* Restart iteration after dropping lock */
spin_lock(&timelines->lock);
tl = list_entry(&timelines->active_list, typeof(*tl), link);
}
@@ -1106,7 +1106,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
/* We must reset pending GPU events before restoring our submission */
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
- if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ if (!intel_gt_gpu_reset_clobbers_display(gt))
ok = intel_gt_reset_all_engines(gt) == 0;
if (!ok) {
/*
@@ -1177,6 +1177,13 @@ static int resume(struct intel_gt *gt)
return 0;
}
+bool intel_gt_gpu_reset_clobbers_display(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ return INTEL_INFO(i915)->gpu_reset_clobbers_display;
+}
+
/**
* intel_gt_reset - reset chip after a hang
* @gt: #intel_gt to reset
@@ -1198,6 +1205,7 @@ void intel_gt_reset(struct intel_gt *gt,
intel_engine_mask_t stalled_mask,
const char *reason)
{
+ struct intel_display *display = &gt->i915->display;
intel_engine_mask_t awake;
int ret;
@@ -1232,18 +1240,18 @@ void intel_gt_reset(struct intel_gt *gt,
goto error;
}
- if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_disable_interrupts(gt->i915);
+ if (intel_gt_gpu_reset_clobbers_display(gt))
+ intel_irq_suspend(gt->i915);
if (do_reset(gt, stalled_mask)) {
gt_err(gt, "Failed to reset chip\n");
goto taint;
}
- if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_enable_interrupts(gt->i915);
+ if (intel_gt_gpu_reset_clobbers_display(gt))
+ intel_irq_resume(gt->i915);
- intel_overlay_reset(gt->i915);
+ intel_overlay_reset(display);
/* sanitize uC after engine reset */
if (!intel_uc_uses_guc_submission(&gt->uc))
@@ -1392,6 +1400,11 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
return err;
}
+static void display_reset_modeset_stuck(void *gt)
+{
+ intel_gt_set_wedged(gt);
+}
+
static void intel_gt_reset_global(struct intel_gt *gt,
u32 engine_mask,
const char *reason)
@@ -1409,15 +1422,33 @@ static void intel_gt_reset_global(struct intel_gt *gt,
/* Use a watchdog to ensure that our reset completes */
intel_wedge_on_timeout(&w, gt, 60 * HZ) {
- intel_display_reset_prepare(gt->i915);
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_display *display = &i915->display;
+ bool need_display_reset;
+ bool reset_display;
+
+ need_display_reset = intel_gt_gpu_reset_clobbers_display(gt) &&
+ intel_has_gpu_reset(gt);
+
+ reset_display = intel_display_reset_test(display) ||
+ need_display_reset;
+
+ if (reset_display)
+ reset_display = intel_display_reset_prepare(display,
+ display_reset_modeset_stuck,
+ gt);
intel_gt_reset(gt, engine_mask, reason);
- intel_display_reset_finish(gt->i915);
+ if (reset_display)
+ intel_display_reset_finish(display, !need_display_reset);
}
if (!test_bit(I915_WEDGED, &gt->reset.flags))
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
+ else
+ drm_dev_wedged_event(&gt->i915->drm,
+ DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET);
}
/**
@@ -1478,7 +1509,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
local_bh_disable();
for_each_engine_masked(engine, gt, engine_mask, tmp) {
- BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
+ BUILD_BUG_ON(I915_RESET_BACKOFF >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags))
continue;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index c00de353075c..724ea6d64f33 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -28,6 +28,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
const char *fmt, ...);
#define I915_ERROR_CAPTURE BIT(0)
+bool intel_gt_gpu_reset_clobbers_display(struct intel_gt *gt);
+
void intel_gt_reset(struct intel_gt *gt,
intel_engine_mask_t stalled_mask,
const char *reason);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h
index 80351f0a856c..4f5fd393af6f 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h
@@ -41,8 +41,7 @@ struct intel_reset {
*/
unsigned long flags;
#define I915_RESET_BACKOFF 0
-#define I915_RESET_MODESET 1
-#define I915_RESET_ENGINE 2
+#define I915_RESET_ENGINE 1
#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 3)
#define I915_WEDGED_ON_FINI (BITS_PER_LONG - 2)
#define I915_WEDGED (BITS_PER_LONG - 1)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 59da4b7bd262..b74d9205c0f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -308,30 +308,6 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
return cs;
}
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct i915_request *rq)
-{
- int num_dwords;
- void *cs;
-
- num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
- if (num_dwords == 0)
- return 0;
-
- num_dwords = CACHELINE_DWORDS - num_dwords;
- GEM_BUG_ON(num_dwords & 1);
-
- cs = intel_ring_begin(rq, num_dwords);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
- intel_ring_advance(rq, cs + num_dwords);
-
- GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
- return 0;
-}
-
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_ring.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index 1b32dadfb8c3..64b322e25f36 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -16,7 +16,6 @@ struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
-int intel_ring_cacheline_align(struct i915_request *rq);
unsigned int intel_ring_update_space(struct intel_ring *ring);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 72277bc8322e..a876a34455f1 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -26,6 +26,7 @@
#include "shmem_utils.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
+#include "intel_gt_print.h"
/* Rough estimate of the typical request size, performing a flush,
* set-context and then emitting the batch.
@@ -192,6 +193,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
static int xcs_resume(struct intel_engine_cs *engine)
{
struct intel_ring *ring = engine->legacy.ring;
+ ktime_t kt;
ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
ring->head, ring->tail);
@@ -229,10 +231,33 @@ static int xcs_resume(struct intel_engine_cs *engine)
set_pp_dir(engine);
- /* First wake the ring up to an empty/idle ring */
- ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
+ /*
+ * First wake the ring up to an empty/idle ring.
+ * Use 50ms of delay to let the engine write successfully
+ * for all platforms. Experimented with different values and
+ * determined that 50ms works best based on testing.
+ */
+ for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC);
+ ktime_before(ktime_get(), (kt)); cpu_relax()) {
+ /*
+ * In case of resets fails because engine resumes from
+ * incorrect RING_HEAD and then GPU may be then fed
+ * to invalid instructions, which may lead to unrecoverable
+ * hang. So at first write doesn't succeed then try again.
+ */
+ ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
+ if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head)
+ break;
+ }
+
ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
- ENGINE_POSTING_READ(engine, RING_TAIL);
+ if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) {
+ ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n",
+ ENGINE_READ_FW(engine, RING_HEAD),
+ ENGINE_READ_FW(engine, RING_TAIL),
+ ring->head);
+ goto err;
+ }
ENGINE_WRITE_FW(engine, RING_CTL,
RING_CTL_SIZE(ring->size) | RING_VALID);
@@ -241,12 +266,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
if (__intel_wait_for_register_fw(engine->uncore,
RING_CTL(engine->mmio_base),
RING_VALID, RING_VALID,
- 5000, 0, NULL))
+ 5000, 0, NULL)) {
+ ENGINE_TRACE(engine, "failed to restart\n");
goto err;
+ }
- if (GRAPHICS_VER(engine->i915) > 2)
+ if (GRAPHICS_VER(engine->i915) > 2) {
ENGINE_WRITE_FW(engine,
RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+ ENGINE_POSTING_READ(engine, RING_MI_MODE);
+ }
/* Now awake, let it get started */
if (ring->tail != ring->head) {
@@ -259,16 +288,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
return 0;
err:
- drm_err(&engine->i915->drm,
- "%s initialization failed; "
- "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
- engine->name,
- ENGINE_READ(engine, RING_CTL),
- ENGINE_READ(engine, RING_CTL) & RING_VALID,
- ENGINE_READ(engine, RING_HEAD), ring->head,
- ENGINE_READ(engine, RING_TAIL), ring->tail,
- ENGINE_READ(engine, RING_START),
- i915_ggtt_offset(ring->vma));
+ gt_err(engine->gt, "%s initialization failed\n", engine->name);
+ ENGINE_TRACE(engine,
+ "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_CTL) & RING_VALID,
+ ENGINE_READ(engine, RING_HEAD), ring->head,
+ ENGINE_READ(engine, RING_TAIL), ring->tail,
+ ENGINE_READ(engine, RING_START),
+ i915_ggtt_offset(ring->vma));
+ GEM_TRACE_DUMP();
return -EIO;
}
@@ -336,7 +365,13 @@ static void reset_prepare(struct intel_engine_cs *engine)
ENGINE_READ_FW(engine, RING_HEAD),
ENGINE_READ_FW(engine, RING_TAIL),
ENGINE_READ_FW(engine, RING_START));
- if (!stop_ring(engine)) {
+ /*
+ * Sometimes engine head failed to set to zero even after writing into it.
+ * Use wait_for_atomic() with 20ms delay to let engine resumes from
+ * correct RING_HEAD. Experimented different values and determined
+ * that 20ms works best based on testing.
+ */
+ if (wait_for_atomic((!stop_ring(engine) == 0), 20)) {
drm_err(&engine->i915->drm,
"failed to set %s head to zero "
"ctl %08x head %08x tail %08x start %08x\n",
@@ -1090,9 +1125,6 @@ static void setup_irq(struct intel_engine_cs *engine)
} else if (GRAPHICS_VER(i915) >= 5) {
engine->irq_enable = gen5_irq_enable;
engine->irq_disable = gen5_irq_disable;
- } else if (GRAPHICS_VER(i915) >= 3) {
- engine->irq_enable = gen3_irq_enable;
- engine->irq_disable = gen3_irq_disable;
} else {
engine->irq_enable = gen2_irq_enable;
engine->irq_disable = gen2_irq_disable;
@@ -1146,7 +1178,7 @@ static void setup_common(struct intel_engine_cs *engine)
* equivalent to our next initial bread so we can elide
* engine->emit_init_breadcrumb().
*/
- engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
+ engine->emit_fini_breadcrumb = gen2_emit_breadcrumb;
if (GRAPHICS_VER(i915) == 5)
engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
@@ -1159,7 +1191,7 @@ static void setup_common(struct intel_engine_cs *engine)
else if (IS_I830(i915) || IS_I845G(i915))
engine->emit_bb_start = i830_emit_bb_start;
else
- engine->emit_bb_start = gen3_emit_bb_start;
+ engine->emit_bb_start = gen2_emit_bb_start;
}
static void setup_rcs(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index fa304ea088e4..eb89948cc112 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -8,7 +8,7 @@
#include <drm/intel/i915_drm.h>
#include "display/intel_display.h"
-#include "display/intel_display_irq.h"
+#include "display/intel_display_rps.h"
#include "i915_drv.h"
#include "i915_irq.h"
#include "i915_reg.h"
@@ -74,7 +74,7 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
static void rps_timer(struct timer_list *t)
{
- struct intel_rps *rps = from_timer(rps, t, timer);
+ struct intel_rps *rps = timer_container_of(rps, t, timer);
struct intel_gt *gt = rps_to_gt(rps);
struct intel_engine_cs *engine;
ktime_t dt, last, timestamp;
@@ -161,7 +161,7 @@ static void rps_start_timer(struct intel_rps *rps)
static void rps_stop_timer(struct intel_rps *rps)
{
- del_timer_sync(&rps->timer);
+ timer_delete_sync(&rps->timer);
rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
cancel_work_sync(&rps->work);
}
@@ -550,6 +550,7 @@ static unsigned int init_emon(struct intel_uncore *uncore)
static bool gen5_rps_enable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_display *display = &i915->display;
struct intel_uncore *uncore = rps_to_uncore(rps);
u8 fstart, vstart;
u32 rgvmodectl;
@@ -607,9 +608,7 @@ static bool gen5_rps_enable(struct intel_rps *rps)
rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
rps->ips.last_time2 = ktime_get_raw_ns();
- spin_lock(&i915->irq_lock);
- ilk_enable_display_irq(i915, DE_PCU_EVENT);
- spin_unlock(&i915->irq_lock);
+ ilk_display_rps_enable(display);
spin_unlock_irq(&mchdev_lock);
@@ -621,14 +620,13 @@ static bool gen5_rps_enable(struct intel_rps *rps)
static void gen5_rps_disable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_display *display = &i915->display;
struct intel_uncore *uncore = rps_to_uncore(rps);
u16 rgvswctl;
spin_lock_irq(&mchdev_lock);
- spin_lock(&i915->irq_lock);
- ilk_disable_display_irq(i915, DE_PCU_EVENT);
- spin_unlock(&i915->irq_lock);
+ ilk_display_rps_disable(display);
rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
@@ -1001,6 +999,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);
+ /* Don't decrement num_waiters for req where increment was skipped */
+ if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING)
+ return;
+
intel_guc_slpc_dec_waiters(slpc);
} else {
atomic_dec(&rps->num_waiters);
@@ -1025,11 +1027,19 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);
- if (slpc->min_freq_softlimit >= slpc->boost_freq)
+ /* Waitboost should not be done with power saving profile */
+ if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING)
return;
/* Return if old value is non zero */
if (!atomic_fetch_inc(&slpc->num_waiters)) {
+ /*
+ * Skip queuing boost work if frequency is already boosted,
+ * but still increment num_waiters.
+ */
+ if (slpc->min_freq_softlimit >= slpc->boost_freq)
+ return;
+
GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
rq->fence.context, rq->fence.seqno);
queue_work(rps_to_gt(rps)->i915->unordered_wq,
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
index 6507fa3f6d1e..ece445109305 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -40,7 +40,7 @@ enum {
/**
* struct intel_rps_freq_caps - rps freq capabilities
* @rp0_freq: non-overclocked max frequency
- * @rp1_freq: "less than" RP0 power/freqency
+ * @rp1_freq: "less than" RP0 power/frequency
* @min_freq: aka RPn, minimum frequency
*
* Freq caps exposed by HW, values are in "hw units" and intel_gpu_freq()
@@ -57,7 +57,7 @@ struct intel_rps {
/*
* work, interrupts_enabled and pm_iir are protected by
- * i915->irq_lock
+ * gt->irq_lock
*/
struct timer_list timer;
struct work_struct work;
@@ -90,7 +90,7 @@ struct intel_rps {
u8 boost_freq; /* Frequency to request when wait boosting */
u8 idle_freq; /* Frequency to request when we are idle */
u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
- u8 rp1_freq; /* "less than" RP0 power/freqency */
+ u8 rp1_freq; /* "less than" RP0 power/frequency */
u8 rp0_freq; /* Non-overclocked max frequency. */
u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */
diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c
index 8c1dbcbcbc4f..2945526d52d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_sa_media.c
+++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c
@@ -27,7 +27,7 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr,
/*
* Standalone media shares the general MMIO space with the primary
- * GT. We'll re-use the primary GT's mapping.
+ * GT. We'll reuse the primary GT's mapping.
*/
uncore->regs = intel_uncore_regs(&i915->uncore);
if (drm_WARN_ON(&i915->drm, uncore->regs == NULL))
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index c8fadf58d836..9501d323d0d3 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -236,7 +236,8 @@ static void xehp_sseu_info_init(struct intel_gt *gt)
GEN12_GT_COMPUTE_DSS_ENABLE,
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
- eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
+ eu_en_fuse = REG_FIELD_GET(XEHP_EU_ENA_MASK,
+ intel_uncore_read(uncore, XEHP_EU_ENABLE));
if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915))
eu_en = eu_en_fuse;
@@ -269,15 +270,15 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
* Although gen12 architecture supported multiple slices, TGL, RKL,
* DG1, and ADL only had a single slice.
*/
- s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
- GEN11_GT_S_ENA_MASK;
+ s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK,
+ intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE));
drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
/* one bit per pair of EUs */
- eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
- GEN11_EU_DIS_MASK);
+ eu_en_fuse = ~REG_FIELD_GET(GEN11_EU_DIS_MASK,
+ intel_uncore_read(uncore, GEN11_EU_DISABLE));
for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
if (eu_en_fuse & BIT(eu))
@@ -306,14 +307,14 @@ static void gen11_sseu_info_init(struct intel_gt *gt)
* Although gen11 architecture supported multiple slices, ICL and
* EHL/JSL only had a single slice in practice.
*/
- s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
- GEN11_GT_S_ENA_MASK;
+ s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK,
+ intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE));
drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
- eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
- GEN11_EU_DIS_MASK);
+ eu_en = ~REG_FIELD_GET(GEN11_EU_DIS_MASK,
+ intel_uncore_read(uncore, GEN11_EU_DISABLE));
gen11_compute_sseu_info(sseu, ss_en, eu_en);
@@ -335,10 +336,8 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
if (!(fuse & CHV_FGT_DISABLE_SS0)) {
u8 disabled_mask =
- ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
- CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
- (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
- CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
+ REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R0_MASK, fuse) |
+ REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS0_R0_MASK);
sseu->subslice_mask.hsw[0] |= BIT(0);
sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF);
@@ -346,10 +345,8 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
if (!(fuse & CHV_FGT_DISABLE_SS1)) {
u8 disabled_mask =
- ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
- CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
- (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
- CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
+ REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R0_MASK, fuse) |
+ REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS1_R0_MASK);
sseu->subslice_mask.hsw[0] |= BIT(1);
sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF);
@@ -385,7 +382,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
int s, ss;
fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
- sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+ sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2);
/* BXT has a single slice and at most 3 subslices. */
intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
@@ -396,8 +393,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
* to each of the enabled slices.
*/
subslice_mask = (1 << sseu->max_subslices) - 1;
- subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
- GEN9_F2_SS_DIS_SHIFT);
+ subslice_mask &= ~REG_FIELD_GET(GEN9_F2_SS_DIS_MASK, fuse2);
/*
* Iterate through enabled slices and subslices to
@@ -490,7 +486,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
u32 eu_disable0, eu_disable1, eu_disable2;
fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
- sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+ sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2);
intel_sseu_set_info(sseu, 3, 3, 8);
/*
@@ -498,18 +494,18 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
* to each of the enabled slices.
*/
subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
- subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
- GEN8_F2_SS_DIS_SHIFT);
+ subslice_mask &= ~REG_FIELD_GET(GEN8_F2_SS_DIS_MASK, fuse2);
eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
- eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK;
- eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) |
- ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) <<
- (32 - GEN8_EU_DIS0_S1_SHIFT));
- eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) |
- ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) <<
- (32 - GEN8_EU_DIS1_S2_SHIFT));
+ eu_disable[0] =
+ REG_FIELD_GET(GEN8_EU_DIS0_S0_MASK, eu_disable0);
+ eu_disable[1] =
+ REG_FIELD_GET(GEN8_EU_DIS0_S1_MASK, eu_disable0) |
+ REG_FIELD_GET(GEN8_EU_DIS1_S1_MASK, eu_disable1) << hweight32(GEN8_EU_DIS0_S1_MASK);
+ eu_disable[2] =
+ REG_FIELD_GET(GEN8_EU_DIS1_S2_MASK, eu_disable1) |
+ REG_FIELD_GET(GEN8_EU_DIS2_S2_MASK, eu_disable2) << hweight32(GEN8_EU_DIS1_S2_MASK);
/*
* Iterate through enabled slices and subslices to
@@ -687,7 +683,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
* According to documentation software must consider the configuration
* as 2x4x8 and hardware will translate this to 1x8x8.
*
- * Furthemore, even though SScount is three bits, maximum documented
+ * Furthermore, even though SScount is three bits, maximum documented
* value for it is four. From this some rules/restrictions follow:
*
* 1.
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
index 756e9ebbc725..2487768bc230 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -122,7 +122,7 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
{
intel_wakeref_t wakeref;
- if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ if (is_mock_gt(gt))
return;
if (intel_gt_is_wedged(gt))
diff --git a/drivers/gpu/drm/i915/gt/intel_wopcm.h b/drivers/gpu/drm/i915/gt/intel_wopcm.h
index 17d6aa86008a..d2038b6de5e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_wopcm.h
+++ b/drivers/gpu/drm/i915/gt/intel_wopcm.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
/*
- * SPDX-License-Identifier: MIT
- *
* Copyright © 2017-2018 Intel Corporation
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index e539a656cfc3..b37e400f74e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -156,7 +156,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
struct i915_wa *list;
- list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
+ list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*list),
GFP_KERNEL);
if (!list) {
drm_err(&i915->drm, "No space for workaround init!\n");
@@ -418,7 +418,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
/* WaForceContextSaveRestoreNonCoherent:bdw */
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
/* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
- (IS_BROADWELL_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
+ (INTEL_INFO(i915)->gt == 3 ? HDC_FENCE_DEST_SLM_DISABLE : 0));
}
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -691,16 +691,17 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
struct drm_i915_private *i915 = engine->i915;
/*
- * Wa_1409142259:tgl,dg1,adl-p
+ * Wa_1409142259:tgl,dg1,adl-p,adl-n
* Wa_1409347922:tgl,dg1,adl-p
* Wa_1409252684:tgl,dg1,adl-p
* Wa_1409217633:tgl,dg1,adl-p
* Wa_1409207793:tgl,dg1,adl-p
- * Wa_1409178076:tgl,dg1,adl-p
- * Wa_1408979724:tgl,dg1,adl-p
- * Wa_14010443199:tgl,rkl,dg1,adl-p
- * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
- * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
+ * Wa_1409178076:tgl,dg1,adl-p,adl-n
+ * Wa_1408979724:tgl,dg1,adl-p,adl-n
+ * Wa_14010443199:tgl,rkl,dg1,adl-p,adl-n
+ * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p,adl-n
+ * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p,adl-n
+ * Wa_22010465259:tgl,rkl,dg1,adl-s,adl-p,adl-n
*/
wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
@@ -741,6 +742,12 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_1606376872 */
wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC);
}
+
+ /*
+ * This bit must be set to enable performance optimization for fast
+ * clears.
+ */
+ wa_mcr_write_or(wal, GEN8_WM_CHICKEN2, WAIT_ON_DEPTH_STALL_DONE_DISABLE);
}
static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -1318,7 +1325,7 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
* We'll do our default/implicit steering based on GSLICE (in the
* sliceid field) and DSS (in the subsliceid field). If we can
* find overlap between the valid MSLICE and/or LNCF values with
- * a suitable GSLICE, then we can just re-use the default value and
+ * a suitable GSLICE, then we can just reuse the default value and
* skip and explicit steering at runtime.
*
* We only need to look for overlap between GSLICE/MSLICE/LNCF to find
@@ -2299,6 +2306,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
}
+ if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) {
+ /*
+ * "Disable Repacking for Compression (masked R/W access)
+ * before rendering compressed surfaces for display."
+ */
+ wa_masked_en(wal, CACHE_MODE_0_GEN7,
+ DISABLE_REPACKING_FOR_COMPRESSION);
+ }
+
if (GRAPHICS_VER(i915) == 11) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
@@ -2537,7 +2553,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_FF_DS_SCHED_HW);
/* WaDisablePSDDualDispatchEnable:ivb */
- if (IS_IVB_GT1(i915))
+ if (INTEL_INFO(i915)->gt == 1)
wa_masked_en(wal,
GEN7_HALF_SLICE_CHICKEN1,
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index c0637bf799a3..79741f043f03 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -108,7 +108,7 @@ static void advance(struct i915_request *request)
static void hw_delay_complete(struct timer_list *t)
{
- struct mock_engine *engine = from_timer(engine, t, hw_delay);
+ struct mock_engine *engine = timer_container_of(engine, t, hw_delay);
struct i915_request *request;
unsigned long flags;
@@ -297,7 +297,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine)
struct i915_request *rq;
unsigned long flags;
- del_timer_sync(&mock->hw_delay);
+ timer_delete_sync(&mock->hw_delay);
spin_lock_irqsave(&engine->sched_engine->lock, flags);
@@ -432,7 +432,7 @@ void mock_engine_flush(struct intel_engine_cs *engine)
container_of(engine, typeof(*mock), base);
struct i915_request *request, *rn;
- del_timer_sync(&mock->hw_delay);
+ timer_delete_sync(&mock->hw_delay);
spin_lock_irq(&mock->hw_lock);
list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link)
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 222ca7c44951..0454eb1814bb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -53,7 +53,7 @@ static int wait_for_submit(struct intel_engine_cs *engine,
if (i915_request_completed(rq)) /* that was quick! */
return 0;
- /* Wait until the HW has acknowleged the submission (or err) */
+ /* Wait until the HW has acknowledged the submission (or err) */
intel_engine_flush_submission(engine);
if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
return 0;
@@ -1198,7 +1198,7 @@ static int live_timeslice_rewind(void *arg)
ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
/* Wait for the timeslice to kick in */
- del_timer(&engine->execlists.timer);
+ timer_delete(&engine->execlists.timer);
tasklet_hi_schedule(&engine->sched_engine->tasklet);
intel_engine_flush_submission(engine);
}
@@ -2357,7 +2357,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg)
/* force preempt reset [failure] */
while (!engine->execlists.pending[0])
intel_engine_flush_submission(engine);
- del_timer_sync(&engine->execlists.preempt);
+ timer_delete_sync(&engine->execlists.preempt);
intel_engine_flush_submission(engine);
cancel_reset_timeout(engine);
@@ -3574,7 +3574,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
arg[id].batch = NULL;
arg[id].count = 0;
- worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
+ worker[id] = kthread_run_worker(0, "igt/smoke:%d", id);
if (IS_ERR(worker[id])) {
err = PTR_ERR(worker[id]);
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 9ce8ff1c04fe..f057c16410e7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -548,7 +548,7 @@ static int igt_reset_fail_engine(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- /* Check that we can recover from engine-reset failues */
+ /* Check that we can recover from engine-reset failures */
if (!intel_has_reset_engine(gt))
return 0;
@@ -1025,7 +1025,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
threads[tmp].engine = other;
threads[tmp].flags = flags;
- worker = kthread_create_worker(0, "igt/%s",
+ worker = kthread_run_worker(0, "igt/%s",
other->name);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index e17b8777d21d..23f04f6f8fba 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@
#include "gem/i915_gem_internal.h"
+#include "i915_drv.h"
#include "i915_selftest.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
@@ -63,7 +64,7 @@ static int wait_for_submit(struct intel_engine_cs *engine,
if (i915_request_completed(rq)) /* that was quick! */
return 0;
- /* Wait until the HW has acknowleged the submission (or err) */
+ /* Wait until the HW has acknowledged the submission (or err) */
intel_engine_flush_submission(engine);
if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
return 0;
@@ -859,6 +860,14 @@ static int live_lrc_timestamp(void *arg)
};
/*
+ * This test was designed to isolate a hardware bug.
+ * The bug was found and fixed in future generations but
+ * now the test pollutes our CI on previous generation.
+ */
+ if (GRAPHICS_VER(gt->i915) == 12)
+ return 0;
+
+ /*
* We want to verify that the timestamp is saved and restore across
* context switches and is monotonic.
*
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index ca460cee4f8b..54bc447efce0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -262,7 +262,7 @@ static int clear(struct intel_migrate *migrate,
{
struct drm_i915_private *i915 = migrate->context->engine->i915;
struct drm_i915_gem_object *obj;
- struct i915_request *rq;
+ struct i915_request *rq = NULL;
struct i915_gem_ww_ctx ww;
u32 *vaddr, val = 0;
bool ccs_cap = false;
@@ -537,7 +537,7 @@ struct spinner_timer {
static void spinner_kill(struct timer_list *timer)
{
- struct spinner_timer *st = from_timer(st, timer, timer);
+ struct spinner_timer *st = timer_container_of(st, timer, timer);
igt_spinner_end(&st->spin);
pr_info("%s\n", __func__);
@@ -660,8 +660,8 @@ static int live_emit_pte_full_ring(void *arg)
out_rq:
i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
- del_timer_sync(&st.timer);
- destroy_timer_on_stack(&st.timer);
+ timer_delete_sync(&st.timer);
+ timer_destroy_on_stack(&st.timer);
out_unpin:
intel_context_unpin(ce);
out_put:
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 1aa1446c8fb0..41716ed454b7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -8,6 +8,7 @@
#include "intel_gpu_commands.h"
#include "intel_gt_requests.h"
#include "intel_ring.h"
+#include "intel_rps.h"
#include "selftest_rc6.h"
#include "selftests/i915_random.h"
@@ -32,12 +33,22 @@ int live_rc6_manual(void *arg)
{
struct intel_gt *gt = arg;
struct intel_rc6 *rc6 = &gt->rc6;
- u64 rc0_power, rc6_power;
+ struct intel_rps *rps = &gt->rps;
intel_wakeref_t wakeref;
+ u64 rc0_sample_energy[2];
+ u64 rc6_sample_energy[2];
+ u64 sleep_time = 1000;
+ u32 rc0_freq = 0;
+ u32 rc6_freq = 0;
+ u64 rc0_power;
+ u64 rc6_power;
bool has_power;
+ u64 threshold;
ktime_t dt;
u64 res[2];
int err = 0;
+ u64 diff;
+
/*
* Our claim is that we can "encourage" the GPU to enter rc6 at will.
@@ -56,16 +67,18 @@ int live_rc6_manual(void *arg)
/* Force RC6 off for starters */
__intel_rc6_disable(rc6);
- msleep(1); /* wakeup is not immediate, takes about 100us on icl */
+ /* wakeup is not immediate, takes about 100us on icl */
+ usleep_range(1000, 2000);
res[0] = rc6_residency(rc6);
dt = ktime_get();
- rc0_power = librapl_energy_uJ();
- msleep(1000);
- rc0_power = librapl_energy_uJ() - rc0_power;
+ rc0_sample_energy[0] = librapl_energy_uJ();
+ msleep(sleep_time);
+ rc0_sample_energy[1] = librapl_energy_uJ() - rc0_sample_energy[0];
dt = ktime_sub(ktime_get(), dt);
res[1] = rc6_residency(rc6);
+ rc0_freq = intel_rps_read_actual_frequency_fw(rps);
if ((res[1] - res[0]) >> 10) {
pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n",
(res[1] - res[0]) >> 10);
@@ -74,10 +87,15 @@ int live_rc6_manual(void *arg)
}
if (has_power) {
- rc0_power = div64_u64(NSEC_PER_SEC * rc0_power,
+ rc0_power = div64_u64(NSEC_PER_SEC * rc0_sample_energy[1],
ktime_to_ns(dt));
+
if (!rc0_power) {
- pr_err("No power measured while in RC0\n");
+ if (rc0_freq)
+ pr_debug("No power measured while in RC0! GPU Freq: %uMHz in RC0\n",
+ rc0_freq);
+ else
+ pr_err("No power and freq measured while in RC0\n");
err = -EINVAL;
goto out_unlock;
}
@@ -89,9 +107,10 @@ int live_rc6_manual(void *arg)
res[0] = rc6_residency(rc6);
intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
dt = ktime_get();
- rc6_power = librapl_energy_uJ();
- msleep(100);
- rc6_power = librapl_energy_uJ() - rc6_power;
+ rc6_sample_energy[0] = librapl_energy_uJ();
+ msleep(sleep_time);
+ rc6_freq = intel_rps_read_actual_frequency_fw(rps);
+ rc6_sample_energy[1] = librapl_energy_uJ() - rc6_sample_energy[0];
dt = ktime_sub(ktime_get(), dt);
res[1] = rc6_residency(rc6);
if (res[1] == res[0]) {
@@ -103,12 +122,24 @@ int live_rc6_manual(void *arg)
}
if (has_power) {
- rc6_power = div64_u64(NSEC_PER_SEC * rc6_power,
+ rc6_power = div64_u64(NSEC_PER_SEC * rc6_sample_energy[1],
ktime_to_ns(dt));
- pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
+ pr_info("GPU consumed %lluuW in RC0 and %lluuW in RC6\n",
rc0_power, rc6_power);
+
if (2 * rc6_power > rc0_power) {
- pr_err("GPU leaked energy while in RC6!\n");
+ pr_err("GPU leaked energy while in RC6!\n"
+ "GPU Freq: %uMHz in RC6 and %uMHz in RC0\n"
+ "RC0 energy before & after sleep respectively: %lluuJ %lluuJ\n"
+ "RC6 energy before & after sleep respectively: %lluuJ %lluuJ\n",
+ rc6_freq, rc0_freq, rc0_sample_energy[0], rc0_sample_energy[1],
+ rc6_sample_energy[0], rc6_sample_energy[1]);
+
+ diff = res[1] - res[0];
+ threshold = (9 * NSEC_PER_MSEC * sleep_time) / 10;
+ if (diff < threshold)
+ pr_err("Did not enter RC6 properly, RC6 start residency=%lluns, RC6 end residency=%lluns\n",
+ res[0], res[1]);
err = -EINVAL;
goto out_unlock;
}
@@ -211,7 +242,7 @@ int live_rc6_ctx_wa(void *arg)
i915_reset_engine_count(error, engine);
const u32 *res;
- /* Use a sacrifical context */
+ /* Use a sacrificial context */
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index dcef8d498919..73bc91c6ea07 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -22,7 +22,7 @@
#include "selftests/igt_spinner.h"
#include "selftests/librapl.h"
-/* Try to isolate the impact of cstates from determing frequency response */
+/* Try to isolate the impact of cstates from determining frequency response */
#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
static void dummy_rps_work(struct work_struct *wrk)
@@ -477,12 +477,13 @@ int live_rps_control(void *arg)
limit, intel_gpu_freq(rps, limit),
min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
- if (limit == rps->min_freq) {
- pr_err("%s: GPU throttled to minimum!\n",
- engine->name);
+ if (limit != rps->max_freq) {
+ u32 throttle = intel_uncore_read(gt->uncore,
+ intel_gt_perf_limit_reasons_reg(gt));
+
+ pr_warn("%s: GPU throttled with reasons 0x%08x\n",
+ engine->name, throttle & GT0_PERF_LIMIT_REASONS_MASK);
show_pstate_limits(rps);
- err = -ENODEV;
- break;
}
if (igt_flush_test(gt->i915)) {
@@ -1115,7 +1116,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq)
for (i = 0; i < 5; i++)
x[i] = __measure_power(5);
- *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
+ *freq = (*freq + read_cagf(rps)) / 2;
/* A simple triangle filter for better result stability */
sort(x, 5, sizeof(*x), cmp_u64, NULL);
@@ -1125,6 +1126,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq)
static u64 measure_power_at(struct intel_rps *rps, int *freq)
{
*freq = rps_set_check(rps, *freq);
+ msleep(100);
return measure_power(rps, freq);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 4ecc4ae74a54..e61bb0bad12c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -95,6 +95,21 @@ static int slpc_restore_freq(struct intel_guc_slpc *slpc, u32 min, u32 max)
return 0;
}
+static u64 slpc_measure_power(struct intel_rps *rps, int *freq)
+{
+ u64 x[5];
+ int i;
+
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_power(5);
+
+ *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power)
{
int err = 0;
@@ -103,7 +118,7 @@ static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power)
if (err)
return err;
*freq = intel_rps_read_actual_frequency(&gt->rps);
- *power = measure_power(&gt->rps, freq);
+ *power = slpc_measure_power(&gt->rps, freq);
return err;
}
@@ -489,7 +504,7 @@ static int live_slpc_tile_interaction(void *arg)
return -ENOMEM;
for_each_gt(gt, i915, i) {
- threads[i].worker = kthread_create_worker(0, "igt/slpc_parallel:%d", gt->info.id);
+ threads[i].worker = kthread_run_worker(0, "igt/slpc_parallel:%d", gt->info.id);
if (IS_ERR(threads[i].worker)) {
ret = PTR_ERR(threads[i].worker);
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
index 3941f2d6fa47..69ed946a39e5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -143,7 +143,7 @@ pte_tlbinv(struct intel_context *ce,
if (ce->engine->class == OTHER_CLASS)
msleep(200);
else
- msleep(10);
+ usleep_range(10000, 20000);
if (va == vb) {
if (!i915_request_completed(rq)) {
diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README
index e7e96d7073c7..22f8dabed434 100644
--- a/drivers/gpu/drm/i915/gt/shaders/README
+++ b/drivers/gpu/drm/i915/gt/shaders/README
@@ -10,7 +10,7 @@ i915/gt/shaders/clear_kernel directory.
The generated .c files should never be modified directly. Instead, any modification
needs to be done on the on their respective ASM files and build instructions below
-needes to be followed.
+needs to be followed.
Building
========
@@ -24,7 +24,7 @@ on building.
Please make sure your Mesa tool is compiled with "-Dtools=intel" and
"-Ddri-drivers=i965", and run this script from IGT source root directory"
-The instructions bellow assume:
+The instructions below assume:
* IGT gpu tools source code is located on your home directory (~) as ~/igt
* Mesa source code is located on your home directory (~) as ~/mesa
and built under the ~/mesa/build directory
@@ -43,4 +43,4 @@ igt $ ./scripts/generate_clear_kernel.sh -g ivb \
~/igt/lib/i915/shaders/clear_kernel/hsw.asm
~ $ cd ~/igt
igt $ ./scripts/generate_clear_kernel.sh -g hsw \
- -m ~/mesa/build/src/intel/tools/i965_asm \ No newline at end of file
+ -m ~/mesa/build/src/intel/tools/i965_asm
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
index 5fdf384bb621..6c0c89daf96c 100644
--- a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
@@ -24,7 +24,7 @@ mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
* DW 1.4 - Rsvd (intended for context ID)
* DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
* DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
- * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (intended for Total Thread Count)
*
* Binding Table
*
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
index 97c7ac9e3854..27c28e63d6cc 100644
--- a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
@@ -24,7 +24,7 @@ mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
* DW 1.4 - Rsvd (intended for context ID)
* DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
* DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
- * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (intended for Total Thread Count)
*
* Binding Table
*
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index 1fb6ff77fd89..365c4b8b04f4 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -40,7 +40,7 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
if (i915_gem_object_is_shmem(obj)) {
file = obj->base.filp;
- atomic_long_inc(&file->f_count);
+ get_file(file);
return file;
}
@@ -108,7 +108,7 @@ static int __shmem_rw(struct file *file, loff_t off,
if (IS_ERR(page))
return PTR_ERR(page);
- vaddr = kmap(page);
+ vaddr = kmap_local_page(page);
if (write) {
memcpy(vaddr + offset_in_page(off), ptr, this);
set_page_dirty(page);
@@ -116,7 +116,7 @@ static int __shmem_rw(struct file *file, loff_t off,
memcpy(ptr, vaddr + offset_in_page(off), this);
}
mark_page_accessed(page);
- kunmap(page);
+ kunmap_local(vaddr);
put_page(page);
len -= this;
@@ -143,11 +143,11 @@ int shmem_read_to_iosys_map(struct file *file, loff_t off,
if (IS_ERR(page))
return PTR_ERR(page);
- vaddr = kmap(page);
+ vaddr = kmap_local_page(page);
iosys_map_memcpy_to(map, map_off, vaddr + offset_in_page(off),
this);
mark_page_accessed(page);
- kunmap(page);
+ kunmap_local(vaddr);
put_page(page);
len -= this;
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
index c34674e797c6..6de87ae5669e 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
@@ -228,6 +228,11 @@ struct slpc_optimized_strategies {
#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0)
+enum slpc_power_profiles {
+ SLPC_POWER_PROFILES_BASE = 0x0,
+ SLPC_POWER_PROFILES_POWER_SAVING = 0x1
+};
+
/**
* DOC: SLPC H2G MESSAGE FORMAT
*
diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
index 1fc0c17b1230..803c0379d97d 100644
--- a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
@@ -81,7 +81,7 @@ struct guc_debug_capture_list {
*
* intel_guc_capture module uses these structures to maintain static
* tables (per unique platform) that consists of lists of registers
- * (offsets, names, flags,...) that are used at the ADS regisration
+ * (offsets, names, flags,...) that are used at the ADS registration
* time as well as during runtime processing and reporting of error-
* capture states generated by GuC just prior to engine reset events.
*/
@@ -200,7 +200,7 @@ struct intel_guc_state_capture {
* dynamically allocate new nodes when receiving the G2H notification
* because the event handlers for all G2H event-processing is called
* by the ct processing worker queue and when that queue is being
- * processed, there is no absoluate guarantee that we are not in the
+ * processed, there is no absolute guarantee that we are not in the
* midst of a GT reset operation (which doesn't allow allocations).
*/
struct list_head cachelist;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
index 551b0d7974ff..d550eb6edfb8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
@@ -80,6 +80,7 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s
const struct intel_gsc_cpd_header_v2 *cpd_header = NULL;
const struct intel_gsc_cpd_entry *cpd_entry = NULL;
const struct intel_gsc_manifest_header *manifest;
+ struct intel_uc_fw_ver min_ver = { 0 };
size_t min_size = sizeof(*layout);
int i;
@@ -212,33 +213,46 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s
}
}
- if (IS_ARROWLAKE(gt->i915)) {
+ /*
+ * ARL SKUs require newer firmwares, but the blob is actually common
+ * across all MTL and ARL SKUs, so we need to do an explicit version check
+ * here rather than using a separate table entry. If a too old version
+ * is found, then just don't use GSC rather than aborting the driver load.
+ * Note that the major number in the GSC FW version is used to indicate
+ * the platform, so we expect it to always be 102 for MTL/ARL binaries.
+ */
+ if (IS_ARROWLAKE_S(gt->i915))
+ min_ver = (struct intel_uc_fw_ver){ 102, 0, 10, 1878 };
+ else if (IS_ARROWLAKE_H(gt->i915) || IS_ARROWLAKE_U(gt->i915))
+ min_ver = (struct intel_uc_fw_ver){ 102, 1, 15, 1926 };
+
+ if (IS_METEORLAKE(gt->i915) && gsc->release.major != 102) {
+ gt_info(gt, "Invalid GSC firmware for MTL/ARL, got %d.%d.%d.%d but need 102.x.x.x",
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build);
+ return -EINVAL;
+ }
+
+ if (min_ver.major) {
bool too_old = false;
- /*
- * ARL requires a newer firmware than MTL did (102.0.10.1878) but the
- * firmware is actually common. So, need to do an explicit version check
- * here rather than using a separate table entry. And if the older
- * MTL-only version is found, then just don't use GSC rather than aborting
- * the driver load.
- */
- if (gsc->release.major < 102) {
+ if (gsc->release.minor < min_ver.minor) {
too_old = true;
- } else if (gsc->release.major == 102) {
- if (gsc->release.minor == 0) {
- if (gsc->release.patch < 10) {
+ } else if (gsc->release.minor == min_ver.minor) {
+ if (gsc->release.patch < min_ver.patch) {
+ too_old = true;
+ } else if (gsc->release.patch == min_ver.patch) {
+ if (gsc->release.build < min_ver.build)
too_old = true;
- } else if (gsc->release.patch == 10) {
- if (gsc->release.build < 1878)
- too_old = true;
- }
}
}
if (too_old) {
- gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least 102.0.10.1878",
+ gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least %d.%d.%d.%d",
gsc->release.major, gsc->release.minor,
- gsc->release.patch, gsc->release.build);
+ gsc->release.patch, gsc->release.build,
+ min_ver.major, min_ver.minor,
+ min_ver.patch, min_ver.build);
return -EINVAL;
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 097fc6bd1285..9df80c325fc1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -239,8 +239,16 @@ static u32 guc_ctl_debug_flags(struct intel_guc *guc)
static u32 guc_ctl_feature_flags(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
u32 flags = 0;
+ /*
+ * Enable PXP GuC autoteardown flow.
+ * NB: MTL does things differently.
+ */
+ if (HAS_PXP(gt->i915) && !IS_METEORLAKE(gt->i915))
+ flags |= GUC_CTL_ENABLE_GUC_PXP_CTL;
+
if (!intel_guc_submission_is_used(guc))
flags |= GUC_CTL_DISABLE_SCHEDULER;
@@ -682,7 +690,7 @@ int intel_guc_suspend(struct intel_guc *guc)
* H2G MMIO command completes.
*
* Don't abort on a failure code from the GuC. Keep going and do the
- * clean up in santize() and re-initialisation on resume and hopefully
+ * clean up in sanitize() and re-initialisation on resume and hopefully
* the error here won't be problematic.
*/
ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 57b903132776..053780f562c1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -295,7 +295,7 @@ struct intel_guc {
*/
struct work_struct dead_guc_worker;
/**
- * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrance
+ * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrence
* used to prevent a fundamentally broken system from continuously
* reloading the GuC.
*/
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 23f54c84cbab..e7ccfa520df3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -145,7 +145,7 @@ static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool
* an end user should hit the timeout is in case of extreme thermal throttling.
* And a system that is that hot during boot is probably dead anyway!
*/
-#if defined(CONFIG_DRM_I915_DEBUG_GEM)
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
#define GUC_LOAD_RETRY_LIMIT 20
#else
#define GUC_LOAD_RETRY_LIMIT 3
@@ -259,13 +259,14 @@ static int guc_wait_ucode(struct intel_guc *guc)
} else if (delta_ms > 200) {
guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
delta_ms, status, count, ret);
- guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n",
- intel_rps_read_actual_frequency(&gt->rps), before_freq,
+ guc_warn(guc, "excessive init time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n",
+ before_freq, intel_rps_read_actual_frequency(&gt->rps),
+ intel_rps_get_requested_frequency(&gt->rps),
intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
} else {
- guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
- delta_ms, intel_rps_read_actual_frequency(&gt->rps),
- before_freq, status, count, ret);
+ guc_dbg(guc, "init took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n",
+ delta_ms, before_freq, intel_rps_read_actual_frequency(&gt->rps),
+ intel_rps_get_requested_frequency(&gt->rps), status, count, ret);
}
return ret;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 263c9c3f6a03..eded00f0c7e1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -105,6 +105,7 @@
#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22)
#define GUC_CTL_FEATURE 2
+#define GUC_CTL_ENABLE_GUC_PXP_CTL BIT(1)
#define GUC_CTL_ENABLE_SLPC BIT(2)
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
@@ -407,7 +408,7 @@ enum guc_capture_type {
GUC_CAPTURE_LIST_TYPE_MAX,
};
-/* Class indecies for capture_class and capture_instance arrays */
+/* Class indices for capture_class and capture_instance arrays */
enum {
GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
GUC_CAPTURE_LIST_CLASS_VIDEO = 1,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
index b67a15f74276..868195c33f5b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -7,6 +7,7 @@
#include "gt/intel_hwconfig.h"
#include "i915_drv.h"
#include "i915_memcpy.h"
+#include "intel_guc_print.h"
/*
* GuC has a blob containing hardware configuration information (HWConfig).
@@ -42,6 +43,8 @@ static int __guc_action_get_hwconfig(struct intel_guc *guc,
};
int ret;
+ guc_dbg(guc, "Querying HW config table: size = %d, offset = 0x%08X\n",
+ ggtt_size, ggtt_offset);
ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
if (ret == -ENXIO)
return -ENOENT;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index bf16351c9349..e8a04e476c57 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -14,11 +14,11 @@
#include "intel_guc_log.h"
#include "intel_guc_print.h"
-#if defined(CONFIG_DRM_I915_DEBUG_GUC)
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
-#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
+#elif IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 706fffca698b..d5ee6e5e1443 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -15,6 +15,34 @@
#include "gt/intel_gt_regs.h"
#include "gt/intel_rps.h"
+/**
+ * DOC: SLPC - Dynamic Frequency management
+ *
+ * Single Loop Power Control (SLPC) is a GuC algorithm that manages
+ * GT frequency based on busyness and how KMD initializes it. SLPC is
+ * almost completely in control after initialization except for a few
+ * scenarios mentioned below.
+ *
+ * KMD uses the concept of waitboost to ramp frequency to RP0 when there
+ * are pending submissions for a context. It achieves this by sending GuC a
+ * request to update the min frequency to RP0. Waitboost is disabled
+ * when the request retires.
+ *
+ * Another form of frequency control happens through per-context hints.
+ * A context can be marked as low latency during creation. That will ensure
+ * that SLPC uses an aggressive frequency ramp when that context is active.
+ *
+ * Power profiles add another level of control to these mechanisms.
+ * When power saving profile is chosen, SLPC will use conservative
+ * thresholds to ramp frequency, thus saving power. KMD will disable
+ * waitboosts as well, which achieves further power savings. Base profile
+ * is default and ensures balanced performance for any workload.
+ *
+ * Lastly, users have some level of control through sysfs, where min/max
+ * frequency values can be altered and the use of efficient freq
+ * can be toggled.
+ */
+
static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc)
{
return container_of(slpc, struct intel_guc, slpc);
@@ -265,6 +293,8 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
slpc->num_boosts = 0;
slpc->media_ratio_mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL;
+ slpc->power_profile = SLPC_POWER_PROFILES_BASE;
+
mutex_init(&slpc->lock);
INIT_WORK(&slpc->boost_work, slpc_boost_work);
@@ -357,21 +387,29 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc)
GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER);
}
-static void slpc_shared_data_reset(struct slpc_shared_data *data)
+static void slpc_shared_data_reset(struct intel_guc_slpc *slpc)
{
- memset(data, 0, sizeof(struct slpc_shared_data));
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ struct slpc_shared_data *data = slpc->vaddr;
+ memset(data, 0, sizeof(struct slpc_shared_data));
data->header.size = sizeof(struct slpc_shared_data);
/* Enable only GTPERF task, disable others */
slpc_mem_set_enabled(data, SLPC_PARAM_TASK_ENABLE_GTPERF,
SLPC_PARAM_TASK_DISABLE_GTPERF);
- slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER,
- SLPC_PARAM_TASK_DISABLE_BALANCER);
+ /*
+ * Don't allow balancer related algorithms on platforms before
+ * Xe_LPG, where GuC started to restrict it to TDP limited scenarios.
+ */
+ if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)) {
+ slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER,
+ SLPC_PARAM_TASK_DISABLE_BALANCER);
- slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC,
- SLPC_PARAM_TASK_DISABLE_DCC);
+ slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC,
+ SLPC_PARAM_TASK_DISABLE_DCC);
+ }
}
/**
@@ -567,6 +605,34 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val)
return ret;
}
+int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val)
+{
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ intel_wakeref_t wakeref;
+ int ret = 0;
+
+ if (val > SLPC_POWER_PROFILES_POWER_SAVING)
+ return -EINVAL;
+
+ mutex_lock(&slpc->lock);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_POWER_PROFILE,
+ val);
+ if (ret)
+ guc_err(slpc_to_guc(slpc),
+ "Failed to set power profile to %d: %pe\n",
+ val, ERR_PTR(ret));
+ else
+ slpc->power_profile = val;
+
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ mutex_unlock(&slpc->lock);
+
+ return ret;
+}
+
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt)
{
u32 pm_intrmsk_mbz = 0;
@@ -686,7 +752,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
GEM_BUG_ON(!slpc->vma);
- slpc_shared_data_reset(slpc->vaddr);
+ slpc_shared_data_reset(slpc);
ret = slpc_reset(slpc);
if (unlikely(ret < 0)) {
@@ -728,6 +794,13 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Enable SLPC Optimized Strategy for compute */
intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
+ /* Set cached value of power_profile */
+ ret = intel_guc_slpc_set_power_profile(slpc, slpc->power_profile);
+ if (unlikely(ret)) {
+ guc_probe_error(guc, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret));
+ return ret;
+ }
+
return 0;
}
@@ -791,6 +864,23 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p
drm_printf(p, "\tSLPC state: %s\n", slpc_get_state_string(slpc));
drm_printf(p, "\tGTPERF task active: %s\n",
str_yes_no(slpc_tasks->status & SLPC_GTPERF_TASK_ENABLED));
+ drm_printf(p, "\tDCC enabled: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_DCC_TASK_ENABLED));
+ drm_printf(p, "\tDCC in: %s\n",
+ str_yes_no(slpc_tasks->status & SLPC_IN_DCC));
+ drm_printf(p, "\tBalancer enabled: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_BALANCER_ENABLED));
+ drm_printf(p, "\tIBC enabled: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_IBC_TASK_ENABLED));
+ drm_printf(p, "\tBalancer IA LMT enabled: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_BALANCER_IA_LMT_ENABLED));
+ drm_printf(p, "\tBalancer IA LMT active: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_BALANCER_IA_LMT_ACTIVE));
drm_printf(p, "\tMax freq: %u MHz\n",
slpc_decode_max_freq(slpc));
drm_printf(p, "\tMin freq: %u MHz\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 1cb5fd44f05c..fc9f761b4372 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -46,5 +46,6 @@ void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val);
int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val);
+int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
index a88651331497..83673b10ac4e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
@@ -33,6 +33,9 @@ struct intel_guc_slpc {
u32 max_freq_softlimit;
bool ignore_eff_freq;
+ /* Base or power saving */
+ u32 power_profile;
+
/* cached media ratio mode */
u32 media_ratio_mode;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index ed979847187f..127316d2c8aa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -633,7 +633,7 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
atomic_inc(&guc->outstanding_submission_g2h);
ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
- if (ret)
+ if (ret && g2h_len_dw)
atomic_dec(&guc->outstanding_submission_g2h);
return ret;
@@ -1223,7 +1223,7 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
* determine validity of these values. Instead we read the values multiple times
* until they are consistent. In test runs, 3 attempts results in consistent
* values. The upper bound is set to 6 attempts and may need to be tuned as per
- * any new occurences.
+ * any new occurrences.
*/
static void __get_engine_usage_record(struct intel_engine_cs *engine,
u32 *last_in, u32 *id, u32 *total)
@@ -1243,6 +1243,21 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine,
} while (++i < 6);
}
+static void __set_engine_usage_record(struct intel_engine_cs *engine,
+ u32 last_in, u32 id, u32 total)
+{
+ struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
+
+#define record_write(map_, field_, val_) \
+ iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_)
+
+ record_write(&rec_map, last_switch_in_stamp, last_in);
+ record_write(&rec_map, current_context_index, id);
+ record_write(&rec_map, total_runtime, total);
+
+#undef record_write
+}
+
static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
{
struct intel_engine_guc_stats *stats = &engine->stats.guc;
@@ -1270,15 +1285,12 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
static u32 gpm_timestamp_shift(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
- u32 reg, shift;
+ u32 reg;
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
- shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
- GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
-
- return 3 - shift;
+ return 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
}
static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
@@ -1339,7 +1351,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
* start_gt_clk is derived from GuC state. To get a consistent
* view of activity, we query the GuC state only if gt is awake.
*/
- wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt);
+ wakeref = in_reset ? NULL : intel_gt_pm_get_if_awake(gt);
if (wakeref) {
stats_saved = *stats;
gt_stamp_saved = guc->timestamp.gt_stamp;
@@ -1363,9 +1375,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
total += intel_gt_clock_interval_to_ns(gt, clk);
}
+ if (total > stats->total)
+ stats->total = total;
+
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
- return ns_to_ktime(total);
+ return ns_to_ktime(stats->total);
}
static void guc_enable_busyness_worker(struct intel_guc *guc)
@@ -1431,13 +1446,39 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
guc_update_pm_timestamp(guc, &unused);
for_each_engine(engine, gt, id) {
+ struct intel_engine_guc_stats *stats = &engine->stats.guc;
+
guc_update_engine_gt_clks(engine);
- engine->stats.guc.prev_total = 0;
+
+ /*
+ * If resetting a running context, accumulate the active
+ * time as well since there will be no context switch.
+ */
+ if (stats->running) {
+ u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
+
+ stats->total_gt_clks += clk;
+ }
+ stats->prev_total = 0;
+ stats->running = 0;
}
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
}
+static void __update_guc_busyness_running_state(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ for_each_engine(engine, gt, id)
+ engine->stats.guc.running = false;
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
static void __update_guc_busyness_stats(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -1543,6 +1584,9 @@ err_trylock:
static int guc_action_enable_usage_stats(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
u32 offset = intel_guc_engine_usage_offset(guc);
u32 action[] = {
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
@@ -1550,6 +1594,9 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc)
0,
};
+ for_each_engine(engine, gt, id)
+ __set_engine_usage_record(engine, 0, 0xffffffff, 0);
+
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
@@ -1582,6 +1629,9 @@ void intel_guc_busyness_park(struct intel_gt *gt)
if (!guc_submission_initialized(guc))
return;
+ /* Assume no engines are running and set running state to false */
+ __update_guc_busyness_running_state(guc);
+
/*
* There is a race with suspend flow where the worker runs after suspend
* and causes an unclaimed register access warning. Cancel the worker
@@ -1688,6 +1738,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
spin_lock_irq(guc_to_gt(guc)->irq_lock);
spin_unlock_irq(guc_to_gt(guc)->irq_lock);
+ /* Flush tasklet */
+ tasklet_disable(&guc->ct.receive_tasklet);
+ tasklet_enable(&guc->ct.receive_tasklet);
+
guc_flush_submissions(guc);
guc_flush_destroyed_contexts(guc);
flush_work(&guc->ct.requests.worker);
@@ -2005,6 +2059,8 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
+ int outstanding;
+
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
!intel_guc_is_fw_running(guc) ||
@@ -2018,8 +2074,10 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
* see in CI if this happens frequently / a precursor to taking down the
* machine.
*/
- if (atomic_read(&guc->outstanding_submission_g2h))
- guc_err(guc, "Unexpected outstanding GuC to Host in reset finish\n");
+ outstanding = atomic_read(&guc->outstanding_submission_g2h);
+ if (outstanding)
+ guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n",
+ outstanding);
atomic_set(&guc->outstanding_submission_g2h, 0);
intel_guc_global_policies_update(guc);
@@ -2950,7 +3008,7 @@ static int __guc_context_pin(struct intel_context *ce,
/*
* GuC context gets pinned in guc_request_alloc. See that function for
- * explaination of why.
+ * explanation of why.
*/
return lrc_pin(ce, engine, vaddr);
@@ -3385,18 +3443,29 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce)
* GuC is active, lets destroy this context, but at this point we can still be racing
* with suspend, so we undo everything if the H2G fails in deregister_context so
* that GuC reset will find this context during clean up.
+ *
+ * There is a race condition where the reset code could have altered
+ * this context's state and done a wakeref put before we try to
+ * deregister it here. So check if the context is still set to be
+ * destroyed before undoing earlier changes, to avoid two wakeref puts
+ * on the same context.
*/
ret = deregister_context(ce, ce->guc_id.id);
if (ret) {
- spin_lock(&ce->guc_state.lock);
- set_context_registered(ce);
- clr_context_destroyed(ce);
- spin_unlock(&ce->guc_state.lock);
+ bool pending_destroyed;
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ pending_destroyed = context_destroyed(ce);
+ if (pending_destroyed) {
+ set_context_registered(ce);
+ clr_context_destroyed(ce);
+ }
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
/*
* As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements
* the wakeref immediately but per function spec usage call this after unlock.
*/
- intel_wakeref_put_async(&gt->wakeref);
+ if (pending_destroyed)
+ intel_wakeref_put_async(&gt->wakeref);
}
return ret;
@@ -5474,12 +5543,20 @@ static inline void guc_log_context(struct drm_printer *p,
{
drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
- drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
- ce->ring->head,
- ce->lrc_reg_state[CTX_RING_HEAD]);
- drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
- ce->ring->tail,
- ce->lrc_reg_state[CTX_RING_TAIL]);
+ if (intel_context_pin_if_active(ce)) {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
+ ce->ring->head,
+ ce->lrc_reg_state[CTX_RING_HEAD]);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
+ ce->ring->tail,
+ ce->lrc_reg_state[CTX_RING_TAIL]);
+ intel_context_unpin(ce);
+ } else {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n",
+ ce->ring->head);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n",
+ ce->ring->tail);
+ }
drm_printf(p, "\t\tContext Pin Count: %u\n",
atomic_read(&ce->pin_count));
drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 2d9152eb7282..456d3372eef8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -231,8 +231,8 @@ static void delayed_huc_load_init(struct intel_huc *huc)
sw_fence_dummy_notify);
i915_sw_fence_commit(&huc->delayed_load.fence);
- hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- huc->delayed_load.timer.function = huc_delayed_load_timer_callback;
+ hrtimer_setup(&huc->delayed_load.timer, huc_delayed_load_timer_callback, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
static void delayed_huc_load_fini(struct intel_huc *huc)
@@ -317,6 +317,11 @@ void intel_huc_init_early(struct intel_huc *huc)
}
}
+void intel_huc_fini_late(struct intel_huc *huc)
+{
+ delayed_huc_load_fini(huc);
+}
+
#define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy")
static int check_huc_loading_mode(struct intel_huc *huc)
{
@@ -414,12 +419,6 @@ out:
void intel_huc_fini(struct intel_huc *huc)
{
- /*
- * the fence is initialized in init_early, so we need to clean it up
- * even if HuC loading is off.
- */
- delayed_huc_load_fini(huc);
-
if (huc->heci_pkt)
i915_vma_unpin_and_release(&huc->heci_pkt, 0);
@@ -427,19 +426,6 @@ void intel_huc_fini(struct intel_huc *huc)
intel_uc_fw_fini(&huc->fw);
}
-void intel_huc_suspend(struct intel_huc *huc)
-{
- if (!intel_uc_fw_is_loadable(&huc->fw))
- return;
-
- /*
- * in the unlikely case that we're suspending before the GSC has
- * completed its loading sequence, just stop waiting. We'll restart
- * on resume.
- */
- delayed_huc_load_complete(huc);
-}
-
static const char *auth_mode_string(struct intel_huc *huc,
enum intel_huc_authentication_type type)
{
@@ -455,7 +441,7 @@ static const char *auth_mode_string(struct intel_huc *huc,
* an end user should hit the timeout is in case of extreme thermal throttling.
* And a system that is that hot during boot is probably dead anyway!
*/
-#if defined(CONFIG_DRM_I915_DEBUG_GEM)
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
#define HUC_LOAD_RETRY_LIMIT 20
#else
#define HUC_LOAD_RETRY_LIMIT 3
@@ -502,13 +488,15 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
if (delta_ms > 50) {
huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
delta_ms, huc->status[type].reg.reg, count, ret);
- huc_warn(huc, "excessive auth time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n",
- intel_rps_read_actual_frequency(&gt->rps), before_freq,
+ huc_warn(huc, "excessive auth time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n",
+ before_freq, intel_rps_read_actual_frequency(&gt->rps),
+ intel_rps_get_requested_frequency(&gt->rps),
intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
} else {
- huc_dbg(huc, "auth took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
- delta_ms, intel_rps_read_actual_frequency(&gt->rps),
- before_freq, huc->status[type].reg.reg, count, ret);
+ huc_dbg(huc, "auth took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n",
+ delta_ms, before_freq, intel_rps_read_actual_frequency(&gt->rps),
+ intel_rps_get_requested_frequency(&gt->rps),
+ huc->status[type].reg.reg, count, ret);
}
/* mark the load process as complete even if the wait failed */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index ba5cb08e9e7b..921ad4b1687f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -55,9 +55,9 @@ struct intel_huc {
int intel_huc_sanitize(struct intel_huc *huc);
void intel_huc_init_early(struct intel_huc *huc);
+void intel_huc_fini_late(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc);
-void intel_huc_suspend(struct intel_huc *huc);
int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type);
int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
enum intel_huc_authentication_type type);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 5b8080ec5315..4a3493e8d433 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -136,6 +136,7 @@ void intel_uc_init_late(struct intel_uc *uc)
void intel_uc_driver_late_release(struct intel_uc *uc)
{
+ intel_huc_fini_late(&uc->huc);
}
/**
@@ -512,7 +513,7 @@ static int __uc_init_hw(struct intel_uc *uc)
ERR_PTR(ret), attempts);
}
- /* Did we succeded or run out of retries? */
+ /* Did we succeed or run out of retries? */
if (ret)
goto err_log_capture;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
index 26fdc392fce6..83801c992488 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
@@ -64,7 +64,7 @@ static int intel_hang_guc(void *arg)
old_beat = engine->props.heartbeat_interval_ms;
ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL);
if (ret) {
- gt_err(gt, "Failed to boost heatbeat interval: %pe\n", ERR_PTR(ret));
+ gt_err(gt, "Failed to boost heartbeat interval: %pe\n", ERR_PTR(ret));
goto err;
}