diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
59 files changed, 625 insertions, 343 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c index 4904d0f4162c..8116fd5987e2 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -179,7 +179,7 @@ u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) return __gen2_emit_breadcrumb(rq, cs, 8, 8); } -/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ +/* Just userspace ABI convention to limit the wa batch bo to a reasonable size */ #define I830_BATCH_LIMIT SZ_256K #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 4d30a86016f2..b721bbd23356 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -308,7 +308,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) /* * There is a discrepancy here between the size reported * by the register and the size of the context layout - * in the docs. Both are described as authorative! + * in the docs. Both are described as authoritative! * * The discrepancy is on the order of a few cachelines, * but the total is under one page (4k), which is our @@ -677,7 +677,7 @@ void intel_engines_release(struct intel_gt *gt) * in case we aborted before completely initialising the engines. */ GEM_BUG_ON(intel_gt_pm_is_awake(gt)); - if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + if (!intel_gt_gpu_reset_clobbers_display(gt)) intel_gt_reset_all_engines(gt); /* Decouple the backend; but keep the layout for late GPU resets */ @@ -769,9 +769,8 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt) if (MEDIA_VER_FULL(i915) < IP_VER(12, 55)) media_fuse = ~media_fuse; - vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; - vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> - GEN11_GT_VEBOX_DISABLE_SHIFT; + vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse); + vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse); if (MEDIA_VER_FULL(i915) >= IP_VER(12, 55)) { fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); @@ -845,7 +844,7 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) * Note that we have a catch-22 situation where we need to be able to access * the blitter forcewake domain to read the engine fuses, but at the same time * we need to know which engines are available on the system to know which - * forcewake domains are present. We solve this by intializing the forcewake + * forcewake domains are present. We solve this by initializing the forcewake * domains based on the full engine mask in the platform capabilities before * calling this function and pruning the domains for fused-off engines * afterwards. @@ -1411,7 +1410,7 @@ create_ggtt_bind_context(struct intel_engine_cs *engine) /* * MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple - * bind requets at a time so get a bigger ring. + * bind requests at a time so get a bigger ring. */ return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_512K, I915_GEM_HWS_GGTT_BIND_ADDR, @@ -1533,7 +1532,7 @@ int intel_engines_init(struct intel_gt *gt) /** * intel_engine_cleanup_common - cleans up the engine state created by - * the common initiailizers. + * the common initializers. * @engine: Engine to cleanup. * * This cleans up everything created by the common helpers. diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index fe1f85e5dda3..155b6255a63e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -237,7 +237,7 @@ struct intel_engine_execlists { */ struct i915_request * const *active; /** - * @inflight: the set of contexts submitted and acknowleged by HW + * @inflight: the set of contexts submitted and acknowledged by HW * * The set of inflight contexts is managed by reading CS events * from the HW. On a context-switch event (not preemption), we @@ -260,7 +260,7 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @virtual: Queue of requets on a virtual engine, sorted by priority. + * @virtual: Queue of requests on a virtual engine, sorted by priority. * Each RB entry is a struct i915_priolist containing a list of requests * of the same priority. */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 4a80ffa1b962..03baa7fa0a27 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2502,7 +2502,7 @@ static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); ENGINE_TRACE(engine, "semaphore yield: %08x\n", engine->execlists.yield); - if (del_timer(&engine->execlists.timer)) + if (timer_delete(&engine->execlists.timer)) tasklet = true; } @@ -3370,8 +3370,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) static void execlists_shutdown(struct intel_engine_cs *engine) { /* Synchronise with residual timers and any softirq they raise */ - del_timer_sync(&engine->execlists.timer); - del_timer_sync(&engine->execlists.preempt); + timer_delete_sync(&engine->execlists.timer); + timer_delete_sync(&engine->execlists.preempt); tasklet_kill(&engine->sched_engine->tasklet); } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index f6c59f20832f..46a5aa4ab9c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -289,6 +289,14 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr, return pte; } +static dma_addr_t gen8_ggtt_pte_decode(u64 pte, bool *is_present, bool *is_local) +{ + *is_present = pte & GEN8_PAGE_PRESENT; + *is_local = pte & GEN12_GGTT_PTE_LM; + + return pte & GEN12_GGTT_PTE_ADDR_MASK; +} + static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) { struct intel_gt *gt = ggtt->vm.gt; @@ -435,6 +443,11 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) writeq(pte, addr); } +static gen8_pte_t gen8_get_pte(void __iomem *addr) +{ + return readq(addr); +} + static void gen8_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, @@ -450,6 +463,16 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, ggtt->invalidate(ggtt); } +static dma_addr_t gen8_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + return ggtt->vm.pte_decode(gen8_get_pte(pte), is_present, is_local); +} + static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm, dma_addr_t addr, u64 offset, unsigned int pat_index, u32 flags) @@ -605,6 +628,17 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, ggtt->invalidate(ggtt); } +static dma_addr_t gen6_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, + bool *is_present, bool *is_local) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + return vm->pte_decode(ioread32(pte), is_present, is_local); +} + /* * Binds an object into the global gtt with the specified cache level. * The object will be accessible to the GPU via commands whose operands @@ -769,6 +803,14 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + return ggtt->vm.read_entry(vm, offset, is_present, is_local); +} + /* * Reserve the top of the GuC address space for firmware images. Addresses * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, @@ -1245,6 +1287,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.scratch_range = gen8_ggtt_clear_range; ggtt->vm.insert_entries = gen8_ggtt_insert_entries; + ggtt->vm.read_entry = gen8_ggtt_read_entry; /* * Serialize GTT updates with aperture access on BXT if VT-d is on, @@ -1291,6 +1334,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) else ggtt->vm.pte_encode = gen8_ggtt_pte_encode; + ggtt->vm.pte_decode = gen8_ggtt_pte_decode; + return ggtt_probe_common(ggtt, size); } @@ -1390,6 +1435,14 @@ static u64 iris_pte_encode(dma_addr_t addr, return pte; } +static dma_addr_t gen6_pte_decode(u64 pte, bool *is_present, bool *is_local) +{ + *is_present = pte & GEN6_PTE_VALID; + *is_local = false; + + return ((pte & 0xff0) << 28) | (pte & ~0xfff); +} + static int gen6_gmch_probe(struct i915_ggtt *ggtt) { struct drm_i915_private *i915 = ggtt->vm.i915; @@ -1428,6 +1481,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.scratch_range = gen6_ggtt_clear_range; ggtt->vm.insert_page = gen6_ggtt_insert_page; ggtt->vm.insert_entries = gen6_ggtt_insert_entries; + ggtt->vm.read_entry = gen6_ggtt_read_entry; ggtt->vm.cleanup = gen6_gmch_remove; ggtt->invalidate = gen6_ggtt_invalidate; @@ -1443,6 +1497,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) else ggtt->vm.pte_encode = snb_pte_encode; + ggtt->vm.pte_decode = gen6_pte_decode; + ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 0ffba50981e3..0c723e7c71a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -328,6 +328,7 @@ static bool fence_is_active(const struct i915_fence_reg *fence) static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) { + struct intel_display *display = &ggtt->vm.i915->display; struct i915_fence_reg *active = NULL; struct i915_fence_reg *fence, *fn; @@ -353,7 +354,7 @@ static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) } /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(ggtt->vm.i915)) + if (intel_has_pending_fb_unpin(display)) return ERR_PTR(-EAGAIN); return ERR_PTR(-ENOBUFS); @@ -749,7 +750,7 @@ static void swizzle_page(struct page *page) char *vaddr; int i; - vaddr = kmap(page); + vaddr = kmap_local_page(page); for (i = 0; i < PAGE_SIZE; i += 128) { memcpy(temp, &vaddr[i], 64); @@ -757,7 +758,7 @@ static void swizzle_page(struct page *page) memcpy(&vaddr[i + 64], temp, 64); } - kunmap(page); + kunmap_local(vaddr); } /** diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c index 59eed0a0ce90..c5f5f0bdfb2c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c @@ -27,6 +27,13 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm, intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); } +static dma_addr_t gmch_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local) +{ + return intel_gmch_gtt_read_entry(offset >> PAGE_SHIFT, + is_present, is_local); +} + static void gmch_ggtt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, unsigned int pat_index, @@ -103,6 +110,7 @@ int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.insert_entries = gmch_ggtt_insert_entries; ggtt->vm.clear_range = gmch_ggtt_clear_range; ggtt->vm.scratch_range = gmch_ggtt_clear_range; + ggtt->vm.read_entry = gmch_ggtt_read_entry; ggtt->vm.cleanup = gmch_ggtt_remove; ggtt->invalidate = gmch_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index c4a351ebf395..3d3b1ba76e2b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -302,25 +302,48 @@ static void gen6_check_faults(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - unsigned long fault; for_each_engine(engine, gt, id) { + u32 fault; + fault = GEN6_RING_FAULT_REG_READ(engine); + if (fault & RING_FAULT_VALID) { gt_dbg(gt, "Unexpected fault\n" - "\tAddr: 0x%08lx\n" + "\tAddr: 0x%08x\n" "\tAddress space: %s\n" - "\tSource ID: %ld\n" - "\tType: %ld\n", - fault & PAGE_MASK, + "\tSource ID: %d\n" + "\tType: %d\n", + fault & RING_FAULT_VADDR_MASK, fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); + REG_FIELD_GET(RING_FAULT_SRCID_MASK, fault), + REG_FIELD_GET(RING_FAULT_FAULT_TYPE_MASK, fault)); } } } +static void gen8_report_fault(struct intel_gt *gt, u32 fault, + u32 fault_data0, u32 fault_data1) +{ + u64 fault_addr; + + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | + ((u64)fault_data0 << 12); + + gt_dbg(gt, "Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + REG_FIELD_GET(RING_FAULT_ENGINE_ID_MASK, fault), + REG_FIELD_GET(RING_FAULT_SRCID_MASK, fault), + REG_FIELD_GET(RING_FAULT_FAULT_TYPE_MASK, fault)); +} + static void xehp_check_faults(struct intel_gt *gt) { u32 fault; @@ -333,28 +356,10 @@ static void xehp_check_faults(struct intel_gt *gt) * toward the primary instance. */ fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); - if (fault & RING_FAULT_VALID) { - u32 fault_data0, fault_data1; - u64 fault_addr; - - fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0); - fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1); - - fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | - ((u64)fault_data0 << 12); - - gt_dbg(gt, "Unexpected fault\n" - "\tAddr: 0x%08x_%08x\n" - "\tAddress space: %s\n" - "\tEngine ID: %d\n" - "\tSource ID: %d\n" - "\tType: %d\n", - upper_32_bits(fault_addr), lower_32_bits(fault_addr), - fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", - GEN8_RING_FAULT_ENGINE_ID(fault), - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); - } + if (fault & RING_FAULT_VALID) + gen8_report_fault(gt, fault, + intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0), + intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1)); } static void gen8_check_faults(struct intel_gt *gt) @@ -374,28 +379,10 @@ static void gen8_check_faults(struct intel_gt *gt) } fault = intel_uncore_read(uncore, fault_reg); - if (fault & RING_FAULT_VALID) { - u32 fault_data0, fault_data1; - u64 fault_addr; - - fault_data0 = intel_uncore_read(uncore, fault_data0_reg); - fault_data1 = intel_uncore_read(uncore, fault_data1_reg); - - fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | - ((u64)fault_data0 << 12); - - gt_dbg(gt, "Unexpected fault\n" - "\tAddr: 0x%08x_%08x\n" - "\tAddress space: %s\n" - "\tEngine ID: %d\n" - "\tSource ID: %d\n" - "\tType: %d\n", - upper_32_bits(fault_addr), lower_32_bits(fault_addr), - fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", - GEN8_RING_FAULT_ENGINE_ID(fault), - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); - } + if (fault & RING_FAULT_VALID) + gen8_report_fault(gt, fault, + intel_uncore_read(uncore, fault_data0_reg), + intel_uncore_read(uncore, fault_data1_reg)); } void intel_gt_check_and_clear_faults(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 6e63505fe478..6c499692d61e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -35,9 +35,7 @@ static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore, u32 f24_mhz = 24000000; u32 f25_mhz = 25000000; u32 f38_4_mhz = 38400000; - u32 crystal_clock = - (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> - GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + u32 crystal_clock = rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK; switch (crystal_clock) { case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: @@ -80,8 +78,7 @@ static u32 gen11_read_clock_frequency(struct intel_uncore *uncore) * register increments from this frequency (it might * increment only every few clock cycle). */ - freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + freq >>= 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0); } return freq; @@ -102,8 +99,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore) * register increments from this frequency (it might * increment only every few clock cycle). */ - freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >> - CTC_SHIFT_PARAMETER_SHIFT); + freq >>= 3 - REG_FIELD_GET(CTC_SHIFT_PARAMETER_MASK, ctc_reg); } return freq; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 1240d44eeb85..75e802e10be2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -480,7 +480,7 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt) gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(1), ~gt_interrupts[1], gt_interrupts[1]); /* * RPS interrupts will get enabled/disabled on demand when RPS itself - * is enabled/disabled. Same wil be the case for GuC interrupts. + * is enabled/disabled. Same will be the case for GuC interrupts. */ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(2), gt->pm_imr, gt->pm_ier); gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(3), ~gt_interrupts[3], gt_interrupts[3]); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index b8912bd6c08e..a60822e2b5d4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -121,9 +121,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) gt->info.mslice_mask = intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask, GEN_DSS_PER_MSLICE); - gt->info.mslice_mask |= - (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & - GEN12_MEML3_EN_MASK); + gt->info.mslice_mask |= REG_FIELD_GET(GEN12_MEML3_EN_MASK, + intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3)); if (!gt->info.mslice_mask) /* should be impossible! */ gt_warn(gt, "mslice mask all zero!\n"); @@ -239,7 +238,7 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt, * to remain in multicast mode for reads. There's no real * downside to this, so we'll just go ahead and do so on all * platforms; we'll only clear the multicast bit from the mask - * when exlicitly doing a write operation. + * when explicitly doing a write operation. */ if (rw_flag == FW_REG_WRITE) mcr_mask |= GEN11_MCR_MULTICAST; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index c08fdb65cc69..3182f19b9837 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -70,6 +70,7 @@ static int __gt_unpark(struct intel_wakeref *wf) { struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); struct drm_i915_private *i915 = gt->i915; + struct intel_display *display = &i915->display; GT_TRACE(gt, "\n"); @@ -84,7 +85,7 @@ static int __gt_unpark(struct intel_wakeref *wf) * Work around it by grabbing a GT IRQ power domain whilst there is any * GT activity, preventing any DC state transitions. */ - gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + gt->awake = intel_display_power_get(display, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); intel_rc6_unpark(>->rc6); @@ -103,6 +104,7 @@ static int __gt_park(struct intel_wakeref *wf) struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); intel_wakeref_t wakeref = fetch_and_zero(>->awake); struct drm_i915_private *i915 = gt->i915; + struct intel_display *display = &i915->display; GT_TRACE(gt, "\n"); @@ -120,7 +122,7 @@ static int __gt_park(struct intel_wakeref *wf) /* Defer dropping the display power well for 100ms, it's slow! */ GEM_BUG_ON(!wakeref); - intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); + intel_display_power_put_async(display, POWER_DOMAIN_GT_IRQ, wakeref); return 0; } @@ -156,7 +158,7 @@ void intel_gt_pm_init(struct intel_gt *gt) static bool reset_engines(struct intel_gt *gt) { - if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + if (intel_gt_gpu_reset_clobbers_display(gt)) return false; return intel_gt_reset_all_engines(gt) == 0; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 6dba65e54cdb..7421ed18d8d1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -30,18 +30,15 @@ /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) -#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 -#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) -#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0 -#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 -#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 -#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 0) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 1) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 2) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 3) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_BIT(3) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ REG_FIELD_PREP(GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 0) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ REG_FIELD_PREP(GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 1) +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) #define RPM_CONFIG1 _MMIO(0xd04) #define GEN10_GT_NOA_ENABLE (1 << 9) @@ -326,6 +323,12 @@ _RING_FAULT_REG_VCS, \ _RING_FAULT_REG_VECS, \ _RING_FAULT_REG_BCS)) +#define RING_FAULT_VADDR_MASK REG_GENMASK(31, 12) /* pre-bdw */ +#define RING_FAULT_ENGINE_ID_MASK REG_GENMASK(16, 12) /* bdw+ */ +#define RING_FAULT_GTTSEL_MASK REG_BIT(11) /* pre-bdw */ +#define RING_FAULT_SRCID_MASK REG_GENMASK(10, 3) +#define RING_FAULT_FAULT_TYPE_MASK REG_GENMASK(2, 1) /* ivb+ */ +#define RING_FAULT_VALID REG_BIT(0) #define ERROR_GEN6 _MMIO(0x40a0) @@ -385,6 +388,8 @@ #define GEN8_FAULT_TLB_DATA0 _MMIO(0x4b10) #define GEN8_FAULT_TLB_DATA1 _MMIO(0x4b14) +#define FAULT_GTT_SEL REG_BIT(4) +#define FAULT_VA_HIGH_BITS REG_GENMASK(3, 0) #define GEN11_GACB_PERF_CTRL _MMIO(0x4b80) #define GEN11_HASH_CTRL_MASK (0x3 << 12 | 0xf << 0) @@ -409,6 +414,9 @@ #define GEN7_SO_PRIM_STORAGE_NEEDED(n) _MMIO(0x5240 + (n) * 8) #define GEN7_SO_PRIM_STORAGE_NEEDED_UDW(n) _MMIO(0x5240 + (n) * 8 + 4) +#define GEN8_WM_CHICKEN2 MCR_REG(0x5584) +#define WAIT_ON_DEPTH_STALL_DONE_DISABLE REG_BIT(5) + #define GEN9_WM_CHICKEN3 _MMIO(0x5588) #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) @@ -504,11 +512,12 @@ #define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11) #define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice) * 0x4) +#define GEN9_PGCTL_SS_ACK(subslice) REG_BIT(2 + (subslice) * 2) +#define GEN9_PGCTL_SLICE_ACK REG_BIT(0) + #define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \ ((slice) % 3) * 0x4) -#define GEN9_PGCTL_SLICE_ACK (1 << 0) -#define GEN9_PGCTL_SS_ACK(subslice) (1 << (2 + (subslice) * 2)) -#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? 0x7F : 0x1F) +#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? REG_GENMASK(6, 0) : REG_GENMASK(4, 0)) #define GEN9_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + (slice) * 0x8) #define GEN10_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + ((slice) / 3) * 0x30 + \ @@ -516,14 +525,14 @@ #define GEN9_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + (slice) * 0x8) #define GEN10_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + ((slice) / 3) * 0x30 + \ ((slice) % 3) * 0x8) -#define GEN9_PGCTL_SSA_EU08_ACK (1 << 0) -#define GEN9_PGCTL_SSA_EU19_ACK (1 << 2) -#define GEN9_PGCTL_SSA_EU210_ACK (1 << 4) -#define GEN9_PGCTL_SSA_EU311_ACK (1 << 6) -#define GEN9_PGCTL_SSB_EU08_ACK (1 << 8) -#define GEN9_PGCTL_SSB_EU19_ACK (1 << 10) -#define GEN9_PGCTL_SSB_EU210_ACK (1 << 12) -#define GEN9_PGCTL_SSB_EU311_ACK (1 << 14) +#define GEN9_PGCTL_SSB_EU311_ACK REG_BIT(14) +#define GEN9_PGCTL_SSB_EU210_ACK REG_BIT(12) +#define GEN9_PGCTL_SSB_EU19_ACK REG_BIT(10) +#define GEN9_PGCTL_SSB_EU08_ACK REG_BIT(8) +#define GEN9_PGCTL_SSA_EU311_ACK REG_BIT(6) +#define GEN9_PGCTL_SSA_EU210_ACK REG_BIT(4) +#define GEN9_PGCTL_SSA_EU19_ACK REG_BIT(2) +#define GEN9_PGCTL_SSA_EU08_ACK REG_BIT(0) #define VF_PREEMPTION _MMIO(0x83a4) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) @@ -580,7 +589,7 @@ #define GEN10_L3BANK_MASK 0x0F /* on Xe_HP the same fuses indicates mslices instead of L3 banks */ #define GEN12_MAX_MSLICES 4 -#define GEN12_MEML3_EN_MASK 0x0F +#define GEN12_MEML3_EN_MASK REG_GENMASK(3, 0) #define HSW_PAVP_FUSE1 _MMIO(0x911c) #define XEHP_SFC_ENABLE_MASK REG_GENMASK(27, 24) @@ -590,37 +599,30 @@ #define HSW_F1_EU_DIS_6EUS 2 #define GEN8_FUSE2 _MMIO(0x9120) -#define GEN8_F2_SS_DIS_SHIFT 21 -#define GEN8_F2_SS_DIS_MASK (0x7 << GEN8_F2_SS_DIS_SHIFT) -#define GEN8_F2_S_ENA_SHIFT 25 -#define GEN8_F2_S_ENA_MASK (0x7 << GEN8_F2_S_ENA_SHIFT) -#define GEN9_F2_SS_DIS_SHIFT 20 -#define GEN9_F2_SS_DIS_MASK (0xf << GEN9_F2_SS_DIS_SHIFT) -#define GEN10_F2_S_ENA_SHIFT 22 -#define GEN10_F2_S_ENA_MASK (0x3f << GEN10_F2_S_ENA_SHIFT) -#define GEN10_F2_SS_DIS_SHIFT 18 -#define GEN10_F2_SS_DIS_MASK (0xf << GEN10_F2_SS_DIS_SHIFT) +#define GEN10_F2_S_ENA_MASK REG_GENMASK(27, 22) +#define GEN10_F2_SS_DIS_MASK REG_GENMASK(21, 18) +#define GEN8_F2_S_ENA_MASK REG_GENMASK(27, 25) +#define GEN9_F2_SS_DIS_MASK REG_GENMASK(23, 20) +#define GEN8_F2_SS_DIS_MASK REG_GENMASK(23, 21) #define GEN8_EU_DISABLE0 _MMIO(0x9134) #define GEN9_EU_DISABLE(slice) _MMIO(0x9134 + (slice) * 0x4) #define GEN11_EU_DISABLE _MMIO(0x9134) -#define GEN8_EU_DIS0_S0_MASK 0xffffff -#define GEN8_EU_DIS0_S1_SHIFT 24 -#define GEN8_EU_DIS0_S1_MASK (0xff << GEN8_EU_DIS0_S1_SHIFT) -#define GEN11_EU_DIS_MASK 0xFF +#define GEN8_EU_DIS0_S1_MASK REG_GENMASK(31, 24) +#define GEN8_EU_DIS0_S0_MASK REG_GENMASK(23, 0) +#define GEN11_EU_DIS_MASK REG_GENMASK(7, 0) #define XEHP_EU_ENABLE _MMIO(0x9134) -#define XEHP_EU_ENA_MASK 0xFF +#define XEHP_EU_ENA_MASK REG_GENMASK(7, 0) #define GEN8_EU_DISABLE1 _MMIO(0x9138) -#define GEN8_EU_DIS1_S1_MASK 0xffff -#define GEN8_EU_DIS1_S2_SHIFT 16 -#define GEN8_EU_DIS1_S2_MASK (0xffff << GEN8_EU_DIS1_S2_SHIFT) +#define GEN8_EU_DIS1_S2_MASK REG_GENMASK(31, 16) +#define GEN8_EU_DIS1_S1_MASK REG_GENMASK(15, 0) #define GEN11_GT_SLICE_ENABLE _MMIO(0x9138) -#define GEN11_GT_S_ENA_MASK 0xFF +#define GEN11_GT_S_ENA_MASK REG_GENMASK(7, 0) #define GEN8_EU_DISABLE2 _MMIO(0x913c) -#define GEN8_EU_DIS2_S2_MASK 0xff +#define GEN8_EU_DIS2_S2_MASK REG_GENMASK(7, 0) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913c) #define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913c) @@ -628,9 +630,8 @@ #define GEN10_EU_DISABLE3 _MMIO(0x9140) #define GEN10_EU_DIS_SS_MASK 0xff #define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) -#define GEN11_GT_VDBOX_DISABLE_MASK 0xff -#define GEN11_GT_VEBOX_DISABLE_SHIFT 16 -#define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT) +#define GEN11_GT_VEBOX_DISABLE_MASK REG_GENMASK(19, 16) +#define GEN11_GT_VDBOX_DISABLE_MASK REG_GENMASK(7, 0) #define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) #define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT _MMIO(0x9148) @@ -878,11 +879,10 @@ /* GPM unit config (Gen9+) */ #define CTC_MODE _MMIO(0xa26c) -#define CTC_SOURCE_PARAMETER_MASK 1 -#define CTC_SOURCE_CRYSTAL_CLOCK 0 -#define CTC_SOURCE_DIVIDE_LOGIC 1 -#define CTC_SHIFT_PARAMETER_SHIFT 1 -#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) +#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) +#define CTC_SOURCE_PARAMETER_MASK REG_BIT(0) +#define CTC_SOURCE_CRYSTAL_CLOCK REG_FIELD_PREP(CTC_SOURCE_PARAMETER_MASK, 0) +#define CTC_SOURCE_DIVIDE_LOGIC REG_FIELD_PREP(CTC_SOURCE_PARAMETER_MASK, 1) /* GPM MSG_IDLE */ #define MSG_IDLE_CS _MMIO(0x8000) @@ -926,12 +926,12 @@ #define CHV_POWER_SS0_SIG1 _MMIO(0xa720) #define CHV_POWER_SS0_SIG2 _MMIO(0xa724) #define CHV_POWER_SS1_SIG1 _MMIO(0xa728) -#define CHV_SS_PG_ENABLE (1 << 1) -#define CHV_EU08_PG_ENABLE (1 << 9) -#define CHV_EU19_PG_ENABLE (1 << 17) -#define CHV_EU210_PG_ENABLE (1 << 25) +#define CHV_EU210_PG_ENABLE REG_BIT(25) +#define CHV_EU19_PG_ENABLE REG_BIT(17) +#define CHV_EU08_PG_ENABLE REG_BIT(9) +#define CHV_SS_PG_ENABLE REG_BIT(1) #define CHV_POWER_SS1_SIG2 _MMIO(0xa72c) -#define CHV_EU311_PG_ENABLE (1 << 1) +#define CHV_EU311_PG_ENABLE REG_BIT(1) #define GEN7_SARCHKMD _MMIO(0xb000) #define GEN7_DISABLE_DEMAND_PREFETCH (1 << 31) @@ -1035,17 +1035,12 @@ #define XEHP_FAULT_TLB_DATA0 MCR_REG(0xceb8) #define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc) #define XEHP_FAULT_TLB_DATA1 MCR_REG(0xcebc) -#define FAULT_VA_HIGH_BITS (0xf << 0) -#define FAULT_GTT_SEL (1 << 4) +/* see GEN8_FAULT_TLB_DATA0/1 */ #define GEN12_RING_FAULT_REG _MMIO(0xcec4) #define XEHP_RING_FAULT_REG MCR_REG(0xcec4) #define XELPMP_RING_FAULT_REG _MMIO(0xcec4) -#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) -#define RING_FAULT_GTTSEL_MASK (1 << 11) -#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) -#define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) -#define RING_FAULT_VALID (1 << 0) +/* see GEN8_RING_FAULT_REG */ #define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) #define XEHP_GFX_TLB_INV_CR MCR_REG(0xced8) @@ -1434,16 +1429,12 @@ #define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH)) #define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168) -#define CHV_FGT_DISABLE_SS0 (1 << 10) -#define CHV_FGT_DISABLE_SS1 (1 << 11) -#define CHV_FGT_EU_DIS_SS0_R0_SHIFT 16 -#define CHV_FGT_EU_DIS_SS0_R0_MASK (0xf << CHV_FGT_EU_DIS_SS0_R0_SHIFT) -#define CHV_FGT_EU_DIS_SS0_R1_SHIFT 20 -#define CHV_FGT_EU_DIS_SS0_R1_MASK (0xf << CHV_FGT_EU_DIS_SS0_R1_SHIFT) -#define CHV_FGT_EU_DIS_SS1_R0_SHIFT 24 -#define CHV_FGT_EU_DIS_SS1_R0_MASK (0xf << CHV_FGT_EU_DIS_SS1_R0_SHIFT) -#define CHV_FGT_EU_DIS_SS1_R1_SHIFT 28 -#define CHV_FGT_EU_DIS_SS1_R1_MASK (0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT) +#define CHV_FGT_EU_DIS_SS1_R1_MASK REG_GENMASK(31, 28) +#define CHV_FGT_EU_DIS_SS1_R0_MASK REG_GENMASK(27, 24) +#define CHV_FGT_EU_DIS_SS0_R1_MASK REG_GENMASK(23, 20) +#define CHV_FGT_EU_DIS_SS0_R0_MASK REG_GENMASK(19, 16) +#define CHV_FGT_DISABLE_SS1 REG_BIT(11) +#define CHV_FGT_DISABLE_SS0 REG_BIT(10) #define BCS_SWCTRL _MMIO(0x22200) #define BCS_SRC_Y REG_BIT(0) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index d7784650e4d9..1154cd2b7c34 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -464,6 +464,45 @@ static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj, return err ?: count; } +static ssize_t slpc_power_profile_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + + switch (slpc->power_profile) { + case SLPC_POWER_PROFILES_BASE: + return sysfs_emit(buff, "[%s] %s\n", "base", "power_saving"); + case SLPC_POWER_PROFILES_POWER_SAVING: + return sysfs_emit(buff, "%s [%s]\n", "base", "power_saving"); + } + + return sysfs_emit(buff, "%u\n", slpc->power_profile); +} + +static ssize_t slpc_power_profile_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buff, size_t count) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + char power_saving[] = "power_saving"; + char base[] = "base"; + int err; + u32 val; + + if (!strncmp(buff, power_saving, sizeof(power_saving) - 1)) + val = SLPC_POWER_PROFILES_POWER_SAVING; + else if (!strncmp(buff, base, sizeof(base) - 1)) + val = SLPC_POWER_PROFILES_BASE; + else + return -EINVAL; + + err = intel_guc_slpc_set_power_profile(slpc, val); + return err ?: count; +} + struct intel_gt_bool_throttle_attr { struct attribute attr; ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, @@ -668,6 +707,7 @@ INTEL_GT_ATTR_RO(media_RP0_freq_mhz); INTEL_GT_ATTR_RO(media_RPn_freq_mhz); INTEL_GT_ATTR_RW(slpc_ignore_eff_freq); +INTEL_GT_ATTR_RW(slpc_power_profile); static const struct attribute *media_perf_power_attrs[] = { &attr_media_freq_factor.attr, @@ -864,6 +904,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) gt_warn(gt, "failed to create ignore_eff_freq sysfs (%pe)", ERR_PTR(ret)); } + if (intel_uc_uses_guc_slpc(>->uc)) { + ret = sysfs_create_file(kobj, &attr_slpc_power_profile.attr); + if (ret) + gt_warn(gt, "failed to create slpc_power_profile sysfs (%pe)", + ERR_PTR(ret)); + } + if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) { ret = sysfs_create_files(kobj, throttle_reason_attrs); if (ret) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 30b128b1fde7..afbc5c769308 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -176,7 +176,6 @@ static void clear_vm_list(struct list_head *list) i915_vma_destroy_locked(vma); i915_gem_object_put(obj); } - } } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 0a36ea751b63..9d3a3ad567a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -312,6 +312,7 @@ struct i915_address_space { u64 (*pte_encode)(dma_addr_t addr, unsigned int pat_index, u32 flags); /* Create a valid PTE */ + dma_addr_t (*pte_decode)(u64 pte, bool *is_present, bool *is_local); #define PTE_READ_ONLY BIT(0) #define PTE_LM BIT(1) @@ -340,6 +341,8 @@ struct i915_address_space { struct i915_vma_resource *vma_res, unsigned int pat_index, u32 flags); + dma_addr_t (*read_entry)(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local); void (*cleanup)(struct i915_address_space *vm); void (*foreach)(struct i915_address_space *vm, @@ -590,6 +593,9 @@ void intel_ggtt_bind_vma(struct i915_address_space *vm, void intel_ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma_resource *vma_res); +dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local); + int i915_ggtt_probe_hw(struct drm_i915_private *i915); int i915_ggtt_init_hw(struct drm_i915_private *i915); int i915_ggtt_enable_hw(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 51847a846002..c481b56fa67d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -751,7 +751,6 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) /* * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 6f7af4077135..aff5aca591e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -304,7 +304,7 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m) struct intel_context *ce; /* - * We randomly distribute contexts across the engines upon constrction, + * We randomly distribute contexts across the engines upon construction, * as they all share the same pinned vm, and so in order to allow * multiple blits to run in parallel, we must construct each blit * to use a different range of the vm for its GTT. This has to be @@ -646,7 +646,7 @@ calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, * When CHUNK_SZ is passed all the pages upto CHUNK_SZ * will be taken for the blt. in Flat-ccs supported * platform Smem obj will have more pages than required - * for main meory hence limit it to the required size + * for main memory hence limit it to the required size * for main memory */ return min_t(u64, bytes_to_cpy, CHUNK_SZ); diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index d791d63d49b4..5dd8121f4b15 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -314,7 +314,6 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), /* WB - L3 */ @@ -675,7 +674,7 @@ void intel_mocs_init(struct intel_gt *gt) __init_mocs_table(gt->uncore, &table, global_mocs_offset()); /* - * Initialize the L3CC table as part of mocs initalization to make + * Initialize the L3CC table as part of mocs initialization to make * sure the LNCFCMOCSx registers are programmed for the subsequent * memory transactions including guc transactions */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9378d5901c49..9ca42589da4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -117,21 +117,10 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - /* - * BSpec 52698 - Render powergating must be off. - * FIXME BSpec is outdated, disabling powergating for MTL is just - * temporary wa and should be removed after fixing real cause - * of forcewake timeouts. - */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) - pg_enable = - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; - else - pg_enable = - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { for (i = 0; i < I915_MAX_VCS; i++) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index aae5a081cb53..dbdcfe130ad4 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -986,7 +986,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) awake = reset_prepare(gt); /* Even if the GPU reset fails, it should still stop the engines */ - if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + if (!intel_gt_gpu_reset_clobbers_display(gt)) intel_gt_reset_all_engines(gt); for_each_engine(engine, gt, id) @@ -1098,7 +1098,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT); dma_fence_put(fence); - /* Restart iteration after droping lock */ + /* Restart iteration after dropping lock */ spin_lock(&timelines->lock); tl = list_entry(&timelines->active_list, typeof(*tl), link); } @@ -1106,14 +1106,13 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) /* We must reset pending GPU events before restoring our submission */ ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ - if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + if (!intel_gt_gpu_reset_clobbers_display(gt)) ok = intel_gt_reset_all_engines(gt) == 0; if (!ok) { /* * Warn CI about the unrecoverable wedged condition. * Time for a reboot. */ - gt_err(gt, "Unrecoverable wedged condition\n"); add_taint_for_CI(gt->i915, TAINT_WARN); return false; } @@ -1178,6 +1177,13 @@ static int resume(struct intel_gt *gt) return 0; } +bool intel_gt_gpu_reset_clobbers_display(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + return INTEL_INFO(i915)->gpu_reset_clobbers_display; +} + /** * intel_gt_reset - reset chip after a hang * @gt: #intel_gt to reset @@ -1234,7 +1240,7 @@ void intel_gt_reset(struct intel_gt *gt, goto error; } - if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + if (intel_gt_gpu_reset_clobbers_display(gt)) intel_irq_suspend(gt->i915); if (do_reset(gt, stalled_mask)) { @@ -1242,7 +1248,7 @@ void intel_gt_reset(struct intel_gt *gt, goto taint; } - if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + if (intel_gt_gpu_reset_clobbers_display(gt)) intel_irq_resume(gt->i915); intel_overlay_reset(display); @@ -1265,10 +1271,8 @@ void intel_gt_reset(struct intel_gt *gt, } ret = resume(gt); - if (ret) { - gt_err(gt, "Failed to resume (%d)\n", ret); + if (ret) goto taint; - } finish: reset_finish(gt, awake); @@ -1396,6 +1400,11 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) return err; } +static void display_reset_modeset_stuck(void *gt) +{ + intel_gt_set_wedged(gt); +} + static void intel_gt_reset_global(struct intel_gt *gt, u32 engine_mask, const char *reason) @@ -1413,15 +1422,33 @@ static void intel_gt_reset_global(struct intel_gt *gt, /* Use a watchdog to ensure that our reset completes */ intel_wedge_on_timeout(&w, gt, 60 * HZ) { - intel_display_reset_prepare(gt->i915); + struct drm_i915_private *i915 = gt->i915; + struct intel_display *display = &i915->display; + bool need_display_reset; + bool reset_display; + + need_display_reset = intel_gt_gpu_reset_clobbers_display(gt) && + intel_has_gpu_reset(gt); + + reset_display = intel_display_reset_test(display) || + need_display_reset; + + if (reset_display) + reset_display = intel_display_reset_prepare(display, + display_reset_modeset_stuck, + gt); intel_gt_reset(gt, engine_mask, reason); - intel_display_reset_finish(gt->i915); + if (reset_display) + intel_display_reset_finish(display, !need_display_reset); } if (!test_bit(I915_WEDGED, >->reset.flags)) kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); + else + drm_dev_wedged_event(>->i915->drm, + DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET); } /** @@ -1482,7 +1509,7 @@ void intel_gt_handle_error(struct intel_gt *gt, intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { local_bh_disable(); for_each_engine_masked(engine, gt, engine_mask, tmp) { - BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); + BUILD_BUG_ON(I915_RESET_BACKOFF >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) continue; @@ -1611,7 +1638,6 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) set_bit(I915_WEDGED_ON_INIT, >->reset.flags); /* Wedged on init is non-recoverable */ - gt_err(gt, "Non-recoverable wedged on init\n"); add_taint_for_CI(gt->i915, TAINT_WARN); } diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index c00de353075c..724ea6d64f33 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -28,6 +28,8 @@ void intel_gt_handle_error(struct intel_gt *gt, const char *fmt, ...); #define I915_ERROR_CAPTURE BIT(0) +bool intel_gt_gpu_reset_clobbers_display(struct intel_gt *gt); + void intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask, const char *reason); diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index 80351f0a856c..4f5fd393af6f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -41,8 +41,7 @@ struct intel_reset { */ unsigned long flags; #define I915_RESET_BACKOFF 0 -#define I915_RESET_MODESET 1 -#define I915_RESET_ENGINE 2 +#define I915_RESET_ENGINE 1 #define I915_WEDGED_ON_INIT (BITS_PER_LONG - 3) #define I915_WEDGED_ON_FINI (BITS_PER_LONG - 2) #define I915_WEDGED (BITS_PER_LONG - 1) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 458e29d89978..a876a34455f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -242,7 +242,7 @@ static int xcs_resume(struct intel_engine_cs *engine) /* * In case of resets fails because engine resumes from * incorrect RING_HEAD and then GPU may be then fed - * to invalid instrcutions, which may lead to unrecoverable + * to invalid instructions, which may lead to unrecoverable * hang. So at first write doesn't succeed then try again. */ ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); @@ -365,7 +365,13 @@ static void reset_prepare(struct intel_engine_cs *engine) ENGINE_READ_FW(engine, RING_HEAD), ENGINE_READ_FW(engine, RING_TAIL), ENGINE_READ_FW(engine, RING_START)); - if (!stop_ring(engine)) { + /* + * Sometimes engine head failed to set to zero even after writing into it. + * Use wait_for_atomic() with 20ms delay to let engine resumes from + * correct RING_HEAD. Experimented different values and determined + * that 20ms works best based on testing. + */ + if (wait_for_atomic((!stop_ring(engine) == 0), 20)) { drm_err(&engine->i915->drm, "failed to set %s head to zero " "ctl %08x head %08x tail %08x start %08x\n", diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fa304ea088e4..eb89948cc112 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -8,7 +8,7 @@ #include <drm/intel/i915_drm.h> #include "display/intel_display.h" -#include "display/intel_display_irq.h" +#include "display/intel_display_rps.h" #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" @@ -74,7 +74,7 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) static void rps_timer(struct timer_list *t) { - struct intel_rps *rps = from_timer(rps, t, timer); + struct intel_rps *rps = timer_container_of(rps, t, timer); struct intel_gt *gt = rps_to_gt(rps); struct intel_engine_cs *engine; ktime_t dt, last, timestamp; @@ -161,7 +161,7 @@ static void rps_start_timer(struct intel_rps *rps) static void rps_stop_timer(struct intel_rps *rps) { - del_timer_sync(&rps->timer); + timer_delete_sync(&rps->timer); rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); cancel_work_sync(&rps->work); } @@ -550,6 +550,7 @@ static unsigned int init_emon(struct intel_uncore *uncore) static bool gen5_rps_enable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_display *display = &i915->display; struct intel_uncore *uncore = rps_to_uncore(rps); u8 fstart, vstart; u32 rgvmodectl; @@ -607,9 +608,7 @@ static bool gen5_rps_enable(struct intel_rps *rps) rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); rps->ips.last_time2 = ktime_get_raw_ns(); - spin_lock(&i915->irq_lock); - ilk_enable_display_irq(i915, DE_PCU_EVENT); - spin_unlock(&i915->irq_lock); + ilk_display_rps_enable(display); spin_unlock_irq(&mchdev_lock); @@ -621,14 +620,13 @@ static bool gen5_rps_enable(struct intel_rps *rps) static void gen5_rps_disable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_display *display = &i915->display; struct intel_uncore *uncore = rps_to_uncore(rps); u16 rgvswctl; spin_lock_irq(&mchdev_lock); - spin_lock(&i915->irq_lock); - ilk_disable_display_irq(i915, DE_PCU_EVENT); - spin_unlock(&i915->irq_lock); + ilk_display_rps_disable(display); rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); @@ -1001,6 +999,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + /* Don't decrement num_waiters for req where increment was skipped */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + intel_guc_slpc_dec_waiters(slpc); } else { atomic_dec(&rps->num_waiters); @@ -1025,11 +1027,19 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); - if (slpc->min_freq_softlimit >= slpc->boost_freq) + /* Waitboost should not be done with power saving profile */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) return; /* Return if old value is non zero */ if (!atomic_fetch_inc(&slpc->num_waiters)) { + /* + * Skip queuing boost work if frequency is already boosted, + * but still increment num_waiters. + */ + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); queue_work(rps_to_gt(rps)->i915->unordered_wq, diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h index 6507fa3f6d1e..ece445109305 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -40,7 +40,7 @@ enum { /** * struct intel_rps_freq_caps - rps freq capabilities * @rp0_freq: non-overclocked max frequency - * @rp1_freq: "less than" RP0 power/freqency + * @rp1_freq: "less than" RP0 power/frequency * @min_freq: aka RPn, minimum frequency * * Freq caps exposed by HW, values are in "hw units" and intel_gpu_freq() @@ -57,7 +57,7 @@ struct intel_rps { /* * work, interrupts_enabled and pm_iir are protected by - * i915->irq_lock + * gt->irq_lock */ struct timer_list timer; struct work_struct work; @@ -90,7 +90,7 @@ struct intel_rps { u8 boost_freq; /* Frequency to request when wait boosting */ u8 idle_freq; /* Frequency to request when we are idle */ u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ - u8 rp1_freq; /* "less than" RP0 power/freqency */ + u8 rp1_freq; /* "less than" RP0 power/frequency */ u8 rp0_freq; /* Non-overclocked max frequency. */ u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c index 8c1dbcbcbc4f..2945526d52d1 100644 --- a/drivers/gpu/drm/i915/gt/intel_sa_media.c +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c @@ -27,7 +27,7 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, /* * Standalone media shares the general MMIO space with the primary - * GT. We'll re-use the primary GT's mapping. + * GT. We'll reuse the primary GT's mapping. */ uncore->regs = intel_uncore_regs(&i915->uncore); if (drm_WARN_ON(&i915->drm, uncore->regs == NULL)) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index c8fadf58d836..9501d323d0d3 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -236,7 +236,8 @@ static void xehp_sseu_info_init(struct intel_gt *gt) GEN12_GT_COMPUTE_DSS_ENABLE, XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); - eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; + eu_en_fuse = REG_FIELD_GET(XEHP_EU_ENA_MASK, + intel_uncore_read(uncore, XEHP_EU_ENABLE)); if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915)) eu_en = eu_en_fuse; @@ -269,15 +270,15 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * Although gen12 architecture supported multiple slices, TGL, RKL, * DG1, and ADL only had a single slice. */ - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK, + intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE)); drm_WARN_ON(>->i915->drm, s_en != 0x1); g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); /* one bit per pair of EUs */ - eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + eu_en_fuse = ~REG_FIELD_GET(GEN11_EU_DIS_MASK, + intel_uncore_read(uncore, GEN11_EU_DISABLE)); for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) if (eu_en_fuse & BIT(eu)) @@ -306,14 +307,14 @@ static void gen11_sseu_info_init(struct intel_gt *gt) * Although gen11 architecture supported multiple slices, ICL and * EHL/JSL only had a single slice in practice. */ - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK, + intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE)); drm_WARN_ON(>->i915->drm, s_en != 0x1); ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); - eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + eu_en = ~REG_FIELD_GET(GEN11_EU_DIS_MASK, + intel_uncore_read(uncore, GEN11_EU_DISABLE)); gen11_compute_sseu_info(sseu, ss_en, eu_en); @@ -335,10 +336,8 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) if (!(fuse & CHV_FGT_DISABLE_SS0)) { u8 disabled_mask = - ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> - CHV_FGT_EU_DIS_SS0_R0_SHIFT) | - (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> - CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); + REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R0_MASK, fuse) | + REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS0_R0_MASK); sseu->subslice_mask.hsw[0] |= BIT(0); sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF); @@ -346,10 +345,8 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) if (!(fuse & CHV_FGT_DISABLE_SS1)) { u8 disabled_mask = - ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> - CHV_FGT_EU_DIS_SS1_R0_SHIFT) | - (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> - CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); + REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R0_MASK, fuse) | + REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS1_R0_MASK); sseu->subslice_mask.hsw[0] |= BIT(1); sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF); @@ -385,7 +382,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) int s, ss; fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); - sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2); /* BXT has a single slice and at most 3 subslices. */ intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, @@ -396,8 +393,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) * to each of the enabled slices. */ subslice_mask = (1 << sseu->max_subslices) - 1; - subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> - GEN9_F2_SS_DIS_SHIFT); + subslice_mask &= ~REG_FIELD_GET(GEN9_F2_SS_DIS_MASK, fuse2); /* * Iterate through enabled slices and subslices to @@ -490,7 +486,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt) u32 eu_disable0, eu_disable1, eu_disable2; fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); - sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2); intel_sseu_set_info(sseu, 3, 3, 8); /* @@ -498,18 +494,18 @@ static void bdw_sseu_info_init(struct intel_gt *gt) * to each of the enabled slices. */ subslice_mask = GENMASK(sseu->max_subslices - 1, 0); - subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> - GEN8_F2_SS_DIS_SHIFT); + subslice_mask &= ~REG_FIELD_GET(GEN8_F2_SS_DIS_MASK, fuse2); eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); - eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; - eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | - ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << - (32 - GEN8_EU_DIS0_S1_SHIFT)); - eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | - ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << - (32 - GEN8_EU_DIS1_S2_SHIFT)); + eu_disable[0] = + REG_FIELD_GET(GEN8_EU_DIS0_S0_MASK, eu_disable0); + eu_disable[1] = + REG_FIELD_GET(GEN8_EU_DIS0_S1_MASK, eu_disable0) | + REG_FIELD_GET(GEN8_EU_DIS1_S1_MASK, eu_disable1) << hweight32(GEN8_EU_DIS0_S1_MASK); + eu_disable[2] = + REG_FIELD_GET(GEN8_EU_DIS1_S2_MASK, eu_disable1) | + REG_FIELD_GET(GEN8_EU_DIS2_S2_MASK, eu_disable2) << hweight32(GEN8_EU_DIS1_S2_MASK); /* * Iterate through enabled slices and subslices to @@ -687,7 +683,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, * According to documentation software must consider the configuration * as 2x4x8 and hardware will translate this to 1x8x8. * - * Furthemore, even though SScount is three bits, maximum documented + * Furthermore, even though SScount is three bits, maximum documented * value for it is four. From this some rules/restrictions follow: * * 1. diff --git a/drivers/gpu/drm/i915/gt/intel_wopcm.h b/drivers/gpu/drm/i915/gt/intel_wopcm.h index 17d6aa86008a..d2038b6de5e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_wopcm.h +++ b/drivers/gpu/drm/i915/gt/intel_wopcm.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2017-2018 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 570c91878189..b37e400f74e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -156,7 +156,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ struct i915_wa *list; - list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), + list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*list), GFP_KERNEL); if (!list) { drm_err(&i915->drm, "No space for workaround init!\n"); @@ -691,16 +691,17 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, struct drm_i915_private *i915 = engine->i915; /* - * Wa_1409142259:tgl,dg1,adl-p + * Wa_1409142259:tgl,dg1,adl-p,adl-n * Wa_1409347922:tgl,dg1,adl-p * Wa_1409252684:tgl,dg1,adl-p * Wa_1409217633:tgl,dg1,adl-p * Wa_1409207793:tgl,dg1,adl-p - * Wa_1409178076:tgl,dg1,adl-p - * Wa_1408979724:tgl,dg1,adl-p - * Wa_14010443199:tgl,rkl,dg1,adl-p - * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p - * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p + * Wa_1409178076:tgl,dg1,adl-p,adl-n + * Wa_1408979724:tgl,dg1,adl-p,adl-n + * Wa_14010443199:tgl,rkl,dg1,adl-p,adl-n + * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p,adl-n + * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p,adl-n + * Wa_22010465259:tgl,rkl,dg1,adl-s,adl-p,adl-n */ wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); @@ -741,6 +742,12 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1606376872 */ wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC); } + + /* + * This bit must be set to enable performance optimization for fast + * clears. + */ + wa_mcr_write_or(wal, GEN8_WM_CHICKEN2, WAIT_ON_DEPTH_STALL_DONE_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -1318,7 +1325,7 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) * We'll do our default/implicit steering based on GSLICE (in the * sliceid field) and DSS (in the subsliceid field). If we can * find overlap between the valid MSLICE and/or LNCF values with - * a suitable GSLICE, then we can just re-use the default value and + * a suitable GSLICE, then we can just reuse the default value and * skip and explicit steering at runtime. * * We only need to look for overlap between GSLICE/MSLICE/LNCF to find diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index c0637bf799a3..79741f043f03 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -108,7 +108,7 @@ static void advance(struct i915_request *request) static void hw_delay_complete(struct timer_list *t) { - struct mock_engine *engine = from_timer(engine, t, hw_delay); + struct mock_engine *engine = timer_container_of(engine, t, hw_delay); struct i915_request *request; unsigned long flags; @@ -297,7 +297,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) struct i915_request *rq; unsigned long flags; - del_timer_sync(&mock->hw_delay); + timer_delete_sync(&mock->hw_delay); spin_lock_irqsave(&engine->sched_engine->lock, flags); @@ -432,7 +432,7 @@ void mock_engine_flush(struct intel_engine_cs *engine) container_of(engine, typeof(*mock), base); struct i915_request *request, *rn; - del_timer_sync(&mock->hw_delay); + timer_delete_sync(&mock->hw_delay); spin_lock_irq(&mock->hw_lock); list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link) diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 81c31396eceb..0454eb1814bb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -53,7 +53,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, if (i915_request_completed(rq)) /* that was quick! */ return 0; - /* Wait until the HW has acknowleged the submission (or err) */ + /* Wait until the HW has acknowledged the submission (or err) */ intel_engine_flush_submission(engine); if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) return 0; @@ -1198,7 +1198,7 @@ static int live_timeslice_rewind(void *arg) ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ /* Wait for the timeslice to kick in */ - del_timer(&engine->execlists.timer); + timer_delete(&engine->execlists.timer); tasklet_hi_schedule(&engine->sched_engine->tasklet); intel_engine_flush_submission(engine); } @@ -2357,7 +2357,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg) /* force preempt reset [failure] */ while (!engine->execlists.pending[0]) intel_engine_flush_submission(engine); - del_timer_sync(&engine->execlists.preempt); + timer_delete_sync(&engine->execlists.preempt); intel_engine_flush_submission(engine); cancel_reset_timeout(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 9d3aeb237295..f057c16410e7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -548,7 +548,7 @@ static int igt_reset_fail_engine(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; - /* Check that we can recover from engine-reset failues */ + /* Check that we can recover from engine-reset failures */ if (!intel_has_reset_engine(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index e17b8777d21d..23f04f6f8fba 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -7,6 +7,7 @@ #include "gem/i915_gem_internal.h" +#include "i915_drv.h" #include "i915_selftest.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" @@ -63,7 +64,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, if (i915_request_completed(rq)) /* that was quick! */ return 0; - /* Wait until the HW has acknowleged the submission (or err) */ + /* Wait until the HW has acknowledged the submission (or err) */ intel_engine_flush_submission(engine); if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) return 0; @@ -859,6 +860,14 @@ static int live_lrc_timestamp(void *arg) }; /* + * This test was designed to isolate a hardware bug. + * The bug was found and fixed in future generations but + * now the test pollutes our CI on previous generation. + */ + if (GRAPHICS_VER(gt->i915) == 12) + return 0; + + /* * We want to verify that the timestamp is saved and restore across * context switches and is monotonic. * diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 1bf7b88d9a9d..54bc447efce0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -537,7 +537,7 @@ struct spinner_timer { static void spinner_kill(struct timer_list *timer) { - struct spinner_timer *st = from_timer(st, timer, timer); + struct spinner_timer *st = timer_container_of(st, timer, timer); igt_spinner_end(&st->spin); pr_info("%s\n", __func__); @@ -660,8 +660,8 @@ static int live_emit_pte_full_ring(void *arg) out_rq: i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */ - del_timer_sync(&st.timer); - destroy_timer_on_stack(&st.timer); + timer_delete_sync(&st.timer); + timer_destroy_on_stack(&st.timer); out_unpin: intel_context_unpin(ce); out_put: diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 27b6d51ef145..41716ed454b7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -33,15 +33,22 @@ int live_rc6_manual(void *arg) { struct intel_gt *gt = arg; struct intel_rc6 *rc6 = >->rc6; - u64 rc0_power, rc6_power; + struct intel_rps *rps = >->rps; intel_wakeref_t wakeref; + u64 rc0_sample_energy[2]; + u64 rc6_sample_energy[2]; + u64 sleep_time = 1000; + u32 rc0_freq = 0; + u32 rc6_freq = 0; + u64 rc0_power; + u64 rc6_power; bool has_power; + u64 threshold; ktime_t dt; u64 res[2]; int err = 0; - u32 rc0_freq = 0; - u32 rc6_freq = 0; - struct intel_rps *rps = >->rps; + u64 diff; + /* * Our claim is that we can "encourage" the GPU to enter rc6 at will. @@ -60,14 +67,15 @@ int live_rc6_manual(void *arg) /* Force RC6 off for starters */ __intel_rc6_disable(rc6); - msleep(1); /* wakeup is not immediate, takes about 100us on icl */ + /* wakeup is not immediate, takes about 100us on icl */ + usleep_range(1000, 2000); res[0] = rc6_residency(rc6); dt = ktime_get(); - rc0_power = librapl_energy_uJ(); - msleep(1000); - rc0_power = librapl_energy_uJ() - rc0_power; + rc0_sample_energy[0] = librapl_energy_uJ(); + msleep(sleep_time); + rc0_sample_energy[1] = librapl_energy_uJ() - rc0_sample_energy[0]; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); rc0_freq = intel_rps_read_actual_frequency_fw(rps); @@ -79,11 +87,12 @@ int live_rc6_manual(void *arg) } if (has_power) { - rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, + rc0_power = div64_u64(NSEC_PER_SEC * rc0_sample_energy[1], ktime_to_ns(dt)); + if (!rc0_power) { if (rc0_freq) - pr_debug("No power measured while in RC0! GPU Freq: %u in RC0\n", + pr_debug("No power measured while in RC0! GPU Freq: %uMHz in RC0\n", rc0_freq); else pr_err("No power and freq measured while in RC0\n"); @@ -98,10 +107,10 @@ int live_rc6_manual(void *arg) res[0] = rc6_residency(rc6); intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL); dt = ktime_get(); - rc6_power = librapl_energy_uJ(); - msleep(1000); + rc6_sample_energy[0] = librapl_energy_uJ(); + msleep(sleep_time); rc6_freq = intel_rps_read_actual_frequency_fw(rps); - rc6_power = librapl_energy_uJ() - rc6_power; + rc6_sample_energy[1] = librapl_energy_uJ() - rc6_sample_energy[0]; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); if (res[1] == res[0]) { @@ -113,13 +122,24 @@ int live_rc6_manual(void *arg) } if (has_power) { - rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, + rc6_power = div64_u64(NSEC_PER_SEC * rc6_sample_energy[1], ktime_to_ns(dt)); - pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n", + pr_info("GPU consumed %lluuW in RC0 and %lluuW in RC6\n", rc0_power, rc6_power); + if (2 * rc6_power > rc0_power) { - pr_err("GPU leaked energy while in RC6! GPU Freq: %u in RC6 and %u in RC0\n", - rc6_freq, rc0_freq); + pr_err("GPU leaked energy while in RC6!\n" + "GPU Freq: %uMHz in RC6 and %uMHz in RC0\n" + "RC0 energy before & after sleep respectively: %lluuJ %lluuJ\n" + "RC6 energy before & after sleep respectively: %lluuJ %lluuJ\n", + rc6_freq, rc0_freq, rc0_sample_energy[0], rc0_sample_energy[1], + rc6_sample_energy[0], rc6_sample_energy[1]); + + diff = res[1] - res[0]; + threshold = (9 * NSEC_PER_MSEC * sleep_time) / 10; + if (diff < threshold) + pr_err("Did not enter RC6 properly, RC6 start residency=%lluns, RC6 end residency=%lluns\n", + res[0], res[1]); err = -EINVAL; goto out_unlock; } @@ -222,7 +242,7 @@ int live_rc6_ctx_wa(void *arg) i915_reset_engine_count(error, engine); const u32 *res; - /* Use a sacrifical context */ + /* Use a sacrificial context */ ce = intel_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index c207a4fb03bf..73bc91c6ea07 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -22,7 +22,7 @@ #include "selftests/igt_spinner.h" #include "selftests/librapl.h" -/* Try to isolate the impact of cstates from determing frequency response */ +/* Try to isolate the impact of cstates from determining frequency response */ #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ static void dummy_rps_work(struct work_struct *wrk) @@ -477,12 +477,13 @@ int live_rps_control(void *arg) limit, intel_gpu_freq(rps, limit), min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); - if (limit == rps->min_freq) { - pr_err("%s: GPU throttled to minimum!\n", - engine->name); + if (limit != rps->max_freq) { + u32 throttle = intel_uncore_read(gt->uncore, + intel_gt_perf_limit_reasons_reg(gt)); + + pr_warn("%s: GPU throttled with reasons 0x%08x\n", + engine->name, throttle & GT0_PERF_LIMIT_REASONS_MASK); show_pstate_limits(rps); - err = -ENODEV; - break; } if (igt_flush_test(gt->i915)) { @@ -1115,7 +1116,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq) for (i = 0; i < 5; i++) x[i] = __measure_power(5); - *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; + *freq = (*freq + read_cagf(rps)) / 2; /* A simple triangle filter for better result stability */ sort(x, 5, sizeof(*x), cmp_u64, NULL); diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index e218b229681f..e61bb0bad12c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -95,6 +95,21 @@ static int slpc_restore_freq(struct intel_guc_slpc *slpc, u32 min, u32 max) return 0; } +static u64 slpc_measure_power(struct intel_rps *rps, int *freq) +{ + u64 x[5]; + int i; + + for (i = 0; i < 5; i++) + x[i] = __measure_power(5); + + *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; + + /* A simple triangle filter for better result stability */ + sort(x, 5, sizeof(*x), cmp_u64, NULL); + return div_u64(x[1] + 2 * x[2] + x[3], 4); +} + static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power) { int err = 0; @@ -103,7 +118,7 @@ static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power) if (err) return err; *freq = intel_rps_read_actual_frequency(>->rps); - *power = measure_power(>->rps, freq); + *power = slpc_measure_power(>->rps, freq); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index 3941f2d6fa47..69ed946a39e5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -143,7 +143,7 @@ pte_tlbinv(struct intel_context *ce, if (ce->engine->class == OTHER_CLASS) msleep(200); else - msleep(10); + usleep_range(10000, 20000); if (va == vb) { if (!i915_request_completed(rq)) { diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README index e7e96d7073c7..22f8dabed434 100644 --- a/drivers/gpu/drm/i915/gt/shaders/README +++ b/drivers/gpu/drm/i915/gt/shaders/README @@ -10,7 +10,7 @@ i915/gt/shaders/clear_kernel directory. The generated .c files should never be modified directly. Instead, any modification needs to be done on the on their respective ASM files and build instructions below -needes to be followed. +needs to be followed. Building ======== @@ -24,7 +24,7 @@ on building. Please make sure your Mesa tool is compiled with "-Dtools=intel" and "-Ddri-drivers=i965", and run this script from IGT source root directory" -The instructions bellow assume: +The instructions below assume: * IGT gpu tools source code is located on your home directory (~) as ~/igt * Mesa source code is located on your home directory (~) as ~/mesa and built under the ~/mesa/build directory @@ -43,4 +43,4 @@ igt $ ./scripts/generate_clear_kernel.sh -g ivb \ ~/igt/lib/i915/shaders/clear_kernel/hsw.asm ~ $ cd ~/igt igt $ ./scripts/generate_clear_kernel.sh -g hsw \ - -m ~/mesa/build/src/intel/tools/i965_asm
\ No newline at end of file + -m ~/mesa/build/src/intel/tools/i965_asm diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm index 5fdf384bb621..6c0c89daf96c 100644 --- a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm @@ -24,7 +24,7 @@ mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; * DW 1.4 - Rsvd (intended for context ID) * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) - * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * DW 1.7 - Rsvd MBZ (intended for Total Thread Count) * * Binding Table * diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm index 97c7ac9e3854..27c28e63d6cc 100644 --- a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm @@ -24,7 +24,7 @@ mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; * DW 1.4 - Rsvd (intended for context ID) * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) - * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * DW 1.7 - Rsvd MBZ (intended for Total Thread Count) * * Binding Table * diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index bb696b29ee2c..365c4b8b04f4 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -108,7 +108,7 @@ static int __shmem_rw(struct file *file, loff_t off, if (IS_ERR(page)) return PTR_ERR(page); - vaddr = kmap(page); + vaddr = kmap_local_page(page); if (write) { memcpy(vaddr + offset_in_page(off), ptr, this); set_page_dirty(page); @@ -116,7 +116,7 @@ static int __shmem_rw(struct file *file, loff_t off, memcpy(ptr, vaddr + offset_in_page(off), this); } mark_page_accessed(page); - kunmap(page); + kunmap_local(vaddr); put_page(page); len -= this; @@ -143,11 +143,11 @@ int shmem_read_to_iosys_map(struct file *file, loff_t off, if (IS_ERR(page)) return PTR_ERR(page); - vaddr = kmap(page); + vaddr = kmap_local_page(page); iosys_map_memcpy_to(map, map_off, vaddr + offset_in_page(off), this); mark_page_accessed(page); - kunmap(page); + kunmap_local(vaddr); put_page(page); len -= this; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h index c34674e797c6..6de87ae5669e 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -228,6 +228,11 @@ struct slpc_optimized_strategies { #define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0) +enum slpc_power_profiles { + SLPC_POWER_PROFILES_BASE = 0x0, + SLPC_POWER_PROFILES_POWER_SAVING = 0x1 +}; + /** * DOC: SLPC H2G MESSAGE FORMAT * diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h index 1fc0c17b1230..803c0379d97d 100644 --- a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h @@ -81,7 +81,7 @@ struct guc_debug_capture_list { * * intel_guc_capture module uses these structures to maintain static * tables (per unique platform) that consists of lists of registers - * (offsets, names, flags,...) that are used at the ADS regisration + * (offsets, names, flags,...) that are used at the ADS registration * time as well as during runtime processing and reporting of error- * capture states generated by GuC just prior to engine reset events. */ @@ -200,7 +200,7 @@ struct intel_guc_state_capture { * dynamically allocate new nodes when receiving the G2H notification * because the event handlers for all G2H event-processing is called * by the ct processing worker queue and when that queue is being - * processed, there is no absoluate guarantee that we are not in the + * processed, there is no absolute guarantee that we are not in the * midst of a GT reset operation (which doesn't allow allocations). */ struct list_head cachelist; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 5dc0ccd07636..d550eb6edfb8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -230,7 +230,7 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s gt_info(gt, "Invalid GSC firmware for MTL/ARL, got %d.%d.%d.%d but need 102.x.x.x", gsc->release.major, gsc->release.minor, gsc->release.patch, gsc->release.build); - return -EINVAL; + return -EINVAL; } if (min_ver.major) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 5949ff0b0161..9df80c325fc1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -690,7 +690,7 @@ int intel_guc_suspend(struct intel_guc *guc) * H2G MMIO command completes. * * Don't abort on a failure code from the GuC. Keep going and do the - * clean up in santize() and re-initialisation on resume and hopefully + * clean up in sanitize() and re-initialisation on resume and hopefully * the error here won't be problematic. */ ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 57b903132776..053780f562c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -295,7 +295,7 @@ struct intel_guc { */ struct work_struct dead_guc_worker; /** - * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrance + * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrence * used to prevent a fundamentally broken system from continuously * reloading the GuC. */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index fe53e8eccf4b..e7ccfa520df3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -259,13 +259,14 @@ static int guc_wait_ucode(struct intel_guc *guc) } else if (delta_ms > 200) { guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", delta_ms, status, count, ret); - guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", - intel_rps_read_actual_frequency(>->rps), before_freq, + guc_warn(guc, "excessive init time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n", + before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { - guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", - delta_ms, intel_rps_read_actual_frequency(>->rps), - before_freq, status, count, ret); + guc_dbg(guc, "init took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), status, count, ret); } return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 4ce6e2332a63..eded00f0c7e1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -408,7 +408,7 @@ enum guc_capture_type { GUC_CAPTURE_LIST_TYPE_MAX, }; -/* Class indecies for capture_class and capture_instance arrays */ +/* Class indices for capture_class and capture_instance arrays */ enum { GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0, GUC_CAPTURE_LIST_CLASS_VIDEO = 1, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index b67a15f74276..868195c33f5b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -7,6 +7,7 @@ #include "gt/intel_hwconfig.h" #include "i915_drv.h" #include "i915_memcpy.h" +#include "intel_guc_print.h" /* * GuC has a blob containing hardware configuration information (HWConfig). @@ -42,6 +43,8 @@ static int __guc_action_get_hwconfig(struct intel_guc *guc, }; int ret; + guc_dbg(guc, "Querying HW config table: size = %d, offset = 0x%08X\n", + ggtt_size, ggtt_offset); ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (ret == -ENXIO) return -ENOENT; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 706fffca698b..d5ee6e5e1443 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -15,6 +15,34 @@ #include "gt/intel_gt_regs.h" #include "gt/intel_rps.h" +/** + * DOC: SLPC - Dynamic Frequency management + * + * Single Loop Power Control (SLPC) is a GuC algorithm that manages + * GT frequency based on busyness and how KMD initializes it. SLPC is + * almost completely in control after initialization except for a few + * scenarios mentioned below. + * + * KMD uses the concept of waitboost to ramp frequency to RP0 when there + * are pending submissions for a context. It achieves this by sending GuC a + * request to update the min frequency to RP0. Waitboost is disabled + * when the request retires. + * + * Another form of frequency control happens through per-context hints. + * A context can be marked as low latency during creation. That will ensure + * that SLPC uses an aggressive frequency ramp when that context is active. + * + * Power profiles add another level of control to these mechanisms. + * When power saving profile is chosen, SLPC will use conservative + * thresholds to ramp frequency, thus saving power. KMD will disable + * waitboosts as well, which achieves further power savings. Base profile + * is default and ensures balanced performance for any workload. + * + * Lastly, users have some level of control through sysfs, where min/max + * frequency values can be altered and the use of efficient freq + * can be toggled. + */ + static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) { return container_of(slpc, struct intel_guc, slpc); @@ -265,6 +293,8 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->num_boosts = 0; slpc->media_ratio_mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL; + slpc->power_profile = SLPC_POWER_PROFILES_BASE; + mutex_init(&slpc->lock); INIT_WORK(&slpc->boost_work, slpc_boost_work); @@ -357,21 +387,29 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); } -static void slpc_shared_data_reset(struct slpc_shared_data *data) +static void slpc_shared_data_reset(struct intel_guc_slpc *slpc) { - memset(data, 0, sizeof(struct slpc_shared_data)); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct slpc_shared_data *data = slpc->vaddr; + memset(data, 0, sizeof(struct slpc_shared_data)); data->header.size = sizeof(struct slpc_shared_data); /* Enable only GTPERF task, disable others */ slpc_mem_set_enabled(data, SLPC_PARAM_TASK_ENABLE_GTPERF, SLPC_PARAM_TASK_DISABLE_GTPERF); - slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER, - SLPC_PARAM_TASK_DISABLE_BALANCER); + /* + * Don't allow balancer related algorithms on platforms before + * Xe_LPG, where GuC started to restrict it to TDP limited scenarios. + */ + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)) { + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER, + SLPC_PARAM_TASK_DISABLE_BALANCER); - slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC, - SLPC_PARAM_TASK_DISABLE_DCC); + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC, + SLPC_PARAM_TASK_DISABLE_DCC); + } } /** @@ -567,6 +605,34 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val) return ret; } +int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + if (val > SLPC_POWER_PROFILES_POWER_SAVING) + return -EINVAL; + + mutex_lock(&slpc->lock); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + ret = slpc_set_param(slpc, + SLPC_PARAM_POWER_PROFILE, + val); + if (ret) + guc_err(slpc_to_guc(slpc), + "Failed to set power profile to %d: %pe\n", + val, ERR_PTR(ret)); + else + slpc->power_profile = val; + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&slpc->lock); + + return ret; +} + void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) { u32 pm_intrmsk_mbz = 0; @@ -686,7 +752,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) GEM_BUG_ON(!slpc->vma); - slpc_shared_data_reset(slpc->vaddr); + slpc_shared_data_reset(slpc); ret = slpc_reset(slpc); if (unlikely(ret < 0)) { @@ -728,6 +794,13 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Enable SLPC Optimized Strategy for compute */ intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); + /* Set cached value of power_profile */ + ret = intel_guc_slpc_set_power_profile(slpc, slpc->power_profile); + if (unlikely(ret)) { + guc_probe_error(guc, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); + return ret; + } + return 0; } @@ -791,6 +864,23 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p drm_printf(p, "\tSLPC state: %s\n", slpc_get_state_string(slpc)); drm_printf(p, "\tGTPERF task active: %s\n", str_yes_no(slpc_tasks->status & SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tDCC enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_DCC_TASK_ENABLED)); + drm_printf(p, "\tDCC in: %s\n", + str_yes_no(slpc_tasks->status & SLPC_IN_DCC)); + drm_printf(p, "\tBalancer enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_BALANCER_ENABLED)); + drm_printf(p, "\tIBC enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_IBC_TASK_ENABLED)); + drm_printf(p, "\tBalancer IA LMT enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_BALANCER_IA_LMT_ENABLED)); + drm_printf(p, "\tBalancer IA LMT active: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_BALANCER_IA_LMT_ACTIVE)); drm_printf(p, "\tMax freq: %u MHz\n", slpc_decode_max_freq(slpc)); drm_printf(p, "\tMin freq: %u MHz\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 1cb5fd44f05c..fc9f761b4372 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -46,5 +46,6 @@ void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index a88651331497..83673b10ac4e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -33,6 +33,9 @@ struct intel_guc_slpc { u32 max_freq_softlimit; bool ignore_eff_freq; + /* Base or power saving */ + u32 power_profile; + /* cached media ratio mode */ u32 media_ratio_mode; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index cc05bd9e43b4..127316d2c8aa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -633,7 +633,7 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, atomic_inc(&guc->outstanding_submission_g2h); ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - if (ret) + if (ret && g2h_len_dw) atomic_dec(&guc->outstanding_submission_g2h); return ret; @@ -1223,7 +1223,7 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) * determine validity of these values. Instead we read the values multiple times * until they are consistent. In test runs, 3 attempts results in consistent * values. The upper bound is set to 6 attempts and may need to be tuned as per - * any new occurences. + * any new occurrences. */ static void __get_engine_usage_record(struct intel_engine_cs *engine, u32 *last_in, u32 *id, u32 *total) @@ -1285,15 +1285,12 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) static u32 gpm_timestamp_shift(struct intel_gt *gt) { intel_wakeref_t wakeref; - u32 reg, shift; + u32 reg; with_intel_runtime_pm(gt->uncore->rpm, wakeref) reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); - shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; - - return 3 - shift; + return 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); } static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) @@ -3011,7 +3008,7 @@ static int __guc_context_pin(struct intel_context *ce, /* * GuC context gets pinned in guc_request_alloc. See that function for - * explaination of why. + * explanation of why. */ return lrc_pin(ce, engine, vaddr); @@ -3446,18 +3443,29 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) * GuC is active, lets destroy this context, but at this point we can still be racing * with suspend, so we undo everything if the H2G fails in deregister_context so * that GuC reset will find this context during clean up. + * + * There is a race condition where the reset code could have altered + * this context's state and done a wakeref put before we try to + * deregister it here. So check if the context is still set to be + * destroyed before undoing earlier changes, to avoid two wakeref puts + * on the same context. */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { - spin_lock(&ce->guc_state.lock); - set_context_registered(ce); - clr_context_destroyed(ce); - spin_unlock(&ce->guc_state.lock); + bool pending_destroyed; + spin_lock_irqsave(&ce->guc_state.lock, flags); + pending_destroyed = context_destroyed(ce); + if (pending_destroyed) { + set_context_registered(ce); + clr_context_destroyed(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. */ - intel_wakeref_put_async(>->wakeref); + if (pending_destroyed) + intel_wakeref_put_async(>->wakeref); } return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index b3cbf85c00cb..456d3372eef8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -231,8 +231,8 @@ static void delayed_huc_load_init(struct intel_huc *huc) sw_fence_dummy_notify); i915_sw_fence_commit(&huc->delayed_load.fence); - hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - huc->delayed_load.timer.function = huc_delayed_load_timer_callback; + hrtimer_setup(&huc->delayed_load.timer, huc_delayed_load_timer_callback, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); } static void delayed_huc_load_fini(struct intel_huc *huc) @@ -317,6 +317,11 @@ void intel_huc_init_early(struct intel_huc *huc) } } +void intel_huc_fini_late(struct intel_huc *huc) +{ + delayed_huc_load_fini(huc); +} + #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") static int check_huc_loading_mode(struct intel_huc *huc) { @@ -414,12 +419,6 @@ out: void intel_huc_fini(struct intel_huc *huc) { - /* - * the fence is initialized in init_early, so we need to clean it up - * even if HuC loading is off. - */ - delayed_huc_load_fini(huc); - if (huc->heci_pkt) i915_vma_unpin_and_release(&huc->heci_pkt, 0); @@ -489,13 +488,15 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc, if (delta_ms > 50) { huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", delta_ms, huc->status[type].reg.reg, count, ret); - huc_warn(huc, "excessive auth time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", - intel_rps_read_actual_frequency(>->rps), before_freq, + huc_warn(huc, "excessive auth time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n", + before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { - huc_dbg(huc, "auth took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", - delta_ms, intel_rps_read_actual_frequency(>->rps), - before_freq, huc->status[type].reg.reg, count, ret); + huc_dbg(huc, "auth took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), + huc->status[type].reg.reg, count, ret); } /* mark the load process as complete even if the wait failed */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index d5e441b9e08d..921ad4b1687f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -55,6 +55,7 @@ struct intel_huc { int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); +void intel_huc_fini_late(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 5b8080ec5315..4a3493e8d433 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -136,6 +136,7 @@ void intel_uc_init_late(struct intel_uc *uc) void intel_uc_driver_late_release(struct intel_uc *uc) { + intel_huc_fini_late(&uc->huc); } /** @@ -512,7 +513,7 @@ static int __uc_init_hw(struct intel_uc *uc) ERR_PTR(ret), attempts); } - /* Did we succeded or run out of retries? */ + /* Did we succeed or run out of retries? */ if (ret) goto err_log_capture; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c index 26fdc392fce6..83801c992488 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c @@ -64,7 +64,7 @@ static int intel_hang_guc(void *arg) old_beat = engine->props.heartbeat_interval_ms; ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL); if (ret) { - gt_err(gt, "Failed to boost heatbeat interval: %pe\n", ERR_PTR(ret)); + gt_err(gt, "Failed to boost heartbeat interval: %pe\n", ERR_PTR(ret)); goto err; } |