diff options
author | Dave Airlie <airlied@redhat.com> | 2022-07-22 08:51:26 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-07-22 08:51:31 +0300 |
commit | 417c1c1963549e9a48b83ada59d90258e38c6594 (patch) | |
tree | 1b5c36833e1c7b2ea3756767f498bf615436d952 /drivers/gpu/drm/i915/gt | |
parent | cb6b81b21bd9cf09d72b7fe711be1b55001eb166 (diff) | |
parent | 17cd10a44a8962860ff4ba351b2a290e752dbbde (diff) | |
download | linux-417c1c1963549e9a48b83ada59d90258e38c6594.tar.xz |
Merge tag 'drm-intel-gt-next-2022-07-13' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver uAPI changes:
- All related to the Small BAR support: (and all by Matt Auld)
* add probed_cpu_visible_size
* expose the avail memory region tracking
* apply ALLOC_GPU only by default
* add NEEDS_CPU_ACCESS hint
* tweak error capture on recoverable contexts
Driver highlights:
- Add Small BAR support (Matt)
- Add MeteorLake support (RK)
- Add support for LMEM PCIe resizable BAR (Akeem)
Driver important fixes:
- ttm related fixes (Matt Auld)
- Fix a performance regression related to waitboost (Chris)
- Fix GT resets (Chris)
Driver others:
- Adding GuC SLPC selftest (Vinay)
- Fix ADL-N GuC load (Daniele)
- Add platform workaround (Gustavo, Matt Roper)
- DG2 and ATS-M device ID updates (Matt Roper)
- Add VM_BIND doc rfc with uAPI documentation (Niranjana)
- Fix user-after-free in vma destruction (Thomas)
- Async flush of GuC log regions (Alan)
- Fixes in selftests (Chris, Dan, Andrzej)
- Convert to drm_dbg (Umesh)
- Disable OA sseu config param for newer hardware (Umesh)
- Multi-cast register steering changes (Matt Roper)
- Add lmem_bar_size modparam (Priyanka)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Ys85pcMYLkqF/HtB@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_engine_cs.c | 34 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_engine_types.h | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 25 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 24 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt_regs.h | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_region_lmem.c | 109 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_reset.c | 37 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/selftest_lrc.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/selftest_slpc.c | 323 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 9 |
15 files changed, 396 insertions, 244 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 9dc9dccf7b09..ecc990ec1b95 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -399,7 +399,8 @@ static void insert_breadcrumb(struct i915_request *rq) * the request as it may have completed and raised the interrupt as * we were attaching it into the lists. */ - irq_work_queue(&b->irq_work); + if (!b->irq_armed || __i915_request_is_complete(rq)) + irq_work_queue(&b->irq_work); } bool i915_request_enable_breadcrumb(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 283870c65991..37fa813af766 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1517,7 +1517,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; - const struct sseu_dev_info *sseu = &engine->gt->info.sseu; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -1542,32 +1541,19 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { - instdone->sampler[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, - slice, subslice); - instdone->row[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, - slice, subslice); - } - } else { - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { - instdone->sampler[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, - slice, subslice); - instdone->row[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, - slice, subslice); - } + for_each_ss_steering(iter, engine->gt, slice, subslice) { + instdone->sampler[slice][subslice] = + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); + instdone->row[slice][subslice] = + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice); } if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) + for_each_ss_steering(iter, engine->gt, slice, subslice) instdone->geom_svg[slice][subslice] = intel_gt_mcr_read(engine->gt, XEHPG_INSTDONE_GEOM_SVG, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 2286f96f5f87..633a7e5dba3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -647,26 +647,4 @@ intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; } -#define instdone_has_slice(dev_priv___, sseu___, slice___) \ - ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) - -#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ - (GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \ - intel_sseu_has_subslice(sseu__, 0, subslice__)) - -#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ - for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ - (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ - (slice_) += ((subslice_) == 0)) \ - for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ - (instdone_has_subslice(dev_priv_, sseu_, slice_, \ - subslice_))) - -#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ - for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ - (iter_) < GEN_SS_MASK_SIZE; \ - (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ - (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ - for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) - #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8da3314bb6bf..68c2b0d8f187 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + for_each_engine(engine, gt, id) { /* * HW architecture suggest typical invalidation time at 40us, @@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 777025d5bd66..e79405a45312 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -495,3 +495,28 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, } } +/** + * intel_gt_mcr_get_ss_steering - returns the group/instance steering for a SS + * @gt: GT structure + * @dss: DSS ID to obtain steering for + * @group: pointer to storage for steering group ID + * @instance: pointer to storage for steering instance ID + * + * Returns the steering IDs (via the @group and @instance parameters) that + * correspond to a specific subslice/DSS ID. + */ +void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, + unsigned int *group, unsigned int *instance) +{ + if (IS_PONTEVECCHIO(gt->i915)) { + *group = dss / GEN_DSS_PER_CSLICE; + *instance = dss % GEN_DSS_PER_CSLICE; + } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) { + *group = dss / GEN_DSS_PER_GSLICE; + *instance = dss % GEN_DSS_PER_GSLICE; + } else { + *group = dss / GEN_MAX_SS_PER_HSW_SLICE; + *instance = dss % GEN_MAX_SS_PER_HSW_SLICE; + return; + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 506b0cbc8db3..77a8b11c287d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -31,4 +31,28 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, bool dump_table); +void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, + unsigned int *group, unsigned int *instance); + +/* + * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice + * presence is determined by using the group/instance as direct lookups in the + * slice/subslice topology. On Xe_HP and beyond, the steering is unrelated to + * the topology, so we lookup the DSS ID directly in "slice 0." + */ +#define _HAS_SS(ss_, gt_, group_, instance_) ( \ + GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 50) ? \ + intel_sseu_has_subslice(&(gt_)->info.sseu, 0, ss_) : \ + intel_sseu_has_subslice(&(gt_)->info.sseu, group_, instance_)) + +/* + * Loop over each subslice/DSS and determine the group and instance IDs that + * should be used to steer MCR accesses toward this DSS. + */ +#define for_each_ss_steering(ss_, gt_, group_, instance_) \ + for (ss_ = 0, intel_gt_mcr_get_ss_steering(gt_, 0, &group_, &instance_); \ + ss_ < I915_MAX_SS_FUSE_BITS; \ + ss_++, intel_gt_mcr_get_ss_steering(gt_, ss_, &group_, &instance_)) \ + for_each_if(_HAS_SS(ss_, gt_, group_, instance_)) + #endif /* __INTEL_GT_MCR__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 37c1095d8603..60d6eb5f245b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -371,6 +371,9 @@ #define GEN9_WM_CHICKEN3 _MMIO(0x5588) #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) +#define CHICKEN_RASTER_1 _MMIO(0x6204) +#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) + #define VFLSKPD _MMIO(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -918,6 +921,10 @@ #define GEN7_L3CNTLREG1 _MMIO(0xb01c) #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C #define GEN7_L3AGDIS (1 << 19) + +#define XEHPC_LNCFMISCCFGREG0 _MMIO(0xb01c) +#define XEHPC_OVRLSCCC REG_BIT(0) + #define GEN7_L3CNTLREG2 _MMIO(0xb020) /* MOCS (Memory Object Control State) registers */ diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index d09b996a9759..6e90032e12e9 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -15,6 +15,103 @@ #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" +static void _release_bars(struct pci_dev *pdev) +{ + int resno; + + for (resno = PCI_STD_RESOURCES; resno < PCI_STD_RESOURCE_END; resno++) { + if (pci_resource_len(pdev, resno)) + pci_release_resource(pdev, resno); + } +} + +static void +_resize_bar(struct drm_i915_private *i915, int resno, resource_size_t size) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + int bar_size = pci_rebar_bytes_to_size(size); + int ret; + + _release_bars(pdev); + + ret = pci_resize_resource(pdev, resno, bar_size); + if (ret) { + drm_info(&i915->drm, "Failed to resize BAR%d to %dM (%pe)\n", + resno, 1 << bar_size, ERR_PTR(ret)); + return; + } + + drm_info(&i915->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); +} + +#define LMEM_BAR_NUM 2 +static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_bus *root = pdev->bus; + struct resource *root_res; + resource_size_t rebar_size; + resource_size_t current_size; + u32 pci_cmd; + int i; + + current_size = roundup_pow_of_two(pci_resource_len(pdev, LMEM_BAR_NUM)); + + if (i915->params.lmem_bar_size) { + u32 bar_sizes; + + rebar_size = i915->params.lmem_bar_size * + (resource_size_t)SZ_1M; + bar_sizes = pci_rebar_get_possible_sizes(pdev, + LMEM_BAR_NUM); + + if (rebar_size == current_size) + return; + + if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) || + rebar_size >= roundup_pow_of_two(lmem_size)) { + rebar_size = lmem_size; + + drm_info(&i915->drm, + "Given bar size is not within supported size, setting it to default: %llu\n", + (u64)lmem_size >> 20); + } + } else { + rebar_size = current_size; + + if (rebar_size != roundup_pow_of_two(lmem_size)) + rebar_size = lmem_size; + else + return; + } + + /* Find out if root bus contains 64bit memory addressing */ + while (root->parent) + root = root->parent; + + pci_bus_for_each_resource(root, root_res, i) { + if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && + root_res->start > 0x100000000ull) + break; + } + + /* pci_resize_resource will fail anyways */ + if (!root_res) { + drm_info(&i915->drm, "Can't resize LMEM BAR - platform support is missing\n"); + return; + } + + /* First disable PCI memory decoding references */ + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); + pci_write_config_dword(pdev, PCI_COMMAND, + pci_cmd & ~PCI_COMMAND_MEMORY); + + _resize_bar(i915, LMEM_BAR_NUM, rebar_size); + + pci_assign_unassigned_bus_resources(pdev->bus); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); +} + static int region_lmem_release(struct intel_memory_region *mem) { @@ -112,12 +209,6 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) flat_ccs_base = intel_gt_mcr_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); flat_ccs_base = (flat_ccs_base >> XEHP_CCS_BASE_SHIFT) * SZ_64K; - /* FIXME: Remove this when we have small-bar enabled */ - if (pci_resource_len(pdev, 2) < lmem_size) { - drm_err(&i915->drm, "System requires small-BAR support, which is currently unsupported on this kernel\n"); - return ERR_PTR(-EINVAL); - } - if (GEM_WARN_ON(lmem_size < flat_ccs_base)) return ERR_PTR(-EIO); @@ -134,6 +225,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE); } + i915_resize_lmem_bar(i915, lmem_size); + if (i915->params.lmem_size > 0) { lmem_size = min_t(resource_size_t, lmem_size, mul_u32_u32(i915->params.lmem_size, SZ_1M)); @@ -170,6 +263,10 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) drm_info(&i915->drm, "Local memory available: %pa\n", &lmem_size); + if (io_size < lmem_size) + drm_info(&i915->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' or similar, if available in the BIOS.\n", + (u64)io_size >> 20); + return mem; err_region_put: diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index a5338c3fde7a..c68d36fb5bbd 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) return err; } -static int gen6_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; u32 hw_mask; @@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } +static int gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(>->uncore->lock, flags); + ret = __gen6_reset_engines(gt, engine_mask, retry); + spin_unlock_irqrestore(>->uncore->lock, flags); + + return ret; +} + static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) { int vecs_id; @@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } -static int gen11_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen11_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt, struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; intel_engine_mask_t tmp; + unsigned long flags; int ret; + spin_lock_irqsave(>->uncore->lock, flags); + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) @@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt, * This is best effort, so ignore any error from the initial reset. */ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) - gen11_reset_engines(gt, gt->info.engine_mask, 0); + __gen11_reset_engines(gt, gt->info.engine_mask, 0); if (GRAPHICS_VER(gt->i915) >= 11) - ret = gen11_reset_engines(gt, engine_mask, retry); + ret = __gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(gt, engine_mask, retry); + ret = __gen6_reset_engines(gt, engine_mask, retry); skip_reset: for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); + spin_unlock_irqrestore(>->uncore->lock, flags); + return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3213c593a55f..e8111fce56d0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -689,6 +689,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + + /* Wa_15010599737:dg2 */ + wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, @@ -2687,6 +2690,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li * performance guide section. */ wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + + /* Wa_16016694945 */ + wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); } if (IS_XEHPSDV(i915)) { diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8b2c11dbe354..1109088fe8f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -176,8 +176,8 @@ static int live_lrc_layout(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); @@ -365,8 +365,8 @@ static int live_lrc_fixed(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index b768cea5943d..ac29691e0b1a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -8,6 +8,11 @@ #define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 10000) #define FREQUENCY_REQ_UNIT DIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \ GEN9_FREQ_SCALER) +enum test_type { + VARY_MIN, + VARY_MAX, + MAX_GRANTED +}; static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) { @@ -36,147 +41,114 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) return ret; } -static int live_slpc_clamp_min(void *arg) +static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, + u32 *max_act_freq) { - struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); - struct intel_guc_slpc *slpc = >->uc.guc.slpc; - struct intel_rps *rps = >->rps; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_spinner spin; - u32 slpc_min_freq, slpc_max_freq; + u32 step, max_freq, req_freq; + u32 act_freq; int err = 0; - if (!intel_uc_uses_guc_slpc(>->uc)) - return 0; + /* Go from max to min in 5 steps */ + step = (slpc->rp0_freq - slpc->min_freq) / NUM_STEPS; + *max_act_freq = slpc->min_freq; + for (max_freq = slpc->rp0_freq; max_freq > slpc->min_freq; + max_freq -= step) { + err = slpc_set_max_freq(slpc, max_freq); + if (err) + break; - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; + req_freq = intel_rps_read_punit_req_frequency(rps); - if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { - pr_err("Could not get SLPC max freq\n"); - return -EIO; - } + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at most %d\n", req_freq, + max_freq + FREQUENCY_REQ_UNIT); + err = -EINVAL; + } - if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { - pr_err("Could not get SLPC min freq\n"); - return -EIO; - } + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > *max_act_freq) + *max_act_freq = act_freq; - if (slpc_min_freq == slpc_max_freq) { - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; + if (err) + break; } - intel_gt_pm_wait_for_idle(gt); - intel_gt_pm_get(gt); - for_each_engine(engine, gt, id) { - struct i915_request *rq; - u32 step, min_freq, req_freq; - u32 act_freq, max_act_freq; + return err; +} - if (!intel_engine_can_store_dword(engine)) - continue; +static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, + u32 *max_act_freq) +{ + u32 step, min_freq, req_freq; + u32 act_freq; + int err = 0; - /* Go from min to max in 5 steps */ - step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; - max_act_freq = slpc_min_freq; - for (min_freq = slpc_min_freq; min_freq < slpc_max_freq; - min_freq += step) { - err = slpc_set_min_freq(slpc, min_freq); - if (err) - break; - - st_engine_heartbeat_disable(engine); - - rq = igt_spinner_create_request(&spin, - engine->kernel_context, - MI_NOOP); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - st_engine_heartbeat_enable(engine); - break; - } + /* Go from min to max in 5 steps */ + step = (slpc->rp0_freq - slpc->min_freq) / NUM_STEPS; + *max_act_freq = slpc->min_freq; + for (min_freq = slpc->min_freq; min_freq < slpc->rp0_freq; + min_freq += step) { + err = slpc_set_min_freq(slpc, min_freq); + if (err) + break; - i915_request_add(rq); + req_freq = intel_rps_read_punit_req_frequency(rps); - if (!igt_wait_for_spinner(&spin, rq)) { - pr_err("%s: Spinner did not start\n", - engine->name); - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - intel_gt_set_wedged(engine->gt); - err = -EIO; - break; - } + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at least %d\n", req_freq, + min_freq - FREQUENCY_REQ_UNIT); + err = -EINVAL; + } - /* Wait for GuC to detect business and raise - * requested frequency if necessary. - */ - delay_for_h2g(); + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > *max_act_freq) + *max_act_freq = act_freq; - req_freq = intel_rps_read_punit_req_frequency(rps); + if (err) + break; + } - /* GuC requests freq in multiples of 50/3 MHz */ - if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { - pr_err("SWReq is %d, should be at least %d\n", req_freq, - min_freq - FREQUENCY_REQ_UNIT); - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - err = -EINVAL; - break; - } + return err; +} - act_freq = intel_rps_read_actual_frequency(rps); - if (act_freq > max_act_freq) - max_act_freq = act_freq; +static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) +{ + struct intel_gt *gt = rps_to_gt(rps); + u32 perf_limit_reasons; + int err = 0; - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - } + err = slpc_set_min_freq(slpc, slpc->rp0_freq); + if (err) + return err; - pr_info("Max actual frequency for %s was %d\n", - engine->name, max_act_freq); + *max_act_freq = intel_rps_read_actual_frequency(rps); + if (*max_act_freq != slpc->rp0_freq) { + /* Check if there was some throttling by pcode */ + perf_limit_reasons = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS); - /* Actual frequency should rise above min */ - if (max_act_freq == slpc_min_freq) { - pr_err("Actual freq did not rise above min\n"); + /* If not, this is an error */ + if (!(perf_limit_reasons & GT0_PERF_LIMIT_REASONS_MASK)) { + pr_err("Pcode did not grant max freq\n"); err = -EINVAL; + } else { + pr_info("Pcode throttled frequency 0x%x\n", perf_limit_reasons); } - - if (err) - break; } - /* Restore min/max frequencies */ - slpc_set_max_freq(slpc, slpc_max_freq); - slpc_set_min_freq(slpc, slpc_min_freq); - - if (igt_flush_test(gt->i915)) - err = -EIO; - - intel_gt_pm_put(gt); - igt_spinner_fini(&spin); - intel_gt_pm_wait_for_idle(gt); - return err; } -static int live_slpc_clamp_max(void *arg) +static int run_test(struct intel_gt *gt, int test_type) { - struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); - struct intel_guc_slpc *slpc; - struct intel_rps *rps; + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_rps *rps = >->rps; struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; - int err = 0; u32 slpc_min_freq, slpc_max_freq; - - slpc = >->uc.guc.slpc; - rps = >->rps; + int err = 0; if (!intel_uc_uses_guc_slpc(>->uc)) return 0; @@ -194,7 +166,7 @@ static int live_slpc_clamp_max(void *arg) return -EIO; } - if (slpc_min_freq == slpc_max_freq) { + if (slpc->min_freq == slpc->rp0_freq) { pr_err("Min/Max are fused to the same value\n"); return -EINVAL; } @@ -203,93 +175,82 @@ static int live_slpc_clamp_max(void *arg) intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { struct i915_request *rq; - u32 max_freq, req_freq; - u32 act_freq, max_act_freq; - u32 step; + u32 max_act_freq; if (!intel_engine_can_store_dword(engine)) continue; - /* Go from max to min in 5 steps */ - step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; - max_act_freq = slpc_min_freq; - for (max_freq = slpc_max_freq; max_freq > slpc_min_freq; - max_freq -= step) { - err = slpc_set_max_freq(slpc, max_freq); - if (err) - break; - - st_engine_heartbeat_disable(engine); - - rq = igt_spinner_create_request(&spin, - engine->kernel_context, - MI_NOOP); - if (IS_ERR(rq)) { - st_engine_heartbeat_enable(engine); - err = PTR_ERR(rq); - break; - } + st_engine_heartbeat_disable(engine); - i915_request_add(rq); + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + st_engine_heartbeat_enable(engine); + break; + } - if (!igt_wait_for_spinner(&spin, rq)) { - pr_err("%s: SLPC spinner did not start\n", - engine->name); - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - intel_gt_set_wedged(engine->gt); - err = -EIO; - break; - } + i915_request_add(rq); - delay_for_h2g(); + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: Spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } - /* Verify that SWREQ indeed was set to specific value */ - req_freq = intel_rps_read_punit_req_frequency(rps); + switch (test_type) { + case VARY_MIN: + err = vary_min_freq(slpc, rps, &max_act_freq); + break; - /* GuC requests freq in multiples of 50/3 MHz */ - if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { - pr_err("SWReq is %d, should be at most %d\n", req_freq, - max_freq + FREQUENCY_REQ_UNIT); + case VARY_MAX: + err = vary_max_freq(slpc, rps, &max_act_freq); + break; + + case MAX_GRANTED: + /* Media engines have a different RP0 */ + if (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS) { igt_spinner_end(&spin); st_engine_heartbeat_enable(engine); - err = -EINVAL; - break; + err = 0; + continue; } - act_freq = intel_rps_read_actual_frequency(rps); - if (act_freq > max_act_freq) - max_act_freq = act_freq; - - st_engine_heartbeat_enable(engine); - igt_spinner_end(&spin); - - if (err) - break; + err = max_granted_freq(slpc, rps, &max_act_freq); + break; } pr_info("Max actual frequency for %s was %d\n", engine->name, max_act_freq); /* Actual frequency should rise above min */ - if (max_act_freq == slpc_min_freq) { + if (max_act_freq <= slpc_min_freq) { pr_err("Actual freq did not rise above min\n"); + pr_err("Perf Limit Reasons: 0x%x\n", + intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS)); err = -EINVAL; } - if (igt_flush_test(gt->i915)) { - err = -EIO; - break; - } + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); if (err) break; } - /* Restore min/max freq */ + /* Restore min/max frequencies */ slpc_set_max_freq(slpc, slpc_max_freq); slpc_set_min_freq(slpc, slpc_min_freq); + if (igt_flush_test(gt->i915)) + err = -EIO; + intel_gt_pm_put(gt); igt_spinner_fini(&spin); intel_gt_pm_wait_for_idle(gt); @@ -297,11 +258,37 @@ static int live_slpc_clamp_max(void *arg) return err; } +static int live_slpc_vary_min(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = to_gt(i915); + + return run_test(gt, VARY_MIN); +} + +static int live_slpc_vary_max(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = to_gt(i915); + + return run_test(gt, VARY_MAX); +} + +/* check if pcode can grant RP0 */ +static int live_slpc_max_granted(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = to_gt(i915); + + return run_test(gt, MAX_GRANTED); +} + int intel_slpc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(live_slpc_clamp_max), - SUBTEST(live_slpc_clamp_min), + SUBTEST(live_slpc_vary_max), + SUBTEST(live_slpc_vary_min), + SUBTEST(live_slpc_max_granted), }; if (intel_gt_is_wedged(to_gt(i915))) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 97a32e610c30..75257bd20ff0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -9,6 +9,7 @@ #include "gt/intel_engine_regs.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc.h" #include "guc_capture_fwif.h" @@ -281,8 +282,7 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, const struct __guc_mmio_reg_descr_group *lists) { struct intel_gt *gt = guc_to_gt(guc); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; - int slice, subslice, i, num_steer_regs, num_tot_regs = 0; + int slice, subslice, iter, i, num_steer_regs, num_tot_regs = 0; const struct __guc_mmio_reg_descr_group *list; struct __guc_mmio_reg_descr_group *extlists; struct __guc_mmio_reg_descr *extarray; @@ -298,7 +298,7 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, num_steer_regs = ARRAY_SIZE(xe_extregs); sseu = >->info.sseu; - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) + for_each_ss_steering(iter, gt, slice, subslice) num_tot_regs += num_steer_regs; if (!num_tot_regs) @@ -315,7 +315,7 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, } extarray = extlists[0].extlist; - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + for_each_ss_steering(iter, gt, slice, subslice) { for (i = 0; i < num_steer_regs; ++i) { __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); ++extarray; @@ -359,9 +359,8 @@ guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc, num_steer_regs += ARRAY_SIZE(xehpg_extregs); sseu = >->info.sseu; - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + for_each_ss_steering(iter, gt, slice, subslice) num_tot_regs += num_steer_regs; - } if (!num_tot_regs) return; @@ -377,7 +376,7 @@ guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc, } extarray = extlists[0].extlist; - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + for_each_ss_steering(iter, gt, slice, subslice) { for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) { __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); ++extarray; @@ -1261,7 +1260,8 @@ static int __guc_capture_flushlog_complete(struct intel_guc *guc) GUC_CAPTURE_LOG_BUFFER }; - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); + } static void __guc_capture_process_output(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 02311ad90264..25b2d7ce6640 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -31,7 +31,7 @@ static int guc_action_flush_log_complete(struct intel_guc *guc) GUC_DEBUG_LOG_BUFFER }; - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); } static int guc_action_flush_log(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index c06e83872c34..27363091e1af 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -162,6 +162,15 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) u8 rev = INTEL_REVID(i915); int i; + /* + * The only difference between the ADL GuC FWs is the HWConfig support. + * ADL-N does not support HWConfig, so we should use the same binary as + * ADL-S, otherwise the GuC might attempt to fetch a config table that + * does not exist. + */ + if (IS_ADLP_N(i915)) + p = INTEL_ALDERLAKE_S; + GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); fw_blobs = blobs_all[uc_fw->type].blobs; fw_count = blobs_all[uc_fw->type].count; |