diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
27 files changed, 444 insertions, 65 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index a4c94dc2e216..cfd932514da2 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -101,6 +101,7 @@ config DRM_I915_USERPTR config DRM_I915_GVT bool "Enable Intel GVT-g graphics virtualization host support" depends on DRM_I915 + depends on X86 depends on 64BIT default n help diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index bf7ce684dd8e..bb4a85445fc6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -10673,6 +10673,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev, vlv_wm_sanitize(dev_priv); } else if (DISPLAY_VER(dev_priv) >= 9) { skl_wm_get_hw_state(dev_priv); + skl_wm_sanitize(dev_priv); } else if (HAS_PCH_SPLIT(dev_priv)) { ilk_wm_get_hw_state(dev_priv); } diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c index c1439fcb5a95..3ff149df4a77 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.c +++ b/drivers/gpu/drm/i915/display/intel_drrs.c @@ -405,6 +405,7 @@ intel_drrs_init(struct intel_connector *connector, struct drm_display_mode *fixed_mode) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_encoder *encoder = connector->encoder; struct drm_display_mode *downclock_mode = NULL; INIT_DELAYED_WORK(&dev_priv->drrs.work, intel_drrs_downclock_work); @@ -416,6 +417,13 @@ intel_drrs_init(struct intel_connector *connector, return NULL; } + if ((DISPLAY_VER(dev_priv) < 8 && !HAS_GMCH(dev_priv)) && + encoder->port != PORT_A) { + drm_dbg_kms(&dev_priv->drm, + "DRRS only supported on eDP port A\n"); + return NULL; + } + if (dev_priv->vbt.drrs_type != SEAMLESS_DRRS_SUPPORT) { drm_dbg_kms(&dev_priv->drm, "VBT doesn't support DRRS\n"); return NULL; diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 160fd2bdafe5..957feeccff3f 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1115,7 +1115,8 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, /* Wa_22010751166: icl, ehl, tgl, dg1, rkl */ if (DISPLAY_VER(i915) >= 11 && - (plane_state->view.color_plane[0].y + drm_rect_height(&plane_state->uapi.src)) & 3) { + (plane_state->view.color_plane[0].y + + (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) { plane_state->no_fbc_reason = "plane end Y offset misaligned"; return false; } diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 0065111593a6..4a2662838cd8 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -360,6 +360,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, port++; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8; diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 1a376e9a1ff3..d610e48cab94 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -959,6 +959,9 @@ static int check_overlay_dst(struct intel_overlay *overlay, const struct intel_crtc_state *pipe_config = overlay->crtc->config; + if (rec->dst_height == 0 || rec->dst_width == 0) + return -EINVAL; + if (rec->dst_x < pipe_config->pipe_src_w && rec->dst_x + rec->dst_width <= pipe_config->pipe_src_w && rec->dst_y < pipe_config->pipe_src_h && diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 40faa18947c9..dbd7d0d83a14 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -345,10 +345,11 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port) static bool adl_tc_phy_status_complete(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 val; - val = intel_uncore_read(uncore, TCSS_DDI_STATUS(dig_port->tc_phy_fia_idx)); + val = intel_uncore_read(uncore, TCSS_DDI_STATUS(tc_port)); if (val == 0xffffffff) { drm_dbg_kms(&i915->drm, "Port %s: PHY in TCCOLD, assuming not complete\n", diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 3a5b247be738..1736efa43339 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2505,9 +2505,14 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce, timeout) < 0) { i915_request_put(rq); - tl = intel_context_timeline_lock(ce); + /* + * Error path, cannot use intel_context_timeline_lock as + * that is user interruptable and this clean up step + * must be done. + */ + mutex_lock(&ce->timeline->mutex); intel_context_exit(ce); - intel_context_timeline_unlock(tl); + mutex_unlock(&ce->timeline->mutex); if (nonblock) return -EWOULDBLOCK; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 4b4829eb16c2..0dd107dcecc2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -311,6 +311,7 @@ struct drm_i915_gem_object { #define I915_BO_READONLY BIT(6) #define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */ #define I915_BO_PROTECTED BIT(8) +#define I915_BO_WAS_BOUND_BIT 9 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 9f429ed6e78a..a50f884973bc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -10,6 +10,8 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +#include "gt/intel_gt.h" + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -221,6 +223,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(to_gt(i915)); + } + return pages; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index de3fe79b665a..1f880c8c66e7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -842,11 +842,9 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) } else if (obj->mm.madv != I915_MADV_WILLNEED) { bo->priority = I915_TTM_PRIO_PURGE; } else if (!i915_gem_object_has_pages(obj)) { - if (bo->priority < I915_TTM_PRIO_HAS_PAGES) - bo->priority = I915_TTM_PRIO_HAS_PAGES; + bo->priority = I915_TTM_PRIO_NO_PAGES; } else { - if (bo->priority > I915_TTM_PRIO_NO_PAGES) - bo->priority = I915_TTM_PRIO_NO_PAGES; + bo->priority = I915_TTM_PRIO_HAS_PAGES; } ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index ee9612a3ee5e..e130c820ae4e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -427,11 +427,17 @@ __i915_ttm_move(struct ttm_buffer_object *bo, if (!IS_ERR(fence)) goto out; - } else if (move_deps) { - int err = i915_deps_sync(move_deps, ctx); + } else { + int err = PTR_ERR(fence); + + if (err == -EINTR || err == -ERESTARTSYS || err == -EAGAIN) + return fence; - if (err) - return ERR_PTR(err); + if (move_deps) { + err = i915_deps_sync(move_deps, ctx); + if (err) + return ERR_PTR(err); + } } /* Error intercept failed or no accelerated migration to start with */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f98f0fb21efb..35d0fcd3a86c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -29,6 +29,8 @@ void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { spin_lock_init(>->irq_lock); + mutex_init(>->tlb_invalidate_lock); + INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -912,3 +914,109 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (GRAPHICS_VER(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (GRAPHICS_VER(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + drm_err_ratelimited(>->i915->drm, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + /* + * Use delayed put since a) we mostly expect a flurry of TLB + * invalidations so it is good to avoid paying the forcewake cost and + * b) it works around a bug in Icelake which cannot cope with too rapid + * transitions. + */ + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + mutex_unlock(>->tlb_invalidate_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 3ace129eb2af..a913fb6ffec3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -91,4 +91,6 @@ void intel_gt_info_print(const struct intel_gt_info *info, void intel_gt_watchdog_work(struct work_struct *work); +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 14216cc471b1..f20687796490 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -73,6 +73,8 @@ struct intel_gt { struct intel_uc uc; + struct mutex tlb_invalidate_lock; + struct i915_wa_list wa_list; struct intel_gt_timelines { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index f9240d4baa69..3aabe164c329 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -206,6 +206,11 @@ struct intel_guc { * context usage for overflows. */ struct delayed_work work; + + /** + * @shift: Right shift value for the gpm timestamp + */ + u32 shift; } timestamp; #ifdef CONFIG_DRM_I915_SELFTEST diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e7517206af82..154ad726e266 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1113,6 +1113,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) if (new_start == lower_32_bits(*prev_start)) return; + /* + * When gt is unparked, we update the gt timestamp and start the ping + * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt + * is unparked, all switched in contexts will have a start time that is + * within +/- POLL_TIME_CLKS of the most recent gt_stamp. + * + * If neither gt_stamp nor new_start has rolled over, then the + * gt_stamp_hi does not need to be adjusted, however if one of them has + * rolled over, we need to adjust gt_stamp_hi accordingly. + * + * The below conditions address the cases of new_start rollover and + * gt_stamp_last rollover respectively. + */ if (new_start < gt_stamp_last && (new_start - gt_stamp_last) <= POLL_TIME_CLKS) gt_stamp_hi++; @@ -1124,17 +1137,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) *prev_start = ((u64)gt_stamp_hi << 32) | new_start; } -static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +/* + * GuC updates shared memory and KMD reads it. Since this is not synchronized, + * we run into a race where the value read is inconsistent. Sometimes the + * inconsistency is in reading the upper MSB bytes of the last_in value when + * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper + * 24 bits are zero. Since these are non-zero values, it is non-trivial to + * determine validity of these values. Instead we read the values multiple times + * until they are consistent. In test runs, 3 attempts results in consistent + * values. The upper bound is set to 6 attempts and may need to be tuned as per + * any new occurences. + */ +static void __get_engine_usage_record(struct intel_engine_cs *engine, + u32 *last_in, u32 *id, u32 *total) { struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine); + int i = 0; + + do { + *last_in = READ_ONCE(rec->last_switch_in_stamp); + *id = READ_ONCE(rec->current_context_index); + *total = READ_ONCE(rec->total_runtime); + + if (READ_ONCE(rec->last_switch_in_stamp) == *last_in && + READ_ONCE(rec->current_context_index) == *id && + READ_ONCE(rec->total_runtime) == *total) + break; + } while (++i < 6); +} + +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +{ struct intel_engine_guc_stats *stats = &engine->stats.guc; struct intel_guc *guc = &engine->gt->uc.guc; - u32 last_switch = rec->last_switch_in_stamp; - u32 ctx_id = rec->current_context_index; - u32 total = rec->total_runtime; + u32 last_switch, ctx_id, total; lockdep_assert_held(&guc->timestamp.lock); + __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); + stats->running = ctx_id != ~0U && last_switch; if (stats->running) __extend_last_switch(guc, &stats->start_gt_clk, last_switch); @@ -1149,23 +1190,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) } } -static void guc_update_pm_timestamp(struct intel_guc *guc, - struct intel_engine_cs *engine, - ktime_t *now) +static u32 gpm_timestamp_shift(struct intel_gt *gt) +{ + intel_wakeref_t wakeref; + u32 reg, shift; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); + + shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; + + return 3 - shift; +} + +static u64 gpm_timestamp(struct intel_gt *gt) +{ + u32 lo, hi, old_hi, loop = 0; + + hi = intel_uncore_read(gt->uncore, MISC_STATUS1); + do { + lo = intel_uncore_read(gt->uncore, MISC_STATUS0); + old_hi = hi; + hi = intel_uncore_read(gt->uncore, MISC_STATUS1); + } while (old_hi != hi && loop++ < 2); + + return ((u64)hi << 32) | lo; +} + +static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) { - u32 gt_stamp_now, gt_stamp_hi; + struct intel_gt *gt = guc_to_gt(guc); + u32 gt_stamp_lo, gt_stamp_hi; + u64 gpm_ts; lockdep_assert_held(&guc->timestamp.lock); gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); - gt_stamp_now = intel_uncore_read(engine->uncore, - RING_TIMESTAMP(engine->mmio_base)); + gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift; + gt_stamp_lo = lower_32_bits(gpm_ts); *now = ktime_get(); - if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp)) + if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) gt_stamp_hi++; - guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now; + guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; } /* @@ -1208,8 +1277,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) if (!in_reset && intel_gt_pm_get_if_awake(gt)) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; + /* + * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - + * start_gt_clk' calculation below for active engines. + */ guc_update_engine_gt_clks(engine); - guc_update_pm_timestamp(guc, engine, now); + guc_update_pm_timestamp(guc, now); intel_gt_pm_put_async(gt); if (i915_reset_count(gpu_error) != reset_count) { *stats = stats_saved; @@ -1241,8 +1314,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) spin_lock_irqsave(&guc->timestamp.lock, flags); + guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { - guc_update_pm_timestamp(guc, engine, &unused); guc_update_engine_gt_clks(engine); engine->stats.guc.prev_total = 0; } @@ -1259,10 +1332,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc) ktime_t unused; spin_lock_irqsave(&guc->timestamp.lock, flags); - for_each_engine(engine, gt, id) { - guc_update_pm_timestamp(guc, engine, &unused); + + guc_update_pm_timestamp(guc, &unused); + for_each_engine(engine, gt, id) guc_update_engine_gt_clks(engine); - } + spin_unlock_irqrestore(&guc->timestamp.lock, flags); } @@ -1335,10 +1409,15 @@ void intel_guc_busyness_park(struct intel_gt *gt) void intel_guc_busyness_unpark(struct intel_gt *gt) { struct intel_guc *guc = >->uc.guc; + unsigned long flags; + ktime_t unused; if (!guc_submission_initialized(guc)) return; + spin_lock_irqsave(&guc->timestamp.lock, flags); + guc_update_pm_timestamp(guc, &unused); + spin_unlock_irqrestore(&guc->timestamp.lock, flags); mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); } @@ -1783,6 +1862,7 @@ int intel_guc_submission_init(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; + guc->timestamp.shift = gpm_timestamp_shift(gt); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 99d1781fa5f0..af79b39048f7 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1148,7 +1148,7 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, ops->set_pfn(se, s->shadow_page.mfn); } -/** +/* * Check if can do 2M page * @vgpu: target vgpu * @entry: target pfn's gtt entry @@ -2193,7 +2193,7 @@ static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, } /** - * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read + * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read * @vgpu: a vGPU * @off: register offset * @p_data: data will be returned to guest diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5ae812d60abe..0633888a411e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1522,7 +1522,7 @@ capture_engine(struct intel_engine_cs *engine, struct i915_request *rq = NULL; unsigned long flags; - ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); + ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL); if (!ee) return NULL; diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h index 76f1d53bdf34..3ad22bbe80eb 100644 --- a/drivers/gpu/drm/i915/i915_mm.h +++ b/drivers/gpu/drm/i915/i915_mm.h @@ -6,6 +6,7 @@ #ifndef __I915_MM_H__ #define __I915_MM_H__ +#include <linux/bug.h> #include <linux/types.h> struct vm_area_struct; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 971d601fe751..c2bb33febb68 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2684,7 +2684,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ -#define GUCPMTIMESTAMP _MMIO(0xC3E8) +#define MISC_STATUS0 _MMIO(0xA500) +#define MISC_STATUS1 _MMIO(0xA504) /* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */ #define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8) @@ -2721,6 +2722,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) @@ -2819,6 +2826,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) + #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) #define FPGA_DBG _MMIO(0x42300) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 29a858c53bdd..c0d6d5526abe 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -457,6 +457,9 @@ int i915_vma_bind(struct i915_vma *vma, vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + atomic_or(bind_flags, &vma->flags); return 0; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 434b1f8b7fe3..32bc155f5dc0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4717,6 +4717,10 @@ static const struct dbuf_slice_conf_entry dg2_allowed_dbufs[] = { }; static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = { + /* + * Keep the join_mbus cases first so check_mbus_joined() + * will prefer them over the !join_mbus cases. + */ { .active_pipes = BIT(PIPE_A), .dbuf_mask = { @@ -4732,6 +4736,20 @@ static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = { .join_mbus = true, }, { + .active_pipes = BIT(PIPE_A), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + }, + .join_mbus = false, + }, + { + .active_pipes = BIT(PIPE_B), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + .join_mbus = false, + }, + { .active_pipes = BIT(PIPE_A) | BIT(PIPE_B), .dbuf_mask = { [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), @@ -4835,7 +4853,7 @@ static bool check_mbus_joined(u8 active_pipes, { int i; - for (i = 0; i < dbuf_slices[i].active_pipes; i++) { + for (i = 0; dbuf_slices[i].active_pipes != 0; i++) { if (dbuf_slices[i].active_pipes == active_pipes) return dbuf_slices[i].join_mbus; } @@ -4847,13 +4865,14 @@ static bool adlp_check_mbus_joined(u8 active_pipes) return check_mbus_joined(active_pipes, adlp_allowed_dbufs); } -static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, +static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus, const struct dbuf_slice_conf_entry *dbuf_slices) { int i; - for (i = 0; i < dbuf_slices[i].active_pipes; i++) { - if (dbuf_slices[i].active_pipes == active_pipes) + for (i = 0; dbuf_slices[i].active_pipes != 0; i++) { + if (dbuf_slices[i].active_pipes == active_pipes && + dbuf_slices[i].join_mbus == join_mbus) return dbuf_slices[i].dbuf_mask[pipe]; } return 0; @@ -4864,7 +4883,7 @@ static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, * returns correspondent DBuf slice mask as stated in BSpec for particular * platform. */ -static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes) +static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus) { /* * FIXME: For ICL this is still a bit unclear as prev BSpec revision @@ -4878,37 +4897,41 @@ static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes) * still here - we will need it once those additional constraints * pop up. */ - return compute_dbuf_slices(pipe, active_pipes, icl_allowed_dbufs); + return compute_dbuf_slices(pipe, active_pipes, join_mbus, + icl_allowed_dbufs); } -static u8 tgl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes) +static u8 tgl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus) { - return compute_dbuf_slices(pipe, active_pipes, tgl_allowed_dbufs); + return compute_dbuf_slices(pipe, active_pipes, join_mbus, + tgl_allowed_dbufs); } -static u32 adlp_compute_dbuf_slices(enum pipe pipe, u32 active_pipes) +static u8 adlp_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus) { - return compute_dbuf_slices(pipe, active_pipes, adlp_allowed_dbufs); + return compute_dbuf_slices(pipe, active_pipes, join_mbus, + adlp_allowed_dbufs); } -static u32 dg2_compute_dbuf_slices(enum pipe pipe, u32 active_pipes) +static u8 dg2_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus) { - return compute_dbuf_slices(pipe, active_pipes, dg2_allowed_dbufs); + return compute_dbuf_slices(pipe, active_pipes, join_mbus, + dg2_allowed_dbufs); } -static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes) +static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes, bool join_mbus) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; if (IS_DG2(dev_priv)) - return dg2_compute_dbuf_slices(pipe, active_pipes); + return dg2_compute_dbuf_slices(pipe, active_pipes, join_mbus); else if (IS_ALDERLAKE_P(dev_priv)) - return adlp_compute_dbuf_slices(pipe, active_pipes); + return adlp_compute_dbuf_slices(pipe, active_pipes, join_mbus); else if (DISPLAY_VER(dev_priv) == 12) - return tgl_compute_dbuf_slices(pipe, active_pipes); + return tgl_compute_dbuf_slices(pipe, active_pipes, join_mbus); else if (DISPLAY_VER(dev_priv) == 11) - return icl_compute_dbuf_slices(pipe, active_pipes); + return icl_compute_dbuf_slices(pipe, active_pipes, join_mbus); /* * For anything else just return one slice yet. * Should be extended for other platforms. @@ -6127,11 +6150,16 @@ skl_compute_ddb(struct intel_atomic_state *state) return ret; } + if (IS_ALDERLAKE_P(dev_priv)) + new_dbuf_state->joined_mbus = + adlp_check_mbus_joined(new_dbuf_state->active_pipes); + for_each_intel_crtc(&dev_priv->drm, crtc) { enum pipe pipe = crtc->pipe; new_dbuf_state->slices[pipe] = - skl_compute_dbuf_slices(crtc, new_dbuf_state->active_pipes); + skl_compute_dbuf_slices(crtc, new_dbuf_state->active_pipes, + new_dbuf_state->joined_mbus); if (old_dbuf_state->slices[pipe] == new_dbuf_state->slices[pipe]) continue; @@ -6143,9 +6171,6 @@ skl_compute_ddb(struct intel_atomic_state *state) new_dbuf_state->enabled_slices = intel_dbuf_enabled_slices(new_dbuf_state); - if (IS_ALDERLAKE_P(dev_priv)) - new_dbuf_state->joined_mbus = adlp_check_mbus_joined(new_dbuf_state->active_pipes); - if (old_dbuf_state->enabled_slices != new_dbuf_state->enabled_slices || old_dbuf_state->joined_mbus != new_dbuf_state->joined_mbus) { ret = intel_atomic_serialize_global_state(&new_dbuf_state->base); @@ -6626,6 +6651,7 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) enum pipe pipe = crtc->pipe; unsigned int mbus_offset; enum plane_id plane_id; + u8 slices; skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal); crtc_state->wm.skl.raw = crtc_state->wm.skl.optimal; @@ -6645,19 +6671,22 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) skl_ddb_entry_union(&dbuf_state->ddb[pipe], ddb_uv); } - dbuf_state->slices[pipe] = - skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes); - dbuf_state->weight[pipe] = intel_crtc_ddb_weight(crtc_state); /* * Used for checking overlaps, so we need absolute * offsets instead of MBUS relative offsets. */ - mbus_offset = mbus_ddb_offset(dev_priv, dbuf_state->slices[pipe]); + slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes, + dbuf_state->joined_mbus); + mbus_offset = mbus_ddb_offset(dev_priv, slices); crtc_state->wm.skl.ddb.start = mbus_offset + dbuf_state->ddb[pipe].start; crtc_state->wm.skl.ddb.end = mbus_offset + dbuf_state->ddb[pipe].end; + /* The slices actually used by the planes on the pipe */ + dbuf_state->slices[pipe] = + skl_ddb_dbuf_slice_mask(dev_priv, &crtc_state->wm.skl.ddb); + drm_dbg_kms(&dev_priv->drm, "[CRTC:%d:%s] dbuf slices 0x%x, ddb (%d - %d), active pipes 0x%x, mbus joined: %s\n", crtc->base.base.id, crtc->base.name, @@ -6669,6 +6698,74 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) dbuf_state->enabled_slices = dev_priv->dbuf.enabled_slices; } +static bool skl_dbuf_is_misconfigured(struct drm_i915_private *i915) +{ + const struct intel_dbuf_state *dbuf_state = + to_intel_dbuf_state(i915->dbuf.obj.state); + struct skl_ddb_entry entries[I915_MAX_PIPES] = {}; + struct intel_crtc *crtc; + + for_each_intel_crtc(&i915->drm, crtc) { + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + entries[crtc->pipe] = crtc_state->wm.skl.ddb; + } + + for_each_intel_crtc(&i915->drm, crtc) { + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + u8 slices; + + slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes, + dbuf_state->joined_mbus); + if (dbuf_state->slices[crtc->pipe] & ~slices) + return true; + + if (skl_ddb_allocation_overlaps(&crtc_state->wm.skl.ddb, entries, + I915_MAX_PIPES, crtc->pipe)) + return true; + } + + return false; +} + +void skl_wm_sanitize(struct drm_i915_private *i915) +{ + struct intel_crtc *crtc; + + /* + * On TGL/RKL (at least) the BIOS likes to assign the planes + * to the wrong DBUF slices. This will cause an infinite loop + * in skl_commit_modeset_enables() as it can't find a way to + * transition between the old bogus DBUF layout to the new + * proper DBUF layout without DBUF allocation overlaps between + * the planes (which cannot be allowed or else the hardware + * may hang). If we detect a bogus DBUF layout just turn off + * all the planes so that skl_commit_modeset_enables() can + * simply ignore them. + */ + if (!skl_dbuf_is_misconfigured(i915)) + return; + + drm_dbg_kms(&i915->drm, "BIOS has misprogrammed the DBUF, disabling all planes\n"); + + for_each_intel_crtc(&i915->drm, crtc) { + struct intel_plane *plane = to_intel_plane(crtc->base.primary); + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + if (plane_state->uapi.visible) + intel_plane_disable_noatomic(crtc, plane); + + drm_WARN_ON(&i915->drm, crtc_state->active_planes != 0); + + memset(&crtc_state->wm.skl.ddb, 0, sizeof(crtc_state->wm.skl.ddb)); + } +} + static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 990cdcaf85ce..d2243653a893 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -47,6 +47,7 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, struct skl_pipe_wm *out); void g4x_wm_sanitize(struct drm_i915_private *dev_priv); void vlv_wm_sanitize(struct drm_i915_private *dev_priv); +void skl_wm_sanitize(struct drm_i915_private *dev_priv); bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, const struct intel_bw_state *bw_state); void intel_sagv_pre_plane_update(struct intel_atomic_state *state); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 53f1ccb78849..64c2708efc9e 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -68,9 +68,7 @@ static noinline depot_stack_handle_t __save_depot_stack(void) static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { spin_lock_init(&rpm->debug.lock); - - if (rpm->available) - stack_depot_init(); + stack_depot_init(); } static noinline depot_stack_handle_t diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index fc25ebf1a593..778da3179b3c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -724,7 +724,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore, } static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, - enum forcewake_domains fw_domains) + enum forcewake_domains fw_domains, + bool delayed) { struct intel_uncore_forcewake_domain *domain; unsigned int tmp; @@ -739,7 +740,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, continue; } - fw_domains_put(uncore, domain->mask); + if (delayed && + !(domain->uncore->fw_domains_timer & domain->mask)) + fw_domain_arm_timer(domain); + else + fw_domains_put(uncore, domain->mask); } } @@ -760,7 +765,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore, return; spin_lock_irqsave(&uncore->lock, irqflags); - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); + spin_unlock_irqrestore(&uncore->lock, irqflags); +} + +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains fw_domains) +{ + unsigned long irqflags; + + if (!uncore->fw_get_funcs) + return; + + spin_lock_irqsave(&uncore->lock, irqflags); + __intel_uncore_forcewake_put(uncore, fw_domains, true); spin_unlock_irqrestore(&uncore->lock, irqflags); } @@ -802,7 +820,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore, if (!uncore->fw_get_funcs) return; - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); } void assert_forcewakes_inactive(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 210fe2a71612..2a15b2b2e2fc 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -246,6 +246,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore, enum forcewake_domains domains); void intel_uncore_forcewake_put(struct intel_uncore *uncore, enum forcewake_domains domains); +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains domains); void intel_uncore_forcewake_flush(struct intel_uncore *uncore, enum forcewake_domains fw_domains); |