diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc')
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 50 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 104 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 125 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_huc.c | 42 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_huc.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_uc.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c | 2 |
17 files changed, 284 insertions, 92 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h index c34674e797c6..6de87ae5669e 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -228,6 +228,11 @@ struct slpc_optimized_strategies { #define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0) +enum slpc_power_profiles { + SLPC_POWER_PROFILES_BASE = 0x0, + SLPC_POWER_PROFILES_POWER_SAVING = 0x1 +}; + /** * DOC: SLPC H2G MESSAGE FORMAT * diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h index 1fc0c17b1230..803c0379d97d 100644 --- a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h @@ -81,7 +81,7 @@ struct guc_debug_capture_list { * * intel_guc_capture module uses these structures to maintain static * tables (per unique platform) that consists of lists of registers - * (offsets, names, flags,...) that are used at the ADS regisration + * (offsets, names, flags,...) that are used at the ADS registration * time as well as during runtime processing and reporting of error- * capture states generated by GuC just prior to engine reset events. */ @@ -200,7 +200,7 @@ struct intel_guc_state_capture { * dynamically allocate new nodes when receiving the G2H notification * because the event handlers for all G2H event-processing is called * by the ct processing worker queue and when that queue is being - * processed, there is no absoluate guarantee that we are not in the + * processed, there is no absolute guarantee that we are not in the * midst of a GT reset operation (which doesn't allow allocations). */ struct list_head cachelist; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 551b0d7974ff..d550eb6edfb8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -80,6 +80,7 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s const struct intel_gsc_cpd_header_v2 *cpd_header = NULL; const struct intel_gsc_cpd_entry *cpd_entry = NULL; const struct intel_gsc_manifest_header *manifest; + struct intel_uc_fw_ver min_ver = { 0 }; size_t min_size = sizeof(*layout); int i; @@ -212,33 +213,46 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s } } - if (IS_ARROWLAKE(gt->i915)) { + /* + * ARL SKUs require newer firmwares, but the blob is actually common + * across all MTL and ARL SKUs, so we need to do an explicit version check + * here rather than using a separate table entry. If a too old version + * is found, then just don't use GSC rather than aborting the driver load. + * Note that the major number in the GSC FW version is used to indicate + * the platform, so we expect it to always be 102 for MTL/ARL binaries. + */ + if (IS_ARROWLAKE_S(gt->i915)) + min_ver = (struct intel_uc_fw_ver){ 102, 0, 10, 1878 }; + else if (IS_ARROWLAKE_H(gt->i915) || IS_ARROWLAKE_U(gt->i915)) + min_ver = (struct intel_uc_fw_ver){ 102, 1, 15, 1926 }; + + if (IS_METEORLAKE(gt->i915) && gsc->release.major != 102) { + gt_info(gt, "Invalid GSC firmware for MTL/ARL, got %d.%d.%d.%d but need 102.x.x.x", + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build); + return -EINVAL; + } + + if (min_ver.major) { bool too_old = false; - /* - * ARL requires a newer firmware than MTL did (102.0.10.1878) but the - * firmware is actually common. So, need to do an explicit version check - * here rather than using a separate table entry. And if the older - * MTL-only version is found, then just don't use GSC rather than aborting - * the driver load. - */ - if (gsc->release.major < 102) { + if (gsc->release.minor < min_ver.minor) { too_old = true; - } else if (gsc->release.major == 102) { - if (gsc->release.minor == 0) { - if (gsc->release.patch < 10) { + } else if (gsc->release.minor == min_ver.minor) { + if (gsc->release.patch < min_ver.patch) { + too_old = true; + } else if (gsc->release.patch == min_ver.patch) { + if (gsc->release.build < min_ver.build) too_old = true; - } else if (gsc->release.patch == 10) { - if (gsc->release.build < 1878) - too_old = true; - } } } if (too_old) { - gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least 102.0.10.1878", + gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least %d.%d.%d.%d", gsc->release.major, gsc->release.minor, - gsc->release.patch, gsc->release.build); + gsc->release.patch, gsc->release.build, + min_ver.major, min_ver.minor, + min_ver.patch, min_ver.build); return -EINVAL; } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 097fc6bd1285..9df80c325fc1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -239,8 +239,16 @@ static u32 guc_ctl_debug_flags(struct intel_guc *guc) static u32 guc_ctl_feature_flags(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); u32 flags = 0; + /* + * Enable PXP GuC autoteardown flow. + * NB: MTL does things differently. + */ + if (HAS_PXP(gt->i915) && !IS_METEORLAKE(gt->i915)) + flags |= GUC_CTL_ENABLE_GUC_PXP_CTL; + if (!intel_guc_submission_is_used(guc)) flags |= GUC_CTL_DISABLE_SCHEDULER; @@ -682,7 +690,7 @@ int intel_guc_suspend(struct intel_guc *guc) * H2G MMIO command completes. * * Don't abort on a failure code from the GuC. Keep going and do the - * clean up in santize() and re-initialisation on resume and hopefully + * clean up in sanitize() and re-initialisation on resume and hopefully * the error here won't be problematic. */ ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 57b903132776..053780f562c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -295,7 +295,7 @@ struct intel_guc { */ struct work_struct dead_guc_worker; /** - * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrance + * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrence * used to prevent a fundamentally broken system from continuously * reloading the GuC. */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 23f54c84cbab..e7ccfa520df3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -145,7 +145,7 @@ static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool * an end user should hit the timeout is in case of extreme thermal throttling. * And a system that is that hot during boot is probably dead anyway! */ -#if defined(CONFIG_DRM_I915_DEBUG_GEM) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #define GUC_LOAD_RETRY_LIMIT 20 #else #define GUC_LOAD_RETRY_LIMIT 3 @@ -259,13 +259,14 @@ static int guc_wait_ucode(struct intel_guc *guc) } else if (delta_ms > 200) { guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", delta_ms, status, count, ret); - guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", - intel_rps_read_actual_frequency(>->rps), before_freq, + guc_warn(guc, "excessive init time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n", + before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { - guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", - delta_ms, intel_rps_read_actual_frequency(>->rps), - before_freq, status, count, ret); + guc_dbg(guc, "init took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), status, count, ret); } return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 263c9c3f6a03..eded00f0c7e1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -105,6 +105,7 @@ #define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22) #define GUC_CTL_FEATURE 2 +#define GUC_CTL_ENABLE_GUC_PXP_CTL BIT(1) #define GUC_CTL_ENABLE_SLPC BIT(2) #define GUC_CTL_DISABLE_SCHEDULER BIT(14) @@ -407,7 +408,7 @@ enum guc_capture_type { GUC_CAPTURE_LIST_TYPE_MAX, }; -/* Class indecies for capture_class and capture_instance arrays */ +/* Class indices for capture_class and capture_instance arrays */ enum { GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0, GUC_CAPTURE_LIST_CLASS_VIDEO = 1, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index b67a15f74276..868195c33f5b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -7,6 +7,7 @@ #include "gt/intel_hwconfig.h" #include "i915_drv.h" #include "i915_memcpy.h" +#include "intel_guc_print.h" /* * GuC has a blob containing hardware configuration information (HWConfig). @@ -42,6 +43,8 @@ static int __guc_action_get_hwconfig(struct intel_guc *guc, }; int ret; + guc_dbg(guc, "Querying HW config table: size = %d, offset = 0x%08X\n", + ggtt_size, ggtt_offset); ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (ret == -ENXIO) return -ENOENT; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index bf16351c9349..e8a04e476c57 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -14,11 +14,11 @@ #include "intel_guc_log.h" #include "intel_guc_print.h" -#if defined(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M #define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M -#elif defined(CONFIG_DRM_I915_DEBUG_GEM) +#elif IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 706fffca698b..d5ee6e5e1443 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -15,6 +15,34 @@ #include "gt/intel_gt_regs.h" #include "gt/intel_rps.h" +/** + * DOC: SLPC - Dynamic Frequency management + * + * Single Loop Power Control (SLPC) is a GuC algorithm that manages + * GT frequency based on busyness and how KMD initializes it. SLPC is + * almost completely in control after initialization except for a few + * scenarios mentioned below. + * + * KMD uses the concept of waitboost to ramp frequency to RP0 when there + * are pending submissions for a context. It achieves this by sending GuC a + * request to update the min frequency to RP0. Waitboost is disabled + * when the request retires. + * + * Another form of frequency control happens through per-context hints. + * A context can be marked as low latency during creation. That will ensure + * that SLPC uses an aggressive frequency ramp when that context is active. + * + * Power profiles add another level of control to these mechanisms. + * When power saving profile is chosen, SLPC will use conservative + * thresholds to ramp frequency, thus saving power. KMD will disable + * waitboosts as well, which achieves further power savings. Base profile + * is default and ensures balanced performance for any workload. + * + * Lastly, users have some level of control through sysfs, where min/max + * frequency values can be altered and the use of efficient freq + * can be toggled. + */ + static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) { return container_of(slpc, struct intel_guc, slpc); @@ -265,6 +293,8 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->num_boosts = 0; slpc->media_ratio_mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL; + slpc->power_profile = SLPC_POWER_PROFILES_BASE; + mutex_init(&slpc->lock); INIT_WORK(&slpc->boost_work, slpc_boost_work); @@ -357,21 +387,29 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); } -static void slpc_shared_data_reset(struct slpc_shared_data *data) +static void slpc_shared_data_reset(struct intel_guc_slpc *slpc) { - memset(data, 0, sizeof(struct slpc_shared_data)); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct slpc_shared_data *data = slpc->vaddr; + memset(data, 0, sizeof(struct slpc_shared_data)); data->header.size = sizeof(struct slpc_shared_data); /* Enable only GTPERF task, disable others */ slpc_mem_set_enabled(data, SLPC_PARAM_TASK_ENABLE_GTPERF, SLPC_PARAM_TASK_DISABLE_GTPERF); - slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER, - SLPC_PARAM_TASK_DISABLE_BALANCER); + /* + * Don't allow balancer related algorithms on platforms before + * Xe_LPG, where GuC started to restrict it to TDP limited scenarios. + */ + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)) { + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER, + SLPC_PARAM_TASK_DISABLE_BALANCER); - slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC, - SLPC_PARAM_TASK_DISABLE_DCC); + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC, + SLPC_PARAM_TASK_DISABLE_DCC); + } } /** @@ -567,6 +605,34 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val) return ret; } +int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + if (val > SLPC_POWER_PROFILES_POWER_SAVING) + return -EINVAL; + + mutex_lock(&slpc->lock); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + ret = slpc_set_param(slpc, + SLPC_PARAM_POWER_PROFILE, + val); + if (ret) + guc_err(slpc_to_guc(slpc), + "Failed to set power profile to %d: %pe\n", + val, ERR_PTR(ret)); + else + slpc->power_profile = val; + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&slpc->lock); + + return ret; +} + void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) { u32 pm_intrmsk_mbz = 0; @@ -686,7 +752,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) GEM_BUG_ON(!slpc->vma); - slpc_shared_data_reset(slpc->vaddr); + slpc_shared_data_reset(slpc); ret = slpc_reset(slpc); if (unlikely(ret < 0)) { @@ -728,6 +794,13 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Enable SLPC Optimized Strategy for compute */ intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); + /* Set cached value of power_profile */ + ret = intel_guc_slpc_set_power_profile(slpc, slpc->power_profile); + if (unlikely(ret)) { + guc_probe_error(guc, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); + return ret; + } + return 0; } @@ -791,6 +864,23 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p drm_printf(p, "\tSLPC state: %s\n", slpc_get_state_string(slpc)); drm_printf(p, "\tGTPERF task active: %s\n", str_yes_no(slpc_tasks->status & SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tDCC enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_DCC_TASK_ENABLED)); + drm_printf(p, "\tDCC in: %s\n", + str_yes_no(slpc_tasks->status & SLPC_IN_DCC)); + drm_printf(p, "\tBalancer enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_BALANCER_ENABLED)); + drm_printf(p, "\tIBC enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_IBC_TASK_ENABLED)); + drm_printf(p, "\tBalancer IA LMT enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_BALANCER_IA_LMT_ENABLED)); + drm_printf(p, "\tBalancer IA LMT active: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_BALANCER_IA_LMT_ACTIVE)); drm_printf(p, "\tMax freq: %u MHz\n", slpc_decode_max_freq(slpc)); drm_printf(p, "\tMin freq: %u MHz\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 1cb5fd44f05c..fc9f761b4372 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -46,5 +46,6 @@ void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index a88651331497..83673b10ac4e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -33,6 +33,9 @@ struct intel_guc_slpc { u32 max_freq_softlimit; bool ignore_eff_freq; + /* Base or power saving */ + u32 power_profile; + /* cached media ratio mode */ u32 media_ratio_mode; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ed979847187f..127316d2c8aa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -633,7 +633,7 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, atomic_inc(&guc->outstanding_submission_g2h); ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - if (ret) + if (ret && g2h_len_dw) atomic_dec(&guc->outstanding_submission_g2h); return ret; @@ -1223,7 +1223,7 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) * determine validity of these values. Instead we read the values multiple times * until they are consistent. In test runs, 3 attempts results in consistent * values. The upper bound is set to 6 attempts and may need to be tuned as per - * any new occurences. + * any new occurrences. */ static void __get_engine_usage_record(struct intel_engine_cs *engine, u32 *last_in, u32 *id, u32 *total) @@ -1243,6 +1243,21 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine, } while (++i < 6); } +static void __set_engine_usage_record(struct intel_engine_cs *engine, + u32 last_in, u32 id, u32 total) +{ + struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); + +#define record_write(map_, field_, val_) \ + iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_) + + record_write(&rec_map, last_switch_in_stamp, last_in); + record_write(&rec_map, current_context_index, id); + record_write(&rec_map, total_runtime, total); + +#undef record_write +} + static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) { struct intel_engine_guc_stats *stats = &engine->stats.guc; @@ -1270,15 +1285,12 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) static u32 gpm_timestamp_shift(struct intel_gt *gt) { intel_wakeref_t wakeref; - u32 reg, shift; + u32 reg; with_intel_runtime_pm(gt->uncore->rpm, wakeref) reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); - shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; - - return 3 - shift; + return 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); } static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) @@ -1339,7 +1351,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) * start_gt_clk is derived from GuC state. To get a consistent * view of activity, we query the GuC state only if gt is awake. */ - wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt); + wakeref = in_reset ? NULL : intel_gt_pm_get_if_awake(gt); if (wakeref) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; @@ -1363,9 +1375,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) total += intel_gt_clock_interval_to_ns(gt, clk); } + if (total > stats->total) + stats->total = total; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); - return ns_to_ktime(total); + return ns_to_ktime(stats->total); } static void guc_enable_busyness_worker(struct intel_guc *guc) @@ -1431,13 +1446,39 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { + struct intel_engine_guc_stats *stats = &engine->stats.guc; + guc_update_engine_gt_clks(engine); - engine->stats.guc.prev_total = 0; + + /* + * If resetting a running context, accumulate the active + * time as well since there will be no context switch. + */ + if (stats->running) { + u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; + + stats->total_gt_clks += clk; + } + stats->prev_total = 0; + stats->running = 0; } spin_unlock_irqrestore(&guc->timestamp.lock, flags); } +static void __update_guc_busyness_running_state(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) + engine->stats.guc.running = false; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + static void __update_guc_busyness_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1543,6 +1584,9 @@ err_trylock: static int guc_action_enable_usage_stats(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; u32 offset = intel_guc_engine_usage_offset(guc); u32 action[] = { INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, @@ -1550,6 +1594,9 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc) 0, }; + for_each_engine(engine, gt, id) + __set_engine_usage_record(engine, 0, 0xffffffff, 0); + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } @@ -1582,6 +1629,9 @@ void intel_guc_busyness_park(struct intel_gt *gt) if (!guc_submission_initialized(guc)) return; + /* Assume no engines are running and set running state to false */ + __update_guc_busyness_running_state(guc); + /* * There is a race with suspend flow where the worker runs after suspend * and causes an unclaimed register access warning. Cancel the worker @@ -1688,6 +1738,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_lock_irq(guc_to_gt(guc)->irq_lock); spin_unlock_irq(guc_to_gt(guc)->irq_lock); + /* Flush tasklet */ + tasklet_disable(&guc->ct.receive_tasklet); + tasklet_enable(&guc->ct.receive_tasklet); + guc_flush_submissions(guc); guc_flush_destroyed_contexts(guc); flush_work(&guc->ct.requests.worker); @@ -2005,6 +2059,8 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) void intel_guc_submission_reset_finish(struct intel_guc *guc) { + int outstanding; + /* Reset called during driver load or during wedge? */ if (unlikely(!guc_submission_initialized(guc) || !intel_guc_is_fw_running(guc) || @@ -2018,8 +2074,10 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) * see in CI if this happens frequently / a precursor to taking down the * machine. */ - if (atomic_read(&guc->outstanding_submission_g2h)) - guc_err(guc, "Unexpected outstanding GuC to Host in reset finish\n"); + outstanding = atomic_read(&guc->outstanding_submission_g2h); + if (outstanding) + guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n", + outstanding); atomic_set(&guc->outstanding_submission_g2h, 0); intel_guc_global_policies_update(guc); @@ -2950,7 +3008,7 @@ static int __guc_context_pin(struct intel_context *ce, /* * GuC context gets pinned in guc_request_alloc. See that function for - * explaination of why. + * explanation of why. */ return lrc_pin(ce, engine, vaddr); @@ -3385,18 +3443,29 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) * GuC is active, lets destroy this context, but at this point we can still be racing * with suspend, so we undo everything if the H2G fails in deregister_context so * that GuC reset will find this context during clean up. + * + * There is a race condition where the reset code could have altered + * this context's state and done a wakeref put before we try to + * deregister it here. So check if the context is still set to be + * destroyed before undoing earlier changes, to avoid two wakeref puts + * on the same context. */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { - spin_lock(&ce->guc_state.lock); - set_context_registered(ce); - clr_context_destroyed(ce); - spin_unlock(&ce->guc_state.lock); + bool pending_destroyed; + spin_lock_irqsave(&ce->guc_state.lock, flags); + pending_destroyed = context_destroyed(ce); + if (pending_destroyed) { + set_context_registered(ce); + clr_context_destroyed(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. */ - intel_wakeref_put_async(>->wakeref); + if (pending_destroyed) + intel_wakeref_put_async(>->wakeref); } return ret; @@ -5474,12 +5543,20 @@ static inline void guc_log_context(struct drm_printer *p, { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); + if (intel_context_pin_if_active(ce)) { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + intel_context_unpin(ce); + } else { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n", + ce->ring->head); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n", + ce->ring->tail); + } drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 2d9152eb7282..456d3372eef8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -231,8 +231,8 @@ static void delayed_huc_load_init(struct intel_huc *huc) sw_fence_dummy_notify); i915_sw_fence_commit(&huc->delayed_load.fence); - hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - huc->delayed_load.timer.function = huc_delayed_load_timer_callback; + hrtimer_setup(&huc->delayed_load.timer, huc_delayed_load_timer_callback, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); } static void delayed_huc_load_fini(struct intel_huc *huc) @@ -317,6 +317,11 @@ void intel_huc_init_early(struct intel_huc *huc) } } +void intel_huc_fini_late(struct intel_huc *huc) +{ + delayed_huc_load_fini(huc); +} + #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") static int check_huc_loading_mode(struct intel_huc *huc) { @@ -414,12 +419,6 @@ out: void intel_huc_fini(struct intel_huc *huc) { - /* - * the fence is initialized in init_early, so we need to clean it up - * even if HuC loading is off. - */ - delayed_huc_load_fini(huc); - if (huc->heci_pkt) i915_vma_unpin_and_release(&huc->heci_pkt, 0); @@ -427,19 +426,6 @@ void intel_huc_fini(struct intel_huc *huc) intel_uc_fw_fini(&huc->fw); } -void intel_huc_suspend(struct intel_huc *huc) -{ - if (!intel_uc_fw_is_loadable(&huc->fw)) - return; - - /* - * in the unlikely case that we're suspending before the GSC has - * completed its loading sequence, just stop waiting. We'll restart - * on resume. - */ - delayed_huc_load_complete(huc); -} - static const char *auth_mode_string(struct intel_huc *huc, enum intel_huc_authentication_type type) { @@ -455,7 +441,7 @@ static const char *auth_mode_string(struct intel_huc *huc, * an end user should hit the timeout is in case of extreme thermal throttling. * And a system that is that hot during boot is probably dead anyway! */ -#if defined(CONFIG_DRM_I915_DEBUG_GEM) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #define HUC_LOAD_RETRY_LIMIT 20 #else #define HUC_LOAD_RETRY_LIMIT 3 @@ -502,13 +488,15 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc, if (delta_ms > 50) { huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", delta_ms, huc->status[type].reg.reg, count, ret); - huc_warn(huc, "excessive auth time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", - intel_rps_read_actual_frequency(>->rps), before_freq, + huc_warn(huc, "excessive auth time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n", + before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { - huc_dbg(huc, "auth took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", - delta_ms, intel_rps_read_actual_frequency(>->rps), - before_freq, huc->status[type].reg.reg, count, ret); + huc_dbg(huc, "auth took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), + huc->status[type].reg.reg, count, ret); } /* mark the load process as complete even if the wait failed */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index ba5cb08e9e7b..921ad4b1687f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -55,9 +55,9 @@ struct intel_huc { int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); +void intel_huc_fini_late(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); -void intel_huc_suspend(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type); int intel_huc_wait_for_auth_complete(struct intel_huc *huc, enum intel_huc_authentication_type type); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 5b8080ec5315..4a3493e8d433 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -136,6 +136,7 @@ void intel_uc_init_late(struct intel_uc *uc) void intel_uc_driver_late_release(struct intel_uc *uc) { + intel_huc_fini_late(&uc->huc); } /** @@ -512,7 +513,7 @@ static int __uc_init_hw(struct intel_uc *uc) ERR_PTR(ret), attempts); } - /* Did we succeded or run out of retries? */ + /* Did we succeed or run out of retries? */ if (ret) goto err_log_capture; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c index 26fdc392fce6..83801c992488 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c @@ -64,7 +64,7 @@ static int intel_hang_guc(void *arg) old_beat = engine->props.heartbeat_interval_ms; ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL); if (ret) { - gt_err(gt, "Failed to boost heatbeat interval: %pe\n", ERR_PTR(ret)); + gt_err(gt, "Failed to boost heartbeat interval: %pe\n", ERR_PTR(ret)); goto err; } |