diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
84 files changed, 8820 insertions, 3816 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c new file mode 100644 index 000000000000..6a5e9ab20b94 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include <drm/drm_print.h> + +#include "debugfs_engines.h" +#include "debugfs_gt.h" +#include "i915_drv.h" /* for_each_engine! */ +#include "intel_engine.h" + +static int engines_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct drm_printer p; + + p = drm_seq_file_printer(m); + for_each_engine(engine, gt, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(engines); + +void debugfs_engines_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "engines", &engines_fops }, + }; + + debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files)); +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h new file mode 100644 index 000000000000..f69257eaa1cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_engines.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef DEBUGFS_ENGINES_H +#define DEBUGFS_ENGINES_H + +struct intel_gt; +struct dentry; + +void debugfs_engines_register(struct intel_gt *gt, struct dentry *root); + +#endif /* DEBUGFS_ENGINES_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c new file mode 100644 index 000000000000..75255aaacaed --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include "debugfs_engines.h" +#include "debugfs_gt.h" +#include "debugfs_gt_pm.h" +#include "i915_drv.h" + +void debugfs_gt_register(struct intel_gt *gt) +{ + struct dentry *root; + + if (!gt->i915->drm.primary->debugfs_root) + return; + + root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root); + if (IS_ERR(root)) + return; + + debugfs_engines_register(gt, root); + debugfs_gt_pm_register(gt, root); +} + +void debugfs_gt_register_files(struct intel_gt *gt, + struct dentry *root, + const struct debugfs_gt_file *files, + unsigned long count) +{ + while (count--) { + if (!files->eval || files->eval(gt)) + debugfs_create_file(files->name, + 0444, root, gt, + files->fops); + + files++; + } +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h new file mode 100644 index 000000000000..4ea0f06cda8f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef DEBUGFS_GT_H +#define DEBUGFS_GT_H + +#include <linux/file.h> + +struct intel_gt; + +#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \ + static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + +void debugfs_gt_register(struct intel_gt *gt); + +struct debugfs_gt_file { + const char *name; + const struct file_operations *fops; + bool (*eval)(const struct intel_gt *gt); +}; + +void debugfs_gt_register_files(struct intel_gt *gt, + struct dentry *root, + const struct debugfs_gt_file *files, + unsigned long count); + +#endif /* DEBUGFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c new file mode 100644 index 000000000000..059c9e5c002e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/seq_file.h> + +#include "debugfs_gt.h" +#include "debugfs_gt_pm.h" +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_llc.h" +#include "intel_rc6.h" +#include "intel_rps.h" +#include "intel_runtime_pm.h" +#include "intel_sideband.h" +#include "intel_uncore.h" + +static int fw_domains_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + struct intel_uncore_forcewake_domain *fw_domain; + unsigned int tmp; + + seq_printf(m, "user.bypass_count = %u\n", + uncore->user_forcewake_count); + + for_each_fw_domain(fw_domain, uncore, tmp) + seq_printf(m, "%s.wake_count = %u\n", + intel_uncore_forcewake_domain_to_str(fw_domain->id), + READ_ONCE(fw_domain->wake_count)); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains); + +static void print_rc6_res(struct seq_file *m, + const char *title, + const i915_reg_t reg) +{ + struct intel_gt *gt = m->private; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + seq_printf(m, "%s %u (%llu us)\n", title, + intel_uncore_read(gt->uncore, reg), + intel_rc6_residency_us(>->rc6, reg)); +} + +static int vlv_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 rcctl1, pw_status; + + pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS); + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + + seq_printf(m, "RC6 Enabled: %s\n", + yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_EI_MODE(1)))); + seq_printf(m, "Render Power Well: %s\n", + (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); + + print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6); + print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + + return fw_domains_show(m, NULL); +} + +static int gen6_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, rc6vids = 0; + u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; + + gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + if (INTEL_GEN(i915) >= 9) { + gen9_powergate_enable = + intel_uncore_read(uncore, GEN9_PG_ENABLE); + gen9_powergate_status = + intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS); + } + + if (INTEL_GEN(i915) <= 7) + sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, + &rc6vids, NULL); + + seq_printf(m, "RC1e Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); + seq_printf(m, "RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (INTEL_GEN(i915) >= 9) { + seq_printf(m, "Render Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); + seq_printf(m, "Media Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } + seq_printf(m, "Deep RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); + seq_printf(m, "Deepest RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE)); + seq_puts(m, "Current RC state: "); + switch (gt_core_status & GEN6_RCn_MASK) { + case GEN6_RC0: + if (gt_core_status & GEN6_CORE_CPD_STATE_MASK) + seq_puts(m, "Core Power Down\n"); + else + seq_puts(m, "on\n"); + break; + case GEN6_RC3: + seq_puts(m, "RC3\n"); + break; + case GEN6_RC6: + seq_puts(m, "RC6\n"); + break; + case GEN6_RC7: + seq_puts(m, "RC7\n"); + break; + default: + seq_puts(m, "Unknown\n"); + break; + } + + seq_printf(m, "Core Power Down: %s\n", + yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); + if (INTEL_GEN(i915) >= 9) { + seq_printf(m, "Render Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + } + + /* Not exactly sure what this is */ + print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", + GEN6_GT_GFX_RC6_LOCKED); + print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); + print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); + print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + + if (INTEL_GEN(i915) <= 7) { + seq_printf(m, "RC6 voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); + seq_printf(m, "RC6+ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); + seq_printf(m, "RC6++ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); + } + + return fw_domains_show(m, NULL); +} + +static int ilk_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 rgvmodectl, rstdbyctl; + u16 crstandvid; + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL); + crstandvid = intel_uncore_read16(uncore, CRSTANDVID); + + seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); + seq_printf(m, "Boost freq: %d\n", + (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >> + MEMMODE_BOOST_FREQ_SHIFT); + seq_printf(m, "HW control enabled: %s\n", + yesno(rgvmodectl & MEMMODE_HWIDLE_EN)); + seq_printf(m, "SW control enabled: %s\n", + yesno(rgvmodectl & MEMMODE_SWMODE_EN)); + seq_printf(m, "Gated voltage change: %s\n", + yesno(rgvmodectl & MEMMODE_RCLK_GATE)); + seq_printf(m, "Starting frequency: P%d\n", + (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT); + seq_printf(m, "Max P-state: P%d\n", + (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT); + seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK)); + seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f)); + seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f)); + seq_printf(m, "Render standby enabled: %s\n", + yesno(!(rstdbyctl & RCX_SW_EXIT))); + seq_puts(m, "Current RS state: "); + switch (rstdbyctl & RSX_STATUS_MASK) { + case RSX_STATUS_ON: + seq_puts(m, "on\n"); + break; + case RSX_STATUS_RC1: + seq_puts(m, "RC1\n"); + break; + case RSX_STATUS_RC1E: + seq_puts(m, "RC1E\n"); + break; + case RSX_STATUS_RS1: + seq_puts(m, "RS1\n"); + break; + case RSX_STATUS_RS2: + seq_puts(m, "RS2 (RC6)\n"); + break; + case RSX_STATUS_RS3: + seq_puts(m, "RC3 (RC6+)\n"); + break; + default: + seq_puts(m, "unknown\n"); + break; + } + + return 0; +} + +static int drpc_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + intel_wakeref_t wakeref; + int err = -ENODEV; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = vlv_drpc(m); + else if (INTEL_GEN(i915) >= 6) + err = gen6_drpc(m); + else + err = ilk_drpc(m); + } + + return err; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc); + +static int frequency_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_rps *rps = >->rps; + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(uncore->rpm); + + if (IS_GEN(i915, 5)) { + u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK); + + seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); + seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f); + seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> + MEMSTAT_VID_SHIFT); + seq_printf(m, "Current P-state: %d\n", + (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + u32 rpmodectl, freq_sts; + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); + + vlv_punit_get(i915); + freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(i915); + + seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); + seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq); + + seq_printf(m, "actual GPU freq: %d MHz\n", + intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); + + seq_printf(m, "current GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + + seq_printf(m, "max GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + seq_printf(m, "min GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + + seq_printf(m, "idle GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + + seq_printf(m, "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); + } else if (INTEL_GEN(i915) >= 6) { + u32 rp_state_limits; + u32 gt_perf_status; + u32 rp_state_cap; + u32 rpmodectl, rpinclimit, rpdeclimit; + u32 rpstat, cagf, reqf; + u32 rpupei, rpcurup, rpprevup; + u32 rpdownei, rpcurdown, rpprevdown; + u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; + int max_freq; + + rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); + if (IS_GEN9_LP(i915)) { + rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); + } else { + rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); + } + + /* RPSTAT1 is in the GT power well */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); + if (INTEL_GEN(i915) >= 9) { + reqf >>= 23; + } else { + reqf &= ~GEN6_TURBO_DISABLE; + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + reqf >>= 24; + else + reqf >>= 25; + } + reqf = intel_gpu_freq(rps, reqf); + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; + rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; + rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; + rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; + rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; + rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; + cagf = intel_rps_read_actual_frequency(rps); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + if (INTEL_GEN(i915) >= 11) { + pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); + pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); + /* + * The equivalent to the PM ISR & IIR cannot be read + * without affecting the current state of the system + */ + pm_isr = 0; + pm_iir = 0; + } else if (INTEL_GEN(i915) >= 8) { + pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); + pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); + pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); + pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); + } else { + pm_ier = intel_uncore_read(uncore, GEN6_PMIER); + pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); + pm_isr = intel_uncore_read(uncore, GEN6_PMISR); + pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); + } + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); + + seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", + pm_ier, pm_imr, pm_mask); + if (INTEL_GEN(i915) <= 10) + seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n", + pm_isr, pm_iir); + seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); + seq_printf(m, "Render p-state ratio: %d\n", + (gt_perf_status & (INTEL_GEN(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); + seq_printf(m, "Render p-state VID: %d\n", + gt_perf_status & 0xff); + seq_printf(m, "Render p-state limit: %d\n", + rp_state_limits & 0xff); + seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat); + seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl); + seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit); + seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); + seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); + seq_printf(m, "CAGF: %dMHz\n", cagf); + seq_printf(m, "RP CUR UP EI: %d (%dus)\n", + rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei)); + seq_printf(m, "RP CUR UP: %d (%dus)\n", + rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup)); + seq_printf(m, "RP PREV UP: %d (%dus)\n", + rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup)); + seq_printf(m, "Up threshold: %d%%\n", + rps->power.up_threshold); + + seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", + rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei)); + seq_printf(m, "RP CUR DOWN: %d (%dus)\n", + rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown)); + seq_printf(m, "RP PREV DOWN: %d (%dus)\n", + rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown)); + seq_printf(m, "Down threshold: %d%%\n", + rps->power.down_threshold); + + max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : + rp_state_cap >> 16) & 0xff; + max_freq *= (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + + max_freq = (rp_state_cap & 0xff00) >> 8; + max_freq *= (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + + max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : + rp_state_cap >> 0) & 0xff; + max_freq *= (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + seq_printf(m, "Max overclocked frequency: %dMHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + seq_printf(m, "Current freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + seq_printf(m, "Actual freq: %d MHz\n", cagf); + seq_printf(m, "Idle freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + seq_printf(m, "Min freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + seq_printf(m, "Boost freq: %d MHz\n", + intel_gpu_freq(rps, rps->boost_freq)); + seq_printf(m, "Max freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + seq_printf(m, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); + } else { + seq_puts(m, "no P-state info available\n"); + } + + seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); + seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); + seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); + + intel_runtime_pm_put(uncore->rpm, wakeref); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency); + +static int llc_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + const bool edram = INTEL_GEN(i915) > 8; + struct intel_rps *rps = >->rps; + unsigned int max_gpu_freq, min_gpu_freq; + intel_wakeref_t wakeref; + int gpu_freq, ia_freq; + + seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(i915))); + seq_printf(m, "%s: %uMB\n", edram ? "eDRAM" : "eLLC", + i915->edram_size_mb); + + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + /* Convert GT frequency to 50 HZ units */ + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; + } + + seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { + ia_freq = gpu_freq; + sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &ia_freq, NULL); + seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", + intel_gpu_freq(rps, + (gpu_freq * + (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? + GEN9_FREQ_SCALER : 1))), + ((ia_freq >> 0) & 0xff) * 100, + ((ia_freq >> 8) & 0xff) * 100); + } + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + + return 0; +} + +static bool llc_eval(const struct intel_gt *gt) +{ + return HAS_LLC(gt->i915); +} + +DEFINE_GT_DEBUGFS_ATTRIBUTE(llc); + +static const char *rps_power_to_str(unsigned int power) +{ + static const char * const strings[] = { + [LOW_POWER] = "low power", + [BETWEEN] = "mixed", + [HIGH_POWER] = "high power", + }; + + if (power >= ARRAY_SIZE(strings) || !strings[power]) + return "unknown"; + + return strings[power]; +} + +static int rps_boost_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_rps *rps = >->rps; + + seq_printf(m, "RPS enabled? %d\n", rps->enabled); + seq_printf(m, "GPU busy? %s\n", yesno(gt->awake)); + seq_printf(m, "Boosts outstanding? %d\n", + atomic_read(&rps->num_waiters)); + seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); + seq_printf(m, "Frequency requested %d, actual %d\n", + intel_gpu_freq(rps, rps->cur_freq), + intel_rps_read_actual_frequency(rps)); + seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->min_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq)); + seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", + intel_gpu_freq(rps, rps->idle_freq), + intel_gpu_freq(rps, rps->efficient_freq), + intel_gpu_freq(rps, rps->boost_freq)); + + seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); + + if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) { + struct intel_uncore *uncore = gt->uncore; + u32 rpup, rpupei; + u32 rpdown, rpdownei; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + rpup = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK; + rpupei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK; + rpdown = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK; + rpdownei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK; + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", + rps_power_to_str(rps->power.mode)); + seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", + rpup && rpupei ? 100 * rpup / rpupei : 0, + rps->power.up_threshold); + seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", + rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, + rps->power.down_threshold); + } else { + seq_puts(m, "\nRPS Autotuning inactive\n"); + } + + return 0; +} + +static bool rps_eval(const struct intel_gt *gt) +{ + return HAS_RPS(gt->i915); +} + +DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost); + +void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "drpc", &drpc_fops, NULL }, + { "frequency", &frequency_fops, NULL }, + { "forcewake", &fw_domains_fops, NULL }, + { "llc", &llc_fops, llc_eval }, + { "rps_boost", &rps_boost_fops, rps_eval }, + }; + + debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files)); +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h new file mode 100644 index 000000000000..4cf5f5c9da7d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef DEBUGFS_GT_PM_H +#define DEBUGFS_GT_PM_H + +struct intel_gt; +struct dentry; + +void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root); + +#endif /* DEBUGFS_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 55317081d48b..0ba524a414c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -28,6 +28,8 @@ #include "i915_drv.h" #include "i915_trace.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" static void irq_enable(struct intel_engine_cs *engine) { @@ -53,15 +55,17 @@ static void irq_disable(struct intel_engine_cs *engine) static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { + struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + lockdep_assert_held(&b->irq_lock); GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) - irq_disable(container_of(b, - struct intel_engine_cs, - breadcrumbs)); + irq_disable(engine); b->irq_armed = false; + intel_gt_pm_put_async(engine->gt); } void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) @@ -127,16 +131,23 @@ __dma_fence_signal__notify(struct dma_fence *fence, } } -void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) +static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + + intel_engine_add_retire(engine, tl); +} + +static void signal_irq_work(struct irq_work *work) +{ + struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); const ktime_t timestamp = ktime_get(); struct intel_context *ce, *cn; struct list_head *pos, *next; - unsigned long flags; LIST_HEAD(signal); - spin_lock_irqsave(&b->irq_lock, flags); + spin_lock(&b->irq_lock); if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); @@ -177,44 +188,41 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) if (!list_is_first(pos, &ce->signals)) { /* Advance the list to the first incomplete request */ __list_del_many(&ce->signals, pos); - if (&ce->signals == pos) /* now empty */ + if (&ce->signals == pos) { /* now empty */ list_del_init(&ce->signal_link); + add_retire(b, ce->timeline); + } } } - spin_unlock_irqrestore(&b->irq_lock, flags); + spin_unlock(&b->irq_lock); list_for_each_safe(pos, next, &signal) { struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); struct list_head cb_list; - spin_lock_irqsave(&rq->lock, flags); + spin_lock(&rq->lock); list_replace(&rq->fence.cb_list, &cb_list); __dma_fence_signal__timestamp(&rq->fence, timestamp); __dma_fence_signal__notify(&rq->fence, &cb_list); - spin_unlock_irqrestore(&rq->lock, flags); + spin_unlock(&rq->lock); i915_request_put(rq); } } -static void signal_irq_work(struct irq_work *work) -{ - struct intel_engine_cs *engine = - container_of(work, typeof(*engine), breadcrumbs.irq_work); - - intel_engine_breadcrumbs_irq(engine); -} - -static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) +static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); lockdep_assert_held(&b->irq_lock); if (b->irq_armed) - return; + return true; + + if (!intel_gt_pm_get_if_awake(engine->gt)) + return false; /* * The breadcrumb irq will be disarmed on the interrupt after the @@ -234,6 +242,8 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) if (!b->irq_enabled++) irq_enable(engine); + + return true; } void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) @@ -271,19 +281,20 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; struct list_head *pos; spin_lock(&b->irq_lock); GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); - __intel_breadcrumbs_arm_irq(b); + if (!__intel_breadcrumbs_arm_irq(b)) + goto unlock; /* * We keep the seqno in retirement order, so we can break - * inside intel_engine_breadcrumbs_irq as soon as we've passed - * the last completed request (or seen a request that hasn't - * event started). We could iterate the timeline->requests list, + * inside intel_engine_signal_breadcrumbs as soon as we've + * passed the last completed request (or seen a request that + * hasn't event started). We could walk the timeline->requests, * but keeping a separate signalers_list has the advantage of * hopefully being much smaller than the full list and so * provides faster iteration and detection when there are no @@ -306,6 +317,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) GEM_BUG_ON(!check_signal_order(ce, rq)); set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); +unlock: spin_unlock(&b->irq_lock); } @@ -326,7 +338,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) */ spin_lock(&b->irq_lock); if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; list_del(&rq->signal_link); if (list_empty(&ce->signals)) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 59c3083c1ec1..fbaa9df6f436 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -13,6 +13,7 @@ #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" +#include "intel_ring.h" static struct i915_global_context { struct i915_global base; @@ -30,8 +31,7 @@ void intel_context_free(struct intel_context *ce) } struct intel_context * -intel_context_create(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +intel_context_create(struct intel_engine_cs *engine) { struct intel_context *ce; @@ -39,7 +39,7 @@ intel_context_create(struct i915_gem_context *ctx, if (!ce) return ERR_PTR(-ENOMEM); - intel_context_init(ce, ctx, engine); + intel_context_init(ce, engine); return ce; } @@ -67,11 +67,8 @@ int __intel_context_do_pin(struct intel_context *ce) if (err) goto err; - GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n", - ce->engine->name, ce->timeline->fence_context, - ce->ring->head, ce->ring->tail); - - i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ + CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n", + ce->ring->head, ce->ring->tail); smp_mb__before_atomic(); /* flush pin before it is visible */ } @@ -97,12 +94,10 @@ void intel_context_unpin(struct intel_context *ce) mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); if (likely(atomic_dec_and_test(&ce->pin_count))) { - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); + CE_TRACE(ce, "retire\n"); ce->ops->unpin(ce); - i915_gem_context_put(ce->gem_context); intel_context_active_release(ce); } @@ -112,13 +107,10 @@ void intel_context_unpin(struct intel_context *ce) static int __context_pin_state(struct i915_vma *vma) { - u64 flags; + unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; int err; - flags = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; - flags |= PIN_HIGH | PIN_GLOBAL; - - err = i915_vma_pin(vma, 0, 0, flags); + err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH); if (err) return err; @@ -143,9 +135,9 @@ static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); + CE_TRACE(ce, "retire\n"); + set_bit(CONTEXT_VALID_BIT, &ce->flags); if (ce->state) __context_unpin_state(ce->state); @@ -197,7 +189,7 @@ int intel_context_active_acquire(struct intel_context *ce) return err; /* Preallocate tracking nodes */ - if (!i915_gem_context_is_kernel(ce->gem_context)) { + if (!intel_context_is_barrier(ce)) { err = i915_active_acquire_preallocate_barrier(&ce->active, ce->engine); if (err) { @@ -218,30 +210,19 @@ void intel_context_active_release(struct intel_context *ce) void intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct i915_address_space *vm; - GEM_BUG_ON(!engine->cops); + GEM_BUG_ON(!engine->gt->vm); kref_init(&ce->ref); - ce->gem_context = ctx; - rcu_read_lock(); - vm = rcu_dereference(ctx->vm); - if (vm) - ce->vm = i915_vm_get(vm); - else - ce->vm = i915_vm_get(&engine->gt->ggtt->vm); - rcu_read_unlock(); - if (ctx->timeline) - ce->timeline = intel_timeline_get(ctx->timeline); - ce->engine = engine; ce->ops = engine->cops; ce->sseu = engine->sseu; - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring = __intel_context_ring_size(SZ_4K); + + ce->vm = i915_vm_get(engine->gt->vm); INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signals); @@ -306,17 +287,11 @@ int intel_context_prepare_remote_request(struct intel_context *ce, int err; /* Only suitable for use in remotely modifying this context */ - GEM_BUG_ON(rq->hw_context == ce); + GEM_BUG_ON(rq->context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ - err = mutex_lock_interruptible_nested(&tl->mutex, - SINGLE_DEPTH_NESTING); - if (err) - return err; - /* Queue this switch after current activity by this context. */ err = i915_active_fence_set(&tl->last_request, rq); - mutex_unlock(&tl->mutex); if (err) return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index dd742ac2fbdb..1d4a1b1357cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -7,21 +7,29 @@ #ifndef __INTEL_CONTEXT_H__ #define __INTEL_CONTEXT_H__ +#include <linux/bitops.h> #include <linux/lockdep.h> +#include <linux/types.h> #include "i915_active.h" #include "intel_context_types.h" #include "intel_engine_types.h" +#include "intel_ring_types.h" #include "intel_timeline_types.h" +#define CE_TRACE(ce, fmt, ...) do { \ + const struct intel_context *ce__ = (ce); \ + ENGINE_TRACE(ce__->engine, "context:%llx" fmt, \ + ce__->timeline->fence_context, \ + ##__VA_ARGS__); \ +} while (0) + void intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, struct intel_engine_cs *engine); void intel_context_fini(struct intel_context *ce); struct intel_context * -intel_context_create(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); +intel_context_create(struct intel_engine_cs *engine); void intel_context_free(struct intel_context *ce); @@ -152,4 +160,64 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz) return u64_to_ptr(struct intel_ring, sz); } +static inline bool intel_context_is_barrier(const struct intel_context *ce) +{ + return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); +} + +static inline bool intel_context_use_semaphores(const struct intel_context *ce) +{ + return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + +static inline void intel_context_set_use_semaphores(struct intel_context *ce) +{ + set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + +static inline void intel_context_clear_use_semaphores(struct intel_context *ce) +{ + clear_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + +static inline bool intel_context_is_banned(const struct intel_context *ce) +{ + return test_bit(CONTEXT_BANNED, &ce->flags); +} + +static inline bool intel_context_set_banned(struct intel_context *ce) +{ + return test_and_set_bit(CONTEXT_BANNED, &ce->flags); +} + +static inline bool +intel_context_force_single_submission(const struct intel_context *ce) +{ + return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags); +} + +static inline void +intel_context_set_single_submission(struct intel_context *ce) +{ + __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags); +} + +static inline bool +intel_context_nopreempt(const struct intel_context *ce) +{ + return test_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + +static inline void +intel_context_set_nopreempt(struct intel_context *ce) +{ + set_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + +static inline void +intel_context_clear_nopreempt(struct intel_context *ce) +{ + clear_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 6959b05ae5f8..9527a659546c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -44,7 +44,7 @@ struct intel_context { #define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2) struct i915_address_space *vm; - struct i915_gem_context *gem_context; + struct i915_gem_context __rcu *gem_context; struct list_head signal_link; struct list_head signals; @@ -54,7 +54,13 @@ struct intel_context { struct intel_timeline *timeline; unsigned long flags; -#define CONTEXT_ALLOC_BIT 0 +#define CONTEXT_BARRIER_BIT 0 +#define CONTEXT_ALLOC_BIT 1 +#define CONTEXT_VALID_BIT 2 +#define CONTEXT_USE_SEMAPHORES 3 +#define CONTEXT_BANNED 4 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 5 +#define CONTEXT_NOPREEMPT 6 u32 *lrc_reg_state; u64 lrc_desc; diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 93ea367fe624..79ecac5ac0ab 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -19,6 +19,7 @@ #include "intel_workarounds.h" struct drm_printer; +struct intel_gt; /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just @@ -28,6 +29,13 @@ struct drm_printer; #define CACHELINE_BYTES 64 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) +#define ENGINE_TRACE(e, fmt, ...) do { \ + const struct intel_engine_cs *e__ __maybe_unused = (e); \ + GEM_TRACE("%s %s: " fmt, \ + dev_name(e__->i915->drm.dev), e__->name, \ + ##__VA_ARGS__); \ +} while (0) + /* * The register defines to be used with the following macros need to accept a * base param, e.g: @@ -89,38 +97,6 @@ struct drm_printer; /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ -enum intel_engine_hangcheck_action { - ENGINE_IDLE = 0, - ENGINE_WAIT, - ENGINE_ACTIVE_SEQNO, - ENGINE_ACTIVE_HEAD, - ENGINE_ACTIVE_SUBUNITS, - ENGINE_WAIT_KICK, - ENGINE_DEAD, -}; - -static inline const char * -hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) -{ - switch (a) { - case ENGINE_IDLE: - return "idle"; - case ENGINE_WAIT: - return "wait"; - case ENGINE_ACTIVE_SEQNO: - return "active seqno"; - case ENGINE_ACTIVE_HEAD: - return "active head"; - case ENGINE_ACTIVE_SUBUNITS: - return "active subunits"; - case ENGINE_WAIT_KICK: - return "wait kick"; - case ENGINE_DEAD: - return "dead"; - } - - return "unknown"; -} static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) @@ -131,9 +107,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) static inline struct i915_request * execlists_active(const struct intel_engine_execlists *execlists) { - GEM_BUG_ON(execlists->active - execlists->inflight > - execlists_num_ports(execlists)); - return READ_ONCE(*execlists->active); + return *READ_ONCE(execlists->active); } static inline void @@ -206,132 +180,19 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_HWS_CSB_WRITE_INDEX 0x1f #define CNL_HWS_CSB_WRITE_INDEX 0x2f -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size); -int intel_ring_pin(struct intel_ring *ring); -void intel_ring_reset(struct intel_ring *ring, u32 tail); -unsigned int intel_ring_update_space(struct intel_ring *ring); -void intel_ring_unpin(struct intel_ring *ring); -void intel_ring_free(struct kref *ref); - -static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) -{ - kref_get(&ring->ref); - return ring; -} - -static inline void intel_ring_put(struct intel_ring *ring) -{ - kref_put(&ring->ref, intel_ring_free); -} - void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); -int __must_check intel_ring_cacheline_align(struct i915_request *rq); - -u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); +int intel_engines_init_mmio(struct intel_gt *gt); +int intel_engines_init(struct intel_gt *gt); -static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) -{ - /* Dummy function. - * - * This serves as a placeholder in the code so that the reader - * can compare against the preceding intel_ring_begin() and - * check that the number of dwords emitted matches the space - * reserved for the command packet (i.e. the value passed to - * intel_ring_begin()). - */ - GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); -} - -static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) -{ - return pos & (ring->size - 1); -} - -static inline bool -intel_ring_offset_valid(const struct intel_ring *ring, - unsigned int pos) -{ - if (pos & -ring->size) /* must be strictly within the ring */ - return false; - - if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ - return false; - - return true; -} - -static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) -{ - /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - rq->ring->vaddr; - GEM_BUG_ON(offset > rq->ring->size); - return intel_ring_wrap(rq->ring, offset); -} - -static inline void -assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) -{ - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); - - /* - * "Ring Buffer Use" - * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 - * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 - * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - * - * We use ring->head as the last known location of the actual RING_HEAD, - * it may have advanced but in the worst case it is equally the same - * as ring->head and so we should never program RING_TAIL to advance - * into the same cacheline as ring->head. - */ -#define cacheline(a) round_down(a, CACHELINE_BYTES) - GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && - tail < ring->head); -#undef cacheline -} - -static inline unsigned int -intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) -{ - /* Whilst writes to the tail are strictly order, there is no - * serialisation between readers and the writers. The tail may be - * read by i915_request_retire() just as it is being updated - * by execlists, as although the breadcrumb is complete, the context - * switch hasn't been seen. - */ - assert_ring_tail_valid(ring, tail); - ring->tail = tail; - return tail; -} - -static inline unsigned int -__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) -{ - /* - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - */ - GEM_BUG_ON(!is_power_of_2(size)); - return (head - tail - CACHELINE_BYTES) & (size - 1); -} - -int intel_engines_init_mmio(struct drm_i915_private *i915); -int intel_engines_setup(struct drm_i915_private *i915); -int intel_engines_init(struct drm_i915_private *i915); -void intel_engines_cleanup(struct drm_i915_private *i915); +void intel_engines_release(struct intel_gt *gt); +void intel_engines_free(struct intel_gt *gt); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); int intel_ring_submission_setup(struct intel_engine_cs *engine); -int intel_ring_submission_init(struct intel_engine_cs *engine); int intel_engine_stop_cs(struct intel_engine_cs *engine); void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); @@ -352,13 +213,11 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); static inline void -intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) +intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) { irq_work_queue(&engine->breadcrumbs.irq_work); } -void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); - void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); @@ -416,14 +275,14 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool stalled) { - if (engine->reset.reset) - engine->reset.reset(engine, stalled); + if (engine->reset.rewind) + engine->reset.rewind(engine, stalled); engine->serial++; /* contexts lost */ } bool intel_engines_are_idle(struct intel_gt *gt); bool intel_engine_is_idle(struct intel_engine_cs *engine); -void intel_engine_flush_submission(struct intel_engine_cs *engine); +bool intel_engine_flush_submission(struct intel_engine_cs *engine); void intel_engines_reset_default_submission(struct intel_gt *gt); @@ -434,61 +293,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); -static inline void intel_engine_context_in(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - if (engine->stats.active++ == 0) - engine->stats.start = ktime_get(); - GEM_BUG_ON(engine->stats.active == 0); - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static inline void intel_engine_context_out(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - ktime_t last; - - if (engine->stats.active && --engine->stats.active == 0) { - /* - * Decrement the active context count and in case GPU - * is now idle add up to the running total. - */ - last = ktime_sub(ktime_get(), engine->stats.start); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } else if (engine->stats.active == 0) { - /* - * After turning on engine stats, context out might be - * the first event in which case we account from the - * time stats gathering was turned on. - */ - last = ktime_sub(ktime_get(), engine->stats.enabled_at); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - int intel_enable_engine_stats(struct intel_engine_cs *engine); void intel_disable_engine_stats(struct intel_engine_cs *engine); @@ -497,7 +301,7 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine); -u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class); +u32 intel_engine_context_size(struct intel_gt *gt, u8 class); #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) @@ -525,4 +329,22 @@ void intel_engine_init_active(struct intel_engine_cs *engine, #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 +static inline bool +intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return false; + + return intel_engine_has_preemption(engine); +} + +static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return false; + + return intel_engine_has_semaphores(engine); +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 051734c9b733..ddf9543b1261 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,15 +28,16 @@ #include "i915_drv.h" -#include "gt/intel_gt.h" - +#include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_engine_user.h" -#include "intel_context.h" +#include "intel_gt.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_reset.h" +#include "intel_ring.h" /* Haswell does have the CXT_SIZE register however it does not appear to be * valid. Now, docs explain in dwords what is in the context object. The full @@ -140,7 +141,7 @@ static const struct engine_info intel_engines[] = { /** * intel_engine_context_size() - return the size of the context for an engine - * @dev_priv: i915 device private + * @gt: the gt * @class: engine class * * Each engine class may require a different amount of space for a context @@ -152,17 +153,18 @@ static const struct engine_info intel_engines[] = { * in LRC mode, but does not include the "shared data page" used with * GuC submission. The caller should account for this if using the GuC. */ -u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) +u32 intel_engine_context_size(struct intel_gt *gt, u8 class) { + struct intel_uncore *uncore = gt->uncore; u32 cxt_size; BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); switch (class) { case RENDER_CLASS: - switch (INTEL_GEN(dev_priv)) { + switch (INTEL_GEN(gt->i915)) { default: - MISSING_CASE(INTEL_GEN(dev_priv)); + MISSING_CASE(INTEL_GEN(gt->i915)); return DEFAULT_LR_CONTEXT_RENDER_SIZE; case 12: case 11: @@ -174,14 +176,14 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) case 8: return GEN8_LR_CONTEXT_RENDER_SIZE; case 7: - if (IS_HASWELL(dev_priv)) + if (IS_HASWELL(gt->i915)) return HSW_CXT_TOTAL_SIZE; - cxt_size = I915_READ(GEN7_CXT_SIZE); + cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE); return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, PAGE_SIZE); case 6: - cxt_size = I915_READ(CXT_SIZE); + cxt_size = intel_uncore_read(uncore, CXT_SIZE); return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, PAGE_SIZE); case 5: @@ -196,9 +198,9 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) * minimum allocation anyway so it should all come * out in the wash. */ - cxt_size = I915_READ(CXT_SIZE) + 1; + cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1; DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n", - INTEL_GEN(dev_priv), + INTEL_GEN(gt->i915), cxt_size * 64, cxt_size - 1); return round_up(cxt_size * 64, PAGE_SIZE); @@ -215,7 +217,7 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: - if (INTEL_GEN(dev_priv) < 8) + if (INTEL_GEN(gt->i915) < 8) return 0; return GEN8_LR_CONTEXT_OTHER_SIZE; } @@ -308,14 +310,16 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->instance = info->instance; __sprint_engine_name(engine); - /* - * To be overridden by the backend on setup. However to facilitate - * cleanup on error during setup, we always provide the destroy vfunc. - */ - engine->destroy = (typeof(engine->destroy))kfree; + engine->props.heartbeat_interval_ms = + CONFIG_DRM_I915_HEARTBEAT_INTERVAL; + engine->props.preempt_timeout_ms = + CONFIG_DRM_I915_PREEMPT_TIMEOUT; + engine->props.stop_timeout_ms = + CONFIG_DRM_I915_STOP_TIMEOUT; + engine->props.timeslice_duration_ms = + CONFIG_DRM_I915_TIMESLICE_DURATION; - engine->context_size = intel_engine_context_size(gt->i915, - engine->class); + engine->context_size = intel_engine_context_size(gt, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; if (engine->context_size) @@ -324,6 +328,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; + ewma__engine_latency_init(&engine->latency); seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -334,7 +339,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) gt->engine_class[info->class][info->instance] = engine; gt->engine[id] = engine; - intel_engine_add_user(engine); gt->i915->engine[id] = engine; return 0; @@ -370,38 +374,58 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) } } -static void intel_setup_engine_capabilities(struct drm_i915_private *i915) +static void intel_setup_engine_capabilities(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) __setup_engine_capabilities(engine); } /** - * intel_engines_cleanup() - free the resources allocated for Command Streamers - * @i915: the i915 devic + * intel_engines_release() - free the resources allocated for Command Streamers + * @gt: pointer to struct intel_gt */ -void intel_engines_cleanup(struct drm_i915_private *i915) +void intel_engines_release(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { - engine->destroy(engine); - i915->engine[id] = NULL; + /* Decouple the backend; but keep the layout for late GPU resets */ + for_each_engine(engine, gt, id) { + if (!engine->release) + continue; + + engine->release(engine); + engine->release = NULL; + + memset(&engine->reset, 0, sizeof(engine->reset)); + + gt->i915->engine[id] = NULL; + } +} + +void intel_engines_free(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) { + kfree(engine); + gt->engine[id] = NULL; } } /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers - * @i915: the i915 device + * @gt: pointer to struct intel_gt * * Return: non-zero if the initialization failed. */ -int intel_engines_init_mmio(struct drm_i915_private *i915) +int intel_engines_init_mmio(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; struct intel_device_info *device_info = mkwrite_device_info(i915); const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask; unsigned int mask = 0; @@ -419,7 +443,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) if (!HAS_ENGINE(i915, i)) continue; - err = intel_engine_setup(&i915->gt, i); + err = intel_engine_setup(gt, i); if (err) goto cleanup; @@ -436,45 +460,14 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) RUNTIME_INFO(i915)->num_engines = hweight32(mask); - intel_gt_check_and_clear_faults(&i915->gt); + intel_gt_check_and_clear_faults(gt); - intel_setup_engine_capabilities(i915); + intel_setup_engine_capabilities(gt); return 0; cleanup: - intel_engines_cleanup(i915); - return err; -} - -/** - * intel_engines_init() - init the Engine Command Streamers - * @i915: i915 device private - * - * Return: non-zero if the initialization failed. - */ -int intel_engines_init(struct drm_i915_private *i915) -{ - int (*init)(struct intel_engine_cs *engine); - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - if (HAS_EXECLISTS(i915)) - init = intel_execlists_submission_init; - else - init = intel_ring_submission_init; - - for_each_engine(engine, i915, id) { - err = init(engine); - if (err) - goto cleanup; - } - - return 0; - -cleanup: - intel_engines_cleanup(i915); + intel_engines_free(gt); return err; } @@ -518,7 +511,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, unsigned int flags; flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915)) + if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) /* * On g33, we cannot place HWS above 256MiB, so * restrict its pinning to the low mappable arena. @@ -589,7 +582,7 @@ err: return ret; } -static int intel_engine_setup_common(struct intel_engine_cs *engine) +static int engine_setup_common(struct intel_engine_cs *engine) { int err; @@ -602,9 +595,9 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); - intel_engine_init_hangcheck(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); intel_engine_pool_init(&engine->pool); @@ -619,49 +612,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) return 0; } -/** - * intel_engines_setup- setup engine state not requiring hw access - * @i915: Device to setup. - * - * Initializes engine structure members shared between legacy and execlists - * submission modes which do not require hardware access. - * - * Typically done early in the submission mode specific engine setup stage. - */ -int intel_engines_setup(struct drm_i915_private *i915) -{ - int (*setup)(struct intel_engine_cs *engine); - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - if (HAS_EXECLISTS(i915)) - setup = intel_execlists_submission_setup; - else - setup = intel_ring_submission_setup; - - for_each_engine(engine, i915, id) { - err = intel_engine_setup_common(engine); - if (err) - goto cleanup; - - err = setup(engine); - if (err) - goto cleanup; - - /* We expect the backend to take control over its state */ - GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree); - - GEM_BUG_ON(!engine->cops); - } - - return 0; - -cleanup: - intel_engines_cleanup(i915); - return err; -} - struct measure_breadcrumb { struct i915_request rq; struct intel_timeline timeline; @@ -745,13 +695,13 @@ create_kernel_context(struct intel_engine_cs *engine) struct intel_context *ce; int err; - ce = intel_context_create(engine->i915->kernel_context, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return ce; - ce->ring = __intel_context_ring_size(SZ_4K); + __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); - err = intel_context_pin(ce); + err = intel_context_pin(ce); /* perma-pin so it is always available */ if (err) { intel_context_put(ce); return ERR_PTR(err); @@ -779,13 +729,19 @@ create_kernel_context(struct intel_engine_cs *engine) * * Returns zero on success or an error code on failure. */ -int intel_engine_init_common(struct intel_engine_cs *engine) +static int engine_init_common(struct intel_engine_cs *engine) { struct intel_context *ce; int ret; engine->set_default_submission(engine); + ret = measure_breadcrumb_dw(engine); + if (ret < 0) + return ret; + + engine->emit_fini_breadcrumb_dw = ret; + /* * We may need to do things with the shrinker which * require us to immediately switch back to the default @@ -800,18 +756,38 @@ int intel_engine_init_common(struct intel_engine_cs *engine) engine->kernel_context = ce; - ret = measure_breadcrumb_dw(engine); - if (ret < 0) - goto err_unpin; + return 0; +} - engine->emit_fini_breadcrumb_dw = ret; +int intel_engines_init(struct intel_gt *gt) +{ + int (*setup)(struct intel_engine_cs *engine); + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; - return 0; + if (HAS_EXECLISTS(gt->i915)) + setup = intel_execlists_submission_setup; + else + setup = intel_ring_submission_setup; -err_unpin: - intel_context_unpin(ce); - intel_context_put(ce); - return ret; + for_each_engine(engine, gt, id) { + err = engine_setup_common(engine); + if (err) + return err; + + err = setup(engine); + if (err) + return err; + + err = engine_init_common(engine); + if (err) + return err; + + intel_engine_add_user(engine); + } + + return 0; } /** @@ -824,9 +800,11 @@ err_unpin: void intel_engine_cleanup_common(struct intel_engine_cs *engine) { GEM_BUG_ON(!list_empty(&engine->active.requests)); + tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ cleanup_status_page(engine); + intel_engine_fini_retire(engine); intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); @@ -873,6 +851,21 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) return bbaddr; } +static unsigned long stop_timeout(const struct intel_engine_cs *engine) +{ + if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */ + return 0; + + /* + * If we are doing a normal GPU reset, we can take our time and allow + * the engine to quiesce. We've stopped submission to the engine, and + * if we wait long enough an innocent context should complete and + * leave the engine idle. So they should not be caught unaware by + * the forthcoming GPU reset (which usually follows the stop_cs)! + */ + return READ_ONCE(engine->props.stop_timeout_ms); +} + int intel_engine_stop_cs(struct intel_engine_cs *engine) { struct intel_uncore *uncore = engine->uncore; @@ -883,16 +876,16 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) if (INTEL_GEN(engine->i915) < 3) return -ENODEV; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); err = 0; if (__intel_wait_for_register_fw(uncore, mode, MODE_IDLE, MODE_IDLE, - 1000, 0, + 1000, stop_timeout(engine), NULL)) { - GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); + ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n"); err = -ETIMEDOUT; } @@ -904,7 +897,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) { - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); } @@ -1054,9 +1047,10 @@ static bool ring_is_idle(struct intel_engine_cs *engine) return idle; } -void intel_engine_flush_submission(struct intel_engine_cs *engine) +bool intel_engine_flush_submission(struct intel_engine_cs *engine) { struct tasklet_struct *t = &engine->execlists.tasklet; + bool active = tasklet_is_locked(t); if (__tasklet_is_scheduled(t)) { local_bh_disable(); @@ -1067,10 +1061,13 @@ void intel_engine_flush_submission(struct intel_engine_cs *engine) tasklet_unlock(t); } local_bh_enable(); + active = true; } /* Otherwise flush the tasklet if it was running on another cpu */ tasklet_unlock_wait(t); + + return active; } /** @@ -1318,10 +1315,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, unsigned int idx; u8 read, write; - drm_printf(m, "\tExeclist tasklet queued? %s (%s), timeslice? %s\n", + drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", yesno(test_bit(TASKLET_STATE_SCHED, &engine->execlists.tasklet.state)), enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + repr_timer(&engine->execlists.preempt), repr_timer(&engine->execlists.timer)); read = execlists->csb_head; @@ -1345,6 +1343,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } execlists_active_lock_bh(execlists); + rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; @@ -1382,6 +1381,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (tl) intel_timeline_put(tl); } + rcu_read_unlock(); execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", @@ -1447,8 +1447,17 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "*** WEDGED ***\n"); drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); - drm_printf(m, "\tHangcheck: %d ms ago\n", - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + drm_printf(m, "\tBarriers?: %s\n", + yesno(!llist_empty(&engine->barrier_tasks))); + drm_printf(m, "\tLatency: %luus\n", + ewma__engine_latency_read(&engine->latency)); + + rcu_read_lock(); + rq = READ_ONCE(engine->heartbeat.systole); + if (rq) + drm_printf(m, "\tHeartbeat: %d ms ago\n", + jiffies_to_msecs(jiffies - rq->emitted_jiffies)); + rcu_read_unlock(); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); @@ -1481,9 +1490,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request_ring(m, rq); - if (rq->hw_context->lrc_reg_state) { + if (rq->context->lrc_reg_state) { drm_printf(m, "Logical Ring Context:\n"); - hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE); + hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); } } spin_unlock_irqrestore(&engine->active.lock, flags); @@ -1544,7 +1553,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) for (port = execlists->pending; (rq = *port); port++) { /* Exclude any contexts already counted in active */ - if (!intel_context_inflight_count(rq->hw_context)) + if (!intel_context_inflight_count(rq->context)) engine->stats.active++; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c new file mode 100644 index 000000000000..742628e40201 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -0,0 +1,242 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_request.h" + +#include "intel_context.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" +#include "intel_engine.h" +#include "intel_gt.h" +#include "intel_reset.h" + +/* + * While the engine is active, we send a periodic pulse along the engine + * to check on its health and to flush any idle-barriers. If that request + * is stuck, and we fail to preempt it, we declare the engine hung and + * issue a reset -- in the hope that restores progress. + */ + +static bool next_heartbeat(struct intel_engine_cs *engine) +{ + long delay; + + delay = READ_ONCE(engine->props.heartbeat_interval_ms); + if (!delay) + return false; + + delay = msecs_to_jiffies_timeout(delay); + if (delay >= HZ) + delay = round_jiffies_up_relative(delay); + schedule_delayed_work(&engine->heartbeat.work, delay); + + return true; +} + +static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) +{ + engine->wakeref_serial = READ_ONCE(engine->serial) + 1; + i915_request_add_active_barriers(rq); +} + +static void show_heartbeat(const struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct drm_printer p = drm_debug_printer("heartbeat"); + + intel_engine_dump(engine, &p, + "%s heartbeat {prio:%d} not ticking\n", + engine->name, + rq->sched.attr.priority); +} + +static void heartbeat(struct work_struct *wrk) +{ + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), + }; + struct intel_engine_cs *engine = + container_of(wrk, typeof(*engine), heartbeat.work.work); + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + + rq = engine->heartbeat.systole; + if (rq && i915_request_completed(rq)) { + i915_request_put(rq); + engine->heartbeat.systole = NULL; + } + + if (!intel_engine_pm_get_if_awake(engine)) + return; + + if (intel_gt_is_wedged(engine->gt)) + goto out; + + if (engine->heartbeat.systole) { + if (engine->schedule && + rq->sched.attr.priority < I915_PRIORITY_BARRIER) { + /* + * Gradually raise the priority of the heartbeat to + * give high priority work [which presumably desires + * low latency and no jitter] the chance to naturally + * complete before being preempted. + */ + attr.priority = I915_PRIORITY_MASK; + if (rq->sched.attr.priority >= attr.priority) + attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT); + if (rq->sched.attr.priority >= attr.priority) + attr.priority = I915_PRIORITY_BARRIER; + + local_bh_disable(); + engine->schedule(rq, &attr); + local_bh_enable(); + } else { + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + show_heartbeat(rq, engine); + + intel_gt_handle_error(engine->gt, engine->mask, + I915_ERROR_CAPTURE, + "stopped heartbeat on %s", + engine->name); + } + goto out; + } + + if (engine->wakeref_serial == engine->serial) + goto out; + + mutex_lock(&ce->timeline->mutex); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) + goto unlock; + + idle_pulse(engine, rq); + if (i915_modparams.enable_hangcheck) + engine->heartbeat.systole = i915_request_get(rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + +unlock: + mutex_unlock(&ce->timeline->mutex); +out: + if (!next_heartbeat(engine)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + intel_engine_pm_put(engine); +} + +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + return; + + next_heartbeat(engine); +} + +void intel_engine_park_heartbeat(struct intel_engine_cs *engine) +{ + if (cancel_delayed_work(&engine->heartbeat.work)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); +} + +void intel_engine_init_heartbeat(struct intel_engine_cs *engine) +{ + INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); +} + +int intel_engine_set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay) +{ + int err; + + /* Send one last pulse before to cleanup persistent hogs */ + if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) { + err = intel_engine_pulse(engine); + if (err) + return err; + } + + WRITE_ONCE(engine->props.heartbeat_interval_ms, delay); + + if (intel_engine_pm_get_if_awake(engine)) { + if (delay) + intel_engine_unpark_heartbeat(engine); + else + intel_engine_park_heartbeat(engine); + intel_engine_pm_put(engine); + } + + return 0; +} + +int intel_engine_pulse(struct intel_engine_cs *engine) +{ + struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + int err = 0; + + if (!intel_engine_has_preemption(engine)) + return -ENODEV; + + if (!intel_engine_pm_get_if_awake(engine)) + return 0; + + if (mutex_lock_interruptible(&ce->timeline->mutex)) + goto out_rpm; + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unlock; + } + + rq->flags |= I915_REQUEST_SENTINEL; + idle_pulse(engine, rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + +out_unlock: + mutex_unlock(&ce->timeline->mutex); +out_rpm: + intel_engine_pm_put(engine); + return err; +} + +int intel_engine_flush_barriers(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + int err = 0; + + if (llist_empty(&engine->barrier_tasks)) + return 0; + + if (!intel_engine_pm_get_if_awake(engine)) + return 0; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_rpm; + } + + idle_pulse(engine, rq); + i915_request_add(rq); + +out_rpm: + intel_engine_pm_put(engine); + return err; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_engine_heartbeat.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h new file mode 100644 index 000000000000..a7b8c0f9e005 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_HEARTBEAT_H +#define INTEL_ENGINE_HEARTBEAT_H + +struct intel_engine_cs; + +void intel_engine_init_heartbeat(struct intel_engine_cs *engine); + +int intel_engine_set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay); + +void intel_engine_park_heartbeat(struct intel_engine_cs *engine); +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine); + +int intel_engine_pulse(struct intel_engine_cs *engine); +int intel_engine_flush_barriers(struct intel_engine_cs *engine); + +#endif /* INTEL_ENGINE_HEARTBEAT_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 67eb6183648a..010620b78202 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -6,12 +6,15 @@ #include "i915_drv.h" +#include "intel_context.h" #include "intel_engine.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_rc6.h" +#include "intel_ring.h" static int __engine_unpark(struct intel_wakeref *wf) { @@ -19,7 +22,7 @@ static int __engine_unpark(struct intel_wakeref *wf) container_of(wf, typeof(*engine), wakeref); void *map; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); intel_gt_pm_get(engine->gt); @@ -34,7 +37,7 @@ static int __engine_unpark(struct intel_wakeref *wf) if (engine->unpark) engine->unpark(engine); - intel_engine_init_hangcheck(engine); + intel_engine_unpark_heartbeat(engine); return 0; } @@ -53,7 +56,7 @@ static inline unsigned long __timeline_mark_lock(struct intel_context *ce) static inline void __timeline_mark_unlock(struct intel_context *ce, unsigned long flags) { - mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_); + mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); local_irq_restore(flags); } @@ -71,12 +74,57 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct i915_request *rq = to_request(fence); + + ewma__engine_latency_add(&rq->engine->latency, + ktime_us_delta(rq->fence.timestamp, + rq->duration.emitted)); +} + +static void +__queue_and_release_pm(struct i915_request *rq, + struct intel_timeline *tl, + struct intel_engine_cs *engine) +{ + struct intel_gt_timelines *timelines = &engine->gt->timelines; + + ENGINE_TRACE(engine, "\n"); + + /* + * We have to serialise all potential retirement paths with our + * submission, as we don't want to underflow either the + * engine->wakeref.counter or our timeline->active_count. + * + * Equally, we cannot allow a new submission to start until + * after we finish queueing, nor could we allow that submitter + * to retire us before we are ready! + */ + spin_lock(&timelines->lock); + + /* Let intel_gt_retire_requests() retire us (acquired under lock) */ + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + + /* Hand the request over to HW and so engine_retire() */ + __i915_request_queue(rq, NULL); + + /* Let new submissions commence (and maybe retire this timeline) */ + __intel_wakeref_defer_park(&engine->wakeref); + + spin_unlock(&timelines->lock); +} + static bool switch_to_kernel_context(struct intel_engine_cs *engine) { + struct intel_context *ce = engine->kernel_context; struct i915_request *rq; unsigned long flags; bool result = true; + GEM_BUG_ON(!intel_context_is_barrier(ce)); + /* Already inside the kernel context, safe to power down. */ if (engine->wakeref_serial == engine->serial) return true; @@ -96,31 +144,56 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * This should hold true as we can only park the engine after * retiring the last request, thus all rings should be empty and * all timelines idle. + * + * For unlocking, there are 2 other parties and the GPU who have a + * stake here. + * + * A new gpu user will be waiting on the engine-pm to start their + * engine_unpark. New waiters are predicated on engine->wakeref.count + * and so intel_wakeref_defer_park() acts like a mutex_unlock of the + * engine->wakeref. + * + * The other party is intel_gt_retire_requests(), which is walking the + * list of active timelines looking for completions. Meanwhile as soon + * as we call __i915_request_queue(), the GPU may complete our request. + * Ergo, if we put ourselves on the timelines.active_list + * (se intel_timeline_enter()) before we increment the + * engine->wakeref.count, we may see the request completion and retire + * it causing an undeflow of the engine->wakeref. */ - flags = __timeline_mark_lock(engine->kernel_context); + flags = __timeline_mark_lock(ce); + GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); - rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + rq = __i915_request_create(ce, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(i915_request_timeline(rq)); - /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; i915_request_add_active_barriers(rq); /* Install ourselves as a preemption barrier */ - rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE; - __i915_request_commit(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ + /* + * Use an interrupt for precise measurement of duration, + * otherwise we rely on someone else retiring all the requests + * which may delay the signaling (i.e. we will likely wait + * until the background request retirement running every + * second or two). + */ + BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); + dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); + rq->duration.emitted = ktime_get(); + } - /* Release our exclusive hold on the engine */ - __intel_wakeref_defer_park(&engine->wakeref); - __i915_request_queue(rq, NULL); + /* Expose ourselves to the world */ + __queue_and_release_pm(rq, ce->timeline, engine); result = false; out_unlock: - __timeline_mark_unlock(engine->kernel_context, flags); + __timeline_mark_unlock(ce, flags); return result; } @@ -133,7 +206,7 @@ static void call_idle_barriers(struct intel_engine_cs *engine) container_of((struct list_head *)node, typeof(*cb), node); - cb->func(NULL, cb); + cb->func(ERR_PTR(-EAGAIN), cb); } } @@ -154,10 +227,11 @@ static int __engine_park(struct intel_wakeref *wf) if (!switch_to_kernel_context(engine)) return -EBUSY; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); call_idle_barriers(engine); /* cleanup after wedging */ + intel_engine_park_heartbeat(engine); intel_engine_disarm_breadcrumbs(engine); intel_engine_pool_park(&engine->pool); @@ -174,7 +248,8 @@ static int __engine_park(struct intel_wakeref *wf) engine->execlists.no_priolist = false; - intel_gt_pm_put(engine->gt); + /* While gt calls i915_vma_parked(), we have to break the lock cycle */ + intel_gt_pm_put_async(engine->gt); return 0; } @@ -188,6 +263,7 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) struct intel_runtime_pm *rpm = engine->uncore->rpm; intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); + intel_engine_init_heartbeat(engine); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 739c50fefcef..e52c2b0cb245 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -7,6 +7,7 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H +#include "i915_request.h" #include "intel_engine_types.h" #include "intel_wakeref.h" @@ -31,6 +32,36 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine) intel_wakeref_put(&engine->wakeref); } +static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine) +{ + intel_wakeref_put_async(&engine->wakeref); +} + +static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) +{ + intel_wakeref_unlock_wait(&engine->wakeref); +} + +static inline struct i915_request * +intel_engine_create_kernel_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + /* + * The engine->kernel_context is special as it is used inside + * the engine-pm barrier (see __engine_park()), circumventing + * the usual mutexes and relying on the engine-pm barrier + * instead. So whenever we use the engine->kernel_context + * outside of the barrier, we must manually handle the + * engine wakeref to serialise with the use inside. + */ + intel_engine_pm_get(engine); + rq = i915_request_create(engine->kernel_context); + intel_engine_pm_put(engine); + + return rq; +} + void intel_engine_init__pm(struct intel_engine_cs *engine); #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c index 3cdbd5f8b5be..397186818305 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -104,6 +104,8 @@ node_create(struct intel_engine_pool *pool, size_t sz) return ERR_CAST(obj); } + i915_gem_object_set_readonly(obj); + node->obj = obj; return node; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 3451be034caf..00287515e7af 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -7,6 +7,7 @@ #ifndef __INTEL_ENGINE_TYPES__ #define __INTEL_ENGINE_TYPES__ +#include <linux/average.h> #include <linux/hashtable.h> #include <linux/irq_work.h> #include <linux/kref.h> @@ -15,6 +16,7 @@ #include <linux/rbtree.h> #include <linux/timer.h> #include <linux/types.h> +#include <linux/workqueue.h> #include "i915_gem.h" #include "i915_pmu.h" @@ -58,6 +60,7 @@ struct i915_gem_context; struct i915_request; struct i915_sched_attr; struct intel_gt; +struct intel_ring; struct intel_uncore; typedef u8 intel_engine_mask_t; @@ -76,40 +79,6 @@ struct intel_instdone { u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; }; -struct intel_engine_hangcheck { - u64 acthd; - u32 last_ring; - u32 last_head; - unsigned long action_timestamp; - struct intel_instdone instdone; -}; - -struct intel_ring { - struct kref ref; - struct i915_vma *vma; - void *vaddr; - - /* - * As we have two types of rings, one global to the engine used - * by ringbuffer submission and those that are exclusive to a - * context used by execlists, we have to play safe and allow - * atomic updates to the pin_count. However, the actual pinning - * of the context is either done during initialisation for - * ringbuffer submission or serialised as part of the context - * pinning for execlists, and so we do not need a mutex ourselves - * to serialise intel_ring_pin/intel_ring_unpin. - */ - atomic_t pin_count; - - u32 head; - u32 tail; - u32 emit; - - u32 space; - u32 size; - u32 effective_size; -}; - /* * we use a single page to load ctx workarounds so all of these * values are referred in terms of dwords @@ -151,6 +120,9 @@ enum intel_engine_id { #define INVALID_ENGINE ((enum intel_engine_id)-1) }; +/* A simple estimator for the round-trip latency of an engine */ +DECLARE_EWMA(_engine_latency, 6, 4) + struct st_preempt_hang { struct completion completion; unsigned int count; @@ -175,6 +147,11 @@ struct intel_engine_execlists { struct timer_list timer; /** + * @preempt: reset the current context if it fails to give way + */ + struct timer_list preempt; + + /** * @default_priolist: priority list for I915_PRIORITY_NORMAL */ struct i915_priolist default_priolist; @@ -326,6 +303,11 @@ struct intel_engine_cs { intel_engine_mask_t saturated; /* submitting semaphores too late? */ + struct { + struct delayed_work work; + struct i915_request *systole; + } heartbeat; + unsigned long serial; unsigned long wakeref_serial; @@ -338,6 +320,13 @@ struct intel_engine_cs { struct intel_timeline *timeline; } legacy; + /* + * We track the average duration of the idle pulse on parking the + * engine to keep an estimate of the how the fast the engine is + * under ideal conditions. + */ + struct ewma__engine_latency latency; + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -411,7 +400,10 @@ struct intel_engine_cs { struct { void (*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, bool stalled); + + void (*rewind)(struct intel_engine_cs *engine, bool stalled); + void (*cancel)(struct intel_engine_cs *engine); + void (*finish)(struct intel_engine_cs *engine); } reset; @@ -461,30 +453,29 @@ struct intel_engine_cs { void (*schedule)(struct i915_request *request, const struct i915_sched_attr *attr); - /* - * Cancel all requests on the hardware, or queued for execution. - * This should only cancel the ready requests that have been - * submitted to the engine (via the engine->submit_request callback). - * This is called when marking the device as wedged. - */ - void (*cancel_requests)(struct intel_engine_cs *engine); - - void (*destroy)(struct intel_engine_cs *engine); + void (*release)(struct intel_engine_cs *engine); struct intel_engine_execlists execlists; + /* + * Keep track of completed timelines on this engine for early + * retirement with the goal of quickly enabling powersaving as + * soon as the engine is idle. + */ + struct intel_timeline *retire; + struct work_struct retire_work; + /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; - struct intel_engine_hangcheck hangcheck; - -#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_USING_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) unsigned int flags; /* @@ -542,12 +533,25 @@ struct intel_engine_cs { */ ktime_t total; } stats; + + struct { + unsigned long heartbeat_interval_ms; + unsigned long preempt_timeout_ms; + unsigned long stop_timeout_ms; + unsigned long timeslice_duration_ms; + } props; }; static inline bool -intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) +intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_USING_CMD_PARSER; +} + +static inline bool +intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) { - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; + return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; } static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 4294f146f13c..51b8718513bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -7,6 +7,8 @@ #ifndef _INTEL_GPU_COMMANDS_H_ #define _INTEL_GPU_COMMANDS_H_ +#include <linux/bitops.h> + /* * Target address alignments required for GPU access e.g. * MI_STORE_DWORD_IMM. @@ -319,4 +321,31 @@ #define COLOR_BLT ((0x2<<29)|(0x40<<22)) #define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) +/* + * Used to convert any address to canonical form. + * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, + * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the + * addresses to be in a canonical form: + * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct + * canonical form [63:48] == [47]." + */ +#define GEN8_HIGH_ADDRESS_BIT 47 +static inline u64 gen8_canonical_addr(u64 address) +{ + return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); +} + +static inline u64 gen8_noncanonical_addr(u64 address) +{ + return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); +} + +static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags) +{ + *cs++ = MI_BATCH_BUFFER_START | flags; + *cs++ = addr; + + return cs; +} + #endif /* _INTEL_GPU_COMMANDS_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 1c4b6c9642ad..ec84b5e62fef 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -3,12 +3,16 @@ * Copyright © 2019 Intel Corporation */ +#include "debugfs_gt.h" #include "i915_drv.h" +#include "intel_context.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" #include "intel_mocs.h" #include "intel_rc6.h" +#include "intel_renderstate.h" +#include "intel_rps.h" #include "intel_uncore.h" #include "intel_pm.h" @@ -22,19 +26,20 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); - intel_gt_init_hangcheck(gt); intel_gt_init_reset(gt); intel_gt_init_requests(gt); + intel_gt_init_timelines(gt); intel_gt_pm_init_early(gt); + + intel_rps_init_early(>->rps); intel_uc_init_early(>->uc); } -void intel_gt_init_hw_early(struct drm_i915_private *i915) +void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) { - i915->gt.ggtt = &i915->ggtt; + gt->ggtt = ggtt; - /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_gt_pm_disable(&i915->gt); + intel_gt_sanitize(gt, false); } static void init_unused_ring(struct intel_gt *gt, u32 base) @@ -72,7 +77,6 @@ int intel_gt_init_hw(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; int ret; - BUG_ON(!i915->kernel_context); ret = intel_gt_terminally_wedged(gt); if (ret) return ret; @@ -302,7 +306,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) intel_gt_chipset_flush(gt); - with_intel_runtime_pm(uncore->rpm, wakeref) { + with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) { unsigned long flags; spin_lock_irqsave(&uncore->lock, flags); @@ -321,8 +325,9 @@ void intel_gt_chipset_flush(struct intel_gt *gt) void intel_gt_driver_register(struct intel_gt *gt) { - if (IS_GEN(gt->i915, 5)) - intel_gpu_ips_init(gt->i915); + intel_rps_driver_register(>->rps); + + debugfs_gt_register(gt); } static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) @@ -364,41 +369,297 @@ static void intel_gt_fini_scratch(struct intel_gt *gt) i915_vma_unpin_and_release(>->scratch, 0); } +static struct i915_address_space *kernel_vm(struct intel_gt *gt) +{ + if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) + return &i915_ppgtt_create(gt->i915)->vm; + else + return i915_vm_get(>->ggtt->vm); +} + +static int __intel_context_flush_retire(struct intel_context *ce) +{ + struct intel_timeline *tl; + + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + intel_context_timeline_unlock(tl); + return 0; +} + +static int __engines_record_defaults(struct intel_gt *gt) +{ + struct i915_request *requests[I915_NUM_ENGINES] = {}; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * As we reset the gpu during very early sanitisation, the current + * register state on the GPU should reflect its defaults values. + * We load a context onto the hw (with restore-inhibit), then switch + * over to a second context to save that default register state. We + * can then prime every new context with that state so they all start + * from the same default HW values. + */ + + for_each_engine(engine, gt, id) { + struct intel_renderstate so; + struct intel_context *ce; + struct i915_request *rq; + + err = intel_renderstate_init(&so, engine); + if (err) + goto out; + + /* We must be able to switch to something! */ + GEM_BUG_ON(!engine->kernel_context); + engine->serial++; /* force the kernel context switch */ + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + intel_context_put(ce); + goto out; + } + + err = intel_engine_emit_ctx_wa(rq); + if (err) + goto err_rq; + + err = intel_renderstate_emit(&so, rq); + if (err) + goto err_rq; + +err_rq: + requests[id] = i915_request_get(rq); + i915_request_add(rq); + intel_renderstate_fini(&so); + if (err) + goto out; + } + + /* Flush the default context image to memory, and enable powersaving. */ + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + err = -EIO; + goto out; + } + + for (id = 0; id < ARRAY_SIZE(requests); id++) { + struct i915_request *rq; + struct i915_vma *state; + void *vaddr; + + rq = requests[id]; + if (!rq) + continue; + + GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); + state = rq->context->state; + if (!state) + continue; + + /* Serialise with retirement on another CPU */ + GEM_BUG_ON(!i915_request_completed(rq)); + err = __intel_context_flush_retire(rq->context); + if (err) + goto out; + + /* We want to be able to unbind the state from the GGTT */ + GEM_BUG_ON(intel_context_is_pinned(rq->context)); + + /* + * As we will hold a reference to the logical state, it will + * not be torn down with the context, and importantly the + * object will hold onto its vma (making it possible for a + * stray GTT write to corrupt our defaults). Unmap the vma + * from the GTT to prevent such accidents and reclaim the + * space. + */ + err = i915_vma_unbind(state); + if (err) + goto out; + + i915_gem_object_lock(state->obj); + err = i915_gem_object_set_to_cpu_domain(state->obj, false); + i915_gem_object_unlock(state->obj); + if (err) + goto out; + + i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); + + /* Check we can acquire the image of the context state */ + vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out; + } + + rq->engine->default_state = i915_gem_object_get(state->obj); + i915_gem_object_unpin_map(state->obj); + } + +out: + /* + * If we have to abandon now, we expect the engines to be idle + * and ready to be torn-down. The quickest way we can accomplish + * this is by declaring ourselves wedged. + */ + if (err) + intel_gt_set_wedged(gt); + + for (id = 0; id < ARRAY_SIZE(requests); id++) { + struct intel_context *ce; + struct i915_request *rq; + + rq = requests[id]; + if (!rq) + continue; + + ce = rq->context; + i915_request_put(rq); + intel_context_put(ce); + } + return err; +} + +static int __engines_verify_workarounds(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return 0; + + for_each_engine(engine, gt, id) { + if (intel_engine_verify_workarounds(engine, "load")) + err = -EIO; + } + + return err; +} + +static void __intel_gt_disable(struct intel_gt *gt) +{ + intel_gt_set_wedged_on_init(gt); + + intel_gt_suspend_prepare(gt); + intel_gt_suspend_late(gt); + + GEM_BUG_ON(intel_gt_pm_is_awake(gt)); +} + int intel_gt_init(struct intel_gt *gt) { int err; - err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); + err = i915_inject_probe_error(gt->i915, -ENODEV); if (err) return err; + /* + * This is just a security blanket to placate dragons. + * On some systems, we very sporadically observe that the first TLBs + * used by the CS may be stale, despite us poking the TLB reset. If + * we hold the forcewake during initialisation these problems + * just magically go away. + */ + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); + if (err) + goto out_fw; + intel_gt_pm_init(gt); - return 0; + gt->vm = kernel_vm(gt); + if (!gt->vm) { + err = -ENOMEM; + goto err_pm; + } + + err = intel_engines_init(gt); + if (err) + goto err_engines; + + intel_uc_init(>->uc); + + err = intel_gt_resume(gt); + if (err) + goto err_uc_init; + + err = __engines_record_defaults(gt); + if (err) + goto err_gt; + + err = __engines_verify_workarounds(gt); + if (err) + goto err_gt; + + err = i915_inject_probe_error(gt->i915, -EIO); + if (err) + goto err_gt; + + goto out_fw; +err_gt: + __intel_gt_disable(gt); + intel_uc_fini_hw(>->uc); +err_uc_init: + intel_uc_fini(>->uc); +err_engines: + intel_engines_release(gt); + i915_vm_put(fetch_and_zero(>->vm)); +err_pm: + intel_gt_pm_fini(gt); + intel_gt_fini_scratch(gt); +out_fw: + if (err) + intel_gt_set_wedged_on_init(gt); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + return err; } void intel_gt_driver_remove(struct intel_gt *gt) { - GEM_BUG_ON(gt->awake); - intel_gt_pm_disable(gt); + __intel_gt_disable(gt); + + intel_uc_fini_hw(>->uc); + intel_uc_fini(>->uc); + + intel_engines_release(gt); } void intel_gt_driver_unregister(struct intel_gt *gt) { - intel_gpu_ips_teardown(); + intel_rps_driver_unregister(>->rps); } void intel_gt_driver_release(struct intel_gt *gt) { - /* Paranoia: make sure we have disabled everything before we exit. */ - intel_gt_pm_disable(gt); - intel_gt_pm_fini(gt); + struct i915_address_space *vm; + vm = fetch_and_zero(>->vm); + if (vm) /* FIXME being called twice on error paths :( */ + i915_vm_put(vm); + + intel_gt_pm_fini(gt); intel_gt_fini_scratch(gt); } void intel_gt_driver_late_release(struct intel_gt *gt) { intel_uc_driver_late_release(>->uc); + intel_gt_fini_requests(gt); intel_gt_fini_reset(gt); + intel_gt_fini_timelines(gt); + intel_engines_free(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index e6ab0bff0efb..2355cf129e9c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -12,6 +12,12 @@ struct drm_i915_private; +#define GT_TRACE(gt, fmt, ...) do { \ + const struct intel_gt *gt__ __maybe_unused = (gt); \ + GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ + ##__VA_ARGS__); \ +} while (0) + static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { return container_of(uc, struct intel_gt, uc); @@ -28,7 +34,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) } void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); -void intel_gt_init_hw_early(struct drm_i915_private *i915); +void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); int __must_check intel_gt_init_hw(struct intel_gt *gt); int intel_gt_init(struct intel_gt *gt); void intel_gt_driver_register(struct intel_gt *gt); @@ -46,8 +52,6 @@ void intel_gt_clear_error_registers(struct intel_gt *gt, void intel_gt_flush_ggtt_writes(struct intel_gt *gt); void intel_gt_chipset_flush(struct intel_gt *gt); -void intel_gt_init_hangcheck(struct intel_gt *gt); - static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, enum intel_gt_scratch_field field) { @@ -59,6 +63,4 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt) return __intel_reset_failed(>->reset); } -void intel_gt_queue_hangcheck(struct intel_gt *gt); - #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 34a4fb624bf7..f796bdf1ed30 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -11,6 +11,7 @@ #include "intel_gt.h" #include "intel_gt_irq.h" #include "intel_uncore.h" +#include "intel_rps.h" static void guc_irq_handler(struct intel_guc *guc, u16 iir) { @@ -27,7 +28,7 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) tasklet = true; if (iir & GT_RENDER_USER_INTERRUPT) { - intel_engine_breadcrumbs_irq(engine); + intel_engine_signal_breadcrumbs(engine); tasklet |= intel_engine_needs_breadcrumb_tasklet(engine); } @@ -77,7 +78,7 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, return guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_GTPM_INSTANCE) - return gen11_rps_irq_handler(gt, iir); + return gen11_rps_irq_handler(>->rps, iir); WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", instance, iir); @@ -244,9 +245,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); } static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) @@ -270,11 +271,11 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); if (gt_iir & GT_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); if (gt_iir & GT_BLT_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[COPY_ENGINE_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | @@ -336,7 +337,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) } if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gen6_rps_irq_handler(gt->i915, gt_iir[2]); + gen6_rps_irq_handler(>->rps, gt_iir[2]); guc_irq_handler(>->uc.guc, gt_iir[2] >> 16); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index b866d5b1eee0..45b68a17da4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -4,6 +4,8 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/suspend.h> + #include "i915_drv.h" #include "i915_globals.h" #include "i915_params.h" @@ -12,13 +14,28 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" +#include "intel_llc.h" #include "intel_pm.h" #include "intel_rc6.h" +#include "intel_rps.h" #include "intel_wakeref.h" -static void pm_notify(struct intel_gt *gt, int state) +static void user_forcewake(struct intel_gt *gt, bool suspend) { - blocking_notifier_call_chain(>->pm_notifications, state, gt->i915); + int count = atomic_read(>->user_wakeref); + + /* Inside suspend/resume so single threaded, no races to worry about. */ + if (likely(!count)) + return; + + intel_gt_pm_get(gt); + if (suspend) { + GEM_BUG_ON(count > atomic_read(>->wakeref.count)); + atomic_sub(count, >->wakeref.count); + } else { + atomic_add(count, >->wakeref.count); + } + intel_gt_pm_put(gt); } static int __gt_unpark(struct intel_wakeref *wf) @@ -26,7 +43,7 @@ static int __gt_unpark(struct intel_wakeref *wf) struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); struct drm_i915_private *i915 = gt->i915; - GEM_TRACE("\n"); + GT_TRACE(gt, "\n"); i915_globals_unpark(); @@ -44,19 +61,12 @@ static int __gt_unpark(struct intel_wakeref *wf) gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); - intel_enable_gt_powersave(i915); - - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - + intel_rc6_unpark(>->rc6); + intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); - intel_gt_queue_hangcheck(gt); intel_gt_unpark_requests(gt); - pm_notify(gt, INTEL_GT_UNPARK); - return 0; } @@ -66,20 +76,21 @@ static int __gt_park(struct intel_wakeref *wf) intel_wakeref_t wakeref = fetch_and_zero(>->awake); struct drm_i915_private *i915 = gt->i915; - GEM_TRACE("\n"); + GT_TRACE(gt, "\n"); - pm_notify(gt, INTEL_GT_PARK); intel_gt_park_requests(gt); + i915_vma_parked(gt); i915_pmu_gt_parked(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); + intel_rps_park(>->rps); + intel_rc6_park(>->rc6); /* Everything switched off, flush any residual interrupt just in case */ intel_synchronize_irq(i915); + /* Defer dropping the display power well for 100ms, it's slow! */ GEM_BUG_ON(!wakeref); - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); i915_globals_park(); @@ -89,14 +100,11 @@ static int __gt_park(struct intel_wakeref *wf) static const struct intel_wakeref_ops wf_ops = { .get = __gt_unpark, .put = __gt_park, - .flags = INTEL_WAKEREF_PUT_ASYNC, }; void intel_gt_pm_init_early(struct intel_gt *gt) { intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); - - BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } void intel_gt_pm_init(struct intel_gt *gt) @@ -107,6 +115,7 @@ void intel_gt_pm_init(struct intel_gt *gt) * user. */ intel_rc6_init(>->rc6); + intel_rps_init(>->rps); } static bool reset_engines(struct intel_gt *gt) @@ -131,8 +140,22 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; + + GT_TRACE(gt, "force:%s", yesno(force)); - GEM_TRACE("\n"); + /* Use a raw wakeref to avoid calling intel_display_power_get early */ + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + /* + * As we have just resumed the machine and woken the device up from + * deep PCI sleep (presumably D3_cold), assume the HW has been reset + * back to defaults, recovering from whatever wedged state we left it + * in and so worth trying to use the device once more. + */ + if (intel_gt_is_wedged(gt)) + intel_gt_unset_wedged(gt); intel_uc_sanitize(>->uc); @@ -140,6 +163,8 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) if (engine->reset.prepare) engine->reset.prepare(engine); + intel_uc_reset_prepare(>->uc); + if (reset_engines(gt) || force) { for_each_engine(engine, gt, id) __intel_engine_reset(engine, false); @@ -148,12 +173,9 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) for_each_engine(engine, gt, id) if (engine->reset.finish) engine->reset.finish(engine); -} -void intel_gt_pm_disable(struct intel_gt *gt) -{ - if (!is_mock_gt(gt)) - intel_sanitize_gt_powersave(gt->i915); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } void intel_gt_pm_fini(struct intel_gt *gt) @@ -165,7 +187,9 @@ int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - int err = 0; + int err; + + GT_TRACE(gt, "\n"); /* * After resume, we may need to poke into the pinned kernel @@ -174,9 +198,22 @@ int intel_gt_resume(struct intel_gt *gt) * allowing us to fixup the user contexts on their first pin. */ intel_gt_pm_get(gt); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); + /* Only when the HW is re-initialised, can we replay the requests */ + err = intel_gt_init_hw(gt); + if (err) { + dev_err(gt->i915->drm.dev, + "Failed to initialize GPU, declaring it wedged!\n"); + intel_gt_set_wedged(gt); + goto err_fw; + } + + intel_rps_enable(>->rps); + intel_llc_enable(>->llc); + for_each_engine(engine, gt, id) { struct intel_context *ce; @@ -185,9 +222,7 @@ int intel_gt_resume(struct intel_gt *gt) ce = engine->kernel_context; if (ce) { GEM_BUG_ON(!intel_context_is_pinned(ce)); - mutex_acquire(&ce->pin_mutex.dep_map, 0, 0, _THIS_IP_); ce->ops->reset(ce); - mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); } engine->serial++; /* kernel context lost */ @@ -203,43 +238,98 @@ int intel_gt_resume(struct intel_gt *gt) } intel_rc6_enable(>->rc6); + + intel_uc_resume(>->uc); + + user_forcewake(gt, false); + +err_fw: intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); return err; } -static void wait_for_idle(struct intel_gt *gt) +static void wait_for_suspend(struct intel_gt *gt) { + if (!intel_gt_pm_is_awake(gt)) + return; + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* * Forcibly cancel outstanding work and leave * the gpu quiet. */ intel_gt_set_wedged(gt); + intel_gt_retire_requests(gt); } intel_gt_pm_wait_for_idle(gt); } -void intel_gt_suspend(struct intel_gt *gt) +void intel_gt_suspend_prepare(struct intel_gt *gt) +{ + user_forcewake(gt, true); + wait_for_suspend(gt); + + intel_uc_suspend(>->uc); +} + +static suspend_state_t pm_suspend_target(void) +{ +#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP) + return pm_suspend_target_state; +#else + return PM_SUSPEND_TO_IDLE; +#endif +} + +void intel_gt_suspend_late(struct intel_gt *gt) { intel_wakeref_t wakeref; /* We expect to be idle already; but also want to be independent */ - wait_for_idle(gt); + wait_for_suspend(gt); + + if (is_mock_gt(gt)) + return; + + GEM_BUG_ON(gt->awake); + + /* + * On disabling the device, we want to turn off HW access to memory + * that we no longer own. + * + * However, not all suspend-states disable the device. S0 (s2idle) + * is effectively runtime-suspend, the device is left powered on + * but needs to be put into a low power state. We need to keep + * powermanagement enabled, but we also retain system state and so + * it remains safe to keep on using our allocated memory. + */ + if (pm_suspend_target() == PM_SUSPEND_TO_IDLE) + return; - with_intel_runtime_pm(gt->uncore->rpm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + intel_rps_disable(>->rps); intel_rc6_disable(>->rc6); + intel_llc_disable(>->llc); + } + + intel_gt_sanitize(gt, false); + + GT_TRACE(gt, "\n"); } void intel_gt_runtime_suspend(struct intel_gt *gt) { intel_uc_runtime_suspend(>->uc); + + GT_TRACE(gt, "\n"); } int intel_gt_runtime_resume(struct intel_gt *gt) { + GT_TRACE(gt, "\n"); intel_gt_init_swizzling(gt); return intel_uc_runtime_resume(>->uc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 997770d3a968..4a9e48c12bd4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -12,11 +12,6 @@ #include "intel_gt_types.h" #include "intel_wakeref.h" -enum { - INTEL_GT_UNPARK, - INTEL_GT_PARK, -}; - static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt) { return intel_wakeref_is_active(>->wakeref); @@ -27,6 +22,11 @@ static inline void intel_gt_pm_get(struct intel_gt *gt) intel_wakeref_get(>->wakeref); } +static inline void __intel_gt_pm_get(struct intel_gt *gt) +{ + __intel_wakeref_get(>->wakeref); +} + static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) { return intel_wakeref_get_if_active(>->wakeref); @@ -37,6 +37,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt) intel_wakeref_put(>->wakeref); } +static inline void intel_gt_pm_put_async(struct intel_gt *gt) +{ + intel_wakeref_put_async(>->wakeref); +} + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); @@ -44,13 +49,13 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) void intel_gt_pm_init_early(struct intel_gt *gt); void intel_gt_pm_init(struct intel_gt *gt); -void intel_gt_pm_disable(struct intel_gt *gt); void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); +void intel_gt_suspend_prepare(struct intel_gt *gt); +void intel_gt_suspend_late(struct intel_gt *gt); int intel_gt_resume(struct intel_gt *gt); -void intel_gt_suspend(struct intel_gt *gt); void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index b73229a84d85..b4f04614230e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -4,8 +4,11 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/workqueue.h> + #include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" +#include "intel_engine_heartbeat.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" @@ -20,13 +23,88 @@ static void retire_requests(struct intel_timeline *tl) break; } -static void flush_submission(struct intel_gt *gt) +static bool flush_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; + bool active = false; + + for_each_engine(engine, gt, id) { + active |= intel_engine_flush_submission(engine); + active |= flush_work(&engine->retire_work); + } + + return active; +} + +static void engine_retire(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), retire_work); + struct intel_timeline *tl = xchg(&engine->retire, NULL); + + do { + struct intel_timeline *next = xchg(&tl->retire, NULL); + + /* + * Our goal here is to retire _idle_ timelines as soon as + * possible (as they are idle, we do not expect userspace + * to be cleaning up anytime soon). + * + * If the timeline is currently locked, either it is being + * retired elsewhere or about to be! + */ + if (mutex_trylock(&tl->mutex)) { + retire_requests(tl); + mutex_unlock(&tl->mutex); + } + intel_timeline_put(tl); + + GEM_BUG_ON(!next); + tl = ptr_mask_bits(next, 1); + } while (tl); +} + +static bool add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ +#define STUB ((struct intel_timeline *)1) + struct intel_timeline *first; + + /* + * We open-code a llist here to include the additional tag [BIT(0)] + * so that we know when the timeline is already on a + * retirement queue: either this engine or another. + */ + + if (cmpxchg(&tl->retire, NULL, STUB)) /* already queued */ + return false; + + intel_timeline_get(tl); + first = READ_ONCE(engine->retire); + do + tl->retire = ptr_pack_bits(first, 1, 1); + while (!try_cmpxchg(&engine->retire, &first, tl)); + + return !first; +} + +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + if (add_retire(engine, tl)) + schedule_work(&engine->retire_work); +} - for_each_engine(engine, gt, id) - intel_engine_flush_submission(engine); +void intel_engine_init_retire(struct intel_engine_cs *engine) +{ + INIT_WORK(&engine->retire_work, engine_retire); +} + +void intel_engine_fini_retire(struct intel_engine_cs *engine) +{ + flush_work(&engine->retire_work); + GEM_BUG_ON(engine->retire); } long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) @@ -34,7 +112,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl, *tn; unsigned long active_count = 0; - unsigned long flags; bool interruptible; LIST_HEAD(free); @@ -44,7 +121,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) flush_submission(gt); /* kick the ksoftirqd tasklets */ - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { if (!mutex_trylock(&tl->mutex)) { active_count++; /* report busy to caller, try again? */ @@ -52,9 +129,9 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) } intel_timeline_get(tl); - GEM_BUG_ON(!tl->active_count); - tl->active_count++; /* pin the list element */ - spin_unlock_irqrestore(&timelines->lock, flags); + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ + spin_unlock(&timelines->lock); if (timeout > 0) { struct dma_fence *fence; @@ -70,28 +147,31 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) retire_requests(tl); - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); - if (--tl->active_count) - active_count += !!rcu_access_pointer(tl->last_request.fence); - else + if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); + else + active_count += i915_active_fence_isset(&tl->last_request); mutex_unlock(&tl->mutex); /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { - GEM_BUG_ON(tl->active_count); + GEM_BUG_ON(atomic_read(&tl->active_count)); list_add(&tl->link, &free); } } - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); + if (flush_submission(gt)) + active_count++; + return active_count ? timeout : 0; } @@ -115,9 +195,9 @@ static void retire_work_handler(struct work_struct *work) struct intel_gt *gt = container_of(work, typeof(*gt), requests.retire_work.work); - intel_gt_retire_requests(gt); schedule_delayed_work(>->requests.retire_work, round_jiffies_up_relative(HZ)); + intel_gt_retire_requests(gt); } void intel_gt_init_requests(struct intel_gt *gt) @@ -135,3 +215,9 @@ void intel_gt_unpark_requests(struct intel_gt *gt) schedule_delayed_work(>->requests.retire_work, round_jiffies_up_relative(HZ)); } + +void intel_gt_fini_requests(struct intel_gt *gt) +{ + /* Wait until the work is marked as finished before unloading! */ + cancel_delayed_work_sync(>->requests.retire_work); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index bd31cbce47e0..dbac53baf1cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -7,7 +7,9 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H +struct intel_engine_cs; struct intel_gt; +struct intel_timeline; long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) @@ -15,10 +17,16 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt) intel_gt_retire_requests_timeout(gt, 0); } +void intel_engine_init_retire(struct intel_engine_cs *engine); +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl); +void intel_engine_fini_retire(struct intel_engine_cs *engine); + int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); void intel_gt_init_requests(struct intel_gt *gt); void intel_gt_park_requests(struct intel_gt *gt); void intel_gt_unpark_requests(struct intel_gt *gt); +void intel_gt_fini_requests(struct intel_gt *gt); #endif /* INTEL_GT_REQUESTS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index ae4aaf75ac78..96890dd12b5f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -20,6 +20,7 @@ #include "intel_llc_types.h" #include "intel_reset_types.h" #include "intel_rc6_types.h" +#include "intel_rps_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -27,14 +28,6 @@ struct i915_ggtt; struct intel_engine_cs; struct intel_uncore; -struct intel_hangcheck { - /* For hangcheck timer */ -#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ -#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) - - struct delayed_work work; -}; - struct intel_gt { struct drm_i915_private *i915; struct intel_uncore *uncore; @@ -68,7 +61,6 @@ struct intel_gt { struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ - struct intel_hangcheck hangcheck; struct intel_reset reset; /** @@ -82,8 +74,7 @@ struct intel_gt { struct intel_llc llc; struct intel_rc6 rc6; - - struct blocking_notifier_head pm_notifications; + struct intel_rps rps; ktime_t last_init_time; @@ -99,6 +90,13 @@ struct intel_gt { struct intel_engine_cs *engine[I915_NUM_ENGINES]; struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] [MAX_ENGINE_INSTANCE + 1]; + + /* + * Default address space (either GGTT or ppGTT depending on arch). + * + * Reserved for exclusive use by the kernel. + */ + struct i915_address_space *vm; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c deleted file mode 100644 index 0fdef00af9e4..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "intel_engine.h" -#include "intel_gt.h" -#include "intel_reset.h" - -struct hangcheck { - u64 acthd; - u32 ring; - u32 head; - enum intel_engine_hangcheck_action action; - unsigned long action_timestamp; - int deadlock; - struct intel_instdone instdone; - bool wedged:1; - bool stalled:1; -}; - -static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) -{ - u32 tmp = current_instdone | *old_instdone; - bool unchanged; - - unchanged = tmp == *old_instdone; - *old_instdone |= tmp; - - return unchanged; -} - -static bool subunits_stuck(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; - struct intel_instdone instdone; - struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; - bool stuck; - int slice; - int subslice; - - intel_engine_get_instdone(engine, &instdone); - - /* There might be unstable subunit states even when - * actual head is not moving. Filter out the unstable ones by - * accumulating the undone -> done transitions and only - * consider those as progress. - */ - stuck = instdone_unchanged(instdone.instdone, - &accu_instdone->instdone); - stuck &= instdone_unchanged(instdone.slice_common, - &accu_instdone->slice_common); - - for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) { - stuck &= instdone_unchanged(instdone.sampler[slice][subslice], - &accu_instdone->sampler[slice][subslice]); - stuck &= instdone_unchanged(instdone.row[slice][subslice], - &accu_instdone->row[slice][subslice]); - } - - return stuck; -} - -static enum intel_engine_hangcheck_action -head_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - if (acthd != engine->hangcheck.acthd) { - - /* Clear subunit states on head movement */ - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - return ENGINE_ACTIVE_HEAD; - } - - if (!subunits_stuck(engine)) - return ENGINE_ACTIVE_SUBUNITS; - - return ENGINE_DEAD; -} - -static enum intel_engine_hangcheck_action -engine_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - enum intel_engine_hangcheck_action ha; - u32 tmp; - - ha = head_stuck(engine, acthd); - if (ha != ENGINE_DEAD) - return ha; - - if (IS_GEN(engine->i915, 2)) - return ENGINE_DEAD; - - /* Is the chip hanging on a WAIT_FOR_EVENT? - * If so we can simply poke the RB_WAIT bit - * and break the hang. This should work on - * all but the second generation chipsets. - */ - tmp = ENGINE_READ(engine, RING_CTL); - if (tmp & RING_WAIT) { - intel_gt_handle_error(engine->gt, engine->mask, 0, - "stuck wait on %s", engine->name); - ENGINE_WRITE(engine, RING_CTL, tmp); - return ENGINE_WAIT_KICK; - } - - return ENGINE_DEAD; -} - -static void hangcheck_load_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - hc->acthd = intel_engine_get_active_head(engine); - hc->ring = ENGINE_READ(engine, RING_START); - hc->head = ENGINE_READ(engine, RING_HEAD); -} - -static void hangcheck_store_sample(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - engine->hangcheck.acthd = hc->acthd; - engine->hangcheck.last_ring = hc->ring; - engine->hangcheck.last_head = hc->head; -} - -static enum intel_engine_hangcheck_action -hangcheck_get_action(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - if (intel_engine_is_idle(engine)) - return ENGINE_IDLE; - - if (engine->hangcheck.last_ring != hc->ring) - return ENGINE_ACTIVE_SEQNO; - - if (engine->hangcheck.last_head != hc->head) - return ENGINE_ACTIVE_SEQNO; - - return engine_stuck(engine, hc->acthd); -} - -static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; - - hc->action = hangcheck_get_action(engine, hc); - - /* We always increment the progress - * if the engine is busy and still processing - * the same request, so that no single request - * can run indefinitely (such as a chain of - * batches). The only time we do not increment - * the hangcheck score on this ring, if this - * engine is in a legitimate wait for another - * engine. In that case the waiting engine is a - * victim and we want to be sure we catch the - * right culprit. Then every time we do kick - * the ring, make it as a progress as the seqno - * advancement might ensure and if not, it - * will catch the hanging engine. - */ - - switch (hc->action) { - case ENGINE_IDLE: - case ENGINE_ACTIVE_SEQNO: - /* Clear head and subunit states on seqno movement */ - hc->acthd = 0; - - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - /* Intentional fall through */ - case ENGINE_WAIT_KICK: - case ENGINE_WAIT: - engine->hangcheck.action_timestamp = jiffies; - break; - - case ENGINE_ACTIVE_HEAD: - case ENGINE_ACTIVE_SUBUNITS: - /* - * Seqno stuck with still active engine gets leeway, - * in hopes that it is just a long shader. - */ - timeout = I915_SEQNO_DEAD_TIMEOUT; - break; - - case ENGINE_DEAD: - break; - - default: - MISSING_CASE(hc->action); - } - - hc->stalled = time_after(jiffies, - engine->hangcheck.action_timestamp + timeout); - hc->wedged = time_after(jiffies, - engine->hangcheck.action_timestamp + - I915_ENGINE_WEDGED_TIMEOUT); -} - -static void hangcheck_declare_hang(struct intel_gt *gt, - intel_engine_mask_t hung, - intel_engine_mask_t stuck) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - char msg[80]; - int len; - - /* If some rings hung but others were still busy, only - * blame the hanging rings in the synopsis. - */ - if (stuck != hung) - hung &= ~stuck; - len = scnprintf(msg, sizeof(msg), - "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, gt, hung, tmp) - len += scnprintf(msg + len, sizeof(msg) - len, - "%s, ", engine->name); - msg[len-2] = '\0'; - - return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg); -} - -/* - * This is called when the chip hasn't reported back with completed - * batchbuffers in a long time. We keep track per ring seqno progress and - * if there are no progress, hangcheck score for that ring is increased. - * Further, acthd is inspected to see if the ring is stuck. On stuck case - * we kick the ring. If we see no progress on three subsequent calls - * we assume chip is wedged and try to fix it by resetting the chip. - */ -static void hangcheck_elapsed(struct work_struct *work) -{ - struct intel_gt *gt = - container_of(work, typeof(*gt), hangcheck.work.work); - intel_engine_mask_t hung = 0, stuck = 0, wedged = 0; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - - if (!i915_modparams.enable_hangcheck) - return; - - if (!READ_ONCE(gt->awake)) - return; - - if (intel_gt_is_wedged(gt)) - return; - - wakeref = intel_runtime_pm_get_if_in_use(gt->uncore->rpm); - if (!wakeref) - return; - - /* As enabling the GPU requires fairly extensive mmio access, - * periodically arm the mmio checker to see if we are triggering - * any invalid access. - */ - intel_uncore_arm_unclaimed_mmio_detection(gt->uncore); - - for_each_engine(engine, gt, id) { - struct hangcheck hc; - - intel_engine_breadcrumbs_irq(engine); - - hangcheck_load_sample(engine, &hc); - hangcheck_accumulate_sample(engine, &hc); - hangcheck_store_sample(engine, &hc); - - if (hc.stalled) { - hung |= engine->mask; - if (hc.action != ENGINE_DEAD) - stuck |= engine->mask; - } - - if (hc.wedged) - wedged |= engine->mask; - } - - if (GEM_SHOW_DEBUG() && (hung | stuck)) { - struct drm_printer p = drm_debug_printer("hangcheck"); - - for_each_engine(engine, gt, id) { - if (intel_engine_is_idle(engine)) - continue; - - intel_engine_dump(engine, &p, "%s\n", engine->name); - } - } - - if (wedged) { - dev_err(gt->i915->drm.dev, - "GPU recovery timed out," - " cancelling all in-flight rendering.\n"); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - } - - if (hung) - hangcheck_declare_hang(gt, hung, stuck); - - intel_runtime_pm_put(gt->uncore->rpm, wakeref); - - /* Reset timer in case GPU hangs without another request being added */ - intel_gt_queue_hangcheck(gt); -} - -void intel_gt_queue_hangcheck(struct intel_gt *gt) -{ - unsigned long delay; - - if (unlikely(!i915_modparams.enable_hangcheck)) - return; - - /* - * Don't continually defer the hangcheck so that it is always run at - * least once after work has been scheduled on any ring. Otherwise, - * we will ignore a hung ring if a second ring is kept busy. - */ - - delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); - queue_delayed_work(system_long_wq, >->hangcheck.work, delay); -} - -void intel_engine_init_hangcheck(struct intel_engine_cs *engine) -{ - memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); - engine->hangcheck.action_timestamp = jiffies; -} - -void intel_gt_init_hangcheck(struct intel_gt *gt) -{ - INIT_DELAYED_WORK(>->hangcheck.work, hangcheck_elapsed); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftest_hangcheck.c" -#endif diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index 35093eb5f24e..ceb785b75c25 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -48,7 +48,7 @@ static bool get_ia_constants(struct intel_llc *llc, struct ia_constants *consts) { struct drm_i915_private *i915 = llc_to_gt(llc)->i915; - struct intel_rps *rps = &i915->gt_pm.rps; + struct intel_rps *rps = &llc_to_gt(llc)->rps; if (rps->max_freq <= rps->min_freq) return false; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d0088d020220..4fb70a7716e3 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -133,18 +133,19 @@ */ #include <linux/interrupt.h> -#include "gem/i915_gem_context.h" - #include "i915_drv.h" #include "i915_perf.h" #include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" +#include "intel_ring.h" #include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) @@ -234,16 +235,9 @@ static void execlists_init_reg_state(u32 *reg_state, const struct intel_engine_cs *engine, const struct intel_ring *ring, bool close); - -static void __context_pin_acquire(struct intel_context *ce) -{ - mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_); -} - -static void __context_pin_release(struct intel_context *ce) -{ - mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_); -} +static void +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine); static void mark_eio(struct i915_request *rq) { @@ -256,6 +250,23 @@ static void mark_eio(struct i915_request *rq) i915_request_mark_complete(rq); } +static struct i915_request * +active_request(const struct intel_timeline * const tl, struct i915_request *rq) +{ + struct i915_request *active = rq; + + rcu_read_lock(); + list_for_each_entry_continue_reverse(rq, &tl->requests, link) { + if (i915_request_completed(rq)) + break; + + active = rq; + } + rcu_read_unlock(); + + return active; +} + static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -460,8 +471,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; - /* bits 12-31 */ + desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ /* * The following 32bits are copied into the OA reports (dword 2). * Consider updating oa_get_render_ctx_id in i915_perf.c when changing @@ -834,12 +844,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) } } -static void unwind_wa_tail(struct i915_request *rq) -{ - rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); - assert_ring_tail_valid(rq->ring, rq->tail); -} - static struct i915_request * __unwind_incomplete_requests(struct intel_engine_cs *engine) { @@ -852,12 +856,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - if (i915_request_completed(rq)) continue; /* XXX */ __i915_request_unsubmit(rq); - unwind_wa_tail(rq); /* * Push the request back into the queue for later resubmission. @@ -877,7 +879,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { - struct intel_engine_cs *owner = rq->hw_context->engine; + struct intel_engine_cs *owner = rq->context->engine; /* * Decouple the virtual breadcrumb before moving it @@ -925,14 +927,180 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } +static void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + if (engine->stats.active++ == 0) + engine->stats.start = ktime_get(); + GEM_BUG_ON(engine->stats.active == 0); + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + ktime_t last; + + if (engine->stats.active && --engine->stats.active == 0) { + /* + * Decrement the active context count and in case GPU + * is now idle add up to the running total. + */ + last = ktime_sub(ktime_get(), engine->stats.start); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } else if (engine->stats.active == 0) { + /* + * After turning on engine stats, context out might be + * the first event in which case we account from the + * time stats gathering was turned on. + */ + last = ktime_sub(ktime_get(), engine->stats.enabled_at); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x60; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x54; + else if (engine->class == RENDER_CLASS) + return 0x58; + else + return -1; +} + +static void +execlists_check_context(const struct intel_context *ce, + const struct intel_engine_cs *engine) +{ + const struct intel_ring *ring = ce->ring; + u32 *regs = ce->lrc_reg_state; + bool valid = true; + int x; + + if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { + pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_START], + i915_ggtt_offset(ring->vma)); + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); + valid = false; + } + + if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != + (RING_CTL_SIZE(ring->size) | RING_VALID)) { + pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_CTL], + (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; + valid = false; + } + + x = lrc_ring_mi_mode(engine); + if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { + pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", + engine->name, regs[x + 1]); + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; + valid = false; + } + + WARN_ONCE(!valid, "Invalid lrc state found before submission\n"); +} + +static void restore_default_state(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + + execlists_init_reg_state(regs, ce, engine, ce->ring, false); +} + +static void reset_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->context; + u32 head; + + /* + * The executing context has been cancelled. We want to prevent + * further execution along this context and propagate the error on + * to anything depending on its results. + * + * In __i915_request_submit(), we apply the -EIO and remove the + * requests' payloads for any banned requests. But first, we must + * rewind the context back to the start of the incomplete request so + * that we do not jump back into the middle of the batch. + * + * We preserve the breadcrumbs and semaphores of the incomplete + * requests so that inter-timeline dependencies (i.e other timelines) + * remain correctly ordered. And we defer to __i915_request_submit() + * so that all asynchronous waits are correctly handled. + */ + ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n", + rq->fence.context, rq->fence.seqno); + + /* On resubmission of the active request, payload will be scrubbed */ + if (i915_request_completed(rq)) + head = rq->tail; + else + head = active_request(ce->timeline, rq)->head; + ce->ring->head = intel_ring_wrap(ce->ring, head); + intel_ring_update_space(ce->ring); + + /* Scrub the context image to prevent replaying the previous batch */ + restore_default_state(ce, engine); + __execlists_update_reg_state(ce, engine); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +} + static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { struct intel_engine_cs * const engine = rq->engine; - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; intel_context_get(ce); + if (unlikely(intel_context_is_banned(ce))) + reset_active(rq, engine); + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + execlists_check_context(ce, engine); + if (ce->tag) { /* Use a fixed tag for OA and friends */ ce->lrc_desc |= (u64)ce->tag << 32; @@ -945,7 +1113,7 @@ __execlists_schedule_in(struct i915_request *rq) BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); } - intel_gt_pm_get(engine->gt); + __intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -955,7 +1123,7 @@ __execlists_schedule_in(struct i915_request *rq) static inline struct i915_request * execlists_schedule_in(struct i915_request *rq, int idx) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; struct intel_engine_cs *old; GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); @@ -986,11 +1154,25 @@ static inline void __execlists_schedule_out(struct i915_request *rq, struct intel_engine_cs * const engine) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; + + /* + * NB process_csb() is not under the engine->active.lock and hence + * schedule_out can race with schedule_in meaning that we should + * refrain from doing non-trivial work here. + */ + + /* + * If we have just completed this context, the engine may now be + * idle and we want to re-enter powersaving. + */ + if (list_is_last(&rq->link, &ce->timeline->requests) && + i915_request_completed(rq)) + intel_engine_add_retire(engine, ce->timeline); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - intel_gt_pm_put(engine->gt); + intel_gt_pm_put_async(engine->gt); /* * If this is part of a virtual engine, its next request may @@ -1010,7 +1192,7 @@ __execlists_schedule_out(struct i915_request *rq, static inline void execlists_schedule_out(struct i915_request *rq) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; struct intel_engine_cs *cur, *old; trace_i915_request_out(rq); @@ -1025,13 +1207,29 @@ execlists_schedule_out(struct i915_request *rq) i915_request_put(rq); } -static u64 execlists_update_context(const struct i915_request *rq) +static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = rq->hw_context; - u64 desc; + struct intel_context *ce = rq->context; + u64 desc = ce->lrc_desc; + u32 tail; - ce->lrc_reg_state[CTX_RING_TAIL] = - intel_ring_set_tail(rq->ring, rq->tail); + /* + * WaIdleLiteRestore:bdw,skl + * + * We should never submit the context with the same RING_TAIL twice + * just in case we submit an empty ring, which confuses the HW. + * + * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of + * the normal request to be able to always advance the RING_TAIL on + * subsequent resubmissions (for lite restore). Should that fail us, + * and we try and submit the same tail again, force the context + * reload. + */ + tail = intel_ring_set_tail(rq->ring, rq->tail); + if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail)) + desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc_reg_state[CTX_RING_TAIL] = tail; + rq->tail = rq->wa_tail; /* * Make sure the context image is complete before we submit it to HW. @@ -1042,21 +1240,14 @@ static u64 execlists_update_context(const struct i915_request *rq) * may not be visible to the HW prior to the completion of the UC * register write and that we may begin execution from the context * before its image is complete leading to invalid PD chasing. - * - * Furthermore, Braswell, at least, wants a full mb to be sure that - * the writes are coherent in memory (visible to the GPU) prior to - * execution, and not just visible to other CPUs (as is the result of - * wmb). */ - mb(); - - desc = ce->lrc_desc; - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + wmb(); /* Wa_1607138340:tgl */ if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1082,15 +1273,14 @@ trace_ports(const struct intel_engine_execlists *execlists, if (!ports[0]) return; - GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n", - engine->name, msg, - ports[0]->fence.context, - ports[0]->fence.seqno, - i915_request_completed(ports[0]) ? "!" : - i915_request_started(ports[0]) ? "*" : - "", - ports[1] ? ports[1]->fence.context : 0, - ports[1] ? ports[1]->fence.seqno : 0); + ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg, + ports[0]->fence.context, + ports[0]->fence.seqno, + i915_request_completed(ports[0]) ? "!" : + i915_request_started(ports[0]) ? "*" : + "", + ports[1] ? ports[1]->fence.context : 0, + ports[1] ? ports[1]->fence.seqno : 0); } static __maybe_unused bool @@ -1114,33 +1304,56 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, } for (port = execlists->pending; (rq = *port); port++) { - if (ce == rq->hw_context) { - GEM_TRACE_ERR("Duplicate context in pending[%zd]\n", + unsigned long flags; + bool ok = true; + + GEM_BUG_ON(!kref_read(&rq->fence.refcount)); + GEM_BUG_ON(!i915_request_is_active(rq)); + + if (ce == rq->context) { + GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n", + ce->timeline->fence_context, port - execlists->pending); return false; } + ce = rq->context; - ce = rq->hw_context; - if (i915_request_completed(rq)) + /* Hold tightly onto the lock to prevent concurrent retires! */ + if (!spin_trylock_irqsave(&rq->lock, flags)) continue; - if (i915_active_is_idle(&ce->active)) { - GEM_TRACE_ERR("Inactive context in pending[%zd]\n", + if (i915_request_completed(rq)) + goto unlock; + + if (i915_active_is_idle(&ce->active) && + !intel_context_is_barrier(ce)) { + GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n", + ce->timeline->fence_context, port - execlists->pending); - return false; + ok = false; + goto unlock; } if (!i915_vma_is_pinned(ce->state)) { - GEM_TRACE_ERR("Unpinned context in pending[%zd]\n", + GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n", + ce->timeline->fence_context, port - execlists->pending); - return false; + ok = false; + goto unlock; } if (!i915_vma_is_pinned(ce->ring->vma)) { - GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n", + GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n", + ce->timeline->fence_context, port - execlists->pending); - return false; + ok = false; + goto unlock; } + +unlock: + spin_unlock_irqrestore(&rq->lock, flags); + if (!ok) + return false; } return ce; @@ -1185,7 +1398,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - i915_gem_context_force_single_submission(ce->gem_context)); + intel_context_force_single_submission(ce)); } static bool can_merge_ctx(const struct intel_context *prev, @@ -1221,7 +1434,7 @@ static bool can_merge_rq(const struct i915_request *prev, (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) return false; - if (!can_merge_ctx(prev->hw_context, next->hw_context)) + if (!can_merge_ctx(prev->context, next->context)) return false; return true; @@ -1269,7 +1482,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, if (!list_empty(&ve->context.signal_link)) { list_move_tail(&ve->context.signal_link, &engine->breadcrumbs.signalers); - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); } spin_unlock(&old->breadcrumbs.irq_lock); } @@ -1345,7 +1558,7 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) { int hint; - if (!intel_engine_has_semaphores(engine)) + if (!intel_engine_has_timeslices(engine)) return false; if (list_is_last(&rq->sched.link, &engine->active.requests)) @@ -1366,15 +1579,32 @@ switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) return rq_prio(list_next_entry(rq, sched.link)); } -static bool -enable_timeslice(const struct intel_engine_execlists *execlists) +static inline unsigned long +timeslice(const struct intel_engine_cs *engine) { - const struct i915_request *rq = *execlists->active; + return READ_ONCE(engine->props.timeslice_duration_ms); +} + +static unsigned long +active_timeslice(const struct intel_engine_cs *engine) +{ + const struct i915_request *rq = *engine->execlists.active; if (i915_request_completed(rq)) - return false; + return 0; + + if (engine->execlists.switch_priority_hint < effective_prio(rq)) + return 0; + + return timeslice(engine); +} - return execlists->switch_priority_hint >= effective_prio(rq); +static void set_timeslice(struct intel_engine_cs *engine) +{ + if (!intel_engine_has_timeslices(engine)) + return; + + set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); } static void record_preemption(struct intel_engine_execlists *execlists) @@ -1382,6 +1612,30 @@ static void record_preemption(struct intel_engine_execlists *execlists) (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } +static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = last_active(&engine->execlists); + if (!rq) + return 0; + + /* Force a fast reset for terminated contexts (ignoring sysfs!) */ + if (unlikely(intel_context_is_banned(rq->context))) + return 1; + + return READ_ONCE(engine->props.preempt_timeout_ms); +} + +static void set_preempt_timeout(struct intel_engine_cs *engine) +{ + if (!intel_engine_has_preempt_reset(engine)) + return; + + set_timer_ms(&engine->execlists.preempt, + active_preempt_timeout(engine)); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1444,12 +1698,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = last_active(execlists); if (last) { if (need_preempt(engine, last, rb)) { - GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n", - engine->name, - last->fence.context, - last->fence.seqno, - last->sched.attr.priority, - execlists->queue_priority_hint); + ENGINE_TRACE(engine, + "preempting last=%llx:%lld, prio=%d, hint=%d\n", + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); record_preemption(execlists); /* @@ -1475,16 +1729,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * tendency to ignore us rewinding the TAIL to the * end of an earlier request. */ - last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; + last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && timer_expired(&engine->execlists.timer)) { - GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n", - engine->name, - last->fence.context, - last->fence.seqno, - last->sched.attr.priority, - execlists->queue_priority_hint); + ENGINE_TRACE(engine, + "expired last=%llx:%lld, prio=%d, hint=%d\n", + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); ring_set_paused(engine, 1); defer_active(engine); @@ -1521,20 +1775,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (!execlists->timer.expires && need_timeslice(engine, last)) - mod_timer(&execlists->timer, - jiffies + 1); + set_timer_ms(&execlists->timer, + timeslice(engine)); + return; } - - /* - * WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_fini_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; } } @@ -1556,7 +1801,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq != ve->request); GEM_BUG_ON(rq->engine != &ve->base); - GEM_BUG_ON(rq->hw_context != &ve->context); + GEM_BUG_ON(rq->context != &ve->context); if (rq_prio(rq) >= queue_prio(execlists)) { if (!virtual_matches(ve, rq, engine)) { @@ -1570,14 +1815,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) return; /* leave this for another */ } - GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", - engine->name, - rq->fence.context, - rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - "", - yesno(engine != ve->siblings[0])); + ENGINE_TRACE(engine, + "virtual rq=%llx:%lld%s, new engine? %s\n", + rq->fence.context, + rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + yesno(engine != ve->siblings[0])); ve->request = NULL; ve->base.execlists.queue_priority_hint = INT_MIN; @@ -1675,7 +1920,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * same LRCA, i.e. we must submit 2 different * contexts if we submit 2 ELSP. */ - if (last->hw_context == rq->hw_context) + if (last->context == rq->context) goto done; if (i915_request_has_sentinel(last)) @@ -1688,8 +1933,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the same context (even though a different * request) to the second port. */ - if (ctx_single_port_submission(last->hw_context) || - ctx_single_port_submission(rq->hw_context)) + if (ctx_single_port_submission(last->context) || + ctx_single_port_submission(rq->context)) goto done; merge = false; @@ -1703,8 +1948,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } GEM_BUG_ON(last && - !can_merge_ctx(last->hw_context, - rq->hw_context)); + !can_merge_ctx(last->context, + rq->context)); submit = true; last = rq; @@ -1733,9 +1978,6 @@ done: * interrupt for secondary ports). */ execlists->queue_priority_hint = queue_prio(execlists); - GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n", - engine->name, execlists->queue_priority_hint, - yesno(submit)); if (submit) { *port = execlists_schedule_in(last, port - execlists->pending); @@ -1757,6 +1999,8 @@ done: memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); + + set_preempt_timeout(engine); } else { skip_submit: ring_set_paused(engine, 0); @@ -1766,16 +2010,17 @@ skip_submit: static void cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct i915_request * const *port, *rq; + struct i915_request * const *port; - for (port = execlists->pending; (rq = *port); port++) - execlists_schedule_out(rq); + for (port = execlists->pending; *port; port++) + execlists_schedule_out(*port); memset(execlists->pending, 0, sizeof(execlists->pending)); - for (port = execlists->active; (rq = *port); port++) - execlists_schedule_out(rq); - execlists->active = - memset(execlists->inflight, 0, sizeof(execlists->inflight)); + /* Mark the end of active before we overwrite *active */ + for (port = xchg(&execlists->active, execlists->pending); *port; port++) + execlists_schedule_out(*port); + WRITE_ONCE(execlists->active, + memset(execlists->inflight, 0, sizeof(execlists->inflight))); } static inline void @@ -1867,7 +2112,7 @@ static void process_csb(struct intel_engine_cs *engine) */ GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && !reset_in_progress(execlists)); - GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); + GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -1881,7 +2126,7 @@ static void process_csb(struct intel_engine_cs *engine) */ head = execlists->csb_head; tail = READ_ONCE(*execlists->csb_write); - GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail); + ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); if (unlikely(head == tail)) return; @@ -1919,35 +2164,35 @@ static void process_csb(struct intel_engine_cs *engine) * status notifier. */ - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n", - engine->name, head, - buf[2 * head + 0], buf[2 * head + 1]); + ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", + head, buf[2 * head + 0], buf[2 * head + 1]); if (INTEL_GEN(engine->i915) >= 12) promote = gen12_csb_parse(execlists, buf + 2 * head); else promote = gen8_csb_parse(execlists, buf + 2 * head); if (promote) { + struct i915_request * const *old = execlists->active; + + /* Point active to the new ELSP; prevent overwriting */ + WRITE_ONCE(execlists->active, execlists->pending); + set_timeslice(engine); + if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); /* cancel old inflight, prepare for switch */ - trace_ports(execlists, "preempted", execlists->active); - while (*execlists->active) - execlists_schedule_out(*execlists->active++); + trace_ports(execlists, "preempted", old); + while (*old) + execlists_schedule_out(*old++); /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - execlists->active = - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending)); - - if (enable_timeslice(execlists)) - mod_timer(&execlists->timer, jiffies + 1); - else - cancel_timer(&execlists->timer); + WRITE_ONCE(execlists->active, + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending))); WRITE_ONCE(execlists->pending[0], NULL); } else { @@ -1997,6 +2242,42 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) } } +static noinline void preempt_reset(struct intel_engine_cs *engine) +{ + const unsigned int bit = I915_RESET_ENGINE + engine->id; + unsigned long *lock = &engine->gt->reset.flags; + + if (i915_modparams.reset < 3) + return; + + if (test_and_set_bit(bit, lock)) + return; + + /* Mark this tasklet as disabled to avoid waiting for it to complete */ + tasklet_disable_nosync(&engine->execlists.tasklet); + + ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", + READ_ONCE(engine->props.preempt_timeout_ms), + jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); + intel_engine_reset(engine, "preemption time out"); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); +} + +static bool preempt_timeout(const struct intel_engine_cs *const engine) +{ + const struct timer_list *t = &engine->execlists.preempt; + + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return false; + + if (!timer_expired(t)) + return false; + + return READ_ONCE(engine->execlists.pending[0]); +} + /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. @@ -2004,23 +2285,39 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - unsigned long flags; + bool timeout = preempt_timeout(engine); process_csb(engine); - if (!READ_ONCE(engine->execlists.pending[0])) { + if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { + unsigned long flags; + spin_lock_irqsave(&engine->active.lock, flags); __execlists_submission_tasklet(engine); spin_unlock_irqrestore(&engine->active.lock, flags); + + /* Recheck after serialising with direct-submission */ + if (timeout && preempt_timeout(engine)) + preempt_reset(engine); } } -static void execlists_submission_timer(struct timer_list *timer) +static void __execlists_kick(struct intel_engine_execlists *execlists) { - struct intel_engine_cs *engine = - from_timer(engine, timer, execlists.timer); - /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&execlists->tasklet); +} + +#define execlists_kick(t, member) \ + __execlists_kick(container_of(t, struct intel_engine_execlists, member)) + +static void execlists_timeslice(struct timer_list *timer) +{ + execlists_kick(timer, timer); +} + +static void execlists_preempt(struct timer_list *timer) +{ + execlists_kick(timer, preempt); } static void queue_request(struct intel_engine_cs *engine, @@ -2100,7 +2397,6 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - vaddr += LRC_HEADER_PAGES * PAGE_SIZE; vaddr += engine->context_size; memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); @@ -2112,7 +2408,6 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - vaddr += LRC_HEADER_PAGES * PAGE_SIZE; vaddr += engine->context_size; if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) @@ -2140,7 +2435,7 @@ __execlists_update_reg_state(const struct intel_context *ce, GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); regs[CTX_RING_HEAD] = ring->head; regs[CTX_RING_TAIL] = ring->tail; @@ -2268,7 +2563,7 @@ static int execlists_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); + GEM_BUG_ON(!intel_context_is_pinned(request->context)); /* * Flush enough space to reduce the likelihood of waiting after @@ -2669,8 +2964,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; unsigned long flags; - GEM_TRACE("%s: depth<-%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); + ENGINE_TRACE(engine, "depth<-%d\n", + atomic_read(&execlists->tasklet.count)); /* * Prevent request submission to the hardware until we have @@ -2723,41 +3018,28 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) WRITE_ONCE(*execlists->csb_write, reset_value); wmb(); /* Make sure this is visible to HW (paranoia?) */ + /* + * Sometimes Icelake forgets to reset its pointers on a GPU reset. + * Bludgeon them with a mmio update to be sure. + */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + invalidate_csb_entries(&execlists->csb_status[0], &execlists->csb_status[reset_value]); } -static struct i915_request *active_request(struct i915_request *rq) -{ - const struct intel_context * const ce = rq->hw_context; - struct i915_request *active = NULL; - struct list_head *list; - - if (!i915_request_is_active(rq)) /* unwound, but incomplete! */ - return rq; - - list = &i915_request_active_timeline(rq)->requests; - list_for_each_entry_from_reverse(rq, list, link) { - if (i915_request_completed(rq)) - break; - - if (rq->hw_context != ce) - break; - - active = rq; - } - - return active; -} - static void __execlists_reset_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine) { u32 *regs = ce->lrc_reg_state; + int x; - if (INTEL_GEN(engine->i915) >= 9) { - regs[GEN9_CTX_RING_MI_MODE + 1] &= ~STOP_RING; - regs[GEN9_CTX_RING_MI_MODE + 1] |= STOP_RING << 16; + x = lrc_ring_mi_mode(engine); + if (x != -1) { + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; } } @@ -2766,7 +3048,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; struct i915_request *rq; - u32 *regs; mb(); /* paranoia: read the CSB pointers from after the reset */ clflush(execlists->csb_write); @@ -2789,22 +3070,20 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) /* We still have requests in-flight; the engine should be active */ GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - ce = rq->hw_context; + ce = rq->context; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - /* Proclaim we have exclusive access to the context image! */ - __context_pin_acquire(ce); - - rq = active_request(rq); - if (!rq) { + if (i915_request_completed(rq)) { /* Idle context; tidy up the ring so we can restart afresh */ - ce->ring->head = ce->ring->tail; + ce->ring->head = intel_ring_wrap(ce->ring, rq->tail); goto out_replay; } /* Context has requests still in-flight; it should not be idle! */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); + rq = active_request(ce->timeline, rq); ce->ring->head = intel_ring_wrap(ce->ring, rq->head); + GEM_BUG_ON(ce->ring->head == ce->ring->tail); /* * If this request hasn't started yet, e.g. it is waiting on a @@ -2845,22 +3124,15 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * to recreate its own state. */ GEM_BUG_ON(!intel_context_is_pinned(ce)); - regs = ce->lrc_reg_state; - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring, false); + restore_default_state(ce, engine); out_replay: - GEM_TRACE("%s replay {head:%04x, tail:%04x\n", - engine->name, ce->ring->head, ce->ring->tail); + ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", + ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ - __context_pin_release(ce); unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -2868,11 +3140,11 @@ unwind: __unwind_incomplete_requests(engine); } -static void execlists_reset(struct intel_engine_cs *engine, bool stalled) +static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) { unsigned long flags; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); spin_lock_irqsave(&engine->active.lock, flags); @@ -2886,14 +3158,14 @@ static void nop_submission_tasklet(unsigned long data) /* The driver is wedged; don't process any more events. */ } -static void execlists_cancel_requests(struct intel_engine_cs *engine) +static void execlists_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -2980,13 +3252,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) if (__tasklet_enable(&execlists->tasklet)) /* And kick in case we missed a new request submission. */ tasklet_hi_schedule(&execlists->tasklet); - GEM_TRACE("%s: depth->%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); + ENGINE_TRACE(engine, "depth->%d\n", + atomic_read(&execlists->tasklet.count)); } -static int gen8_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) +static int gen8_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) { u32 *cs; @@ -3020,7 +3292,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, return 0; } -static int gen9_emit_bb_start(struct i915_request *rq, +static int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags) { @@ -3469,17 +3741,18 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); + cancel_timer(&engine->execlists.preempt); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; - engine->cancel_requests = execlists_cancel_requests; engine->schedule = i915_schedule; engine->execlists.tasklet.func = execlists_submission_tasklet; engine->reset.prepare = execlists_reset_prepare; - engine->reset.reset = execlists_reset; + engine->reset.rewind = execlists_reset_rewind; + engine->reset.cancel = execlists_reset_cancel; engine->reset.finish = execlists_reset_finish; engine->park = execlists_park; @@ -3494,13 +3767,27 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) if (INTEL_GEN(engine->i915) >= 12) engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; + + if (intel_engine_has_preemption(engine)) + engine->emit_bb_start = gen8_emit_bb_start; + else + engine->emit_bb_start = gen8_emit_bb_start_noarb; } -static void execlists_destroy(struct intel_engine_cs *engine) +static void execlists_shutdown(struct intel_engine_cs *engine) { + /* Synchronise with residual timers and any softirq they raise */ + del_timer_sync(&engine->execlists.timer); + del_timer_sync(&engine->execlists.preempt); + tasklet_kill(&engine->execlists.tasklet); +} + +static void execlists_release(struct intel_engine_cs *engine) +{ + execlists_shutdown(engine); + intel_engine_cleanup_common(engine); lrc_destroy_wa_ctx(engine); - kfree(engine); } static void @@ -3508,13 +3795,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ - engine->destroy = execlists_destroy; + engine->release = execlists_release; engine->resume = execlists_resume; - engine->reset.prepare = execlists_reset_prepare; - engine->reset.reset = execlists_reset; - engine->reset.finish = execlists_reset_finish; - engine->cops = &execlists_context_ops; engine->request_alloc = execlists_request_alloc; @@ -3537,10 +3820,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) * until a more refined solution exists. */ } - if (IS_GEN(engine->i915, 8)) - engine->emit_bb_start = gen8_emit_bb_start; - else - engine->emit_bb_start = gen9_emit_bb_start; } static inline void @@ -3584,9 +3863,15 @@ static void rcs_submission_override(struct intel_engine_cs *engine) int intel_execlists_submission_setup(struct intel_engine_cs *engine) { + struct intel_engine_execlists * const execlists = &engine->execlists; + struct drm_i915_private *i915 = engine->i915; + struct intel_uncore *uncore = engine->uncore; + u32 base = engine->mmio_base; + tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); - timer_setup(&engine->execlists.timer, execlists_submission_timer, 0); + timer_setup(&engine->execlists.timer, execlists_timeslice, 0); + timer_setup(&engine->execlists.preempt, execlists_preempt, 0); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); @@ -3594,21 +3879,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) if (engine->class == RENDER_CLASS) rcs_submission_override(engine); - return 0; -} - -int intel_execlists_submission_init(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct drm_i915_private *i915 = engine->i915; - struct intel_uncore *uncore = engine->uncore; - u32 base = engine->mmio_base; - int ret; - - ret = intel_engine_init_common(engine); - if (ret) - return ret; - if (intel_init_workaround_bb(engine)) /* * We continue even if we fail to initialize WA batch @@ -3689,7 +3959,7 @@ static void init_common_reg_state(u32 * const regs, _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); - regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID; + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; regs[CTX_BB_STATE] = RING_BB_PPGTT; } @@ -3796,12 +4066,6 @@ populate_lr_context(struct intel_context *ce, set_redzone(vaddr, engine); if (engine->default_state) { - /* - * We only want to copy over the template context state; - * skipping over the headers reserved for GuC communication, - * leaving those as zero. - */ - const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; void *defaults; defaults = i915_gem_object_pin_map(engine->default_state, @@ -3811,8 +4075,9 @@ populate_lr_context(struct intel_context *ce, goto err_unpin_ctx; } - memcpy(vaddr + start, defaults + start, engine->context_size); + memcpy(vaddr, defaults, engine->context_size); i915_gem_object_unpin_map(engine->default_state); + __set_bit(CONTEXT_VALID_BIT, &ce->flags); inhibit = false; } @@ -3826,9 +4091,7 @@ populate_lr_context(struct intel_context *ce, ret = 0; err_unpin_ctx: - __i915_gem_object_flush_map(ctx_obj, - LRC_HEADER_PAGES * PAGE_SIZE, - engine->context_size); + __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); i915_gem_object_unpin_map(ctx_obj); return ret; } @@ -3845,11 +4108,6 @@ static int __execlists_context_alloc(struct intel_context *ce, GEM_BUG_ON(ce->state); context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); - /* - * Before the actual start of the context image, we insert a few pages - * for our own use and for sharing with the GuC. - */ - context_size += LRC_HEADER_PAGES * PAGE_SIZE; if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) context_size += I915_GTT_PAGE_SIZE; /* for redzone */ @@ -3917,17 +4175,18 @@ static void virtual_context_destroy(struct kref *kref) for (n = 0; n < ve->num_siblings; n++) { struct intel_engine_cs *sibling = ve->siblings[n]; struct rb_node *node = &ve->nodes[sibling->id].rb; + unsigned long flags; if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->active.lock); + spin_lock_irqsave(&sibling->active.lock, flags); /* Detachment is lazily performed in the execlists tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irqrestore(&sibling->active.lock, flags); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); @@ -3966,6 +4225,13 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) ve->siblings[0]); } +static int virtual_context_alloc(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + + return __execlists_context_alloc(ce, ve->siblings[0]); +} + static int virtual_context_pin(struct intel_context *ce) { struct virtual_engine *ve = container_of(ce, typeof(*ve), context); @@ -4003,6 +4269,8 @@ static void virtual_context_exit(struct intel_context *ce) } static const struct intel_context_ops virtual_context_ops = { + .alloc = virtual_context_alloc, + .pin = virtual_context_pin, .unpin = execlists_context_unpin, @@ -4029,10 +4297,9 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) mask = ve->siblings[0]->mask; } - GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n", - ve->base.name, - rq->fence.context, rq->fence.seqno, - mask, ve->base.execlists.queue_priority_hint); + ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", + rq->fence.context, rq->fence.seqno, + mask, ve->base.execlists.queue_priority_hint); return mask; } @@ -4123,10 +4390,9 @@ static void virtual_submit_request(struct i915_request *rq) struct i915_request *old; unsigned long flags; - GEM_TRACE("%s: rq=%llx:%lld\n", - ve->base.name, - rq->fence.context, - rq->fence.seqno); + ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", + rq->fence.context, + rq->fence.seqno); GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); @@ -4194,8 +4460,7 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) } struct intel_context * -intel_execlists_create_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs **siblings, +intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) { struct virtual_engine *ve; @@ -4206,13 +4471,13 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, return ERR_PTR(-EINVAL); if (count == 1) - return intel_context_create(ctx, siblings[0]); + return intel_context_create(siblings[0]); ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); if (!ve) return ERR_PTR(-ENOMEM); - ve->base.i915 = ctx->i915; + ve->base.i915 = siblings[0]->i915; ve->base.gt = siblings[0]->gt; ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; @@ -4239,7 +4504,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_breadcrumbs(&ve->base); - intel_engine_init_execlists(&ve->base); ve->base.cops = &virtual_context_ops; @@ -4255,7 +4519,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, virtual_submission_tasklet, (unsigned long)ve); - intel_context_init(&ve->context, ctx, &ve->base); + intel_context_init(&ve->context, &ve->base); for (n = 0; n < count; n++) { struct intel_engine_cs *sibling = siblings[n]; @@ -4322,12 +4586,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.flags |= I915_ENGINE_IS_VIRTUAL; - err = __execlists_context_alloc(&ve->context, siblings[0]); - if (err) - goto err_put; - - __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags); - return &ve->context; err_put: @@ -4336,14 +4594,12 @@ err_put: } struct intel_context * -intel_execlists_clone_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs *src) +intel_execlists_clone_virtual(struct intel_engine_cs *src) { struct virtual_engine *se = to_virtual_engine(src); struct intel_context *dst; - dst = intel_execlists_create_virtual(ctx, - se->siblings, + dst = intel_execlists_create_virtual(se->siblings, se->num_siblings); if (IS_ERR(dst)) return dst; @@ -4502,7 +4758,6 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, bool scrub) { GEM_BUG_ON(!intel_context_is_pinned(ce)); - __context_pin_acquire(ce); /* * We want a simple context + ring to execute the breadcrumb update. @@ -4512,23 +4767,21 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - if (scrub) { - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring, false); - } + if (scrub) + restore_default_state(ce, engine); /* Rerun the request; its payload has been neutered (if guilty). */ ce->ring->head = head; intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); - __context_pin_release(ce); +} + +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == + intel_execlists_set_default_submission; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 99dc576a4e25..dfbc214e14f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,6 +43,7 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) @@ -82,34 +83,14 @@ enum { void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int intel_execlists_submission_setup(struct intel_engine_cs *engine); -int intel_execlists_submission_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ - -/* - * We allocate a header at the start of the context image for our own - * use, therefore the actual location of the logical state is offset - * from the start of the VMA. The layout is - * - * | [guc] | [hwsp] [logical state] | - * |<- our header ->|<- context image ->| - * - */ -/* The first page is used for sharing data with the GuC */ -#define LRC_GUCSHR_PN (0) -#define LRC_GUCSHR_SZ (1) /* At the start of the context image is its per-process HWS page */ -#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ) +#define LRC_PPHWSP_PN (0) #define LRC_PPHWSP_SZ (1) -/* Finally we have the logical state for the context */ +/* After the PPHWSP we have the logical state for the context */ #define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) -/* - * Currently we include the PPHWSP in __intel_engine_context_size() so - * the size of the header is synonymous with the start of the PPHWSP. - */ -#define LRC_HEADER_PAGES LRC_PPHWSP_PN - /* Space within PPHWSP reserved to be used as scratch */ #define LRC_PPHWSP_SCRATCH 0x34 #define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) @@ -129,13 +110,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int max); struct intel_context * -intel_execlists_create_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs **siblings, +intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); struct intel_context * -intel_execlists_clone_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs *src); +intel_execlists_clone_virtual(struct intel_engine_cs *src); int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, @@ -145,4 +124,7 @@ struct intel_engine_cs * intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling); +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 06ab0276e10e..08a3be65f700 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -13,8 +13,8 @@ #define CTX_CONTEXT_CONTROL (0x02 + 1) #define CTX_RING_HEAD (0x04 + 1) #define CTX_RING_TAIL (0x06 + 1) -#define CTX_RING_BUFFER_START (0x08 + 1) -#define CTX_RING_BUFFER_CONTROL (0x0a + 1) +#define CTX_RING_START (0x08 + 1) +#define CTX_RING_CTL (0x0a + 1) #define CTX_BB_STATE (0x10 + 1) #define CTX_BB_PER_CTX_PTR (0x18 + 1) #define CTX_PDP3_UDW (0x24 + 1) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 5bac3966906b..893249ea48d4 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -26,6 +26,7 @@ #include "intel_gt.h" #include "intel_mocs.h" #include "intel_lrc.h" +#include "intel_ring.h" /* structures required */ struct drm_i915_mocs_entry { @@ -199,14 +200,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { MOCS_ENTRY(15, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ L3_3_WB), \ - /* Bypass LLC - Uncached (EHL+) */ \ - MOCS_ENTRY(16, \ - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ - L3_1_UC), \ - /* Bypass LLC - L3 (Read-Only) (EHL+) */ \ - MOCS_ENTRY(17, \ - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ - L3_3_WB), \ /* Self-Snoop - L3 + LLC */ \ MOCS_ENTRY(18, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ @@ -270,7 +263,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = { L3_1_UC), /* HW Special Case (Displayable) */ MOCS_ENTRY(61, - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), + LE_1_UC | LE_TC_1_LLC, L3_3_WB), }; @@ -290,65 +283,42 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = { static bool get_mocs_settings(const struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { - bool result = false; - if (INTEL_GEN(i915) >= 12) { table->size = ARRAY_SIZE(tigerlake_mocs_table); table->table = tigerlake_mocs_table; table->n_entries = GEN11_NUM_MOCS_ENTRIES; - result = true; } else if (IS_GEN(i915, 11)) { table->size = ARRAY_SIZE(icelake_mocs_table); table->table = icelake_mocs_table; table->n_entries = GEN11_NUM_MOCS_ENTRIES; - result = true; } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = skylake_mocs_table; - result = true; } else if (IS_GEN9_LP(i915)) { table->size = ARRAY_SIZE(broxton_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = broxton_mocs_table; - result = true; } else { WARN_ONCE(INTEL_GEN(i915) >= 9, "Platform that should have a MOCS table does not.\n"); + return false; } + if (GEM_DEBUG_WARN_ON(table->size > table->n_entries)) + return false; + /* WaDisableSkipCaching:skl,bxt,kbl,glk */ if (IS_GEN(i915, 9)) { int i; for (i = 0; i < table->size; i++) - if (WARN_ON(table->table[i].l3cc_value & - (L3_ESC(1) | L3_SCC(0x7)))) + if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value & + (L3_ESC(1) | L3_SCC(0x7)))) return false; } - return result; -} - -static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) -{ - switch (engine->id) { - case RCS0: - return GEN9_GFX_MOCS(index); - case VCS0: - return GEN9_MFX0_MOCS(index); - case BCS0: - return GEN9_BLT_MOCS(index); - case VECS0: - return GEN9_VEBOX_MOCS(index); - case VCS1: - return GEN9_MFX1_MOCS(index); - case VCS2: - return GEN11_MFX2_MOCS(index); - default: - MISSING_CASE(engine->id); - return INVALID_MMIO_REG; - } + return true; } /* @@ -358,29 +328,47 @@ static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].control_value; return table->table[I915_MOCS_PTE].control_value; } -static void init_mocs_table(struct intel_engine_cs *engine, - const struct drm_i915_mocs_table *table) +#define for_each_mocs(mocs, t, i) \ + for (i = 0; \ + i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\ + i++) + +static void __init_mocs_table(struct intel_uncore *uncore, + const struct drm_i915_mocs_table *table, + u32 addr) { - struct intel_uncore *uncore = engine->uncore; - u32 unused_value = table->table[I915_MOCS_PTE].control_value; unsigned int i; + u32 mocs; + + for_each_mocs(mocs, table, i) + intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); +} - for (i = 0; i < table->size; i++) - intel_uncore_write_fw(uncore, - mocs_register(engine, i), - get_entry_control(table, i)); +static u32 mocs_offset(const struct intel_engine_cs *engine) +{ + static const u32 offset[] = { + [RCS0] = __GEN9_RCS0_MOCS0, + [VCS0] = __GEN9_VCS0_MOCS0, + [VCS1] = __GEN9_VCS1_MOCS0, + [VECS0] = __GEN9_VECS0_MOCS0, + [BCS0] = __GEN9_BCS0_MOCS0, + [VCS2] = __GEN11_VCS2_MOCS0, + }; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset)); + return offset[engine->id]; +} - /* All remaining entries are unused */ - for (; i < table->n_entries; i++) - intel_uncore_write_fw(uncore, - mocs_register(engine, i), - unused_value); +static void init_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) +{ + __init_mocs_table(engine->uncore, table, mocs_offset(engine)); } /* @@ -390,51 +378,34 @@ static void init_mocs_table(struct intel_engine_cs *engine, static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].l3cc_value; return table->table[I915_MOCS_PTE].l3cc_value; } -static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, - u16 low, - u16 high) +static inline u32 l3cc_combine(u16 low, u16 high) { return low | (u32)high << 16; } +#define for_each_l3cc(l3cc, t, i) \ + for (i = 0; \ + i < ((t)->n_entries + 1) / 2 ? \ + (l3cc = l3cc_combine(get_entry_l3cc((t), 2 * i), \ + get_entry_l3cc((t), 2 * i + 1))), 1 : \ + 0; \ + i++) + static void init_l3cc_table(struct intel_engine_cs *engine, const struct drm_i915_mocs_table *table) { struct intel_uncore *uncore = engine->uncore; - u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value; unsigned int i; + u32 l3cc; - for (i = 0; i < table->size / 2; i++) { - u16 low = get_entry_l3cc(table, 2 * i); - u16 high = get_entry_l3cc(table, 2 * i + 1); - - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(table, low, high)); - } - - /* Odd table size - 1 left over */ - if (table->size & 1) { - u16 low = get_entry_l3cc(table, 2 * i); - - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(table, low, unused_value)); - i++; - } - - /* All remaining entries are also unused */ - for (; i < table->n_entries / 2; i++) - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(table, unused_value, - unused_value)); + for_each_l3cc(l3cc, table, i) + intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc); } void intel_mocs_init_engine(struct intel_engine_cs *engine) @@ -455,38 +426,33 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_l3cc_table(engine, &table); } -static void intel_mocs_init_global(struct intel_gt *gt) +static u32 global_mocs_offset(void) { - struct intel_uncore *uncore = gt->uncore; - struct drm_i915_mocs_table table; - unsigned int index; + return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); +} - GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); +static void init_global_mocs(struct intel_gt *gt) +{ + struct drm_i915_mocs_table table; - if (!get_mocs_settings(gt->i915, &table)) + /* + * LLC and eDRAM control values are not applicable to dgfx + */ + if (IS_DGFX(gt->i915)) return; - if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) + if (!get_mocs_settings(gt->i915, &table)) return; - for (index = 0; index < table.size; index++) - intel_uncore_write(uncore, - GEN12_GLOBAL_MOCS(index), - table.table[index].control_value); - - /* - * Ok, now set the unused entries to the invalid entry (index 0). These - * entries are officially undefined and no contract for the contents and - * settings is given for these entries. - */ - for (; index < table.n_entries; index++) - intel_uncore_write(uncore, - GEN12_GLOBAL_MOCS(index), - table.table[0].control_value); + __init_mocs_table(gt->uncore, &table, global_mocs_offset()); } void intel_mocs_init(struct intel_gt *gt) { if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) - intel_mocs_init_global(gt); + init_global_mocs(gt); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_mocs.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 70f0e01a38b9..9e303c29d6e3 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -88,21 +88,18 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) * do not want the enable hysteresis to less than the wakeup latency. * * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). + * service latency, and puts it under 10us for Icelake, similar to + * Broadwell+, To be conservative, we want to factor in a context + * switch on top (due to ksoftirqd). */ - set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); /* 3a: Enable RC6 */ - set(uncore, GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1)); + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1); set(uncore, GEN9_PG_ENABLE, GEN9_RENDER_PG_ENABLE | @@ -173,13 +170,18 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) else rc6_mode = GEN6_RC_CTL_EI_MODE(1); - set(uncore, GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - rc6_mode); + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + rc6_mode; - set(uncore, GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); + /* + * WaRsDisableCoarsePowerGating:skl,cnl + * - Render/Media PG need to be disabled with RC6. + */ + if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6))) + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); } static void gen8_rc6_enable(struct intel_rc6 *rc6) @@ -198,10 +200,10 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ /* 3: Enable RC6 */ - set(uncore, GEN6_RC_CONTROL, + rc6->ctl_enable = GEN6_RC_CTL_HW_ENABLE | GEN7_RC_CTL_TO_MODE | - GEN6_RC_CTL_RC6_ENABLE); + GEN6_RC_CTL_RC6_ENABLE; } static void gen6_rc6_enable(struct intel_rc6 *rc6) @@ -237,10 +239,10 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6) rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; if (HAS_RC6pp(i915)) rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - set(uncore, GEN6_RC_CONTROL, + rc6->ctl_enable = rc6_mask | GEN6_RC_CTL_EI_MODE(1) | - GEN6_RC_CTL_HW_ENABLE); + GEN6_RC_CTL_HW_ENABLE; rc6vids = 0; ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, @@ -358,7 +360,7 @@ static void chv_rc6_enable(struct intel_rc6 *rc6) VLV_RENDER_RC6_COUNT_EN)); /* 3: Enable RC6 */ - set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE); + rc6->ctl_enable = GEN7_RC_CTL_TO_MODE; } static void vlv_rc6_enable(struct intel_rc6 *rc6) @@ -384,8 +386,8 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6) VLV_MEDIA_RC6_COUNT_EN | VLV_RENDER_RC6_COUNT_EN)); - set(uncore, GEN6_RC_CONTROL, - GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); + rc6->ctl_enable = + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; } static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) @@ -486,6 +488,21 @@ static void rpm_put(struct intel_rc6 *rc6) rc6->wakeref = false; } +static bool pctx_corrupted(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return false; + + if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO)) + return false; + + dev_notice(i915->drm.dev, + "RC6 context corruption, disabling runtime power management\n"); + return true; +} + static void __intel_rc6_disable(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); @@ -525,6 +542,11 @@ void intel_rc6_init(struct intel_rc6 *rc6) void intel_rc6_sanitize(struct intel_rc6 *rc6) { + if (rc6->enabled) { /* unbalanced suspend/resume */ + rpm_get(rc6); + rc6->enabled = false; + } + if (rc6->supported) __intel_rc6_disable(rc6); } @@ -554,13 +576,51 @@ void intel_rc6_enable(struct intel_rc6 *rc6) else if (INTEL_GEN(i915) >= 6) gen6_rc6_enable(rc6); + rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE; + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + rc6->ctl_enable = 0; + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + if (unlikely(pctx_corrupted(rc6))) + return; + /* rc6 is ready, runtime-pm is go! */ rpm_put(rc6); rc6->enabled = true; } +void intel_rc6_unpark(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->enabled) + return; + + /* Restore HW timers for automatic RC6 entry while busy */ + set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable); +} + +void intel_rc6_park(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->enabled) + return; + + if (unlikely(pctx_corrupted(rc6))) { + intel_rc6_disable(rc6); + return; + } + + if (!rc6->manual) + return; + + /* Turn off the HW timers and go directly to rc6 */ + set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); + set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT); +} + void intel_rc6_disable(struct intel_rc6 *rc6) { if (!rc6->enabled) @@ -710,3 +770,7 @@ u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg) { return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_rc6.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h index 5e6711f36457..9f0f23fca8af 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -15,6 +15,9 @@ struct intel_rc6; void intel_rc6_init(struct intel_rc6 *rc6); void intel_rc6_fini(struct intel_rc6 *rc6); +void intel_rc6_unpark(struct intel_rc6 *rc6); +void intel_rc6_park(struct intel_rc6 *rc6); + void intel_rc6_sanitize(struct intel_rc6 *rc6); void intel_rc6_enable(struct intel_rc6 *rc6); void intel_rc6_disable(struct intel_rc6 *rc6); diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h index 214f354d6ae4..bfbb623f7a4f 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -18,10 +18,13 @@ struct intel_rc6 { u64 prev_hw_residency[4]; u64 cur_residency[4]; + u32 ctl_enable; + struct drm_i915_gem_object *pctx; bool supported : 1; bool enabled : 1; + bool manual : 1; bool wakeref : 1; }; diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 6d05f9c64178..5954ecc3207f 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -27,16 +27,7 @@ #include "i915_drv.h" #include "intel_renderstate.h" - -struct intel_renderstate { - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 batch_offset; - u32 batch_size; - u32 aux_offset; - u32 aux_size; -}; +#include "intel_ring.h" static const struct intel_renderstate_rodata * render_state_get_rodata(const struct intel_engine_cs *engine) @@ -83,11 +74,11 @@ static int render_state_setup(struct intel_renderstate *so, u32 *d; int ret; - ret = i915_gem_object_prepare_write(so->obj, &needs_clflush); + ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush); if (ret) return ret; - d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0)); + d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -165,7 +156,7 @@ static int render_state_setup(struct intel_renderstate *so, ret = 0; out: - i915_gem_object_finish_access(so->obj); + i915_gem_object_finish_access(so->vma->obj); return ret; err: @@ -176,61 +167,84 @@ err: #undef OUT_BATCH -int intel_renderstate_emit(struct i915_request *rq) +int intel_renderstate_init(struct intel_renderstate *so, + struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = rq->engine; - struct intel_renderstate so = {}; /* keep the compiler happy */ + struct drm_i915_gem_object *obj; int err; - so.rodata = render_state_get_rodata(engine); - if (!so.rodata) + memset(so, 0, sizeof(*so)); + + so->rodata = render_state_get_rodata(engine); + if (!so->rodata) return 0; - if (so.rodata->batch_items * 4 > PAGE_SIZE) + if (so->rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; - so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(so.obj)) - return PTR_ERR(so.obj); + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); - so.vma = i915_vma_instance(so.obj, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(so.vma)) { - err = PTR_ERR(so.vma); + so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(so->vma)) { + err = PTR_ERR(so->vma); goto err_obj; } - err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) goto err_vma; - err = render_state_setup(&so, rq->i915); + err = render_state_setup(so, engine->i915); if (err) goto err_unpin; + return 0; + +err_unpin: + i915_vma_unpin(so->vma); +err_vma: + i915_vma_close(so->vma); +err_obj: + i915_gem_object_put(obj); + so->vma = NULL; + return err; +} + +int intel_renderstate_emit(struct intel_renderstate *so, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + int err; + + if (!so->vma) + return 0; + err = engine->emit_bb_start(rq, - so.batch_offset, so.batch_size, + so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); if (err) - goto err_unpin; + return err; - if (so.aux_size > 8) { + if (so->aux_size > 8) { err = engine->emit_bb_start(rq, - so.aux_offset, so.aux_size, + so->aux_offset, so->aux_size, I915_DISPATCH_SECURE); if (err) - goto err_unpin; + return err; } - i915_vma_lock(so.vma); - err = i915_request_await_object(rq, so.vma->obj, false); + i915_vma_lock(so->vma); + err = i915_request_await_object(rq, so->vma->obj, false); if (err == 0) - err = i915_vma_move_to_active(so.vma, rq, 0); - i915_vma_unlock(so.vma); -err_unpin: - i915_vma_unpin(so.vma); -err_vma: - i915_vma_close(so.vma); -err_obj: - i915_gem_object_put(so.obj); + err = i915_vma_move_to_active(so->vma, rq, 0); + i915_vma_unlock(so->vma); + return err; } + +void intel_renderstate_fini(struct intel_renderstate *so) +{ + i915_vma_unpin_and_release(&so->vma, 0); +} diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 8d5079145054..5700be69a05a 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -27,6 +27,8 @@ #include <linux/types.h> struct i915_request; +struct intel_engine_cs; +struct i915_vma; struct intel_renderstate_rodata { const u32 *reloc; @@ -46,6 +48,19 @@ extern const struct intel_renderstate_rodata gen7_null_state; extern const struct intel_renderstate_rodata gen8_null_state; extern const struct intel_renderstate_rodata gen9_null_state; -int intel_renderstate_emit(struct i915_request *rq); +struct intel_renderstate { + const struct intel_renderstate_rodata *rodata; + struct i915_vma *vma; + u32 batch_offset; + u32 batch_size; + u32 aux_offset; + u32 aux_size; +}; + +int intel_renderstate_init(struct intel_renderstate *so, + struct intel_engine_cs *engine); +int intel_renderstate_emit(struct intel_renderstate *so, + struct i915_request *rq); +void intel_renderstate_fini(struct intel_renderstate *so); #endif /* _INTEL_RENDERSTATE_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index bf8d1ed4b1d8..1c51296646e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -21,6 +21,7 @@ #include "intel_reset.h" #include "uc/intel_guc.h" +#include "uc/intel_guc_submission.h" #define RESET_MAX_RETRIES 3 @@ -40,27 +41,29 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) static void engine_skip_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *hung_ctx = rq->gem_context; + struct intel_context *hung_ctx = rq->context; if (!i915_request_is_active(rq)) return; lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) - if (rq->gem_context == hung_ctx) + if (rq->context == hung_ctx) i915_request_skip(rq, -EIO); } -static void client_mark_guilty(struct drm_i915_file_private *file_priv, - const struct i915_gem_context *ctx) +static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) { - unsigned int score; + struct drm_i915_file_private *file_priv = ctx->file_priv; unsigned long prev_hang; + unsigned int score; + + if (IS_ERR_OR_NULL(file_priv)) + return; - if (i915_gem_context_is_banned(ctx)) + score = 0; + if (banned) score = I915_CLIENT_SCORE_CONTEXT_BAN; - else - score = 0; prev_hang = xchg(&file_priv->hang_timestamp, jiffies); if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) @@ -75,17 +78,38 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv, } } -static bool context_mark_guilty(struct i915_gem_context *ctx) +static bool mark_guilty(struct i915_request *rq) { + struct i915_gem_context *ctx; unsigned long prev_hang; bool banned; int i; + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); + if (!ctx) + return false; + + if (i915_gem_context_is_closed(ctx)) { + intel_context_set_banned(rq->context); + banned = true; + goto out; + } + atomic_inc(&ctx->guilty_count); /* Cool contexts are too cool to be banned! (Used for reset testing.) */ - if (!i915_gem_context_is_bannable(ctx)) - return false; + if (!i915_gem_context_is_bannable(ctx)) { + banned = false; + goto out; + } + + dev_notice(ctx->i915->drm.dev, + "%s context reset due to GPU hang\n", + ctx->name); /* Record the timestamp for the last N hangs */ prev_hang = ctx->hang_timestamp[0]; @@ -100,18 +124,25 @@ static bool context_mark_guilty(struct i915_gem_context *ctx) if (banned) { DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", ctx->name, atomic_read(&ctx->guilty_count)); - i915_gem_context_set_banned(ctx); + intel_context_set_banned(rq->context); } - if (!IS_ERR_OR_NULL(ctx->file_priv)) - client_mark_guilty(ctx->file_priv, ctx); + client_mark_guilty(ctx, banned); +out: + i915_gem_context_put(ctx); return banned; } -static void context_mark_innocent(struct i915_gem_context *ctx) +static void mark_innocent(struct i915_request *rq) { - atomic_inc(&ctx->active_count); + struct i915_gem_context *ctx; + + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx) + atomic_inc(&ctx->active_count); + rcu_read_unlock(); } void __i915_request_reset(struct i915_request *rq, bool guilty) @@ -124,14 +155,16 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) GEM_BUG_ON(i915_request_completed(rq)); + rcu_read_lock(); /* protect the GEM context */ if (guilty) { i915_request_skip(rq, -EIO); - if (context_mark_guilty(rq->gem_context)) + if (mark_guilty(rq)) engine_skip_context(rq); } else { dma_fence_set_error(&rq->fence, -EAGAIN); - context_mark_innocent(rq->gem_context); + mark_innocent(rq); } + rcu_read_unlock(); } static bool i915_in_reset(struct pci_dev *pdev) @@ -647,7 +680,8 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * GPU state upon resume, i.e. fail to restart after a reset. */ intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); - engine->reset.prepare(engine); + if (engine->reset.prepare) + engine->reset.prepare(engine); } static void revoke_mmaps(struct intel_gt *gt) @@ -667,8 +701,13 @@ static void revoke_mmaps(struct intel_gt *gt) continue; GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]); - node = &vma->obj->base.vma_node; + + if (!vma->mmo) + continue; + + node = &vma->mmo->vma_node; vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, vma->size, @@ -722,10 +761,11 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) static void reset_finish_engine(struct intel_engine_cs *engine) { - engine->reset.finish(engine); + if (engine->reset.finish) + engine->reset.finish(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); - intel_engine_breadcrumbs_irq(engine); + intel_engine_signal_breadcrumbs(engine); } static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) @@ -754,7 +794,7 @@ static void nop_submit_request(struct i915_request *request) i915_request_mark_complete(request); spin_unlock_irqrestore(&engine->active.lock, flags); - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); } static void __intel_gt_set_wedged(struct intel_gt *gt) @@ -799,7 +839,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) /* Mark all executing requests as skipped */ for_each_engine(engine, gt, id) - engine->cancel_requests(engine); + if (engine->reset.cancel) + engine->reset.cancel(engine); reset_finish(gt, awake); @@ -820,7 +861,6 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; - unsigned long flags; bool ok; if (!test_bit(I915_WEDGED, >->reset.flags)) @@ -842,7 +882,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * * No more can be submitted until we reset the wedged bit. */ - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); list_for_each_entry(tl, &timelines->active_list, link) { struct dma_fence *fence; @@ -850,7 +890,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) if (!fence) continue; - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); /* * All internal dependencies (i915_requests) will have @@ -863,10 +903,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) dma_fence_put(fence); /* Restart iteration after droping lock */ - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); tl = list_entry(&timelines->active_list, typeof(*tl), link); } - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); /* We must reset pending GPU events before restoring our submission */ ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ @@ -1024,8 +1064,6 @@ void intel_gt_reset(struct intel_gt *gt, if (ret) goto taint; - intel_gt_queue_hangcheck(gt); - finish: reset_finish(gt, awake); unlock: @@ -1072,9 +1110,10 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) { struct intel_gt *gt = engine->gt; + bool uses_guc = intel_engine_in_guc_submission_mode(engine); int ret; - GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags); + ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); if (!intel_engine_pm_get_if_awake(engine)) @@ -1087,14 +1126,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (!engine->gt->uc.guc.execbuf_client) + if (!uses_guc) ret = intel_gt_reset_engine(engine); else ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->gt->uc.guc.execbuf_client ? "GuC " : "", + uses_guc ? "GuC " : "", engine->name, ret); goto out; } @@ -1116,7 +1155,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); return ret; } @@ -1353,4 +1392,5 @@ void __intel_fini_wedge(struct intel_wedge_me *w) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" +#include "selftest_hangcheck.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c new file mode 100644 index 000000000000..374b28f13ca0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -0,0 +1,318 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_object.h" +#include "i915_drv.h" +#include "i915_vma.h" +#include "intel_engine.h" +#include "intel_ring.h" +#include "intel_timeline.h" + +unsigned int intel_ring_update_space(struct intel_ring *ring) +{ + unsigned int space; + + space = __intel_ring_space(ring->head, ring->emit, ring->size); + + ring->space = space; + return space; +} + +int intel_ring_pin(struct intel_ring *ring) +{ + struct i915_vma *vma = ring->vma; + unsigned int flags; + void *addr; + int ret; + + if (atomic_fetch_inc(&ring->pin_count)) + return 0; + + flags = PIN_GLOBAL; + + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ + flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + + if (vma->obj->stolen) + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; + + ret = i915_vma_pin(vma, 0, 0, flags); + if (unlikely(ret)) + goto err_unpin; + + if (i915_vma_is_map_and_fenceable(vma)) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto err_ring; + } + + i915_vma_make_unshrinkable(vma); + + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->emit); + + ring->vaddr = addr; + return 0; + +err_ring: + i915_vma_unpin(vma); +err_unpin: + atomic_dec(&ring->pin_count); + return ret; +} + +void intel_ring_reset(struct intel_ring *ring, u32 tail) +{ + tail = intel_ring_wrap(ring, tail); + ring->tail = tail; + ring->head = tail; + ring->emit = tail; + intel_ring_update_space(ring); +} + +void intel_ring_unpin(struct intel_ring *ring) +{ + struct i915_vma *vma = ring->vma; + + if (!atomic_dec_and_test(&ring->pin_count)) + return; + + i915_vma_unset_ggtt_write(vma); + if (i915_vma_is_map_and_fenceable(vma)) + i915_vma_unpin_iomap(vma); + else + i915_gem_object_unpin_map(vma->obj); + + i915_vma_make_purgeable(vma); + i915_vma_unpin(vma); +} + +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) +{ + struct i915_address_space *vm = &ggtt->vm; + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = ERR_PTR(-ENODEV); + if (i915_ggtt_has_aperture(ggtt)) + obj = i915_gem_object_create_stolen(i915, size); + if (IS_ERR(obj)) + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Mark ring buffers as read-only from GPU side (so no stray overwrites) + * if supported by the platform's GGTT. + */ + if (vm->has_read_only) + i915_gem_object_set_readonly(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size) +{ + struct drm_i915_private *i915 = engine->i915; + struct intel_ring *ring; + struct i915_vma *vma; + + GEM_BUG_ON(!is_power_of_2(size)); + GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) + return ERR_PTR(-ENOMEM); + + kref_init(&ring->ref); + ring->size = size; + + /* + * Workaround an erratum on the i830 which causes a hang if + * the TAIL pointer points to within the last 2 cachelines + * of the buffer. + */ + ring->effective_size = size; + if (IS_I830(i915) || IS_I845G(i915)) + ring->effective_size -= 2 * CACHELINE_BYTES; + + intel_ring_update_space(ring); + + vma = create_ring_vma(engine->gt->ggtt, size); + if (IS_ERR(vma)) { + kfree(ring); + return ERR_CAST(vma); + } + ring->vma = vma; + + return ring; +} + +void intel_ring_free(struct kref *ref) +{ + struct intel_ring *ring = container_of(ref, typeof(*ring), ref); + + i915_vma_put(ring->vma); + kfree(ring); +} + +static noinline int +wait_for_space(struct intel_ring *ring, + struct intel_timeline *tl, + unsigned int bytes) +{ + struct i915_request *target; + long timeout; + + if (intel_ring_update_space(ring) >= bytes) + return 0; + + GEM_BUG_ON(list_empty(&tl->requests)); + list_for_each_entry(target, &tl->requests, link) { + if (target->ring != ring) + continue; + + /* Would completion of this request free enough space? */ + if (bytes <= __intel_ring_space(target->postfix, + ring->emit, ring->size)) + break; + } + + if (GEM_WARN_ON(&target->link == &tl->requests)) + return -ENOSPC; + + timeout = i915_request_wait(target, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + return timeout; + + i915_request_retire_upto(target); + + intel_ring_update_space(ring); + GEM_BUG_ON(ring->space < bytes); + return 0; +} + +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) +{ + struct intel_ring *ring = rq->ring; + const unsigned int remain_usable = ring->effective_size - ring->emit; + const unsigned int bytes = num_dwords * sizeof(u32); + unsigned int need_wrap = 0; + unsigned int total_bytes; + u32 *cs; + + /* Packets must be qword aligned. */ + GEM_BUG_ON(num_dwords & 1); + + total_bytes = bytes + rq->reserved_space; + GEM_BUG_ON(total_bytes > ring->effective_size); + + if (unlikely(total_bytes > remain_usable)) { + const int remain_actual = ring->size - ring->emit; + + if (bytes > remain_usable) { + /* + * Not enough space for the basic request. So need to + * flush out the remainder and then wait for + * base + reserved. + */ + total_bytes += remain_actual; + need_wrap = remain_actual | 1; + } else { + /* + * The base request will fit but the reserved space + * falls off the end. So we don't need an immediate + * wrap and only need to effectively wait for the + * reserved size from the start of ringbuffer. + */ + total_bytes = rq->reserved_space + remain_actual; + } + } + + if (unlikely(total_bytes > ring->space)) { + int ret; + + /* + * Space is reserved in the ringbuffer for finalising the + * request, as that cannot be allowed to fail. During request + * finalisation, reserved_space is set to 0 to stop the + * overallocation and the assumption is that then we never need + * to wait (which has the risk of failing with EINTR). + * + * See also i915_request_alloc() and i915_request_add(). + */ + GEM_BUG_ON(!rq->reserved_space); + + ret = wait_for_space(ring, + i915_request_timeline(rq), + total_bytes); + if (unlikely(ret)) + return ERR_PTR(ret); + } + + if (unlikely(need_wrap)) { + need_wrap &= ~1; + GEM_BUG_ON(need_wrap > ring->space); + GEM_BUG_ON(ring->emit + need_wrap > ring->size); + GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); + + /* Fill the tail with MI_NOOP */ + memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); + ring->space -= need_wrap; + ring->emit = 0; + } + + GEM_BUG_ON(ring->emit > ring->size - bytes); + GEM_BUG_ON(ring->space < bytes); + cs = ring->vaddr + ring->emit; + GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); + ring->emit += bytes; + ring->space -= bytes; + + return cs; +} + +/* Align the ring tail to a cacheline boundary */ +int intel_ring_cacheline_align(struct i915_request *rq) +{ + int num_dwords; + void *cs; + + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); + if (num_dwords == 0) + return 0; + + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + + cs = intel_ring_begin(rq, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); + intel_ring_advance(rq, cs + num_dwords); + + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h new file mode 100644 index 000000000000..ea2839d9e044 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RING_H +#define INTEL_RING_H + +#include "i915_gem.h" /* GEM_BUG_ON */ +#include "i915_request.h" +#include "intel_ring_types.h" + +struct intel_engine_cs; + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size); + +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords); +int intel_ring_cacheline_align(struct i915_request *rq); + +unsigned int intel_ring_update_space(struct intel_ring *ring); + +int intel_ring_pin(struct intel_ring *ring); +void intel_ring_unpin(struct intel_ring *ring); +void intel_ring_reset(struct intel_ring *ring, u32 tail); + +void intel_ring_free(struct kref *ref); + +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) +{ + kref_get(&ring->ref); + return ring; +} + +static inline void intel_ring_put(struct intel_ring *ring) +{ + kref_put(&ring->ref, intel_ring_free); +} + +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) +{ + /* Dummy function. + * + * This serves as a placeholder in the code so that the reader + * can compare against the preceding intel_ring_begin() and + * check that the number of dwords emitted matches the space + * reserved for the command packet (i.e. the value passed to + * intel_ring_begin()). + */ + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); +} + +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + +static inline bool +intel_ring_offset_valid(const struct intel_ring *ring, + unsigned int pos) +{ + if (pos & -ring->size) /* must be strictly within the ring */ + return false; + + if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ + return false; + + return true; +} + +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) +{ + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ + u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); + return intel_ring_wrap(rq->ring, offset); +} + +static inline void +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) +{ + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); + + /* + * "Ring Buffer Use" + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 + * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 + * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + * + * We use ring->head as the last known location of the actual RING_HEAD, + * it may have advanced but in the worst case it is equally the same + * as ring->head and so we should never program RING_TAIL to advance + * into the same cacheline as ring->head. + */ +#define cacheline(a) round_down(a, CACHELINE_BYTES) + GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && + tail < ring->head); +#undef cacheline +} + +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} + +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); +} + +#endif /* INTEL_RING_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index bf631f15aa78..81f872f9ef03 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -40,6 +40,7 @@ #include "intel_gt_irq.h" #include "intel_gt_pm_irq.h" #include "intel_reset.h" +#include "intel_ring.h" #include "intel_workarounds.h" /* Rough estimate of the typical request size, performing a flush, @@ -47,16 +48,6 @@ */ #define LEGACY_REQUEST_SIZE 200 -unsigned int intel_ring_update_space(struct intel_ring *ring) -{ - unsigned int space; - - space = __intel_ring_space(ring->head, ring->emit, ring->size); - - ring->space = space; - return space; -} - static int gen2_render_ring_flush(struct i915_request *rq, u32 mode) { @@ -371,6 +362,12 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) */ flags |= PIPE_CONTROL_CS_STALL; + /* + * CS_STALL suggests at least a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + /* Just flush everything. Experiments have shown that reducing the * number of bits based on the write domains has little performance * impact. @@ -389,13 +386,6 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; - /* - * TLB invalidate requires a post-sync write. - */ - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; - - flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache @@ -463,7 +453,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB | + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->fence.seqno; @@ -505,14 +496,13 @@ static void set_hwstam(struct intel_engine_cs *engine, u32 mask) static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) { - struct drm_i915_private *dev_priv = engine->i915; u32 addr; addr = lower_32_bits(phys); - if (INTEL_GEN(dev_priv) >= 4) + if (INTEL_GEN(engine->i915) >= 4) addr |= (phys >> 28) & 0xf0; - I915_WRITE(HWS_PGA, addr); + intel_uncore_write(engine->uncore, HWS_PGA, addr); } static struct page *status_page(struct intel_engine_cs *engine) @@ -531,14 +521,13 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *engine) static void set_hwsp(struct intel_engine_cs *engine, u32 offset) { - struct drm_i915_private *dev_priv = engine->i915; i915_reg_t hwsp; /* * The ring status page addresses are no longer next to the rest of * the ring registers as of gen7. */ - if (IS_GEN(dev_priv, 7)) { + if (IS_GEN(engine->i915, 7)) { switch (engine->id) { /* * No more rings exist on Gen7. Default case is only to shut up @@ -560,14 +549,14 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) hwsp = VEBOX_HWS_PGA_GEN7; break; } - } else if (IS_GEN(dev_priv, 6)) { + } else if (IS_GEN(engine->i915, 6)) { hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); } else { hwsp = RING_HWS_PGA(engine->mmio_base); } - I915_WRITE(hwsp, offset); - POSTING_READ(hwsp); + intel_uncore_write(engine->uncore, hwsp, offset); + intel_uncore_posting_read(engine->uncore, hwsp); } static void flush_cs_tlb(struct intel_engine_cs *engine) @@ -642,8 +631,8 @@ static int xcs_resume(struct intel_engine_cs *engine) struct intel_ring *ring = engine->legacy.ring; int ret = 0; - GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n", - engine->name, ring->head, ring->tail); + ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", + ring->head, ring->tail); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); @@ -730,7 +719,7 @@ static int xcs_resume(struct intel_engine_cs *engine) } /* Papering over lost _interrupts_ immediately following the restart */ - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); out: intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); @@ -756,10 +745,10 @@ static void reset_prepare(struct intel_engine_cs *engine) * * FIXME: Wa for more modern gens needs to be validated */ - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); if (intel_engine_stop_cs(engine)) - GEM_TRACE("%s: timed out on STOP_RING\n", engine->name); + ENGINE_TRACE(engine, "timed out on STOP_RING\n"); intel_uncore_write_fw(uncore, RING_HEAD(base), @@ -775,12 +764,11 @@ static void reset_prepare(struct intel_engine_cs *engine) /* Check acts as a post */ if (intel_uncore_read_fw(uncore, RING_HEAD(base))) - GEM_TRACE("%s: ring head [%x] not parked\n", - engine->name, - intel_uncore_read_fw(uncore, RING_HEAD(base))); + ENGINE_TRACE(engine, "ring head [%x] not parked\n", + intel_uncore_read_fw(uncore, RING_HEAD(base))); } -static void reset_ring(struct intel_engine_cs *engine, bool stalled) +static void reset_rewind(struct intel_engine_cs *engine, bool stalled) { struct i915_request *pos, *rq; unsigned long flags; @@ -851,7 +839,8 @@ static void reset_finish(struct intel_engine_cs *engine) static int rcs_resume(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *i915 = engine->i915; + struct intel_uncore *uncore = engine->uncore; /* * Disable CONSTANT_BUFFER before it is loaded from the context @@ -863,13 +852,14 @@ static int rcs_resume(struct intel_engine_cs *engine) * they are already accustomed to from before contexts were * enabled. */ - if (IS_GEN(dev_priv, 4)) - I915_WRITE(ECOSKPD, + if (IS_GEN(i915, 4)) + intel_uncore_write(uncore, ECOSKPD, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE)); /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ - if (IS_GEN_RANGE(dev_priv, 4, 6)) - I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); + if (IS_GEN_RANGE(i915, 4, 6)) + intel_uncore_write(uncore, MI_MODE, + _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be @@ -877,38 +867,40 @@ static int rcs_resume(struct intel_engine_cs *engine) * * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv */ - if (IS_GEN_RANGE(dev_priv, 6, 7)) - I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); + if (IS_GEN_RANGE(i915, 6, 7)) + intel_uncore_write(uncore, MI_MODE, + _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); /* Required for the hardware to program scanline values for waiting */ /* WaEnableFlushTlbInvalidationMode:snb */ - if (IS_GEN(dev_priv, 6)) - I915_WRITE(GFX_MODE, + if (IS_GEN(i915, 6)) + intel_uncore_write(uncore, GFX_MODE, _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ - if (IS_GEN(dev_priv, 7)) - I915_WRITE(GFX_MODE_GEN7, + if (IS_GEN(i915, 7)) + intel_uncore_write(uncore, GFX_MODE_GEN7, _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); - if (IS_GEN(dev_priv, 6)) { + if (IS_GEN(i915, 6)) { /* From the Sandybridge PRM, volume 1 part 3, page 24: * "If this bit is set, STCunit will have LRA as replacement * policy. [...] This bit must be reset. LRA replacement * policy is not supported." */ - I915_WRITE(CACHE_MODE_0, + intel_uncore_write(uncore, CACHE_MODE_0, _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); } - if (IS_GEN_RANGE(dev_priv, 6, 7)) - I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + if (IS_GEN_RANGE(i915, 6, 7)) + intel_uncore_write(uncore, INSTPM, + _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); return xcs_resume(engine); } -static void cancel_requests(struct intel_engine_cs *engine) +static void reset_cancel(struct intel_engine_cs *engine) { struct i915_request *request; unsigned long flags; @@ -1186,162 +1178,6 @@ i915_emit_bb_start(struct i915_request *rq, return 0; } -int intel_ring_pin(struct intel_ring *ring) -{ - struct i915_vma *vma = ring->vma; - unsigned int flags; - void *addr; - int ret; - - if (atomic_fetch_inc(&ring->pin_count)) - return 0; - - flags = PIN_GLOBAL; - - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - - if (vma->obj->stolen) - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; - - ret = i915_vma_pin(vma, 0, 0, flags); - if (unlikely(ret)) - goto err_unpin; - - if (i915_vma_is_map_and_fenceable(vma)) - addr = (void __force *)i915_vma_pin_iomap(vma); - else - addr = i915_gem_object_pin_map(vma->obj, - i915_coherent_map_type(vma->vm->i915)); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_ring; - } - - i915_vma_make_unshrinkable(vma); - - GEM_BUG_ON(ring->vaddr); - ring->vaddr = addr; - - return 0; - -err_ring: - i915_vma_unpin(vma); -err_unpin: - atomic_dec(&ring->pin_count); - return ret; -} - -void intel_ring_reset(struct intel_ring *ring, u32 tail) -{ - tail = intel_ring_wrap(ring, tail); - ring->tail = tail; - ring->head = tail; - ring->emit = tail; - intel_ring_update_space(ring); -} - -void intel_ring_unpin(struct intel_ring *ring) -{ - struct i915_vma *vma = ring->vma; - - if (!atomic_dec_and_test(&ring->pin_count)) - return; - - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->emit); - - i915_vma_unset_ggtt_write(vma); - if (i915_vma_is_map_and_fenceable(vma)) - i915_vma_unpin_iomap(vma); - else - i915_gem_object_unpin_map(vma->obj); - - GEM_BUG_ON(!ring->vaddr); - ring->vaddr = NULL; - - i915_vma_unpin(vma); - i915_vma_make_purgeable(vma); -} - -static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) -{ - struct i915_address_space *vm = &ggtt->vm; - struct drm_i915_private *i915 = vm->i915; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = i915_gem_object_create_stolen(i915, size); - if (IS_ERR(obj)) - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - /* - * Mark ring buffers as read-only from GPU side (so no stray overwrites) - * if supported by the platform's GGTT. - */ - if (vm->has_read_only) - i915_gem_object_set_readonly(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return vma; -} - -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size) -{ - struct drm_i915_private *i915 = engine->i915; - struct intel_ring *ring; - struct i915_vma *vma; - - GEM_BUG_ON(!is_power_of_2(size)); - GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - return ERR_PTR(-ENOMEM); - - kref_init(&ring->ref); - - ring->size = size; - /* Workaround an erratum on the i830 which causes a hang if - * the TAIL pointer points to within the last 2 cachelines - * of the buffer. - */ - ring->effective_size = size; - if (IS_I830(i915) || IS_I845G(i915)) - ring->effective_size -= 2 * CACHELINE_BYTES; - - intel_ring_update_space(ring); - - vma = create_ring_vma(engine->gt->ggtt, size); - if (IS_ERR(vma)) { - kfree(ring); - return ERR_CAST(vma); - } - ring->vma = vma; - - return ring; -} - -void intel_ring_free(struct kref *ref) -{ - struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - - i915_vma_put(ring->vma); - kfree(ring); -} - static void __ring_context_fini(struct intel_context *ce) { i915_vma_put(ce->state); @@ -1483,6 +1319,8 @@ static int ring_context_alloc(struct intel_context *ce) return PTR_ERR(vma); ce->state = vma; + if (engine->default_state) + __set_bit(CONTEXT_VALID_BIT, &ce->flags); } return 0; @@ -1525,45 +1363,37 @@ static const struct intel_context_ops ring_context_ops = { .destroy = ring_context_destroy, }; -static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) +static int load_pd_dir(struct i915_request *rq, + const struct i915_ppgtt *ppgtt, + u32 valid) { const struct intel_engine_cs * const engine = rq->engine; u32 *cs; - cs = intel_ring_begin(rq, 6); + cs = intel_ring_begin(rq, 12); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); - *cs++ = PP_DIR_DCLV_2G; + *cs++ = valid; *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10; - intel_ring_advance(rq, cs); - - return 0; -} - -static int flush_pd_dir(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Stall until the page table load is complete */ + /* Stall until the page table load is complete? */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = intel_gt_scratch_offset(rq->engine->gt, + *cs++ = intel_gt_scratch_offset(engine->gt, INTEL_GT_SCRATCH_FIELD_DEFAULT); - *cs++ = MI_NOOP; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE); intel_ring_advance(rq, cs); + return 0; } @@ -1650,7 +1480,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags; + *cs++ = i915_ggtt_offset(rq->context->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1720,10 +1550,10 @@ static int remap_l3_slice(struct i915_request *rq, int slice) static int remap_l3(struct i915_request *rq) { - struct i915_gem_context *ctx = rq->gem_context; + struct i915_gem_context *ctx = i915_request_gem_context(rq); int i, err; - if (!ctx->remap_slice) + if (!ctx || !ctx->remap_slice) return 0; for (i = 0; i < MAX_L3_SLICES; i++) { @@ -1739,34 +1569,50 @@ static int remap_l3(struct i915_request *rq) return 0; } +static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) +{ + int ret; + + if (!vm) + return 0; + + ret = rq->engine->emit_flush(rq, EMIT_FLUSH); + if (ret) + return ret; + + /* + * Not only do we need a full barrier (post-sync write) after + * invalidating the TLBs, but we need to wait a little bit + * longer. Whether this is merely delaying us, or the + * subsequent flush is a key part of serialising with the + * post-sync op, this extra pass appears vital before a + * mm switch! + */ + ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G); + if (ret) + return ret; + + return rq->engine->emit_flush(rq, EMIT_FLUSH); +} + static int switch_context(struct i915_request *rq) { - struct intel_context *ce = rq->hw_context; - struct i915_address_space *vm = vm_alias(ce); + struct intel_context *ce = rq->context; int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); - if (vm) { - ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm)); - if (ret) - return ret; - } + ret = switch_mm(rq, vm_alias(ce)); + if (ret) + return ret; if (ce->state) { u32 hw_flags; GEM_BUG_ON(rq->engine->id != RCS0); - /* - * The kernel context(s) is treated as pure scratch and is not - * expected to retain any state (as we sacrifice it during - * suspend and on resume it may be corrupted). This is ok, - * as nothing actually executes using the kernel context; it - * is purely used for flushing user contexts. - */ hw_flags = 0; - if (i915_gem_context_is_kernel(rq->gem_context)) + if (!test_bit(CONTEXT_VALID_BIT, &ce->flags)) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); @@ -1774,34 +1620,6 @@ static int switch_context(struct i915_request *rq) return ret; } - if (vm) { - struct intel_engine_cs *engine = rq->engine; - - ret = engine->emit_flush(rq, EMIT_INVALIDATE); - if (ret) - return ret; - - ret = flush_pd_dir(rq); - if (ret) - return ret; - - /* - * Not only do we need a full barrier (post-sync write) after - * invalidating the TLBs, but we need to wait a little bit - * longer. Whether this is merely delaying us, or the - * subsequent flush is a key part of serialising with the - * post-sync op, this extra pass appears vital before a - * mm switch! - */ - ret = engine->emit_flush(rq, EMIT_INVALIDATE); - if (ret) - return ret; - - ret = engine->emit_flush(rq, EMIT_FLUSH); - if (ret) - return ret; - } - ret = remap_l3(rq); if (ret) return ret; @@ -1813,7 +1631,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); + GEM_BUG_ON(!intel_context_is_pinned(request->context)); GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); /* @@ -1836,148 +1654,6 @@ static int ring_request_alloc(struct i915_request *request) return 0; } -static noinline int -wait_for_space(struct intel_ring *ring, - struct intel_timeline *tl, - unsigned int bytes) -{ - struct i915_request *target; - long timeout; - - if (intel_ring_update_space(ring) >= bytes) - return 0; - - GEM_BUG_ON(list_empty(&tl->requests)); - list_for_each_entry(target, &tl->requests, link) { - if (target->ring != ring) - continue; - - /* Would completion of this request free enough space? */ - if (bytes <= __intel_ring_space(target->postfix, - ring->emit, ring->size)) - break; - } - - if (GEM_WARN_ON(&target->link == &tl->requests)) - return -ENOSPC; - - timeout = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) - return timeout; - - i915_request_retire_upto(target); - - intel_ring_update_space(ring); - GEM_BUG_ON(ring->space < bytes); - return 0; -} - -u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) -{ - struct intel_ring *ring = rq->ring; - const unsigned int remain_usable = ring->effective_size - ring->emit; - const unsigned int bytes = num_dwords * sizeof(u32); - unsigned int need_wrap = 0; - unsigned int total_bytes; - u32 *cs; - - /* Packets must be qword aligned. */ - GEM_BUG_ON(num_dwords & 1); - - total_bytes = bytes + rq->reserved_space; - GEM_BUG_ON(total_bytes > ring->effective_size); - - if (unlikely(total_bytes > remain_usable)) { - const int remain_actual = ring->size - ring->emit; - - if (bytes > remain_usable) { - /* - * Not enough space for the basic request. So need to - * flush out the remainder and then wait for - * base + reserved. - */ - total_bytes += remain_actual; - need_wrap = remain_actual | 1; - } else { - /* - * The base request will fit but the reserved space - * falls off the end. So we don't need an immediate - * wrap and only need to effectively wait for the - * reserved size from the start of ringbuffer. - */ - total_bytes = rq->reserved_space + remain_actual; - } - } - - if (unlikely(total_bytes > ring->space)) { - int ret; - - /* - * Space is reserved in the ringbuffer for finalising the - * request, as that cannot be allowed to fail. During request - * finalisation, reserved_space is set to 0 to stop the - * overallocation and the assumption is that then we never need - * to wait (which has the risk of failing with EINTR). - * - * See also i915_request_alloc() and i915_request_add(). - */ - GEM_BUG_ON(!rq->reserved_space); - - ret = wait_for_space(ring, - i915_request_timeline(rq), - total_bytes); - if (unlikely(ret)) - return ERR_PTR(ret); - } - - if (unlikely(need_wrap)) { - need_wrap &= ~1; - GEM_BUG_ON(need_wrap > ring->space); - GEM_BUG_ON(ring->emit + need_wrap > ring->size); - GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); - - /* Fill the tail with MI_NOOP */ - memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); - ring->space -= need_wrap; - ring->emit = 0; - } - - GEM_BUG_ON(ring->emit > ring->size - bytes); - GEM_BUG_ON(ring->space < bytes); - cs = ring->vaddr + ring->emit; - GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); - ring->emit += bytes; - ring->space -= bytes; - - return cs; -} - -/* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct i915_request *rq) -{ - int num_dwords; - void *cs; - - num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - if (num_dwords == 0) - return 0; - - num_dwords = CACHELINE_DWORDS - num_dwords; - GEM_BUG_ON(num_dwords & 1); - - cs = intel_ring_begin(rq, num_dwords); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); - intel_ring_advance(rq, cs); - - GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); - return 0; -} - static void gen6_bsd_submit_request(struct i915_request *request) { struct intel_uncore *uncore = request->engine->uncore; @@ -2111,7 +1787,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode) static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; - engine->cancel_requests = cancel_requests; engine->park = NULL; engine->unpark = NULL; @@ -2123,7 +1798,7 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = gen6_bsd_submit_request; } -static void ring_destroy(struct intel_engine_cs *engine) +static void ring_release(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -2137,8 +1812,6 @@ static void ring_destroy(struct intel_engine_cs *engine) intel_timeline_unpin(engine->legacy.timeline); intel_timeline_put(engine->legacy.timeline); - - kfree(engine); } static void setup_irq(struct intel_engine_cs *engine) @@ -2169,11 +1842,12 @@ static void setup_common(struct intel_engine_cs *engine) setup_irq(engine); - engine->destroy = ring_destroy; + engine->release = ring_release; engine->resume = xcs_resume; engine->reset.prepare = reset_prepare; - engine->reset.reset = reset_ring; + engine->reset.rewind = reset_rewind; + engine->reset.cancel = reset_cancel; engine->reset.finish = reset_finish; engine->cops = &ring_context_ops; @@ -2284,6 +1958,10 @@ static void setup_vecs(struct intel_engine_cs *engine) int intel_ring_submission_setup(struct intel_engine_cs *engine) { + struct intel_timeline *timeline; + struct intel_ring *ring; + int err; + setup_common(engine); switch (engine->class) { @@ -2304,15 +1982,6 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) return -ENODEV; } - return 0; -} - -int intel_ring_submission_init(struct intel_engine_cs *engine) -{ - struct intel_timeline *timeline; - struct intel_ring *ring; - int err; - timeline = intel_timeline_create(engine->gt, engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); @@ -2338,16 +2007,10 @@ int intel_ring_submission_init(struct intel_engine_cs *engine) engine->legacy.ring = ring; engine->legacy.timeline = timeline; - err = intel_engine_init_common(engine); - if (err) - goto err_ring_unpin; - GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); return 0; -err_ring_unpin: - intel_ring_unpin(ring); err_ring: intel_ring_put(ring); err_timeline_unpin: diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h new file mode 100644 index 000000000000..d9f17f38e0cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RING_TYPES_H +#define INTEL_RING_TYPES_H + +#include <linux/atomic.h> +#include <linux/kref.h> +#include <linux/types.h> + +/* + * Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) + +struct i915_vma; + +struct intel_ring { + struct kref ref; + struct i915_vma *vma; + void *vaddr; + + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + + u32 head; + u32 tail; + u32 emit; + + u32 space; + u32 size; + u32 effective_size; +}; + +#endif /* INTEL_RING_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c new file mode 100644 index 000000000000..f232036c3c7a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -0,0 +1,1903 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" +#include "intel_rps.h" +#include "intel_sideband.h" +#include "../../../platform/x86/intel_ips.h" + +/* + * Lock protecting IPS related data structures + */ +static DEFINE_SPINLOCK(mchdev_lock); + +static struct intel_gt *rps_to_gt(struct intel_rps *rps) +{ + return container_of(rps, struct intel_gt, rps); +} + +static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) +{ + return rps_to_gt(rps)->i915; +} + +static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) +{ + return rps_to_gt(rps)->uncore; +} + +static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) +{ + return mask & ~rps->pm_intrmsk_mbz; +} + +static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +{ + intel_uncore_write_fw(uncore, reg, val); +} + +static u32 rps_pm_mask(struct intel_rps *rps, u8 val) +{ + u32 mask = 0; + + /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ + if (val > rps->min_freq_softlimit) + mask |= (GEN6_PM_RP_UP_EI_EXPIRED | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + if (val < rps->max_freq_softlimit) + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; + + mask &= rps->pm_events; + + return rps_pm_sanitize_mask(rps, ~mask); +} + +static void rps_reset_ei(struct intel_rps *rps) +{ + memset(&rps->ei, 0, sizeof(rps->ei)); +} + +static void rps_enable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps_reset_ei(rps); + + if (IS_VALLEYVIEW(gt->i915)) + /* WaGsvRC0ResidencyMethod:vlv */ + rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; + else + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_enable_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); + + set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq)); +} + +static void gen6_rps_reset_interrupts(struct intel_rps *rps) +{ + gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); +} + +static void gen11_rps_reset_interrupts(struct intel_rps *rps) +{ + while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) + ; +} + +static void rps_reset_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + spin_lock_irq(>->irq_lock); + if (INTEL_GEN(gt->i915) >= 11) + gen11_rps_reset_interrupts(rps); + else + gen6_rps_reset_interrupts(rps); + + rps->pm_iir = 0; + spin_unlock_irq(>->irq_lock); +} + +static void rps_disable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps->pm_events = 0; + + set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); + spin_unlock_irq(>->irq_lock); + + intel_synchronize_irq(gt->i915); + + /* + * Now that we will not be generating any more work, flush any + * outstanding tasks. As we are called on the RPS idle path, + * we will reset the GPU to minimum frequencies, so the current + * state of the worker can be discarded. + */ + cancel_work_sync(&rps->work); + + rps_reset_interrupts(rps); +} + +static const struct cparams { + u16 i; + u16 t; + u16 m; + u16 c; +} cparams[] = { + { 1, 1333, 301, 28664 }, + { 1, 1066, 294, 24460 }, + { 1, 800, 294, 25192 }, + { 0, 1333, 276, 27605 }, + { 0, 1066, 276, 27605 }, + { 0, 800, 231, 23784 }, +}; + +static void gen5_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fmax, fmin, fstart; + u32 rgvmodectl; + int c_m, i; + + if (i915->fsb_freq <= 3200) + c_m = 0; + else if (i915->fsb_freq <= 4800) + c_m = 1; + else + c_m = 2; + + for (i = 0; i < ARRAY_SIZE(cparams); i++) { + if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { + rps->ips.m = cparams[i].m; + rps->ips.c = cparams[i].c; + break; + } + } + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Set up min, max, and cur for interrupt handling */ + fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; + fmin = (rgvmodectl & MEMMODE_FMIN_MASK); + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); + + rps->min_freq = fmax; + rps->max_freq = fmin; + + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; +} + +static unsigned long +__ips_chipset_val(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + unsigned long now = jiffies_to_msecs(jiffies), dt; + unsigned long result; + u64 total, delta; + + lockdep_assert_held(&mchdev_lock); + + /* + * Prevent division-by-zero if we are asking too fast. + * Also, we don't get interesting results if we are polling + * faster than once in 10ms, so just return the saved value + * in such cases. + */ + dt = now - ips->last_time1; + if (dt <= 10) + return ips->chipset_power; + + /* FIXME: handle per-counter overflow */ + total = intel_uncore_read(uncore, DMIEC); + total += intel_uncore_read(uncore, DDREC); + total += intel_uncore_read(uncore, CSIEC); + + delta = total - ips->last_count1; + + result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); + + ips->last_count1 = total; + ips->last_time1 = now; + + ips->chipset_power = result; + + return result; +} + +static unsigned long ips_mch_val(struct intel_uncore *uncore) +{ + unsigned int m, x, b; + u32 tsfs; + + tsfs = intel_uncore_read(uncore, TSFS); + x = intel_uncore_read8(uncore, TR1); + + b = tsfs & TSFS_INTR_MASK; + m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; + + return m * x / 127 - b; +} + +static int _pxvid_to_vd(u8 pxvid) +{ + if (pxvid == 0) + return 0; + + if (pxvid >= 8 && pxvid < 31) + pxvid = 31; + + return (pxvid + 2) * 125; +} + +static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) +{ + const int vd = _pxvid_to_vd(pxvid); + + if (INTEL_INFO(i915)->is_mobile) + return max(vd - 1125, 0); + + return vd; +} + +static void __gen5_ips_update(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + u64 now, delta, dt; + u32 count; + + lockdep_assert_held(&mchdev_lock); + + now = ktime_get_raw_ns(); + dt = now - ips->last_time2; + do_div(dt, NSEC_PER_MSEC); + + /* Don't divide by 0 */ + if (dt <= 10) + return; + + count = intel_uncore_read(uncore, GFXEC); + delta = count - ips->last_count2; + + ips->last_count2 = count; + ips->last_time2 = now; + + /* More magic constants... */ + ips->gfx_power = div_u64(delta * 1181, dt * 10); +} + +static void gen5_rps_update(struct intel_rps *rps) +{ + spin_lock_irq(&mchdev_lock); + __gen5_ips_update(&rps->ips); + spin_unlock_irq(&mchdev_lock); +} + +static bool gen5_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + lockdep_assert_held(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + if (rgvswctl & MEMCTL_CMD_STS) { + DRM_DEBUG("gpu busy, RCS change rejected\n"); + return false; /* still busy with another command */ + } + + /* Invert the frequency bin into an ips delay */ + val = rps->max_freq - val; + val = rps->min_freq + val; + + rgvswctl = + (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | + MEMCTL_SFCAVM; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + intel_uncore_posting_read16(uncore, MEMSWCTL); + + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + + return true; +} + +static unsigned long intel_pxfreq(u32 vidfreq) +{ + int div = (vidfreq & 0x3f0000) >> 16; + int post = (vidfreq & 0x3000) >> 12; + int pre = (vidfreq & 0x7); + + if (!pre) + return 0; + + return div * 133333 / (pre << post); +} + +static unsigned int init_emon(struct intel_uncore *uncore) +{ + u8 pxw[16]; + int i; + + /* Disable to program */ + intel_uncore_write(uncore, ECR, 0); + intel_uncore_posting_read(uncore, ECR); + + /* Program energy weights for various events */ + intel_uncore_write(uncore, SDEW, 0x15040d00); + intel_uncore_write(uncore, CSIEW0, 0x007f0000); + intel_uncore_write(uncore, CSIEW1, 0x1e220004); + intel_uncore_write(uncore, CSIEW2, 0x04000004); + + for (i = 0; i < 5; i++) + intel_uncore_write(uncore, PEW(i), 0); + for (i = 0; i < 3; i++) + intel_uncore_write(uncore, DEW(i), 0); + + /* Program P-state weights to account for frequency power adjustment */ + for (i = 0; i < 16; i++) { + u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); + unsigned int freq = intel_pxfreq(pxvidfreq); + unsigned int vid = + (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + unsigned int val; + + val = vid * vid * freq / 1000 * 255; + val /= 127 * 127 * 900; + + pxw[i] = val; + } + /* Render standby states get 0 weight */ + pxw[14] = 0; + pxw[15] = 0; + + for (i = 0; i < 4; i++) { + intel_uncore_write(uncore, PXW(i), + pxw[i * 4 + 0] << 24 | + pxw[i * 4 + 1] << 16 | + pxw[i * 4 + 2] << 8 | + pxw[i * 4 + 3] << 0); + } + + /* Adjust magic regs to magic values (more experimental results) */ + intel_uncore_write(uncore, OGW0, 0); + intel_uncore_write(uncore, OGW1, 0); + intel_uncore_write(uncore, EG0, 0x00007f00); + intel_uncore_write(uncore, EG1, 0x0000000e); + intel_uncore_write(uncore, EG2, 0x000e0000); + intel_uncore_write(uncore, EG3, 0x68000300); + intel_uncore_write(uncore, EG4, 0x42000000); + intel_uncore_write(uncore, EG5, 0x00140031); + intel_uncore_write(uncore, EG6, 0); + intel_uncore_write(uncore, EG7, 0); + + for (i = 0; i < 8; i++) + intel_uncore_write(uncore, PXWL(i), 0); + + /* Enable PMON + select events */ + intel_uncore_write(uncore, ECR, 0x80000019); + + return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; +} + +static bool gen5_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fstart, vstart; + u32 rgvmodectl; + + spin_lock_irq(&mchdev_lock); + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Enable temp reporting */ + intel_uncore_write16(uncore, PMMISC, + intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); + intel_uncore_write16(uncore, TSC1, + intel_uncore_read16(uncore, TSC1) | TSE); + + /* 100ms RC evaluation intervals */ + intel_uncore_write(uncore, RCUPEI, 100000); + intel_uncore_write(uncore, RCDNEI, 100000); + + /* Set max/min thresholds to 90ms and 80ms respectively */ + intel_uncore_write(uncore, RCBMAXAVG, 90000); + intel_uncore_write(uncore, RCBMINAVG, 80000); + + intel_uncore_write(uncore, MEMIHYST, 1); + + /* Set up min, max, and cur for interrupt handling */ + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + + vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & + PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + + intel_uncore_write(uncore, + MEMINTREN, + MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); + + intel_uncore_write(uncore, VIDSTART, vstart); + intel_uncore_posting_read(uncore, VIDSTART); + + rgvmodectl |= MEMMODE_SWMODE_EN; + intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); + + if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & + MEMCTL_CMD_STS) == 0, 10)) + DRM_ERROR("stuck trying to change perf mode\n"); + mdelay(1); + + gen5_rps_set(rps, rps->cur_freq); + + rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); + rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); + rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); + rps->ips.last_time1 = jiffies_to_msecs(jiffies); + + rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); + rps->ips.last_time2 = ktime_get_raw_ns(); + + spin_unlock_irq(&mchdev_lock); + + rps->ips.corr = init_emon(uncore); + + return true; +} + +static void gen5_rps_disable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + spin_lock_irq(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + + /* Ack interrupts, disable EFC interrupt */ + intel_uncore_write(uncore, MEMINTREN, + intel_uncore_read(uncore, MEMINTREN) & + ~MEMINT_EVAL_CHG_EN); + intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + intel_uncore_write(uncore, DEIER, + intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); + intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); + intel_uncore_write(uncore, DEIMR, + intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); + + /* Go back to the starting frequency */ + gen5_rps_set(rps, rps->idle_freq); + mdelay(1); + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write(uncore, MEMSWCTL, rgvswctl); + mdelay(1); + + spin_unlock_irq(&mchdev_lock); +} + +static u32 rps_limits(struct intel_rps *rps, u8 val) +{ + u32 limits; + + /* + * Only set the down limit when we've reached the lowest level to avoid + * getting more interrupts, otherwise leave this clear. This prevents a + * race in the hw when coming out of rc6: There's a tiny window where + * the hw runs at the minimal clock before selecting the desired + * frequency, if the down threshold expires in that window we will not + * receive a down interrupt. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 9) { + limits = rps->max_freq_softlimit << 23; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 14; + } else { + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; + } + + return limits; +} + +static void rps_set_power(struct intel_rps *rps, int new_power) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 threshold_up = 0, threshold_down = 0; /* in % */ + u32 ei_up = 0, ei_down = 0; + + lockdep_assert_held(&rps->power.mutex); + + if (new_power == rps->power.mode) + return; + + /* Note the units here are not exactly 1us, but 1280ns. */ + switch (new_power) { + case LOW_POWER: + /* Upclock if more than 95% busy over 16ms */ + ei_up = 16000; + threshold_up = 95; + + /* Downclock if less than 85% busy over 32ms */ + ei_down = 32000; + threshold_down = 85; + break; + + case BETWEEN: + /* Upclock if more than 90% busy over 13ms */ + ei_up = 13000; + threshold_up = 90; + + /* Downclock if less than 75% busy over 32ms */ + ei_down = 32000; + threshold_down = 75; + break; + + case HIGH_POWER: + /* Upclock if more than 85% busy over 10ms */ + ei_up = 10000; + threshold_up = 85; + + /* Downclock if less than 60% busy over 32ms */ + ei_down = 32000; + threshold_down = 60; + break; + } + + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(i915)) + goto skip_hw_write; + + set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up)); + set(uncore, GEN6_RP_UP_THRESHOLD, + GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100)); + + set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down)); + set(uncore, GEN6_RP_DOWN_THRESHOLD, + GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100)); + + set(uncore, GEN6_RP_CONTROL, + (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + +skip_hw_write: + rps->power.mode = new_power; + rps->power.up_threshold = threshold_up; + rps->power.down_threshold = threshold_down; +} + +static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) +{ + int new_power; + + new_power = rps->power.mode; + switch (rps->power.mode) { + case LOW_POWER: + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) + new_power = BETWEEN; + break; + + case BETWEEN: + if (val <= rps->efficient_freq && + val < rps->cur_freq) + new_power = LOW_POWER; + else if (val >= rps->rp0_freq && + val > rps->cur_freq) + new_power = HIGH_POWER; + break; + + case HIGH_POWER: + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) + new_power = BETWEEN; + break; + } + /* Max/min bins are special */ + if (val <= rps->min_freq_softlimit) + new_power = LOW_POWER; + if (val >= rps->max_freq_softlimit) + new_power = HIGH_POWER; + + mutex_lock(&rps->power.mutex); + if (rps->power.interactive) + new_power = HIGH_POWER; + rps_set_power(rps, new_power); + mutex_unlock(&rps->power.mutex); +} + +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) +{ + mutex_lock(&rps->power.mutex); + if (interactive) { + if (!rps->power.interactive++ && rps->active) + rps_set_power(rps, HIGH_POWER); + } else { + GEM_BUG_ON(!rps->power.interactive); + rps->power.interactive--; + } + mutex_unlock(&rps->power.mutex); +} + +static int gen6_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 swreq; + + if (INTEL_GEN(i915) >= 9) + swreq = GEN9_FREQUENCY(val); + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + swreq = HSW_FREQUENCY(val); + else + swreq = (GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + set(uncore, GEN6_RPNSWREQ, swreq); + + return 0; +} + +static int vlv_rps_set(struct intel_rps *rps, u8 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + vlv_punit_get(i915); + err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); + vlv_punit_put(i915); + + return err; +} + +static int rps_set(struct intel_rps *rps, u8 val, bool update) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + if (INTEL_GEN(i915) < 6) + return 0; + + if (val == rps->last_freq) + return 0; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = vlv_rps_set(rps, val); + else + err = gen6_rps_set(rps, val); + if (err) + return err; + + if (update) + gen6_rps_set_thresholds(rps, val); + rps->last_freq = val; + + return 0; +} + +void intel_rps_unpark(struct intel_rps *rps) +{ + u8 freq; + + if (!rps->enabled) + return; + + /* + * Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + mutex_lock(&rps->lock); + rps->active = true; + freq = max(rps->cur_freq, rps->efficient_freq), + freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit); + intel_rps_set(rps, freq); + rps->last_adj = 0; + mutex_unlock(&rps->lock); + + if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps_enable_interrupts(rps); + + if (IS_GEN(rps_to_i915(rps), 5)) + gen5_rps_update(rps); +} + +void intel_rps_park(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (!rps->enabled) + return; + + if (INTEL_GEN(i915) >= 6) + rps_disable_interrupts(rps); + + rps->active = false; + if (rps->last_freq <= rps->idle_freq) + return; + + /* + * The punit delays the write of the frequency and voltage until it + * determines the GPU is awake. During normal usage we don't want to + * waste power changing the frequency if the GPU is sleeping (rc6). + * However, the GPU and driver is now idle and we do not want to delay + * switching to minimum voltage (reducing power whilst idle) as we do + * not expect to be woken in the near future and so must flush the + * change by waking the device. + * + * We choose to take the media powerwell (either would do to trick the + * punit into committing the voltage change) as that takes a lot less + * power than the render powerwell. + */ + intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); + rps_set(rps, rps->idle_freq, false); + intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); +} + +void intel_rps_boost(struct i915_request *rq) +{ + struct intel_rps *rps = &rq->engine->gt->rps; + unsigned long flags; + + if (i915_request_signaled(rq) || !rps->active) + return; + + /* Serializes with i915_request_retire() */ + spin_lock_irqsave(&rq->lock, flags); + if (!i915_request_has_waitboost(rq) && + !dma_fence_is_signaled_locked(&rq->fence)) { + rq->flags |= I915_REQUEST_WAITBOOST; + + if (!atomic_fetch_inc(&rps->num_waiters) && + READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); + + atomic_inc(&rps->boosts); + } + spin_unlock_irqrestore(&rq->lock, flags); +} + +int intel_rps_set(struct intel_rps *rps, u8 val) +{ + int err; + + lockdep_assert_held(&rps->lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); + + if (rps->active) { + err = rps_set(rps, val, true); + if (err) + return err; + + /* + * Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 6) { + struct intel_uncore *uncore = rps_to_uncore(rps); + + set(uncore, + GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val)); + + set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val)); + } + } + + rps->cur_freq = val; + return 0; +} + +static void gen6_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* All of these values are in units of 50MHz */ + + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ + if (IS_GEN9_LP(i915)) { + u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; + } else { + u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; + } + + /* hw_max = RP0 until we check for overclocking */ + rps->max_freq = rps->rp0_freq; + + rps->efficient_freq = rps->rp1_freq; + if (IS_HASWELL(i915) || IS_BROADWELL(i915) || + IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(i915, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status, NULL) == 0) + rps->efficient_freq = + clamp_t(u8, + (ddcc_status >> 8) & 0xff, + rps->min_freq, + rps->max_freq); + } + + if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + /* Store the frequency values in 16.66 MHZ units, which is + * the natural hardware unit for SKL + */ + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; + } +} + +static bool rps_reset(struct intel_rps *rps) +{ + /* force a reset */ + rps->power.mode = -1; + rps->last_freq = -1; + + if (rps_set(rps, rps->min_freq, true)) { + DRM_ERROR("Failed to reset RPS to initial values\n"); + return false; + } + + rps->cur_freq = rps->min_freq; + return true; +} + +/* See the Gen9_GT_PM_Programming_Guide doc for the below */ +static bool gen9_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Program defaults and thresholds for RPS */ + if (IS_GEN(i915, 9)) + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + GEN9_FREQUENCY(rps->rp1_freq)); + + /* 1 second timeout */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + GT_INTERVAL_FROM_US(i915, 1000000)); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); + + return rps_reset(rps); +} + +static bool gen8_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + HSW_FREQUENCY(rps->rp1_freq)); + + /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + 100000000 / 128); /* 1 second timeout */ + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static bool gen6_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Power down if completely idle for over 50ms */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static int chv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + switch (RUNTIME_INFO(i915)->sseu.eu_total) { + case 8: + /* (2 * 4) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; + break; + case 12: + /* (2 * 6) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; + break; + case 16: + /* (2 * 8) config */ + default: + /* Setting (2 * 8) Min RP0 for any other combination */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; + break; + } + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static int chv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); + val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; + + return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; +} + +static int chv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static u32 chv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); + val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static bool chv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + /* 1: Program defaults and thresholds for RPS*/ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + /* 2: Enable RPS */ + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + + /* Setting Fixed Bias */ + vlv_punit_get(i915); + + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static int vlv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp1; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; + rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; + + return rp1; +} + +static int vlv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp0; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; + /* Clamp to max */ + rp0 = min_t(u32, rp0, 0xea); + + return rp0; +} + +static int vlv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rpe; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); + rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); + rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; + + return rpe; +} + +static int vlv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; + /* + * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value + * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on + * a BYT-M B0 the above register contains 0xbf. Moreover when setting + * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 + * to make sure it matches what Punit accepts. + */ + return max_t(u32, val, 0xc0); +} + +static bool vlv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); + + vlv_punit_get(i915); + + /* Setting Fixed Bias */ + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static unsigned long __ips_gfx_val(struct intel_ips *ips) +{ + struct intel_rps *rps = container_of(ips, typeof(*rps), ips); + struct intel_uncore *uncore = rps_to_uncore(rps); + unsigned long t, corr, state1, corr2, state2; + u32 pxvid, ext_v; + + lockdep_assert_held(&mchdev_lock); + + pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); + pxvid = (pxvid >> 24) & 0x7f; + ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); + + state1 = ext_v; + + /* Revel in the empirically derived constants */ + + /* Correction factor in 1/100000 units */ + t = ips_mch_val(uncore); + if (t > 80) + corr = t * 2349 + 135940; + else if (t >= 50) + corr = t * 964 + 29317; + else /* < 50 */ + corr = t * 301 + 1004; + + corr = corr * 150142 * state1 / 10000 - 78642; + corr /= 100000; + corr2 = corr * ips->corr; + + state2 = corr2 * state1 / 10000; + state2 /= 100; /* convert to mW */ + + __gen5_ips_update(ips); + + return ips->gfx_power + state2; +} + +void intel_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (IS_CHERRYVIEW(i915)) + rps->enabled = chv_rps_enable(rps); + else if (IS_VALLEYVIEW(i915)) + rps->enabled = vlv_rps_enable(rps); + else if (INTEL_GEN(i915) >= 9) + rps->enabled = gen9_rps_enable(rps); + else if (INTEL_GEN(i915) >= 8) + rps->enabled = gen8_rps_enable(rps); + else if (INTEL_GEN(i915) >= 6) + rps->enabled = gen6_rps_enable(rps); + else if (IS_IRONLAKE_M(i915)) + rps->enabled = gen5_rps_enable(rps); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + if (!rps->enabled) + return; + + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); + + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); +} + +static void gen6_rps_disable(struct intel_rps *rps) +{ + set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); +} + +void intel_rps_disable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->enabled = false; + + if (INTEL_GEN(i915) >= 6) + gen6_rps_disable(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_disable(rps); +} + +static int byt_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val - 0xb7 + * Slow = Fast = GPLL ref * N + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); +} + +static int byt_freq_opcode(struct intel_rps *rps, int val) +{ + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; +} + +static int chv_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val / 2 + * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); +} + +static int chv_freq_opcode(struct intel_rps *rps, int val) +{ + /* CHV needs even values */ + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; +} + +int intel_gpu_freq(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, + GEN9_FREQ_SCALER); + else if (IS_CHERRYVIEW(i915)) + return chv_gpu_freq(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_gpu_freq(rps, val); + else + return val * GT_FREQUENCY_MULTIPLIER; +} + +int intel_freq_opcode(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, + GT_FREQUENCY_MULTIPLIER); + else if (IS_CHERRYVIEW(i915)) + return chv_freq_opcode(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_freq_opcode(rps, val); + else + return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); +} + +static void vlv_init_gpll_ref_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->gpll_ref_freq = + vlv_get_cck_clock(i915, "GPLL ref", + CCK_GPLL_CLOCK_CONTROL, + i915->czclk_freq); + + DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); +} + +static void vlv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + switch ((val >> 6) & 3) { + case 0: + case 1: + i915->mem_freq = 800; + break; + case 2: + i915->mem_freq = 1066; + break; + case 3: + i915->mem_freq = 1333; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = vlv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = vlv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = vlv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = vlv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); +} + +static void chv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_cck_read(i915, CCK_FUSE_REG); + + switch ((val >> 2) & 0x7) { + case 3: + i915->mem_freq = 2000; + break; + default: + i915->mem_freq = 1600; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = chv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = chv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = chv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = chv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, + "Odd GPU freq values\n"); +} + +static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) +{ + ei->ktime = ktime_get_raw(); + ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); + ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); +} + +static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + const struct intel_rps_ei *prev = &rps->ei; + struct intel_rps_ei now; + u32 events = 0; + + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) + return 0; + + vlv_c0_read(uncore, &now); + + if (prev->ktime) { + u64 time, c0; + u32 render, media; + + time = ktime_us_delta(now.ktime, prev->ktime); + + time *= rps_to_i915(rps)->czclk_freq; + + /* Workload can be split between render + media, + * e.g. SwapBuffers being blitted in X after being rendered in + * mesa. To account for this we need to combine both engines + * into our activity counter. + */ + render = now.render_c0 - prev->render_c0; + media = now.media_c0 - prev->media_c0; + c0 = max(render, media); + c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ + + if (c0 > time * rps->power.up_threshold) + events = GEN6_PM_RP_UP_THRESHOLD; + else if (c0 < time * rps->power.down_threshold) + events = GEN6_PM_RP_DOWN_THRESHOLD; + } + + rps->ei = now; + return events; +} + +static void rps_work(struct work_struct *work) +{ + struct intel_rps *rps = container_of(work, typeof(*rps), work); + struct intel_gt *gt = rps_to_gt(rps); + bool client_boost = false; + int new_freq, adj, min, max; + u32 pm_iir = 0; + + spin_lock_irq(>->irq_lock); + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); + spin_unlock_irq(>->irq_lock); + + /* Make sure we didn't queue anything we're not going to process. */ + if ((pm_iir & rps->pm_events) == 0 && !client_boost) + goto out; + + mutex_lock(&rps->lock); + + pm_iir |= vlv_wa_c0_ei(rps, pm_iir); + + adj = rps->last_adj; + new_freq = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; + if (client_boost) + max = rps->max_freq; + if (client_boost && new_freq < rps->boost_freq) { + new_freq = rps->boost_freq; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { + if (adj > 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; + + if (new_freq >= rps->max_freq_softlimit) + adj = 0; + } else if (client_boost) { + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { + if (rps->cur_freq > rps->efficient_freq) + new_freq = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_freq = rps->min_freq_softlimit; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; + + if (new_freq <= rps->min_freq_softlimit) + adj = 0; + } else { /* unknown event */ + adj = 0; + } + + rps->last_adj = adj; + + /* + * Limit deboosting and boosting to keep ourselves at the extremes + * when in the respective power modes (i.e. slowly decrease frequencies + * while in the HIGH_POWER zone and slowly increase frequencies while + * in the LOW_POWER zone). On idle, we will hit the timeout and drop + * to the next level quickly, and conversely if busy we expect to + * hit a waitboost and rapidly switch into max power. + */ + if ((adj < 0 && rps->power.mode == HIGH_POWER) || + (adj > 0 && rps->power.mode == LOW_POWER)) + rps->last_adj = 0; + + /* sysfs frequency interfaces may have snuck in while servicing the + * interrupt + */ + new_freq += adj; + new_freq = clamp_t(int, new_freq, min, max); + + if (intel_rps_set(rps, new_freq)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + rps->last_adj = 0; + } + + mutex_unlock(&rps->lock); + +out: + spin_lock_irq(>->irq_lock); + gen6_gt_pm_unmask_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); +} + +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_gt *gt = rps_to_gt(rps); + const u32 events = rps->pm_events & pm_iir; + + lockdep_assert_held(>->irq_lock); + + if (unlikely(!events)) + return; + + gen6_gt_pm_mask_irq(gt, events); + + rps->pm_iir |= events; + schedule_work(&rps->work); +} + +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_gt *gt = rps_to_gt(rps); + + if (pm_iir & rps->pm_events) { + spin_lock(>->irq_lock); + gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events); + rps->pm_iir |= pm_iir & rps->pm_events; + schedule_work(&rps->work); + spin_unlock(>->irq_lock); + } + + if (INTEL_GEN(gt->i915) >= 8) + return; + + if (pm_iir & PM_VEBOX_USER_INTERRUPT) + intel_engine_signal_breadcrumbs(gt->engine[VECS0]); + + if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) + DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); +} + +void gen5_rps_irq_handler(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 busy_up, busy_down, max_avg, min_avg; + u8 new_freq; + + spin_lock(&mchdev_lock); + + intel_uncore_write16(uncore, + MEMINTRSTS, + intel_uncore_read(uncore, MEMINTRSTS)); + + intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); + busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); + max_avg = intel_uncore_read(uncore, RCBMAXAVG); + min_avg = intel_uncore_read(uncore, RCBMINAVG); + + /* Handle RCS change request from hw */ + new_freq = rps->cur_freq; + if (busy_up > max_avg) + new_freq++; + else if (busy_down < min_avg) + new_freq--; + new_freq = clamp(new_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) + rps->cur_freq = new_freq; + + spin_unlock(&mchdev_lock); +} + +void intel_rps_init_early(struct intel_rps *rps) +{ + mutex_init(&rps->lock); + mutex_init(&rps->power.mutex); + + INIT_WORK(&rps->work, rps_work); + + atomic_set(&rps->num_waiters, 0); +} + +void intel_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (IS_CHERRYVIEW(i915)) + chv_rps_init(rps); + else if (IS_VALLEYVIEW(i915)) + vlv_rps_init(rps); + else if (INTEL_GEN(i915) >= 6) + gen6_rps_init(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_init(rps); + + /* Derive initial user preferences/limits from the hardware limits */ + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { + u32 params = 0; + + sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, + ¶ms, NULL); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (rps->max_freq & 0xff) * 50, + (params & 0xff) * 50); + rps->max_freq = params & 0xff; + } + } + + /* Finally allow us to boost to max by default */ + rps->boost_freq = rps->max_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; + + rps->pm_intrmsk_mbz = 0; + + /* + * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + * + * TODO: verify if this can be reproduced on VLV,CHV. + */ + if (INTEL_GEN(i915) <= 7) + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + + if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11) + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} + +u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 cagf; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + cagf = (rpstat >> 8) & 0xff; + else if (INTEL_GEN(i915) >= 9) + cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + else + cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + + return cagf; +} + +static u32 read_cagf(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 freq; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + vlv_punit_get(i915); + freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(i915); + } else { + freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1); + } + + return intel_rps_get_cagf(rps, freq); +} + +u32 intel_rps_read_actual_frequency(struct intel_rps *rps) +{ + struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; + + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_gpu_freq(rps, read_cagf(rps)); + + return freq; +} + +/* External interface for intel_ips.ko */ + +static struct drm_i915_private __rcu *ips_mchdev; + +/** + * Tells the intel_ips driver that the i915 driver is now loaded, if + * IPS got loaded first. + * + * This awkward dance is so that neither module has to depend on the + * other in order for IPS to do the appropriate communication of + * GPU turbo limits to i915. + */ +static void +ips_ping_for_i915_load(void) +{ + void (*link)(void); + + link = symbol_get(ips_link_to_i915_driver); + if (link) { + link(); + symbol_put(ips_link_to_i915_driver); + } +} + +void intel_rps_driver_register(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + /* + * We only register the i915 ips part with intel-ips once everything is + * set up, to avoid intel-ips sneaking in and reading bogus values. + */ + if (IS_GEN(gt->i915, 5)) { + GEM_BUG_ON(ips_mchdev); + rcu_assign_pointer(ips_mchdev, gt->i915); + ips_ping_for_i915_load(); + } +} + +void intel_rps_driver_unregister(struct intel_rps *rps) +{ + if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) + rcu_assign_pointer(ips_mchdev, NULL); +} + +static struct drm_i915_private *mchdev_get(void) +{ + struct drm_i915_private *i915; + + rcu_read_lock(); + i915 = rcu_dereference(ips_mchdev); + if (!kref_get_unless_zero(&i915->drm.ref)) + i915 = NULL; + rcu_read_unlock(); + + return i915; +} + +/** + * i915_read_mch_val - return value for IPS use + * + * Calculate and return a value for the IPS driver to use when deciding whether + * we have thermal and power headroom to increase CPU or GPU power budget. + */ +unsigned long i915_read_mch_val(void) +{ + struct drm_i915_private *i915; + unsigned long chipset_val = 0; + unsigned long graphics_val = 0; + intel_wakeref_t wakeref; + + i915 = mchdev_get(); + if (!i915) + return 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + struct intel_ips *ips = &i915->gt.rps.ips; + + spin_lock_irq(&mchdev_lock); + chipset_val = __ips_chipset_val(ips); + graphics_val = __ips_gfx_val(ips); + spin_unlock_irq(&mchdev_lock); + } + + drm_dev_put(&i915->drm); + return chipset_val + graphics_val; +} +EXPORT_SYMBOL_GPL(i915_read_mch_val); + +/** + * i915_gpu_raise - raise GPU frequency limit + * + * Raise the limit; IPS indicates we have thermal headroom. + */ +bool i915_gpu_raise(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit < rps->max_freq) + rps->max_freq_softlimit++; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_raise); + +/** + * i915_gpu_lower - lower GPU frequency limit + * + * IPS indicates we're close to a thermal limit, so throttle back the GPU + * frequency maximum. + */ +bool i915_gpu_lower(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit > rps->min_freq) + rps->max_freq_softlimit--; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_lower); + +/** + * i915_gpu_busy - indicate GPU business to IPS + * + * Tell the IPS driver whether or not the GPU is busy. + */ +bool i915_gpu_busy(void) +{ + struct drm_i915_private *i915; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + ret = i915->gt.awake; + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_busy); + +/** + * i915_gpu_turbo_disable - disable graphics turbo + * + * Disable graphics turbo by resetting the max frequency and setting the + * current frequency to the default. + */ +bool i915_gpu_turbo_disable(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + rps->max_freq_softlimit = rps->min_freq; + ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h new file mode 100644 index 000000000000..dfa98194f3b2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_H +#define INTEL_RPS_H + +#include "intel_rps_types.h" + +struct i915_request; + +void intel_rps_init_early(struct intel_rps *rps); +void intel_rps_init(struct intel_rps *rps); + +void intel_rps_driver_register(struct intel_rps *rps); +void intel_rps_driver_unregister(struct intel_rps *rps); + +void intel_rps_enable(struct intel_rps *rps); +void intel_rps_disable(struct intel_rps *rps); + +void intel_rps_park(struct intel_rps *rps); +void intel_rps_unpark(struct intel_rps *rps); +void intel_rps_boost(struct i915_request *rq); + +int intel_rps_set(struct intel_rps *rps, u8 val); +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); + +int intel_gpu_freq(struct intel_rps *rps, int val); +int intel_freq_opcode(struct intel_rps *rps, int val); +u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); +u32 intel_rps_read_actual_frequency(struct intel_rps *rps); + +void gen5_rps_irq_handler(struct intel_rps *rps); +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); + +#endif /* INTEL_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h new file mode 100644 index 000000000000..c2e279154bd5 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_TYPES_H +#define INTEL_RPS_TYPES_H + +#include <linux/atomic.h> +#include <linux/ktime.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +struct intel_ips { + u64 last_count1; + unsigned long last_time1; + unsigned long chipset_power; + u64 last_count2; + u64 last_time2; + unsigned long gfx_power; + u8 corr; + + int c, m; +}; + +struct intel_rps_ei { + ktime_t ktime; + u32 render_c0; + u32 media_c0; +}; + +struct intel_rps { + struct mutex lock; /* protects enabling and the worker */ + + /* + * work, interrupts_enabled and pm_iir are protected by + * dev_priv->irq_lock + */ + struct work_struct work; + bool enabled; + bool active; + u32 pm_iir; + + /* PM interrupt bits that should never be masked */ + u32 pm_intrmsk_mbz; + u32 pm_events; + + /* Frequencies are stored in potentially platform dependent multiples. + * In other words, *_freq needs to be multiplied by X to be interesting. + * Soft limits are those which are used for the dynamic reclocking done + * by the driver (raise frequencies under heavy loads, and lower for + * lighter loads). Hard limits are those imposed by the hardware. + * + * A distinction is made for overclocking, which is never enabled by + * default, and is considered to be above the hard limit if it's + * possible at all. + */ + u8 cur_freq; /* Current frequency (cached, may not == HW) */ + u8 last_freq; /* Last SWREQ frequency */ + u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */ + u8 max_freq_softlimit; /* Max frequency permitted by the driver */ + u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ + u8 min_freq; /* AKA RPn. Minimum frequency */ + u8 boost_freq; /* Frequency to request when wait boosting */ + u8 idle_freq; /* Frequency to request when we are idle */ + u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ + u8 rp1_freq; /* "less than" RP0 power/freqency */ + u8 rp0_freq; /* Non-overclocked max frequency. */ + u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ + + int last_adj; + + struct { + struct mutex mutex; + + enum { LOW_POWER, BETWEEN, HIGH_POWER } mode; + unsigned int interactive; + + u8 up_threshold; /* Current %busy required to uplock */ + u8 down_threshold; /* Current %busy required to downclock */ + } power; + + atomic_t num_waiters; + atomic_t boosts; + + /* manual wa residency calculations */ + struct intel_rps_ei ei; + struct intel_ips ips; +}; + +#endif /* INTEL_RPS_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 0f959694303c..ee5dc4fbdeb9 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -4,17 +4,20 @@ * Copyright © 2016-2018 Intel Corporation */ -#include "gt/intel_gt_types.h" - #include "i915_drv.h" #include "i915_active.h" #include "i915_syncmap.h" -#include "gt/intel_timeline.h" +#include "intel_gt.h" +#include "intel_ring.h" +#include "intel_timeline.h" #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) +#define CACHELINE_BITS 6 +#define CACHELINE_FREE CACHELINE_BITS + struct intel_timeline_hwsp { struct intel_gt *gt; struct intel_gt_timelines *gt_timelines; @@ -23,14 +26,6 @@ struct intel_timeline_hwsp { u64 free_bitmap; }; -struct intel_timeline_cacheline { - struct i915_active active; - struct intel_timeline_hwsp *hwsp; - void *vaddr; -#define CACHELINE_BITS 6 -#define CACHELINE_FREE CACHELINE_BITS -}; - static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -133,7 +128,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); i915_active_fini(&cl->active); - kfree(cl); + kfree_rcu(cl, rcu); } __i915_active_call @@ -254,7 +249,7 @@ int intel_timeline_init(struct intel_timeline *timeline, mutex_init(&timeline->mutex); - INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -262,7 +257,7 @@ int intel_timeline_init(struct intel_timeline *timeline, return 0; } -static void timelines_init(struct intel_gt *gt) +void intel_gt_init_timelines(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; @@ -273,15 +268,11 @@ static void timelines_init(struct intel_gt *gt) INIT_LIST_HEAD(&timelines->hwsp_free_list); } -void intel_timelines_init(struct drm_i915_private *i915) -{ - timelines_init(&i915->gt); -} - void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(timeline->retire); if (timeline->hwsp_cacheline) cacheline_free(timeline->hwsp_cacheline); @@ -337,34 +328,52 @@ int intel_timeline_pin(struct intel_timeline *tl) void intel_timeline_enter(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; - unsigned long flags; + /* + * Pretend we are serialised by the timeline->mutex. + * + * While generally true, there are a few exceptions to the rule + * for the engine->kernel_context being used to manage power + * transitions. As the engine_park may be called from under any + * timeline, it uses the power mutex as a global serialisation + * lock to prevent any other request entering its timeline. + * + * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. + * + * However, intel_gt_retire_request() does not know which engine + * it is retiring along and so cannot partake in the engine-pm + * barrier, and there we use the tl->active_count as a means to + * pin the timeline in the active_list while the locks are dropped. + * Ergo, as that is outside of the engine-pm barrier, we need to + * use atomic to manipulate tl->active_count. + */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!atomic_read(&tl->pin_count)); - if (tl->active_count++) + + if (atomic_add_unless(&tl->active_count, 1, 0)) return; - GEM_BUG_ON(!tl->active_count); /* overflow? */ - spin_lock_irqsave(&timelines->lock, flags); - list_add(&tl->link, &timelines->active_list); - spin_unlock_irqrestore(&timelines->lock, flags); + spin_lock(&timelines->lock); + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + spin_unlock(&timelines->lock); } void intel_timeline_exit(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; - unsigned long flags; + /* See intel_timeline_enter() */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!tl->active_count); - if (--tl->active_count) + GEM_BUG_ON(!atomic_read(&tl->active_count)); + if (atomic_add_unless(&tl->active_count, -1, 1)) return; - spin_lock_irqsave(&timelines->lock, flags); - list_del(&tl->link); - spin_unlock_irqrestore(&timelines->lock, flags); + spin_lock(&timelines->lock); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); + spin_unlock(&timelines->lock); /* * Since this timeline is idle, all bariers upon which we were waiting @@ -500,46 +509,35 @@ int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *to, u32 *hwsp) { - struct intel_timeline *tl; + struct intel_timeline_cacheline *cl; int err; + GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline)); + rcu_read_lock(); - tl = rcu_dereference(from->timeline); - if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref)) - tl = NULL; + cl = rcu_dereference(from->hwsp_cacheline); + if (unlikely(!i915_active_acquire_if_busy(&cl->active))) + goto unlock; /* seqno wrapped and completed! */ + if (unlikely(i915_request_completed(from))) + goto release; rcu_read_unlock(); - if (!tl) /* already completed */ - return 1; - GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl); - - err = -EBUSY; - if (mutex_trylock(&tl->mutex)) { - struct intel_timeline_cacheline *cl = from->hwsp_cacheline; - - if (i915_request_completed(from)) { - err = 1; - goto unlock; - } + err = cacheline_ref(cl, to); + if (err) + goto out; - err = cacheline_ref(cl, to); - if (err) - goto unlock; + *hwsp = i915_ggtt_offset(cl->hwsp->vma) + + ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; - if (likely(cl == tl->hwsp_cacheline)) { - *hwsp = tl->hwsp_offset; - } else { /* across a seqno wrap, recover the original offset */ - *hwsp = i915_ggtt_offset(cl->hwsp->vma) + - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * - CACHELINE_BYTES; - } +out: + i915_active_release(&cl->active); + return err; +release: + i915_active_release(&cl->active); unlock: - mutex_unlock(&tl->mutex); - } - intel_timeline_put(tl); - - return err; + rcu_read_unlock(); + return 1; } void intel_timeline_unpin(struct intel_timeline *tl) @@ -562,7 +560,7 @@ void __intel_timeline_free(struct kref *kref) kfree_rcu(timeline, rcu); } -static void timelines_fini(struct intel_gt *gt) +void intel_gt_fini_timelines(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; @@ -570,11 +568,6 @@ static void timelines_fini(struct intel_gt *gt) GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); } -void intel_timelines_fini(struct drm_i915_private *i915) -{ - timelines_fini(&i915->gt); -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "gt/selftests/mock_timeline.c" #include "gt/selftest_timeline.c" diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index f583af1ba18d..f5b7eade3809 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -88,7 +88,7 @@ int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *until, u32 *hwsp_offset); -void intel_timelines_init(struct drm_i915_private *i915); -void intel_timelines_fini(struct drm_i915_private *i915); +void intel_gt_init_timelines(struct intel_gt *gt); +void intel_gt_fini_timelines(struct intel_gt *gt); #endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 98d9ee166379..02181c5020db 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -10,14 +10,15 @@ #include <linux/list.h> #include <linux/kref.h> #include <linux/mutex.h> +#include <linux/rcupdate.h> #include <linux/types.h> #include "i915_active_types.h" -struct drm_i915_private; struct i915_vma; -struct intel_timeline_cacheline; struct i915_syncmap; +struct intel_gt; +struct intel_timeline_hwsp; struct intel_timeline { u64 fence_context; @@ -42,7 +43,7 @@ struct intel_timeline { * from the intel_context caller plus internal atomicity. */ atomic_t pin_count; - unsigned int active_count; + atomic_t active_count; const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; @@ -66,6 +67,9 @@ struct intel_timeline { */ struct i915_active_fence last_request; + /** A chain of completed timelines ready for early retirement. */ + struct intel_timeline *retire; + /** * We track the most recent seqno that we wait on in every context so * that we only have to emit a new await and dependency on a more @@ -84,4 +88,13 @@ struct intel_timeline { struct rcu_head rcu; }; +struct intel_timeline_cacheline { + struct i915_active active; + + struct intel_timeline_hwsp *hwsp; + void *vaddr; + + struct rcu_head rcu; +}; + #endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index af8a8183154a..195ccf7db272 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -6,7 +6,9 @@ #include "i915_drv.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "intel_gt.h" +#include "intel_ring.h" #include "intel_workarounds.h" /** @@ -145,21 +147,27 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } } -static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val) +static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val, u32 read_mask) { struct i915_wa wa = { .reg = reg, .mask = mask, .val = val, - .read = mask, + .read = read_mask, }; _wa_add(wal, &wa); } static void +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val) +{ + wa_add(wal, reg, mask, val, mask); +} + +static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); @@ -567,9 +575,24 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + u32 val; + /* Wa_1409142259:tgl */ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + + /* Wa_1604555607:tgl */ + val = intel_uncore_read(engine->uncore, FF_MODE2); + val &= ~FF_MODE2_TDS_TIMER_MASK; + val |= FF_MODE2_TDS_TIMER_128; + /* + * FIXME: FF_MODE2 register is not readable till TGL B0. We can + * enable verification of WA from the later steppings, which enables + * the read of FF_MODE2. + */ + wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val, + IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 : + FF_MODE2_TDS_TIMER_MASK); } static void @@ -1215,6 +1238,26 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) static void tgl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + + switch (engine->class) { + case RENDER_CLASS: + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl + * + * This covers 4 registers which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); + break; + default: + break; + } } void intel_engine_init_whitelist(struct intel_engine_cs *engine) @@ -1294,6 +1337,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN6_RC_SLEEP_PSMI_CONTROL, GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | GEN8_RC_SEMA_IDLE_MSG_DISABLE); + + /* + * Wa_1606679103:tgl + * (see also Wa_1606682166:icl) + */ + wa_write_or(wal, + GEN7_SARCHKMD, + GEN7_DISABLE_SAMPLER_PREFETCH); } if (IS_GEN(i915, 11)) { @@ -1553,7 +1604,9 @@ static int engine_wa_list_verify(struct intel_context *ce, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_engine_pm_get(ce->engine); rq = intel_context_create_request(ce); + intel_engine_pm_put(ce->engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_vma; @@ -1563,16 +1616,17 @@ static int engine_wa_list_verify(struct intel_context *ce, if (err) goto err_vma; + i915_request_get(rq); i915_request_add(rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) { err = -ETIME; - goto err_vma; + goto err_rq; } results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); if (IS_ERR(results)) { err = PTR_ERR(results); - goto err_vma; + goto err_rq; } err = 0; @@ -1586,6 +1640,8 @@ static int engine_wa_list_verify(struct intel_context *ce, i915_gem_object_unpin_map(vma->obj); +err_rq: + i915_request_put(rq); err_vma: i915_vma_unpin(vma); i915_vma_put(vma); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 123db2c3f956..4e1eafa94be9 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -23,6 +23,7 @@ */ #include "gem/i915_gem_context.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "intel_context.h" @@ -76,7 +77,7 @@ static void advance(struct i915_request *request) i915_request_mark_complete(request); GEM_BUG_ON(!i915_request_completed(request)); - intel_engine_queue_breadcrumbs(request->engine); + intel_engine_signal_breadcrumbs(request->engine); } static void hw_delay_complete(struct timer_list *t) @@ -206,16 +207,12 @@ static void mock_reset_prepare(struct intel_engine_cs *engine) { } -static void mock_reset(struct intel_engine_cs *engine, bool stalled) +static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled) { GEM_BUG_ON(stalled); } -static void mock_reset_finish(struct intel_engine_cs *engine) -{ -} - -static void mock_cancel_requests(struct intel_engine_cs *engine) +static void mock_reset_cancel(struct intel_engine_cs *engine) { struct i915_request *request; unsigned long flags; @@ -233,6 +230,24 @@ static void mock_cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->active.lock, flags); } +static void mock_reset_finish(struct intel_engine_cs *engine) +{ +} + +static void mock_engine_release(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + + GEM_BUG_ON(timer_pending(&mock->hw_delay)); + + intel_context_unpin(engine->kernel_context); + intel_context_put(engine->kernel_context); + + intel_engine_fini_retire(engine); + intel_engine_fini_breadcrumbs(engine); +} + struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, const char *name, int id) @@ -264,9 +279,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.submit_request = mock_submit_request; engine->base.reset.prepare = mock_reset_prepare; - engine->base.reset.reset = mock_reset; + engine->base.reset.rewind = mock_reset_rewind; + engine->base.reset.cancel = mock_reset_cancel; engine->base.reset.finish = mock_reset_finish; - engine->base.cancel_requests = mock_cancel_requests; + + engine->base.release = mock_engine_release; i915->gt.engine[id] = &engine->base; i915->gt.engine_class[0][id] = &engine->base; @@ -289,6 +306,7 @@ int mock_engine_init(struct intel_engine_cs *engine) intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); intel_engine_pool_init(&engine->pool); ce = create_kernel_context(engine); @@ -320,18 +338,3 @@ void mock_engine_flush(struct intel_engine_cs *engine) void mock_engine_reset(struct intel_engine_cs *engine) { } - -void mock_engine_free(struct intel_engine_cs *engine) -{ - struct mock_engine *mock = - container_of(engine, typeof(*mock), base); - - GEM_BUG_ON(timer_pending(&mock->hw_delay)); - - intel_context_unpin(engine->kernel_context); - intel_context_put(engine->kernel_context); - - intel_engine_fini_breadcrumbs(engine); - - kfree(engine); -} diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index f63a26a3e620..e874dfaa5316 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -5,6 +5,7 @@ */ #include "i915_selftest.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_gt.h" @@ -47,35 +48,36 @@ static int context_sync(struct intel_context *ce) mutex_lock(&tl->mutex); do { - struct dma_fence *fence; + struct i915_request *rq; long timeout; - fence = i915_active_fence_get(&tl->last_request); - if (!fence) + if (list_empty(&tl->requests)) break; - timeout = dma_fence_wait_timeout(fence, false, HZ / 10); + rq = list_last_entry(&tl->requests, typeof(*rq), link); + i915_request_get(rq); + + timeout = i915_request_wait(rq, 0, HZ / 10); if (timeout < 0) err = timeout; else - i915_request_retire_upto(to_request(fence)); + i915_request_retire_upto(rq); - dma_fence_put(fence); + i915_request_put(rq); } while (!err); mutex_unlock(&tl->mutex); return err; } -static int __live_context_size(struct intel_engine_cs *engine, - struct i915_gem_context *fixme) +static int __live_context_size(struct intel_engine_cs *engine) { struct intel_context *ce; struct i915_request *rq; void *vaddr; int err; - ce = intel_context_create(fixme, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); @@ -103,9 +105,6 @@ static int __live_context_size(struct intel_engine_cs *engine, * * TLDR; this overlaps with the execlists redzone. */ - if (HAS_EXECLISTS(engine->i915)) - vaddr += LRC_HEADER_PAGES * PAGE_SIZE; - vaddr += engine->context_size - I915_GTT_PAGE_SIZE; memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); @@ -121,7 +120,7 @@ static int __live_context_size(struct intel_engine_cs *engine, goto err_unpin; /* Force the context switch */ - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -146,7 +145,6 @@ static int live_context_size(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; enum intel_engine_id id; int err = 0; @@ -155,10 +153,6 @@ static int live_context_size(void *arg) * HW tries to write past the end of one. */ - fixme = kernel_context(gt->i915); - if (IS_ERR(fixme)) - return PTR_ERR(fixme); - for_each_engine(engine, gt, id) { struct { struct drm_i915_gem_object *state; @@ -183,7 +177,7 @@ static int live_context_size(void *arg) /* Overlaps with the execlists redzone */ engine->context_size += I915_GTT_PAGE_SIZE; - err = __live_context_size(engine, fixme); + err = __live_context_size(engine); engine->context_size -= I915_GTT_PAGE_SIZE; @@ -196,13 +190,12 @@ static int live_context_size(void *arg) break; } - kernel_context_close(fixme); return err; } -static int __live_active_context(struct intel_engine_cs *engine, - struct i915_gem_context *fixme) +static int __live_active_context(struct intel_engine_cs *engine) { + unsigned long saved_heartbeat; struct intel_context *ce; int pass; int err; @@ -226,40 +219,55 @@ static int __live_active_context(struct intel_engine_cs *engine, return -EINVAL; } - ce = intel_context_create(fixme, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); + saved_heartbeat = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + for (pass = 0; pass <= 2; pass++) { struct i915_request *rq; + intel_engine_pm_get(engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err; + goto out_engine; } err = request_sync(rq); if (err) - goto err; + goto out_engine; /* Context will be kept active until after an idle-barrier. */ if (i915_active_is_idle(&ce->active)) { pr_err("context is not active; expected idle-barrier (%s pass %d)\n", engine->name, pass); err = -EINVAL; - goto err; + goto out_engine; } if (!intel_engine_pm_is_awake(engine)) { pr_err("%s is asleep before idle-barrier\n", engine->name); err = -EINVAL; - goto err; + goto out_engine; } + +out_engine: + intel_engine_pm_put(engine); + if (err) + goto err; } /* Now make sure our idle-barriers are flushed */ + err = intel_engine_flush_barriers(engine); + if (err) + goto err; + + /* Wait for the barrier and in the process wait for engine to park */ err = context_sync(engine->kernel_context); if (err) goto err; @@ -269,12 +277,15 @@ static int __live_active_context(struct intel_engine_cs *engine, err = -EINVAL; } + intel_engine_pm_flush(engine); + if (intel_engine_pm_is_awake(engine)) { struct drm_printer p = drm_debug_printer(__func__); intel_engine_dump(engine, &p, - "%s is still awake after idle-barriers\n", - engine->name); + "%s is still awake:%d after idle-barriers\n", + engine->name, + atomic_read(&engine->wakeref.count)); GEM_TRACE_DUMP(); err = -EINVAL; @@ -282,6 +293,7 @@ static int __live_active_context(struct intel_engine_cs *engine, } err: + engine->props.heartbeat_interval_ms = saved_heartbeat; intel_context_put(ce); return err; } @@ -290,23 +302,11 @@ static int live_active_context(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; enum intel_engine_id id; - struct drm_file *file; int err = 0; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - fixme = live_context(gt->i915, file); - if (IS_ERR(fixme)) { - err = PTR_ERR(fixme); - goto out_file; - } - for_each_engine(engine, gt, id) { - err = __live_active_context(engine, fixme); + err = __live_active_context(engine); if (err) break; @@ -315,8 +315,6 @@ static int live_active_context(void *arg) break; } -out_file: - mock_file_free(gt->i915, file); return err; } @@ -348,10 +346,10 @@ unpin: return err; } -static int __live_remote_context(struct intel_engine_cs *engine, - struct i915_gem_context *fixme) +static int __live_remote_context(struct intel_engine_cs *engine) { struct intel_context *local, *remote; + unsigned long saved_heartbeat; int pass; int err; @@ -363,16 +361,26 @@ static int __live_remote_context(struct intel_engine_cs *engine, * clobber the idle-barrier. */ - remote = intel_context_create(fixme, engine); + if (intel_engine_pm_is_awake(engine)) { + pr_err("%s is awake before starting %s!\n", + engine->name, __func__); + return -EINVAL; + } + + remote = intel_context_create(engine); if (IS_ERR(remote)) return PTR_ERR(remote); - local = intel_context_create(fixme, engine); + local = intel_context_create(engine); if (IS_ERR(local)) { err = PTR_ERR(local); goto err_remote; } + saved_heartbeat = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + intel_engine_pm_get(engine); + for (pass = 0; pass <= 2; pass++) { err = __remote_sync(local, remote); if (err) @@ -390,6 +398,9 @@ static int __live_remote_context(struct intel_engine_cs *engine, } } + intel_engine_pm_put(engine); + engine->props.heartbeat_interval_ms = saved_heartbeat; + intel_context_put(local); err_remote: intel_context_put(remote); @@ -400,23 +411,11 @@ static int live_remote_context(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; enum intel_engine_id id; - struct drm_file *file; int err = 0; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - fixme = live_context(gt->i915, file); - if (IS_ERR(fixme)) { - err = PTR_ERR(fixme); - goto out_file; - } - for_each_engine(engine, gt, id) { - err = __live_remote_context(engine, fixme); + err = __live_remote_context(engine); if (err) break; @@ -425,8 +424,6 @@ static int live_remote_context(void *arg) break; } -out_file: - mock_file_free(gt->i915, file); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 3880f07c29b8..f88e445a1cae 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -4,7 +4,365 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_selftest.h" +#include <linux/sort.h> + +#include "intel_gt_pm.h" +#include "intel_rps.h" + +#include "i915_selftest.h" +#include "selftests/igt_flush_test.h" + +#define COUNT 5 + +static int cmp_u32(const void *A, const void *B) +{ + const u32 *a = A, *b = B; + + return *a - *b; +} + +static void perf_begin(struct intel_gt *gt) +{ + intel_gt_pm_get(gt); + + /* Boost gpufreq to max [waitboost] and keep it fixed */ + atomic_inc(>->rps.num_waiters); + schedule_work(>->rps.work); + flush_work(>->rps.work); +} + +static int perf_end(struct intel_gt *gt) +{ + atomic_dec(>->rps.num_waiters); + intel_gt_pm_put(gt); + + return igt_flush_test(gt->i915); +} + +static int write_timestamp(struct i915_request *rq, int slot) +{ + u32 cmd; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; + if (INTEL_GEN(rq->i915) >= 8) + cmd++; + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); + *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32); + *cs++ = 0; + + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_vma *create_empty_batch(struct intel_context *ce) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + + obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_put; + } + + cs[0] = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_unpin; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_unpin; + + i915_gem_object_unpin_map(obj); + return vma; + +err_unpin: + i915_gem_object_unpin_map(obj); +err_put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static u32 trifilter(u32 *a) +{ + u64 sum; + + sort(a, COUNT, sizeof(*a), cmp_u32, NULL); + + sum = mul_u32_u32(a[2], 2); + sum += a[1]; + sum += a[3]; + + return sum >> 2; +} + +static int perf_mi_bb_start(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + return 0; + + perf_begin(gt); + for_each_engine(engine, gt, id) { + struct intel_context *ce = engine->kernel_context; + struct i915_vma *batch; + u32 cycles[COUNT]; + int i; + + intel_engine_pm_get(engine); + + batch = create_empty_batch(ce); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + intel_engine_pm_put(engine); + break; + } + + err = i915_vma_sync(batch); + if (err) { + intel_engine_pm_put(engine); + i915_vma_put(batch); + break; + } + + for (i = 0; i < ARRAY_SIZE(cycles); i++) { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = write_timestamp(rq, 2); + if (err) + goto out; + + err = rq->engine->emit_bb_start(rq, + batch->node.start, 8, + 0); + if (err) + goto out; + + err = write_timestamp(rq, 3); + if (err) + goto out; + +out: + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + if (err) + break; + + cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; + } + i915_vma_put(batch); + intel_engine_pm_put(engine); + if (err) + break; + + pr_info("%s: MI_BB_START cycles: %u\n", + engine->name, trifilter(cycles)); + } + if (perf_end(gt)) + err = -EIO; + + return err; +} + +static struct i915_vma *create_nop_batch(struct intel_context *ce) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + + obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_put; + } + + memset(cs, 0, SZ_64K); + cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_unpin; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_unpin; + + i915_gem_object_unpin_map(obj); + return vma; + +err_unpin: + i915_gem_object_unpin_map(obj); +err_put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int perf_mi_noop(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + return 0; + + perf_begin(gt); + for_each_engine(engine, gt, id) { + struct intel_context *ce = engine->kernel_context; + struct i915_vma *base, *nop; + u32 cycles[COUNT]; + int i; + + intel_engine_pm_get(engine); + + base = create_empty_batch(ce); + if (IS_ERR(base)) { + err = PTR_ERR(base); + intel_engine_pm_put(engine); + break; + } + + err = i915_vma_sync(base); + if (err) { + i915_vma_put(base); + intel_engine_pm_put(engine); + break; + } + + nop = create_nop_batch(ce); + if (IS_ERR(nop)) { + err = PTR_ERR(nop); + i915_vma_put(base); + intel_engine_pm_put(engine); + break; + } + + err = i915_vma_sync(nop); + if (err) { + i915_vma_put(nop); + i915_vma_put(base); + intel_engine_pm_put(engine); + break; + } + + for (i = 0; i < ARRAY_SIZE(cycles); i++) { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = write_timestamp(rq, 2); + if (err) + goto out; + + err = rq->engine->emit_bb_start(rq, + base->node.start, 8, + 0); + if (err) + goto out; + + err = write_timestamp(rq, 3); + if (err) + goto out; + + err = rq->engine->emit_bb_start(rq, + nop->node.start, + nop->node.size, + 0); + if (err) + goto out; + + err = write_timestamp(rq, 4); + if (err) + goto out; + +out: + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + if (err) + break; + + cycles[i] = + (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - + (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); + } + i915_vma_put(nop); + i915_vma_put(base); + intel_engine_pm_put(engine); + if (err) + break; + + pr_info("%s: 16K MI_NOOP cycles: %u\n", + engine->name, trifilter(cycles)); + } + if (perf_end(gt)) + err = -EIO; + + return err; +} + +int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_mi_bb_start), + SUBTEST(perf_mi_noop), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} static int intel_mmio_bases_check(void *arg) { diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c new file mode 100644 index 000000000000..43d4d589749f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -0,0 +1,374 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include <linux/sort.h> + +#include "i915_drv.h" + +#include "intel_gt_requests.h" +#include "i915_selftest.h" + +static int timeline_sync(struct intel_timeline *tl) +{ + struct dma_fence *fence; + long timeout; + + fence = i915_active_fence_get(&tl->last_request); + if (!fence) + return 0; + + timeout = dma_fence_wait_timeout(fence, true, HZ / 2); + dma_fence_put(fence); + if (timeout < 0) + return timeout; + + return 0; +} + +static int engine_sync_barrier(struct intel_engine_cs *engine) +{ + return timeline_sync(engine->kernel_context->timeline); +} + +struct pulse { + struct i915_active active; + struct kref kref; +}; + +static int pulse_active(struct i915_active *active) +{ + kref_get(&container_of(active, struct pulse, active)->kref); + return 0; +} + +static void pulse_free(struct kref *kref) +{ + kfree(container_of(kref, struct pulse, kref)); +} + +static void pulse_put(struct pulse *p) +{ + kref_put(&p->kref, pulse_free); +} + +static void pulse_retire(struct i915_active *active) +{ + pulse_put(container_of(active, struct pulse, active)); +} + +static struct pulse *pulse_create(void) +{ + struct pulse *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return p; + + kref_init(&p->kref); + i915_active_init(&p->active, pulse_active, pulse_retire); + + return p; +} + +static void pulse_unlock_wait(struct pulse *p) +{ + i915_active_unlock_wait(&p->active); +} + +static int __live_idle_pulse(struct intel_engine_cs *engine, + int (*fn)(struct intel_engine_cs *cs)) +{ + struct pulse *p; + int err; + + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + + p = pulse_create(); + if (!p) + return -ENOMEM; + + err = i915_active_acquire(&p->active); + if (err) + goto out; + + err = i915_active_acquire_preallocate_barrier(&p->active, engine); + if (err) { + i915_active_release(&p->active); + goto out; + } + + i915_active_acquire_barrier(&p->active); + i915_active_release(&p->active); + + GEM_BUG_ON(i915_active_is_idle(&p->active)); + GEM_BUG_ON(llist_empty(&engine->barrier_tasks)); + + err = fn(engine); + if (err) + goto out; + + GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); + + if (engine_sync_barrier(engine)) { + struct drm_printer m = drm_err_printer("pulse"); + + pr_err("%s: no heartbeat pulse?\n", engine->name); + intel_engine_dump(engine, &m, "%s", engine->name); + + err = -ETIME; + goto out; + } + + GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial); + + pulse_unlock_wait(p); /* synchronize with the retirement callback */ + + if (!i915_active_is_idle(&p->active)) { + struct drm_printer m = drm_err_printer("pulse"); + + pr_err("%s: heartbeat pulse did not flush idle tasks\n", + engine->name); + i915_active_print(&p->active, &m); + + err = -EINVAL; + goto out; + } + +out: + pulse_put(p); + return err; +} + +static int live_idle_flush(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that we can flush the idle barriers */ + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_idle_pulse(engine, intel_engine_flush_barriers); + intel_engine_pm_put(engine); + if (err) + break; + } + + return err; +} + +static int live_idle_pulse(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that heartbeat pulses flush the idle barriers */ + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_idle_pulse(engine, intel_engine_pulse); + intel_engine_pm_put(engine); + if (err && err != -ENODEV) + break; + + err = 0; + } + + return err; +} + +static int cmp_u32(const void *_a, const void *_b) +{ + const u32 *a = _a, *b = _b; + + return *a - *b; +} + +static int __live_heartbeat_fast(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + ktime_t t0, t1; + u32 times[5]; + int err; + int i; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + intel_engine_pm_get(engine); + + err = intel_engine_set_heartbeat(engine, 1); + if (err) + goto err_pm; + + for (i = 0; i < ARRAY_SIZE(times); i++) { + /* Manufacture a tick */ + do { + while (READ_ONCE(engine->heartbeat.systole)) + flush_delayed_work(&engine->heartbeat.work); + + engine->serial++; /* quick, pretend we are not idle! */ + flush_delayed_work(&engine->heartbeat.work); + if (!delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat did not start\n", + engine->name); + err = -EINVAL; + goto err_pm; + } + + rcu_read_lock(); + rq = READ_ONCE(engine->heartbeat.systole); + if (rq) + rq = i915_request_get_rcu(rq); + rcu_read_unlock(); + } while (!rq); + + t0 = ktime_get(); + while (rq == READ_ONCE(engine->heartbeat.systole)) + yield(); /* work is on the local cpu! */ + t1 = ktime_get(); + + i915_request_put(rq); + times[i] = ktime_us_delta(t1, t0); + } + + sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); + + pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", + engine->name, + times[ARRAY_SIZE(times) / 2], + times[0], + times[ARRAY_SIZE(times) - 1]); + + /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */ + if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) { + pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", + engine->name, + times[ARRAY_SIZE(times) / 2], + jiffies_to_usecs(6)); + err = -EINVAL; + } + + intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); +err_pm: + intel_engine_pm_put(engine); + intel_context_put(ce); + return err; +} + +static int live_heartbeat_fast(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that the heartbeat ticks at the desired rate. */ + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + return 0; + + for_each_engine(engine, gt, id) { + err = __live_heartbeat_fast(engine); + if (err) + break; + } + + return err; +} + +static int __live_heartbeat_off(struct intel_engine_cs *engine) +{ + int err; + + intel_engine_pm_get(engine); + + engine->serial++; + flush_delayed_work(&engine->heartbeat.work); + if (!delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat not running\n", + engine->name); + err = -EINVAL; + goto err_pm; + } + + err = intel_engine_set_heartbeat(engine, 0); + if (err) + goto err_pm; + + engine->serial++; + flush_delayed_work(&engine->heartbeat.work); + if (delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat still running\n", + engine->name); + err = -EINVAL; + goto err_beat; + } + + if (READ_ONCE(engine->heartbeat.systole)) { + pr_err("%s: heartbeat still allocated\n", + engine->name); + err = -EINVAL; + goto err_beat; + } + +err_beat: + intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); +err_pm: + intel_engine_pm_put(engine); + return err; +} + +static int live_heartbeat_off(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that we can turn off heartbeat and not interrupt VIP */ + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + return 0; + + for_each_engine(engine, gt, id) { + if (!intel_engine_has_preemption(engine)) + continue; + + err = __live_heartbeat_off(engine); + if (err) + break; + } + + return err; +} + +int intel_heartbeat_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_idle_flush), + SUBTEST(live_idle_pulse), + SUBTEST(live_heartbeat_fast), + SUBTEST(live_heartbeat_off), + }; + int saved_hangcheck; + int err; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + saved_hangcheck = i915_modparams.enable_hangcheck; + i915_modparams.enable_hangcheck = INT_MAX; + + err = intel_gt_live_subtests(tests, &i915->gt); + + i915_modparams.enable_hangcheck = saved_hangcheck; + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 20b9c83f43ad..cbf6b0735272 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -51,11 +51,12 @@ static int live_engine_pm(void *arg) pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n", engine->name, p->name); else - intel_engine_pm_put(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); + intel_engine_pm_put_async(engine); p->critical_section_end(); - /* engine wakeref is sync (instant) */ + intel_engine_pm_flush(engine); + if (intel_engine_pm_is_awake(engine)) { pr_err("%s is still awake after flushing pm\n", engine->name); diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 5d429037cdad..09ff8e4f88af 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -6,6 +6,7 @@ */ #include "selftest_llc.h" +#include "selftest_rc6.h" static int live_gt_resume(void *arg) { @@ -15,7 +16,8 @@ static int live_gt_resume(void *arg) /* Do several suspend/resume cycles to check we don't explode! */ do { - intel_gt_suspend(gt); + intel_gt_suspend_prepare(gt); + intel_gt_suspend_late(gt); if (gt->rc6.enabled) { pr_err("rc6 still enabled after suspend!\n"); @@ -49,6 +51,7 @@ static int live_gt_resume(void *arg) int intel_gt_pm_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(live_rc6_manual), SUBTEST(live_gt_resume), }; @@ -57,3 +60,20 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915) return intel_gt_live_subtests(tests, &i915->gt); } + +int intel_gt_pm_late_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + /* + * These tests may leave the system in an undesirable state. + * They are intended to be run last in CI and the system + * rebooted afterwards. + */ + SUBTEST(live_rc6_ctx_wa), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 8e0016464325..5dbda2a74272 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -25,7 +25,9 @@ #include <linux/kthread.h> #include "gem/i915_gem_context.h" -#include "gt/intel_gt.h" + +#include "intel_gt.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "i915_selftest.h" @@ -308,6 +310,24 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) 1000)); } +static void engine_heartbeat_disable(struct intel_engine_cs *engine, + unsigned long *saved) +{ + *saved = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + static int igt_hang_sanitycheck(void *arg) { struct intel_gt *gt = arg; @@ -377,36 +397,30 @@ static int igt_reset_nop(void *arg) struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; unsigned int reset_count, count; enum intel_engine_id id; - struct drm_file *file; IGT_TIMEOUT(end_time); int err = 0; /* Check that we can reset during non-user portions of requests */ - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(gt->i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } - - i915_gem_context_clear_bannable(ctx); reset_count = i915_reset_count(global); count = 0; do { for_each_engine(engine, gt, id) { + struct intel_context *ce; int i; + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + for (i = 0; i < 16; i++) { struct i915_request *rq; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -414,6 +428,8 @@ static int igt_reset_nop(void *arg) i915_request_add(rq); } + + intel_context_put(ce); } igt_global_reset_lock(gt); @@ -437,10 +453,7 @@ static int igt_reset_nop(void *arg) } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - err = igt_flush_test(gt->i915); -out: - mock_file_free(gt->i915, file); - if (intel_gt_is_wedged(gt)) + if (igt_flush_test(gt->i915)) err = -EIO; return err; } @@ -450,36 +463,29 @@ static int igt_reset_nop_engine(void *arg) struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; enum intel_engine_id id; - struct drm_file *file; - int err = 0; /* Check that we can engine-reset during non-user portions */ if (!intel_has_reset_engine(gt)) return 0; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(gt->i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } - - i915_gem_context_clear_bannable(ctx); for_each_engine(engine, gt, id) { - unsigned int reset_count, reset_engine_count; - unsigned int count; + unsigned int reset_count, reset_engine_count, count; + struct intel_context *ce; + unsigned long heartbeat; IGT_TIMEOUT(end_time); + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); count = 0; + engine_heartbeat_disable(engine, &heartbeat); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -494,7 +500,7 @@ static int igt_reset_nop_engine(void *arg) for (i = 0; i < 16; i++) { struct i915_request *rq; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -523,22 +529,18 @@ static int igt_reset_nop_engine(void *arg) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - pr_info("%s(%s): %d resets\n", __func__, engine->name, count); + engine_heartbeat_enable(engine, heartbeat); - if (err) - break; + pr_info("%s(%s): %d resets\n", __func__, engine->name, count); - err = igt_flush_test(gt->i915); + intel_context_put(ce); + if (igt_flush_test(gt->i915)) + err = -EIO; if (err) - break; + return err; } - err = igt_flush_test(gt->i915); -out: - mock_file_free(gt->i915, file); - if (intel_gt_is_wedged(gt)) - err = -EIO; - return err; + return 0; } static int __igt_reset_engine(struct intel_gt *gt, bool active) @@ -562,6 +564,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; + unsigned long heartbeat; IGT_TIMEOUT(end_time); if (active && !intel_engine_can_store_dword(engine)) @@ -577,7 +580,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); - intel_engine_pm_get(engine); + engine_heartbeat_disable(engine, &heartbeat); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { @@ -629,7 +632,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine, heartbeat); if (err) break; @@ -699,43 +702,43 @@ static int active_engine(void *data) struct active_engine *arg = data; struct intel_engine_cs *engine = arg->engine; struct i915_request *rq[8] = {}; - struct i915_gem_context *ctx[ARRAY_SIZE(rq)]; - struct drm_file *file; - unsigned long count = 0; + struct intel_context *ce[ARRAY_SIZE(rq)]; + unsigned long count; int err = 0; - file = mock_file(engine->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - for (count = 0; count < ARRAY_SIZE(ctx); count++) { - ctx[count] = live_context(engine->i915, file); - if (IS_ERR(ctx[count])) { - err = PTR_ERR(ctx[count]); + for (count = 0; count < ARRAY_SIZE(ce); count++) { + ce[count] = intel_context_create(engine); + if (IS_ERR(ce[count])) { + err = PTR_ERR(ce[count]); while (--count) - i915_gem_context_put(ctx[count]); - goto err_file; + intel_context_put(ce[count]); + return err; } } + count = 0; while (!kthread_should_stop()) { unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); struct i915_request *old = rq[idx]; struct i915_request *new; - new = igt_request_alloc(ctx[idx], engine); + new = intel_context_create_request(ce[idx]); if (IS_ERR(new)) { err = PTR_ERR(new); break; } - if (arg->flags & TEST_PRIORITY) - ctx[idx]->sched.priority = - i915_prandom_u32_max_state(512, &prng); - rq[idx] = i915_request_get(new); i915_request_add(new); + if (engine->schedule && arg->flags & TEST_PRIORITY) { + struct i915_sched_attr attr = { + .priority = + i915_prandom_u32_max_state(512, &prng), + }; + engine->schedule(rq[idx], &attr); + } + err = active_request_put(old); if (err) break; @@ -749,10 +752,10 @@ static int active_engine(void *data) /* Keep the first error */ if (!err) err = err__; + + intel_context_put(ce[count]); } -err_file: - mock_file_free(engine->i915, file); return err; } @@ -786,6 +789,7 @@ static int __igt_reset_engines(struct intel_gt *gt, struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; + unsigned long heartbeat; IGT_TIMEOUT(end_time); if (flags & TEST_ACTIVE && @@ -826,7 +830,9 @@ static int __igt_reset_engines(struct intel_gt *gt, get_task_struct(tsk); } - intel_engine_pm_get(engine); + yield(); /* start all threads before we begin */ + + engine_heartbeat_disable(engine, &heartbeat); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; @@ -900,7 +906,8 @@ static int __igt_reset_engines(struct intel_gt *gt, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine, heartbeat); + pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); @@ -1016,7 +1023,7 @@ static int igt_reset_wait(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->i915->engine[RCS0]; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct i915_request *rq; unsigned int reset_count; struct hang h; @@ -1143,14 +1150,18 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, int (*fn)(void *), unsigned int flags) { - struct intel_engine_cs *engine = gt->i915->engine[RCS0]; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; struct evict_vma arg; struct hang h; + unsigned int pin_flags; int err; + if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) + return 0; + if (!engine || !intel_engine_can_store_dword(engine)) return 0; @@ -1186,10 +1197,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, goto out_obj; } - err = i915_vma_pin(arg.vma, 0, 0, - i915_vma_is_ggtt(arg.vma) ? - PIN_GLOBAL | PIN_MAPPABLE : - PIN_USER); + pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER; + + if (flags & EXEC_OBJECT_NEEDS_FENCE) + pin_flags |= PIN_MAPPABLE; + + err = i915_vma_pin(arg.vma, 0, 0, pin_flags); if (err) { i915_request_add(rq); goto out_obj; @@ -1292,32 +1305,21 @@ static int igt_reset_evict_ggtt(void *arg) static int igt_reset_evict_ppgtt(void *arg) { struct intel_gt *gt = arg; - struct i915_gem_context *ctx; - struct i915_address_space *vm; - struct drm_file *file; + struct i915_ppgtt *ppgtt; int err; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); + /* aliasing == global gtt locking, covered above */ + if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL) + return 0; - ctx = live_context(gt->i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } + ppgtt = i915_ppgtt_create(gt->i915); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); - err = 0; - vm = i915_gem_context_get_vm_rcu(ctx); - if (!i915_is_ggtt(vm)) { - /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(gt, vm, - evict_vma, EXEC_OBJECT_WRITE); - } - i915_vm_put(vm); + err = __igt_reset_evict_vma(gt, &ppgtt->vm, + evict_vma, EXEC_OBJECT_WRITE); + i915_vm_put(&ppgtt->vm); -out: - mock_file_free(gt->i915, file); return err; } @@ -1493,7 +1495,7 @@ static int igt_handle_error(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->i915->engine[RCS0]; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct hang h; struct i915_request *rq; struct i915_gpu_state *error; @@ -1563,7 +1565,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", engine->name, mode, p->name); - tasklet_disable_nosync(t); + tasklet_disable(t); p->critical_section_begin(); err = intel_engine_reset(engine, NULL); @@ -1686,7 +1688,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) }; struct intel_gt *gt = &i915->gt; intel_wakeref_t wakeref; - bool saved_hangcheck; int err; if (!intel_has_gpu_reset(gt)) @@ -1696,12 +1697,9 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) return -EIO; /* we're long past hope of a successful reset */ wakeref = intel_runtime_pm_get(gt->uncore->rpm); - saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); - drain_delayed_work(>->hangcheck.work); /* flush param */ err = intel_gt_live_subtests(tests, gt); - i915_modparams.enable_hangcheck = saved_hangcheck; intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c index a7057785e420..fd3770e48ac7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_llc.c +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -6,6 +6,7 @@ #include "intel_pm.h" /* intel_gpu_freq() */ #include "selftest_llc.h" +#include "intel_rps.h" static int gen6_verify_ring_freq(struct intel_llc *llc) { @@ -25,6 +26,8 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) for (gpu_freq = consts.min_gpu_freq; gpu_freq <= consts.max_gpu_freq; gpu_freq++) { + struct intel_rps *rps = &llc_to_gt(llc)->rps; + unsigned int ia_freq, ring_freq, found; u32 val; @@ -44,7 +47,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ia_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ia_freq); err = -EINVAL; break; @@ -54,7 +57,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ring_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ring_freq); err = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 5dc679781a08..9ec9833c9c7b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -49,14 +50,31 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) return vma; } +static void engine_heartbeat_disable(struct intel_engine_cs *engine, + unsigned long *saved) +{ + *saved = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + static int live_sanitycheck(void *arg) { struct intel_gt *gt = arg; - struct i915_gem_engines_iter it; - struct i915_gem_context *ctx; - struct intel_context *ce; + struct intel_engine_cs *engine; + enum intel_engine_id id; struct igt_spinner spin; - int err = -ENOMEM; + int err = 0; if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) return 0; @@ -64,17 +82,20 @@ static int live_sanitycheck(void *arg) if (igt_spinner_init(&spin, gt)) return -ENOMEM; - ctx = kernel_context(gt->i915); - if (!ctx) - goto err_spin; - - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + for_each_engine(engine, gt, id) { + struct intel_context *ce; struct i915_request *rq; + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_ctx; + goto out_ctx; } i915_request_add(rq); @@ -83,21 +104,21 @@ static int live_sanitycheck(void *arg) GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - goto err_ctx; + goto out_ctx; } igt_spinner_end(&spin); if (igt_flush_test(gt->i915)) { err = -EIO; - goto err_ctx; + goto out_ctx; } + +out_ctx: + intel_context_put(ce); + if (err) + break; } - err = 0; -err_ctx: - i915_gem_context_unlock_engines(ctx); - kernel_context_close(ctx); -err_spin: igt_spinner_fini(&spin); return err; } @@ -105,7 +126,6 @@ err_spin: static int live_unlite_restore(struct intel_gt *gt, int prio) { struct intel_engine_cs *engine; - struct i915_gem_context *ctx; enum intel_engine_id id; struct igt_spinner spin; int err = -ENOMEM; @@ -118,15 +138,12 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) if (igt_spinner_init(&spin, gt)) return err; - ctx = kernel_context(gt->i915); - if (!ctx) - goto err_spin; - err = 0; for_each_engine(engine, gt, id) { struct intel_context *ce[2] = {}; struct i915_request *rq[2]; struct igt_live_test t; + unsigned long saved; int n; if (prio && !intel_engine_has_preemption(engine)) @@ -139,11 +156,12 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) err = -EIO; break; } + engine_heartbeat_disable(engine, &saved); for (n = 0; n < ARRAY_SIZE(ce); n++) { struct intel_context *tmp; - tmp = intel_context_create(ctx, engine); + tmp = intel_context_create(engine); if (IS_ERR(tmp)) { err = PTR_ERR(tmp); goto err_ce; @@ -168,12 +186,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) } GEM_BUG_ON(!ce[1]->ring->size); intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); - - local_irq_disable(); /* appease lockdep */ - __context_pin_acquire(ce[1]); __execlists_update_reg_state(ce[1], engine); - __context_pin_release(ce[1]); - local_irq_enable(); rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); if (IS_ERR(rq[0])) { @@ -251,14 +264,13 @@ err_ce: intel_context_put(ce[n]); } + engine_heartbeat_enable(engine, saved); if (igt_live_test_end(&t)) err = -EIO; if (err) break; } - kernel_context_close(ctx); -err_spin: igt_spinner_fini(&spin); return err; } @@ -313,17 +325,17 @@ emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) static struct i915_request * semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) { - struct i915_gem_context *ctx; + struct intel_context *ce; struct i915_request *rq; int err; - ctx = kernel_context(engine->i915); - if (!ctx) - return ERR_PTR(-ENOMEM); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) - goto out_ctx; + goto out_ce; err = 0; if (rq->engine->emit_init_breadcrumb) @@ -336,8 +348,8 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) if (err) rq = ERR_PTR(err); -out_ctx: - kernel_context_close(ctx); +out_ce: + intel_context_put(ce); return rq; } @@ -352,7 +364,7 @@ release_queue(struct intel_engine_cs *engine, struct i915_request *rq; u32 *cs; - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -444,6 +456,8 @@ static int live_timeslice_preempt(void *arg) * need to preempt the current task and replace it with another * ready task. */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) @@ -470,12 +484,16 @@ static int live_timeslice_preempt(void *arg) enum intel_engine_id id; for_each_engine(engine, gt, id) { + unsigned long saved; + if (!intel_engine_has_preemption(engine)) continue; memset(vaddr, 0, PAGE_SIZE); + engine_heartbeat_disable(engine, &saved); err = slice_semaphore_queue(engine, vma, count); + engine_heartbeat_enable(engine, saved); if (err) goto err_pin; @@ -499,7 +517,7 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine) { struct i915_request *rq; - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) return rq; @@ -518,6 +536,11 @@ static void wait_for_submit(struct intel_engine_cs *engine, } while (!i915_request_is_active(rq)); } +static long timeslice_threshold(const struct intel_engine_cs *engine) +{ + return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; +} + static int live_timeslice_queue(void *arg) { struct intel_gt *gt = arg; @@ -535,6 +558,8 @@ static int live_timeslice_queue(void *arg) * ELSP[1] is already occupied, so must rely on timeslicing to * eject ELSP[0] in favour of the queue.) */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) @@ -561,17 +586,19 @@ static int live_timeslice_queue(void *arg) .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; struct i915_request *rq, *nop; + unsigned long saved; if (!intel_engine_has_preemption(engine)) continue; + engine_heartbeat_disable(engine, &saved); memset(vaddr, 0, PAGE_SIZE); /* ELSP[0]: semaphore wait */ rq = semaphore_queue(engine, vma, 0); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_pin; + goto err_heartbeat; } engine->schedule(rq, &attr); wait_for_submit(engine, rq); @@ -580,8 +607,7 @@ static int live_timeslice_queue(void *arg) nop = nop_request(engine); if (IS_ERR(nop)) { err = PTR_ERR(nop); - i915_request_put(rq); - goto err_pin; + goto err_rq; } wait_for_submit(engine, nop); i915_request_put(nop); @@ -591,10 +617,8 @@ static int live_timeslice_queue(void *arg) /* Queue: semaphore signal, matching priority as semaphore */ err = release_queue(engine, vma, 1, effective_prio(rq)); - if (err) { - i915_request_put(rq); - goto err_pin; - } + if (err) + goto err_rq; intel_engine_flush_submission(engine); if (!READ_ONCE(engine->execlists.timer.expires) && @@ -612,8 +636,8 @@ static int live_timeslice_queue(void *arg) err = -EINVAL; } - /* Timeslice every jiffie, so within 2 we should signal */ - if (i915_request_wait(rq, 0, 3) < 0) { + /* Timeslice every jiffy, so within 2 we should signal */ + if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -625,12 +649,14 @@ static int live_timeslice_queue(void *arg) memset(vaddr, 0xff, PAGE_SIZE); err = -EIO; } +err_rq: i915_request_put(rq); +err_heartbeat: + engine_heartbeat_enable(engine, saved); if (err) break; } -err_pin: i915_vma_unpin(vma); err_map: i915_gem_object_unpin_map(obj); @@ -743,15 +769,19 @@ static int live_busywait_preempt(void *arg) *cs++ = 0; intel_ring_advance(lo, cs); + + i915_request_get(lo); i915_request_add(lo); if (wait_for(READ_ONCE(*map), 10)) { + i915_request_put(lo); err = -ETIMEDOUT; goto err_vma; } /* Low priority request should be busywaiting now */ if (i915_request_wait(lo, 0, 1) != -ETIME) { + i915_request_put(lo); pr_err("%s: Busywaiting request did not!\n", engine->name); err = -EIO; @@ -761,6 +791,7 @@ static int live_busywait_preempt(void *arg) hi = igt_request_alloc(ctx_hi, engine); if (IS_ERR(hi)) { err = PTR_ERR(hi); + i915_request_put(lo); goto err_vma; } @@ -768,6 +799,7 @@ static int live_busywait_preempt(void *arg) if (IS_ERR(cs)) { err = PTR_ERR(cs); i915_request_add(hi); + i915_request_put(lo); goto err_vma; } @@ -788,11 +820,13 @@ static int live_busywait_preempt(void *arg) intel_engine_dump(engine, &p, "%s\n", engine->name); GEM_TRACE_DUMP(); + i915_request_put(lo); intel_gt_set_wedged(gt); err = -EIO; goto err_vma; } GEM_BUG_ON(READ_ONCE(*map)); + i915_request_put(lo); if (igt_live_test_end(&t)) { err = -EIO; @@ -1165,6 +1199,325 @@ err_wedged: goto err_client_b; } +struct live_preempt_cancel { + struct intel_engine_cs *engine; + struct preempt_client a, b; +}; + +static int __cancel_active0(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP0 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + clear_bit(CONTEXT_BANNED, &rq->context->flags); + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + intel_context_set_banned(rq->context); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_active1(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[2] = {}; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP1 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* no preemption */ + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + rq[1] = spinner_create_request(&arg->b.spin, + arg->b.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + intel_context_set_banned(rq[1]->context); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + igt_spinner_end(&arg->a.spin); + if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != 0) { + pr_err("Normal inflight0 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != -EIO) { + pr_err("Cancelled inflight1 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_queued(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[3] = {}; + struct igt_live_test t; + int err; + + /* Full ELSP and one in the wings */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + rq[2] = spinner_create_request(&arg->b.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[2])) { + err = PTR_ERR(rq[2]); + goto out; + } + + i915_request_get(rq[2]); + err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); + i915_request_add(rq[2]); + if (err) + goto out; + + intel_context_set_banned(rq[2]->context); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != 0) { + pr_err("Normal inflight1 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[2]->fence.error != -EIO) { + pr_err("Cancelled queued request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[2]); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_hostile(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + int err; + + /* Preempt cancel non-preemptible spinner in ELSP0 */ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return 0; + + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) + return PTR_ERR(rq); + + clear_bit(CONTEXT_BANNED, &rq->context->flags); + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + intel_context_set_banned(rq->context); + err = intel_engine_pulse(arg->engine); /* force reset */ + if (err) + goto out; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq); + if (igt_flush_test(arg->engine->i915)) + err = -EIO; + return err; +} + +static int live_preempt_cancel(void *arg) +{ + struct intel_gt *gt = arg; + struct live_preempt_cancel data; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * To cancel an inflight context, we need to first remove it from the + * GPU. That sounds like preemption! Plus a little bit of bookkeeping. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + if (preempt_client_init(gt, &data.a)) + return -ENOMEM; + if (preempt_client_init(gt, &data.b)) + goto err_client_a; + + for_each_engine(data.engine, gt, id) { + if (!intel_engine_has_preemption(data.engine)) + continue; + + err = __cancel_active0(&data); + if (err) + goto err_wedged; + + err = __cancel_active1(&data); + if (err) + goto err_wedged; + + err = __cancel_queued(&data); + if (err) + goto err_wedged; + + err = __cancel_hostile(&data); + if (err) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&data.b); +err_client_a: + preempt_client_fini(&data.a); + return err; + +err_wedged: + GEM_TRACE_DUMP(); + igt_spinner_end(&data.b.spin); + igt_spinner_end(&data.a.spin); + intel_gt_set_wedged(gt); + goto err_client_b; +} + static int live_suppress_self_preempt(void *arg) { struct intel_gt *gt = arg; @@ -1341,6 +1694,7 @@ static int live_suppress_wait_preempt(void *arg) { struct intel_gt *gt = arg; struct preempt_client client[4]; + struct i915_request *rq[ARRAY_SIZE(client)] = {}; struct intel_engine_cs *engine; enum intel_engine_id id; int err = -ENOMEM; @@ -1374,7 +1728,6 @@ static int live_suppress_wait_preempt(void *arg) continue; for (depth = 0; depth < ARRAY_SIZE(client); depth++) { - struct i915_request *rq[ARRAY_SIZE(client)]; struct i915_request *dummy; engine->execlists.preempt_hang.count = 0; @@ -1384,18 +1737,22 @@ static int live_suppress_wait_preempt(void *arg) goto err_client_3; for (i = 0; i < ARRAY_SIZE(client); i++) { - rq[i] = spinner_create_request(&client[i].spin, - client[i].ctx, engine, - MI_NOOP); - if (IS_ERR(rq[i])) { - err = PTR_ERR(rq[i]); + struct i915_request *this; + + this = spinner_create_request(&client[i].spin, + client[i].ctx, engine, + MI_NOOP); + if (IS_ERR(this)) { + err = PTR_ERR(this); goto err_wedged; } /* Disable NEWCLIENT promotion */ - __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request, + __i915_active_fence_set(&i915_request_timeline(this)->last_request, &dummy->fence); - i915_request_add(rq[i]); + + rq[i] = i915_request_get(this); + i915_request_add(this); } dummy_request_free(dummy); @@ -1416,8 +1773,11 @@ static int live_suppress_wait_preempt(void *arg) goto err_wedged; } - for (i = 0; i < ARRAY_SIZE(client); i++) + for (i = 0; i < ARRAY_SIZE(client); i++) { igt_spinner_end(&client[i].spin); + i915_request_put(rq[i]); + rq[i] = NULL; + } if (igt_flush_test(gt->i915)) goto err_wedged; @@ -1445,8 +1805,10 @@ err_client_0: return err; err_wedged: - for (i = 0; i < ARRAY_SIZE(client); i++) + for (i = 0; i < ARRAY_SIZE(client); i++) { igt_spinner_end(&client[i].spin); + i915_request_put(rq[i]); + } intel_gt_set_wedged(gt); err = -EIO; goto err_client_3; @@ -1491,6 +1853,8 @@ static int live_chain_preempt(void *arg) MI_ARB_CHECK); if (IS_ERR(rq)) goto err_wedged; + + i915_request_get(rq); i915_request_add(rq); ring_size = rq->wa_tail - rq->head; @@ -1503,8 +1867,10 @@ static int live_chain_preempt(void *arg) igt_spinner_end(&lo.spin); if (i915_request_wait(rq, 0, HZ / 2) < 0) { pr_err("Timed out waiting to flush %s\n", engine->name); + i915_request_put(rq); goto err_wedged; } + i915_request_put(rq); if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; @@ -1538,6 +1904,8 @@ static int live_chain_preempt(void *arg) rq = igt_request_alloc(hi.ctx, engine); if (IS_ERR(rq)) goto err_wedged; + + i915_request_get(rq); i915_request_add(rq); engine->schedule(rq, &attr); @@ -1550,14 +1918,19 @@ static int live_chain_preempt(void *arg) count); intel_engine_dump(engine, &p, "%s\n", engine->name); + i915_request_put(rq); goto err_wedged; } igt_spinner_end(&lo.spin); + i915_request_put(rq); rq = igt_request_alloc(lo.ctx, engine); if (IS_ERR(rq)) goto err_wedged; + + i915_request_get(rq); i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1566,8 +1939,11 @@ static int live_chain_preempt(void *arg) count); intel_engine_dump(engine, &p, "%s\n", engine->name); + + i915_request_put(rq); goto err_wedged; } + i915_request_put(rq); } if (igt_live_test_end(&t)) { @@ -1591,6 +1967,201 @@ err_wedged: goto err_client_lo; } +static int create_gang(struct intel_engine_cs *engine, + struct i915_request **prev) +{ + struct drm_i915_gem_object *obj; + struct intel_context *ce; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_ce; + } + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_obj; + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) + goto err_obj; + + /* Semaphore target: spin until zero */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = lower_32_bits(vma->node.start); + *cs++ = upper_32_bits(vma->node.start); + + if (*prev) { + u64 offset = (*prev)->batch->node.start; + + /* Terminate the spinner in the next lower priority batch. */ + *cs++ = MI_STORE_DWORD_IMM_GEN4; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = 0; + } + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + goto err_obj; + + rq->batch = vma; + i915_request_get(rq); + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(vma); + i915_request_add(rq); + if (err) + goto err_rq; + + i915_gem_object_put(obj); + intel_context_put(ce); + + rq->client_link.next = &(*prev)->client_link; + *prev = rq; + return 0; + +err_rq: + i915_request_put(rq); +err_obj: + i915_gem_object_put(obj); +err_ce: + intel_context_put(ce); + return err; +} + +static int live_preempt_gang(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + /* + * Build as long a chain of preempters as we can, with each + * request higher priority than the last. Once we are ready, we release + * the last batch which then precolates down the chain, each releasing + * the next oldest in turn. The intent is to simply push as hard as we + * can with the number of preemptions, trying to exceed narrow HW + * limits. At a minimum, we insist that we can sort all the user + * high priority levels into execution order. + */ + + for_each_engine(engine, gt, id) { + struct i915_request *rq = NULL; + struct igt_live_test t; + IGT_TIMEOUT(end_time); + int prio = 0; + int err = 0; + u32 *cs; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) + return -EIO; + + do { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(prio++), + }; + + err = create_gang(engine, &rq); + if (err) + break; + + /* Submit each spinner at increasing priority */ + engine->schedule(rq, &attr); + + if (prio <= I915_PRIORITY_MAX) + continue; + + if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) + break; + + if (__igt_timeout(end_time, NULL)) + break; + } while (1); + pr_debug("%s: Preempt chain of %d requests\n", + engine->name, prio); + + /* + * Such that the last spinner is the highest priority and + * should execute first. When that spinner completes, + * it will terminate the next lowest spinner until there + * are no more spinners and the gang is complete. + */ + cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); + if (!IS_ERR(cs)) { + *cs = 0; + i915_gem_object_unpin_map(rq->batch->obj); + } else { + err = PTR_ERR(cs); + intel_gt_set_wedged(gt); + } + + while (rq) { /* wait for each rq from highest to lowest prio */ + struct i915_request *n = + list_next_entry(rq, client_link); + + if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(engine->i915->drm.dev); + + pr_err("Failed to flush chain of %d requests, at %d\n", + prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -ETIME; + } + + i915_request_put(rq); + rq = n; + } + + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + static int live_preempt_hang(void *arg) { struct intel_gt *gt = arg; @@ -1702,6 +2273,105 @@ err_spin_hi: return err; } +static int live_preempt_timeout(void *arg) +{ + struct intel_gt *gt = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * Check that we force preemption to occur by cancelling the previous + * context if it refuses to yield the GPU. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return 0; + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin_lo, gt)) + return -ENOMEM; + + ctx_hi = kernel_context(gt->i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + + ctx_lo = kernel_context(gt->i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + + for_each_engine(engine, gt, id) { + unsigned long saved_timeout; + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + intel_gt_set_wedged(gt); + err = -EIO; + goto err_ctx_lo; + } + + rq = igt_request_alloc(ctx_hi, engine); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + /* Flush the previous CS ack before changing timeouts */ + while (READ_ONCE(engine->execlists.pending[0])) + cpu_relax(); + + saved_timeout = engine->props.preempt_timeout_ms; + engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + engine->props.preempt_timeout_ms = saved_timeout; + + if (i915_request_wait(rq, 0, HZ / 10) < 0) { + intel_gt_set_wedged(gt); + i915_request_put(rq); + err = -ETIME; + goto err_ctx_lo; + } + + igt_spinner_end(&spin_lo); + i915_request_put(rq); + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); + return err; +} + static int random_range(struct rnd_state *rnd, int min, int max) { return i915_prandom_u32_max_state(max - min, rnd) + min; @@ -1829,6 +2499,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) get_task_struct(tsk[id]); } + yield(); /* start all threads before we kthread_stop() */ + count = 0; for_each_engine(engine, smoke->gt, id) { int status; @@ -1966,28 +2638,18 @@ static int nop_virtual_engine(struct intel_gt *gt, #define CHAIN BIT(0) { IGT_TIMEOUT(end_time); - struct i915_request *request[16]; - struct i915_gem_context *ctx[16]; + struct i915_request *request[16] = {}; struct intel_context *ve[16]; unsigned long n, prime, nc; struct igt_live_test t; ktime_t times[2] = {}; int err; - GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx)); + GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); for (n = 0; n < nctx; n++) { - ctx[n] = kernel_context(gt->i915); - if (!ctx[n]) { - err = -ENOMEM; - nctx = n; - goto out; - } - - ve[n] = intel_execlists_create_virtual(ctx[n], - siblings, nsibling); + ve[n] = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve[n])) { - kernel_context_close(ctx[n]); err = PTR_ERR(ve[n]); nctx = n; goto out; @@ -1996,7 +2658,6 @@ static int nop_virtual_engine(struct intel_gt *gt, err = intel_context_pin(ve[n]); if (err) { intel_context_put(ve[n]); - kernel_context_close(ctx[n]); nctx = n; goto out; } @@ -2012,27 +2673,35 @@ static int nop_virtual_engine(struct intel_gt *gt, if (flags & CHAIN) { for (nc = 0; nc < nctx; nc++) { for (n = 0; n < prime; n++) { - request[nc] = - i915_request_create(ve[nc]); - if (IS_ERR(request[nc])) { - err = PTR_ERR(request[nc]); + struct i915_request *rq; + + rq = i915_request_create(ve[nc]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); goto out; } - i915_request_add(request[nc]); + if (request[nc]) + i915_request_put(request[nc]); + request[nc] = i915_request_get(rq); + i915_request_add(rq); } } } else { for (n = 0; n < prime; n++) { for (nc = 0; nc < nctx; nc++) { - request[nc] = - i915_request_create(ve[nc]); - if (IS_ERR(request[nc])) { - err = PTR_ERR(request[nc]); + struct i915_request *rq; + + rq = i915_request_create(ve[nc]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); goto out; } - i915_request_add(request[nc]); + if (request[nc]) + i915_request_put(request[nc]); + request[nc] = i915_request_get(rq); + i915_request_add(rq); } } } @@ -2058,6 +2727,11 @@ static int nop_virtual_engine(struct intel_gt *gt, if (prime == 1) times[0] = times[1]; + for (nc = 0; nc < nctx; nc++) { + i915_request_put(request[nc]); + request[nc] = NULL; + } + if (__igt_timeout(end_time, NULL)) break; } @@ -2075,9 +2749,9 @@ out: err = -EIO; for (nc = 0; nc < nctx; nc++) { + i915_request_put(request[nc]); intel_context_unpin(ve[nc]); intel_context_put(ve[nc]); - kernel_context_close(ctx[nc]); } return err; } @@ -2136,7 +2810,6 @@ static int mask_virtual_engine(struct intel_gt *gt, unsigned int nsibling) { struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; - struct i915_gem_context *ctx; struct intel_context *ve; struct igt_live_test t; unsigned int n; @@ -2147,11 +2820,7 @@ static int mask_virtual_engine(struct intel_gt *gt, * restrict it to our desired engine within the virtual engine. */ - ctx = kernel_context(gt->i915); - if (!ctx) - return -ENOMEM; - - ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_close; @@ -2219,7 +2888,6 @@ out_unpin: out_put: intel_context_put(ve); out_close: - kernel_context_close(ctx); return err; } @@ -2259,7 +2927,6 @@ static int preserved_virtual_engine(struct intel_gt *gt, unsigned int nsibling) { struct i915_request *last = NULL; - struct i915_gem_context *ctx; struct intel_context *ve; struct i915_vma *scratch; struct igt_live_test t; @@ -2267,17 +2934,11 @@ static int preserved_virtual_engine(struct intel_gt *gt, int err = 0; u32 *cs; - ctx = kernel_context(gt->i915); - if (!ctx) - return -ENOMEM; - scratch = create_scratch(siblings[0]->gt); - if (IS_ERR(scratch)) { - err = PTR_ERR(scratch); - goto out_close; - } + if (IS_ERR(scratch)) + return PTR_ERR(scratch); - ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_scratch; @@ -2360,8 +3021,6 @@ out_put: intel_context_put(ve); out_scratch: i915_vma_unpin_and_release(&scratch, 0); -out_close: - kernel_context_close(ctx); return err; } @@ -2413,16 +3072,54 @@ static int bond_virtual_engine(struct intel_gt *gt, #define BOND_SCHEDULE BIT(0) { struct intel_engine_cs *master; - struct i915_gem_context *ctx; struct i915_request *rq[16]; enum intel_engine_id id; + struct igt_spinner spin; unsigned long n; int err; + /* + * A set of bonded requests is intended to be run concurrently + * across a number of engines. We use one request per-engine + * and a magic fence to schedule each of the bonded requests + * at the same time. A consequence of our current scheduler is that + * we only move requests to the HW ready queue when the request + * becomes ready, that is when all of its prerequisite fences have + * been signaled. As one of those fences is the master submit fence, + * there is a delay on all secondary fences as the HW may be + * currently busy. Equally, as all the requests are independent, + * they may have other fences that delay individual request + * submission to HW. Ergo, we do not guarantee that all requests are + * immediately submitted to HW at the same time, just that if the + * rules are abided by, they are ready at the same time as the + * first is submitted. Userspace can embed semaphores in its batch + * to ensure parallel execution of its phases as it requires. + * Though naturally it gets requested that perhaps the scheduler should + * take care of parallel execution, even across preemption events on + * different HW. (The proper answer is of course "lalalala".) + * + * With the submit-fence, we have identified three possible phases + * of synchronisation depending on the master fence: queued (not + * ready), executing, and signaled. The first two are quite simple + * and checked below. However, the signaled master fence handling is + * contentious. Currently we do not distinguish between a signaled + * fence and an expired fence, as once signaled it does not convey + * any information about the previous execution. It may even be freed + * and hence checking later it may not exist at all. Ergo we currently + * do not apply the bonding constraint for an already signaled fence, + * as our expectation is that it should not constrain the secondaries + * and is outside of the scope of the bonded request API (i.e. all + * userspace requests are meant to be running in parallel). As + * it imposes no constraint, and is effectively a no-op, we do not + * check below as normal execution flows are checked extensively above. + * + * XXX Is the degenerate handling of signaled submit fences the + * expected behaviour for userpace? + */ + GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - ctx = kernel_context(gt->i915); - if (!ctx) + if (igt_spinner_init(&spin, gt)) return -ENOMEM; err = 0; @@ -2435,7 +3132,9 @@ static int bond_virtual_engine(struct intel_gt *gt, memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); - rq[0] = igt_request_alloc(ctx, master); + rq[0] = igt_spinner_create_request(&spin, + master->kernel_context, + MI_NOOP); if (IS_ERR(rq[0])) { err = PTR_ERR(rq[0]); goto out; @@ -2448,16 +3147,21 @@ static int bond_virtual_engine(struct intel_gt *gt, &fence, GFP_KERNEL); } + i915_request_add(rq[0]); if (err < 0) goto out; + if (!(flags & BOND_SCHEDULE) && + !igt_wait_for_spinner(&spin, rq[0])) { + err = -EIO; + goto out; + } + for (n = 0; n < nsibling; n++) { struct intel_context *ve; - ve = intel_execlists_create_virtual(ctx, - siblings, - nsibling); + ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); onstack_fence_fini(&fence); @@ -2499,6 +3203,8 @@ static int bond_virtual_engine(struct intel_gt *gt, } } onstack_fence_fini(&fence); + intel_engine_flush_submission(master); + igt_spinner_end(&spin); if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { pr_err("Master request did not execute (on %s)!\n", @@ -2535,7 +3241,7 @@ out: if (igt_flush_test(gt->i915)) err = -EIO; - kernel_context_close(ctx); + igt_spinner_fini(&spin); return err; } @@ -2599,10 +3305,13 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt), SUBTEST(live_late_preempt), SUBTEST(live_nopreempt), + SUBTEST(live_preempt_cancel), SUBTEST(live_suppress_self_preempt), SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), + SUBTEST(live_preempt_gang), SUBTEST(live_preempt_hang), + SUBTEST(live_preempt_timeout), SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), @@ -2749,8 +3458,101 @@ static int live_lrc_layout(void *arg) return err; } -static int __live_lrc_state(struct i915_gem_context *fixme, - struct intel_engine_cs *engine, +static int find_offset(const u32 *lri, u32 offset) +{ + int i; + + for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) + if (lri[i] == offset) + return i; + + return -1; +} + +static int live_lrc_fixed(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Check the assumed register offsets match the actual locations in + * the context image. + */ + + for_each_engine(engine, gt, id) { + const struct { + u32 reg; + u32 offset; + const char *name; + } tbl[] = { + { + i915_mmio_reg_offset(RING_START(engine->mmio_base)), + CTX_RING_START - 1, + "RING_START" + }, + { + i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), + CTX_RING_CTL - 1, + "RING_CTL" + }, + { + i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), + CTX_RING_HEAD - 1, + "RING_HEAD" + }, + { + i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), + CTX_RING_TAIL - 1, + "RING_TAIL" + }, + { + i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), + lrc_ring_mi_mode(engine), + "RING_MI_MODE" + }, + { + i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), + CTX_BB_STATE - 1, + "BB_STATE" + }, + { }, + }, *t; + u32 *hw; + + if (!engine->default_state) + continue; + + hw = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + for (t = tbl; t->name; t++) { + int dw = find_offset(hw, t->reg); + + if (dw != t->offset) { + pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", + engine->name, + t->name, + t->reg, + dw, + t->offset); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(engine->default_state); + } + + return err; +} + +static int __live_lrc_state(struct intel_engine_cs *engine, struct i915_vma *scratch) { struct intel_context *ce; @@ -2765,7 +3567,7 @@ static int __live_lrc_state(struct i915_gem_context *fixme, int err; int n; - ce = intel_context_create(fixme, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); @@ -2839,7 +3641,6 @@ static int live_lrc_state(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; struct i915_vma *scratch; enum intel_engine_id id; int err = 0; @@ -2849,18 +3650,12 @@ static int live_lrc_state(void *arg) * intel_context. */ - fixme = kernel_context(gt->i915); - if (!fixme) - return -ENOMEM; - scratch = create_scratch(gt); - if (IS_ERR(scratch)) { - err = PTR_ERR(scratch); - goto out_close; - } + if (IS_ERR(scratch)) + return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - err = __live_lrc_state(fixme, engine, scratch); + err = __live_lrc_state(engine, scratch); if (err) break; } @@ -2869,8 +3664,6 @@ static int live_lrc_state(void *arg) err = -EIO; i915_vma_unpin_and_release(&scratch, 0); -out_close: - kernel_context_close(fixme); return err; } @@ -2880,7 +3673,7 @@ static int gpr_make_dirty(struct intel_engine_cs *engine) u32 *cs; int n; - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -2903,8 +3696,7 @@ static int gpr_make_dirty(struct intel_engine_cs *engine) return 0; } -static int __live_gpr_clear(struct i915_gem_context *fixme, - struct intel_engine_cs *engine, +static int __live_gpr_clear(struct intel_engine_cs *engine, struct i915_vma *scratch) { struct intel_context *ce; @@ -2920,7 +3712,7 @@ static int __live_gpr_clear(struct i915_gem_context *fixme, if (err) return err; - ce = intel_context_create(fixme, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); @@ -2982,7 +3774,6 @@ static int live_gpr_clear(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; struct i915_vma *scratch; enum intel_engine_id id; int err = 0; @@ -2992,18 +3783,12 @@ static int live_gpr_clear(void *arg) * to avoid leaking any information from previous contexts. */ - fixme = kernel_context(gt->i915); - if (!fixme) - return -ENOMEM; - scratch = create_scratch(gt); - if (IS_ERR(scratch)) { - err = PTR_ERR(scratch); - goto out_close; - } + if (IS_ERR(scratch)) + return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - err = __live_gpr_clear(fixme, engine, scratch); + err = __live_gpr_clear(engine, scratch); if (err) break; } @@ -3012,8 +3797,6 @@ static int live_gpr_clear(void *arg) err = -EIO; i915_vma_unpin_and_release(&scratch, 0); -out_close: - kernel_context_close(fixme); return err; } @@ -3021,6 +3804,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_lrc_layout), + SUBTEST(live_lrc_fixed), SUBTEST(live_lrc_state), SUBTEST(live_gpr_clear), }; diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c new file mode 100644 index 000000000000..de1f83100fb6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -0,0 +1,419 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gt/intel_engine_pm.h" +#include "i915_selftest.h" + +#include "gem/selftests/mock_context.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_spinner.h" + +struct live_mocs { + struct drm_i915_mocs_table table; + struct i915_vma *scratch; + void *vaddr; +}; + +static int request_add_sync(struct i915_request *rq, int err) +{ + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static struct i915_vma *create_scratch(struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + +static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt) +{ + int err; + + if (!get_mocs_settings(gt->i915, &arg->table)) + return -EINVAL; + + arg->scratch = create_scratch(gt); + if (IS_ERR(arg->scratch)) + return PTR_ERR(arg->scratch); + + arg->vaddr = i915_gem_object_pin_map(arg->scratch->obj, I915_MAP_WB); + if (IS_ERR(arg->vaddr)) { + err = PTR_ERR(arg->vaddr); + goto err_scratch; + } + + return 0; + +err_scratch: + i915_vma_unpin_and_release(&arg->scratch, 0); + return err; +} + +static void live_mocs_fini(struct live_mocs *arg) +{ + i915_vma_unpin_and_release(&arg->scratch, I915_VMA_RELEASE_MAP); +} + +static int read_regs(struct i915_request *rq, + u32 addr, unsigned int count, + uint32_t *offset) +{ + unsigned int i; + u32 *cs; + + GEM_BUG_ON(!IS_ALIGNED(*offset, sizeof(u32))); + + cs = intel_ring_begin(rq, 4 * count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + for (i = 0; i < count; i++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = addr; + *cs++ = *offset; + *cs++ = 0; + + addr += sizeof(u32); + *offset += sizeof(u32); + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static int read_mocs_table(struct i915_request *rq, + const struct drm_i915_mocs_table *table, + uint32_t *offset) +{ + u32 addr; + + if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915)) + addr = global_mocs_offset(); + else + addr = mocs_offset(rq->engine); + + return read_regs(rq, addr, table->n_entries, offset); +} + +static int read_l3cc_table(struct i915_request *rq, + const struct drm_i915_mocs_table *table, + uint32_t *offset) +{ + u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); + + return read_regs(rq, addr, (table->n_entries + 1) / 2, offset); +} + +static int check_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table, + uint32_t **vaddr) +{ + unsigned int i; + u32 expect; + + for_each_mocs(expect, table, i) { + if (**vaddr != expect) { + pr_err("%s: Invalid MOCS[%d] entry, found %08x, expected %08x\n", + engine->name, i, **vaddr, expect); + return -EINVAL; + } + ++*vaddr; + } + + return 0; +} + +static bool mcr_range(struct drm_i915_private *i915, u32 offset) +{ + /* + * Registers in this range are affected by the MCR selector + * which only controls CPU initiated MMIO. Routing does not + * work for CS access so we cannot verify them on this path. + */ + return INTEL_GEN(i915) >= 8 && offset >= 0xb000 && offset <= 0xb4ff; +} + +static int check_l3cc_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table, + uint32_t **vaddr) +{ + /* Can we read the MCR range 0xb00 directly? See intel_workarounds! */ + u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); + unsigned int i; + u32 expect; + + for_each_l3cc(expect, table, i) { + if (!mcr_range(engine->i915, reg) && **vaddr != expect) { + pr_err("%s: Invalid L3CC[%d] entry, found %08x, expected %08x\n", + engine->name, i, **vaddr, expect); + return -EINVAL; + } + ++*vaddr; + reg += 4; + } + + return 0; +} + +static int check_mocs_engine(struct live_mocs *arg, + struct intel_context *ce) +{ + struct i915_vma *vma = arg->scratch; + struct i915_request *rq; + u32 offset; + u32 *vaddr; + int err; + + memset32(arg->vaddr, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (!err) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + + /* Read the mocs tables back using SRM */ + offset = i915_ggtt_offset(vma); + if (!err) + err = read_mocs_table(rq, &arg->table, &offset); + if (!err && ce->engine->class == RENDER_CLASS) + err = read_l3cc_table(rq, &arg->table, &offset); + offset -= i915_ggtt_offset(vma); + GEM_BUG_ON(offset > PAGE_SIZE); + + err = request_add_sync(rq, err); + if (err) + return err; + + /* Compare the results against the expected tables */ + vaddr = arg->vaddr; + if (!err) + err = check_mocs_table(ce->engine, &arg->table, &vaddr); + if (!err && ce->engine->class == RENDER_CLASS) + err = check_l3cc_table(ce->engine, &arg->table, &vaddr); + if (err) + return err; + + GEM_BUG_ON(arg->vaddr + offset != vaddr); + return 0; +} + +static int live_mocs_kernel(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct live_mocs mocs; + int err; + + /* Basic check the system is configured with the expected mocs table */ + + err = live_mocs_init(&mocs, gt); + if (err) + return err; + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = check_mocs_engine(&mocs, engine->kernel_context); + intel_engine_pm_put(engine); + if (err) + break; + } + + live_mocs_fini(&mocs); + return err; +} + +static int live_mocs_clean(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct live_mocs mocs; + int err; + + /* Every new context should see the same mocs table */ + + err = live_mocs_init(&mocs, gt); + if (err) + return err; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + err = check_mocs_engine(&mocs, ce); + intel_context_put(ce); + if (err) + break; + } + + live_mocs_fini(&mocs); + return err; +} + +static int active_engine_reset(struct intel_context *ce, + const char *reason) +{ + struct igt_spinner spin; + struct i915_request *rq; + int err; + + err = igt_spinner_init(&spin, ce->engine->gt); + if (err) + return err; + + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + if (IS_ERR(rq)) { + igt_spinner_fini(&spin); + return PTR_ERR(rq); + } + + err = request_add_spin(rq, &spin); + if (err == 0) + err = intel_engine_reset(ce->engine, reason); + + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + + return err; +} + +static int __live_mocs_reset(struct live_mocs *mocs, + struct intel_context *ce) +{ + int err; + + err = intel_engine_reset(ce->engine, "mocs"); + if (err) + return err; + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + + err = active_engine_reset(ce, "mocs"); + if (err) + return err; + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + + intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs"); + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + + return 0; +} + +static int live_mocs_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct live_mocs mocs; + int err = 0; + + /* Check the mocs setup is retained over per-engine and global resets */ + + if (!intel_has_reset_engine(gt)) + return 0; + + err = live_mocs_init(&mocs, gt); + if (err) + return err; + + igt_global_reset_lock(gt); + for_each_engine(engine, gt, id) { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + intel_engine_pm_get(engine); + err = __live_mocs_reset(&mocs, ce); + intel_engine_pm_put(engine); + + intel_context_put(ce); + if (err) + break; + } + igt_global_reset_unlock(gt); + + live_mocs_fini(&mocs); + return err; +} + +int intel_mocs_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_mocs_kernel), + SUBTEST(live_mocs_clean), + SUBTEST(live_mocs_reset), + }; + struct drm_i915_mocs_table table; + + if (!get_mocs_settings(i915, &table)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c new file mode 100644 index 000000000000..8cc55a0e9e06 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -0,0 +1,203 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_context.h" +#include "intel_engine_pm.h" +#include "intel_gt_requests.h" +#include "intel_ring.h" +#include "selftest_rc6.h" + +#include "selftests/i915_random.h" + +int live_rc6_manual(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rc6 *rc6 = >->rc6; + intel_wakeref_t wakeref; + u64 res[2]; + int err = 0; + + /* + * Our claim is that we can "encourage" the GPU to enter rc6 at will. + * Let's try it! + */ + + if (!rc6->enabled) + return 0; + + /* bsw/byt use a PCU and decouple RC6 from our manual control */ + if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) + return 0; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + /* Force RC6 off for starters */ + __intel_rc6_disable(rc6); + msleep(1); /* wakeup is not immediate, takes about 100us on icl */ + + res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + msleep(250); + res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + if ((res[1] - res[0]) >> 10) { + pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n", + (res[1] - res[0]) >> 10); + err = -EINVAL; + goto out_unlock; + } + + /* Manually enter RC6 */ + intel_rc6_park(rc6); + + res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + msleep(100); + res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + + if (res[1] == res[0]) { + pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n", + intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE), + intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL)); + err = -EINVAL; + } + + /* Restore what should have been the original state! */ + intel_rc6_unpark(rc6); + +out_unlock: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + return err; +} + +static const u32 *__live_rc6_ctx(struct intel_context *ce) +{ + struct i915_request *rq; + const u32 *result; + u32 cmd; + u32 *cs; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return ERR_CAST(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return cs; + } + + cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; + if (INTEL_GEN(rq->i915) >= 8) + cmd++; + + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO); + *cs++ = ce->timeline->hwsp_offset + 8; + *cs++ = 0; + intel_ring_advance(rq, cs); + + result = rq->hwsp_seqno + 2; + i915_request_add(rq); + + return result; +} + +static struct intel_engine_cs ** +randomised_engines(struct intel_gt *gt, + struct rnd_state *prng, + unsigned int *count) +{ + struct intel_engine_cs *engine, **engines; + enum intel_engine_id id; + int n; + + n = 0; + for_each_engine(engine, gt, id) + n++; + if (!n) + return NULL; + + engines = kmalloc_array(n, sizeof(*engines), GFP_KERNEL); + if (!engines) + return NULL; + + n = 0; + for_each_engine(engine, gt, id) + engines[n++] = engine; + + i915_prandom_shuffle(engines, sizeof(*engines), n, prng); + + *count = n; + return engines; +} + +int live_rc6_ctx_wa(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs **engines; + unsigned int n, count; + I915_RND_STATE(prng); + int err = 0; + + /* A read of CTX_INFO upsets rc6. Poke the bear! */ + if (INTEL_GEN(gt->i915) < 8) + return 0; + + engines = randomised_engines(gt, &prng, &count); + if (!engines) + return 0; + + for (n = 0; n < count; n++) { + struct intel_engine_cs *engine = engines[n]; + int pass; + + for (pass = 0; pass < 2; pass++) { + struct intel_context *ce; + unsigned int resets = + i915_reset_engine_count(>->i915->gpu_error, + engine); + const u32 *res; + + /* Use a sacrifical context */ + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + intel_engine_pm_get(engine); + res = __live_rc6_ctx(ce); + intel_engine_pm_put(engine); + intel_context_put(ce); + if (IS_ERR(res)) { + err = PTR_ERR(res); + goto out; + } + + if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out; + } + + intel_gt_pm_wait_for_idle(gt); + pr_debug("%s: CTX_INFO=%0x\n", + engine->name, READ_ONCE(*res)); + + if (resets != + i915_reset_engine_count(>->i915->gpu_error, + engine)) { + pr_err("%s: GPU reset required\n", + engine->name); + add_taint_for_CI(TAINT_WARN); + err = -EIO; + goto out; + } + } + } + +out: + kfree(engines); + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.h b/drivers/gpu/drm/i915/gt/selftest_rc6.h new file mode 100644 index 000000000000..762fd442d7b2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.h @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef SELFTEST_RC6_H +#define SELFTEST_RC6_H + +int live_rc6_ctx_wa(void *arg); +int live_rc6_manual(void *arg); + +#endif /* SELFTEST_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 6efb9221b7fa..6ad6aca315f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -126,7 +126,7 @@ static int igt_atomic_engine_reset(void *arg) goto out_unlock; for_each_engine(engine, gt, id) { - tasklet_disable_nosync(&engine->execlists.tasklet); + tasklet_disable(&engine->execlists.tasklet); intel_engine_pm_get(engine); for (p = igt_atomic_phases; p->name; p++) { diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index dac86f699a4c..e2d78cc22fb4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -9,6 +9,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_requests.h" +#include "intel_ring.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" @@ -457,7 +458,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) goto out; } - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) goto out_unpin; @@ -674,9 +675,7 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - intel_engine_pm_get(engine); - rq = i915_request_create(engine->kernel_context); - intel_engine_pm_put(engine); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index ef02920cec29..ac1921854cbf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -264,22 +264,15 @@ static int switch_to_scratch_context(struct intel_engine_cs *engine, struct igt_spinner *spin) { - struct i915_gem_context *ctx; struct intel_context *ce; struct i915_request *rq; int err = 0; - ctx = kernel_context(engine->i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - GEM_BUG_ON(i915_gem_context_is_bannable(ctx)); - - ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); - GEM_BUG_ON(IS_ERR(ce)); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); rq = igt_spinner_create_request(spin, ce, MI_NOOP); - intel_context_put(ce); if (IS_ERR(rq)) { @@ -293,7 +286,6 @@ err: if (err && spin) igt_spinner_end(spin); - kernel_context_close(ctx); return err; } @@ -367,20 +359,17 @@ out_ctx: return err; } -static struct i915_vma *create_batch(struct i915_gem_context *ctx) +static struct i915_vma *create_batch(struct i915_address_space *vm) { struct drm_i915_gem_object *obj; - struct i915_address_space *vm; struct i915_vma *vma; int err; - obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, 16 * PAGE_SIZE); if (IS_ERR(obj)) return ERR_CAST(obj); - vm = i915_gem_context_get_vm_rcu(ctx); vma = i915_vma_instance(obj, vm, NULL); - i915_vm_put(vm); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -452,8 +441,7 @@ static int whitelist_writable_count(struct intel_engine_cs *engine) return count; } -static int check_dirty_whitelist(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int check_dirty_whitelist(struct intel_context *ce) { const u32 values[] = { 0x00000000, @@ -481,19 +469,17 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, 0xffff00ff, 0xffffffff, }; - struct i915_address_space *vm; + struct intel_engine_cs *engine = ce->engine; struct i915_vma *scratch; struct i915_vma *batch; int err = 0, i, v; u32 *cs, *results; - vm = i915_gem_context_get_vm_rcu(ctx); - scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1); - i915_vm_put(vm); + scratch = create_scratch(ce->vm, 2 * ARRAY_SIZE(values) + 1); if (IS_ERR(scratch)) return PTR_ERR(scratch); - batch = create_batch(ctx); + batch = create_batch(ce->vm); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto out_scratch; @@ -513,9 +499,12 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, ro_reg = ro_register(reg); + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; + srm = MI_STORE_REGISTER_MEM; lrm = MI_LOAD_REGISTER_MEM; - if (INTEL_GEN(ctx->i915) >= 8) + if (INTEL_GEN(engine->i915) >= 8) lrm++, srm++; pr_debug("%s: Writing garbage to %x\n", @@ -574,7 +563,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, i915_gem_object_unpin_map(batch->obj); intel_gt_chipset_flush(engine->gt); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -693,7 +682,7 @@ out_unpin: break; } - if (igt_flush_test(ctx->i915)) + if (igt_flush_test(engine->i915)) err = -EIO; out_batch: i915_vma_unpin_and_release(&batch, 0); @@ -706,38 +695,31 @@ static int live_dirty_whitelist(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; enum intel_engine_id id; - struct drm_file *file; - int err = 0; /* Can the user write to the whitelisted registers? */ if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(gt->i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } - for_each_engine(engine, gt, id) { + struct intel_context *ce; + int err; + if (engine->whitelist.count == 0) continue; - err = check_dirty_whitelist(ctx, engine); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = check_dirty_whitelist(ce); + intel_context_put(ce); if (err) - goto out_file; + return err; } -out_file: - mock_file_free(gt->i915, file); - return err; + return 0; } static int live_reset_whitelist(void *arg) @@ -810,8 +792,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, u64 offset = results->node.start + sizeof(u32) * i; u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); - /* Clear access permission field */ - reg &= ~RING_FORCE_TO_NONPRIV_ACCESS_MASK; + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; *cs++ = srm; *cs++ = reg; @@ -827,12 +809,15 @@ err_req: static int scrub_whitelisted_registers(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *batch; int i, err = 0; u32 *cs; - batch = create_batch(ctx); + vm = i915_gem_context_get_vm_rcu(ctx); + batch = create_batch(vm); + i915_vm_put(vm); if (IS_ERR(batch)) return PTR_ERR(batch); @@ -849,6 +834,9 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, if (ro_register(reg)) continue; + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; + *cs++ = reg; *cs++ = 0xffffffff; } diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c index 2a77c051f36a..aeb1d1f616e8 100644 --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context) mutex_init(&timeline->mutex); - INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h index 689efc66c908..d2bcc3df6183 100644 --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h @@ -7,6 +7,8 @@ #ifndef __MOCK_TIMELINE__ #define __MOCK_TIMELINE__ +#include <linux/types.h> + struct intel_timeline; void mock_timeline_init(struct intel_timeline *timeline, u64 context); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 37f7bcbf7dac..5d00a3b2d914 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -4,6 +4,8 @@ */ #include "gt/intel_gt.h" +#include "gt/intel_gt_irq.h" +#include "gt/intel_gt_pm_irq.h" #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" @@ -30,18 +32,17 @@ * just the HuC, but more are expected to land in the future). */ -static void gen8_guc_raise_irq(struct intel_guc *guc) +void intel_guc_notify(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - intel_uncore_write(gt->uncore, GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); -} - -static void gen11_guc_raise_irq(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - intel_uncore_write(gt->uncore, GEN11_GUC_HOST_INTERRUPT, 0); + /* + * On Gen11+, the value written to the register is passes as a payload + * to the FW. However, the FW currently treats all values the same way + * (H2G interrupt), so we can just write the value that the HW expects + * on older gens. + */ + intel_uncore_write(gt->uncore, guc->notify_reg, GUC_SEND_TRIGGER); } static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) @@ -77,6 +78,93 @@ void intel_guc_init_send_regs(struct intel_guc *guc) guc->send_regs.fw_domains = fw_domains; } +static void gen9_reset_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(>->i915->runtime_pm); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); + spin_unlock_irq(>->irq_lock); +} + +static void gen9_enable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(>->i915->runtime_pm); + + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) & + gt->pm_guc_events); + guc->interrupts.enabled = true; + gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); + } + spin_unlock_irq(>->irq_lock); +} + +static void gen9_disable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(>->i915->runtime_pm); + + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; + + gen6_gt_pm_disable_irq(gt, gt->pm_guc_events); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); + + gen9_reset_guc_interrupts(guc); +} + +static void gen11_reset_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + gen11_gt_reset_one_iir(gt, 0, GEN11_GUC); + spin_unlock_irq(>->irq_lock); +} + +static void gen11_enable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); + + WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_ENABLE, events); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_MASK, ~events); + guc->interrupts.enabled = true; + } + spin_unlock_irq(>->irq_lock); +} + +static void gen11_disable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; + + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); + + gen11_reset_guc_interrupts(guc); +} + void intel_guc_init_early(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_gt(guc)->i915; @@ -88,47 +176,19 @@ void intel_guc_init_early(struct intel_guc *guc) mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; if (INTEL_GEN(i915) >= 11) { - guc->notify = gen11_guc_raise_irq; + guc->notify_reg = GEN11_GUC_HOST_INTERRUPT; guc->interrupts.reset = gen11_reset_guc_interrupts; guc->interrupts.enable = gen11_enable_guc_interrupts; guc->interrupts.disable = gen11_disable_guc_interrupts; } else { - guc->notify = gen8_guc_raise_irq; + guc->notify_reg = GUC_SEND_INTERRUPT; guc->interrupts.reset = gen9_reset_guc_interrupts; guc->interrupts.enable = gen9_enable_guc_interrupts; guc->interrupts.disable = gen9_disable_guc_interrupts; } } -static int guc_shared_data_create(struct intel_guc *guc) -{ - struct i915_vma *vma; - void *vaddr; - - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); - } - - guc->shared_data = vma; - guc->shared_data_vaddr = vaddr; - - return 0; -} - -static void guc_shared_data_destroy(struct intel_guc *guc) -{ - i915_vma_unpin_and_release(&guc->shared_data, I915_VMA_RELEASE_MAP); -} - static u32 guc_ctl_debug_flags(struct intel_guc *guc) { u32 level = intel_guc_log_get_level(&guc->log); @@ -275,14 +335,9 @@ int intel_guc_init(struct intel_guc *guc) if (ret) goto err_fetch; - ret = guc_shared_data_create(guc); - if (ret) - goto err_fw; - GEM_BUG_ON(!guc->shared_data); - ret = intel_guc_log_create(&guc->log); if (ret) - goto err_shared; + goto err_fw; ret = intel_guc_ads_create(guc); if (ret) @@ -317,8 +372,6 @@ err_ads: intel_guc_ads_destroy(guc); err_log: intel_guc_log_destroy(&guc->log); -err_shared: - guc_shared_data_destroy(guc); err_fw: intel_uc_fw_fini(&guc->fw); err_fetch: @@ -343,21 +396,10 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); - guc_shared_data_destroy(guc); intel_uc_fw_fini(&guc->fw); intel_uc_fw_cleanup_fetch(&guc->fw); -} - -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size) -{ - WARN(1, "Unexpected send: action=%#x\n", *action); - return -ENODEV; -} -void intel_guc_to_host_event_handler_nop(struct intel_guc *guc) -{ - WARN(1, "Unexpected event: no suitable handler\n"); + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED); } /* @@ -499,6 +541,13 @@ int intel_guc_suspend(struct intel_guc *guc) }; /* + * If GuC communication is enabled but submission is not supported, + * we do not need to suspend the GuC. + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + + /* * The ENTER_S_STATE action queues the save/restore operation in GuC FW * and then returns, so waiting on the H2G is not enough to guarantee * GuC is done. When all the processing is done, GuC writes @@ -539,19 +588,9 @@ int intel_guc_suspend(struct intel_guc *guc) int intel_guc_reset_engine(struct intel_guc *guc, struct intel_engine_cs *engine) { - u32 data[7]; - - GEM_BUG_ON(!guc->execbuf_client); - - data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET; - data[1] = engine->guc_id; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = guc->execbuf_client->stage_id; - data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); + /* XXX: to be implemented with submission interface rework */ - return intel_guc_send(guc, data, ARRAY_SIZE(data)); + return -ENODEV; } /** @@ -565,6 +604,14 @@ int intel_guc_resume(struct intel_guc *guc) GUC_POWER_D0, }; + /* + * If GuC communication is enabled but submission is not supported, + * we do not need to resume the GuC but we do need to enable the + * GuC communication on resume (above). + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } @@ -644,3 +691,37 @@ err: i915_gem_object_put(obj); return vma; } + +/** + * intel_guc_allocate_and_map_vma() - Allocate and map VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) + * @out_vma: return variable for the allocated vma pointer + * @out_vaddr: return variable for the obj mapping + * + * This wrapper calls intel_guc_allocate_vma() and then maps the allocated + * object with I915_MAP_WB. + * + * Return: 0 if successful, a negative errno code otherwise. + */ +int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, + struct i915_vma **out_vma, void **out_vaddr) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, size); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + *out_vma = vma; + *out_vaddr = vaddr; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2b2f046d3cc3..910d49590068 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -20,8 +20,8 @@ struct __guc_ads_blob; /* * Top level structure of GuC. It handles firmware loading and manages client - * pool and doorbells. intel_guc owns a intel_guc_client to replace the legacy - * ExecList submission. + * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList + * submission. */ struct intel_guc { struct intel_uc_fw fw; @@ -46,15 +46,13 @@ struct intel_guc { struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; - struct ida stage_ids; - struct i915_vma *shared_data; - void *shared_data_vaddr; - struct intel_guc_client *execbuf_client; + struct i915_vma *workqueue; + void *workqueue_vaddr; + spinlock_t wq_lock; - DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); - /* Cyclic counter mod pagesize */ - u32 db_cacheline; + struct i915_vma *proc_desc; + void *proc_desc_vaddr; /* Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; @@ -66,44 +64,33 @@ struct intel_guc { enum forcewake_domains fw_domains; } send_regs; + /* register used to send interrupts to the GuC FW */ + i915_reg_t notify_reg; + /* Store msg (e.g. log flush) that we see while CTBs are disabled */ u32 mmio_msg; /* To serialize the intel_guc_send actions */ struct mutex send_mutex; - - /* GuC's FW specific send function */ - int (*send)(struct intel_guc *guc, const u32 *data, u32 len, - u32 *response_buf, u32 response_buf_size); - - /* GuC's FW specific event handler function */ - void (*handler)(struct intel_guc *guc); - - /* GuC's FW specific notify function */ - void (*notify)(struct intel_guc *guc); }; static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { - return guc->send(guc, action, len, NULL, 0); + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); } static inline int intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { - return guc->send(guc, action, len, response_buf, response_buf_size); -} - -static inline void intel_guc_notify(struct intel_guc *guc) -{ - guc->notify(guc); + return intel_guc_ct_send(&guc->ct, action, len, + response_buf, response_buf_size); } static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) { - guc->handler(guc); + intel_guc_ct_event_handler(&guc->ct); } /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ @@ -138,12 +125,9 @@ void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_write_params(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); void intel_guc_fini(struct intel_guc *guc); -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size); +void intel_guc_notify(struct intel_guc *guc); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size); -void intel_guc_to_host_event_handler(struct intel_guc *guc); -void intel_guc_to_host_event_handler_nop(struct intel_guc *guc); int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, const u32 *payload, u32 len); int intel_guc_sample_forcewake(struct intel_guc *guc); @@ -151,6 +135,8 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); int intel_guc_suspend(struct intel_guc *guc); int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); +int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, + struct i915_vma **out_vma, void **out_vaddr); static inline bool intel_guc_is_supported(struct intel_guc *guc) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index ca6674b8e00c..101728006ae9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -93,7 +93,8 @@ static void __guc_ads_init(struct intel_guc *guc) */ blob->ads.golden_context_lrca[engine_class] = 0; blob->ads.eng_state_size[engine_class] = - intel_engine_context_size(dev_priv, engine_class) - + intel_engine_context_size(guc_to_gt(guc), + engine_class) - skipped_size; } @@ -135,32 +136,19 @@ static void __guc_ads_init(struct intel_guc *guc) int intel_guc_ads_create(struct intel_guc *guc) { const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob)); - struct i915_vma *vma; - void *blob; int ret; GEM_BUG_ON(guc->ads_vma); - vma = intel_guc_allocate_vma(guc, size); - if (IS_ERR(vma)) - return PTR_ERR(vma); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, + (void **)&guc->ads_blob); - blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(blob)) { - ret = PTR_ERR(blob); - goto err_vma; - } - - guc->ads_vma = vma; - guc->ads_blob = blob; + if (ret) + return ret; __guc_ads_init(guc); return 0; - -err_vma: - i915_vma_unpin_and_release(&guc->ads_vma, 0); - return ret; } void intel_guc_ads_destroy(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index b49115517510..c6f971a049f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -37,13 +37,10 @@ static void ct_incoming_request_worker_func(struct work_struct *w); */ void intel_guc_ct_init_early(struct intel_guc_ct *ct) { - /* we're using static channel owners */ - ct->host_channel.owner = CTB_OWNER_HOST; - - spin_lock_init(&ct->lock); - INIT_LIST_HEAD(&ct->pending_requests); - INIT_LIST_HEAD(&ct->incoming_requests); - INIT_WORK(&ct->worker, ct_incoming_request_worker_func); + spin_lock_init(&ct->requests.lock); + INIT_LIST_HEAD(&ct->requests.pending); + INIT_LIST_HEAD(&ct->requests.incoming); + INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); } static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) @@ -64,14 +61,13 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type) } static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, - u32 cmds_addr, u32 size, u32 owner) + u32 cmds_addr, u32 size) { - CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n", - desc, cmds_addr, size, owner); + CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size); memset(desc, 0, sizeof(*desc)); desc->addr = cmds_addr; desc->size = size; - desc->owner = owner; + desc->owner = CTB_OWNER_HOST; } static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc) @@ -104,12 +100,11 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, } static int guc_action_deregister_ct_buffer(struct intel_guc *guc, - u32 owner, u32 type) { u32 action[] = { INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER, - owner, + CTB_OWNER_HOST, type }; int err; @@ -117,20 +112,27 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc, /* Can't use generic send(), CT deregistration must go over MMIO */ err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (err) - DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n", - guc_ct_buffer_type_to_str(type), owner, err); + DRM_ERROR("CT: deregister %s buffer failed; err=%d\n", + guc_ct_buffer_type_to_str(type), err); return err; } -static int ctch_init(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_init - Init buffer-based communication + * @ct: pointer to CT struct + * + * Allocate memory required for buffer-based communication. + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_guc_ct_init(struct intel_guc_ct *ct) { - struct i915_vma *vma; + struct intel_guc *guc = ct_to_guc(ct); void *blob; int err; int i; - GEM_BUG_ON(ctch->vma); + GEM_BUG_ON(ct->vma); /* We allocate 1 page to hold both descriptors and both buffers. * ___________..................... @@ -154,71 +156,65 @@ static int ctch_init(struct intel_guc *guc, * other code will need updating as well. */ - /* allocate vma */ - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_out; + err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob); + if (err) { + DRM_ERROR("CT: channel allocation failed; err=%d\n", err); + return err; } - ctch->vma = vma; - /* map first page */ - blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(blob)) { - err = PTR_ERR(blob); - goto err_vma; - } CT_DEBUG_DRIVER("CT: vma base=%#x\n", - intel_guc_ggtt_offset(guc, ctch->vma)); + intel_guc_ggtt_offset(guc, ct->vma)); /* store pointers to desc and cmds */ - for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { - GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); - ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i; - ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2; + for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { + GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); + ct->ctbs[i].desc = blob + PAGE_SIZE/4 * i; + ct->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2; } return 0; - -err_vma: - i915_vma_unpin_and_release(&ctch->vma, 0); -err_out: - CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n", - ctch->owner, err); - return err; } -static void ctch_fini(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_fini - Fini buffer-based communication + * @ct: pointer to CT struct + * + * Deallocate memory required for buffer-based communication. + */ +void intel_guc_ct_fini(struct intel_guc_ct *ct) { - GEM_BUG_ON(ctch->enabled); + GEM_BUG_ON(ct->enabled); - i915_vma_unpin_and_release(&ctch->vma, I915_VMA_RELEASE_MAP); + i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP); } -static int ctch_enable(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_enable - Enable buffer based command transport. + * @ct: pointer to CT struct + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_guc_ct_enable(struct intel_guc_ct *ct) { + struct intel_guc *guc = ct_to_guc(ct); u32 base; int err; int i; - GEM_BUG_ON(!ctch->vma); - - GEM_BUG_ON(ctch->enabled); + GEM_BUG_ON(ct->enabled); /* vma should be already allocated and map'ed */ - base = intel_guc_ggtt_offset(guc, ctch->vma); + GEM_BUG_ON(!ct->vma); + base = intel_guc_ggtt_offset(guc, ct->vma); /* (re)initialize descriptors * cmds buffers are in the second half of the blob page */ - for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { + for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); - guc_ct_buffer_desc_init(ctch->ctbs[i].desc, + guc_ct_buffer_desc_init(ct->ctbs[i].desc, base + PAGE_SIZE/4 * i + PAGE_SIZE/2, - PAGE_SIZE/4, - ctch->owner); + PAGE_SIZE/4); } /* register buffers, starting wirh RECV buffer @@ -236,38 +232,42 @@ static int ctch_enable(struct intel_guc *guc, if (unlikely(err)) goto err_deregister; - ctch->enabled = true; + ct->enabled = true; return 0; err_deregister: guc_action_deregister_ct_buffer(guc, - ctch->owner, INTEL_GUC_CT_BUFFER_TYPE_RECV); err_out: - DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err); + DRM_ERROR("CT: can't open channel; err=%d\n", err); return err; } -static void ctch_disable(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_disable - Disable buffer based command transport. + * @ct: pointer to CT struct + */ +void intel_guc_ct_disable(struct intel_guc_ct *ct) { - GEM_BUG_ON(!ctch->enabled); + struct intel_guc *guc = ct_to_guc(ct); - ctch->enabled = false; + GEM_BUG_ON(!ct->enabled); - guc_action_deregister_ct_buffer(guc, - ctch->owner, - INTEL_GUC_CT_BUFFER_TYPE_SEND); - guc_action_deregister_ct_buffer(guc, - ctch->owner, - INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct->enabled = false; + + if (intel_guc_is_running(guc)) { + guc_action_deregister_ct_buffer(guc, + INTEL_GUC_CT_BUFFER_TYPE_SEND); + guc_action_deregister_ct_buffer(guc, + INTEL_GUC_CT_BUFFER_TYPE_RECV); + } } -static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch) +static u32 ct_get_next_fence(struct intel_guc_ct *ct) { /* For now it's trivial */ - return ++ctch->next_fence; + return ++ct->requests.next_fence; } /** @@ -440,35 +440,34 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) return err; } -static int ctch_send(struct intel_guc_ct *ct, - struct intel_guc_ct_channel *ctch, - const u32 *action, - u32 len, - u32 *response_buf, - u32 response_buf_size, - u32 *status) +static int ct_send(struct intel_guc_ct *ct, + const u32 *action, + u32 len, + u32 *response_buf, + u32 response_buf_size, + u32 *status) { - struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND]; + struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND]; struct guc_ct_buffer_desc *desc = ctb->desc; struct ct_request request; unsigned long flags; u32 fence; int err; - GEM_BUG_ON(!ctch->enabled); + GEM_BUG_ON(!ct->enabled); GEM_BUG_ON(!len); GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); GEM_BUG_ON(!response_buf && response_buf_size); - fence = ctch_get_next_fence(ctch); + fence = ct_get_next_fence(ct); request.fence = fence; request.status = 0; request.response_len = response_buf_size; request.response_buf = response_buf; - spin_lock_irqsave(&ct->lock, flags); - list_add_tail(&request.link, &ct->pending_requests); - spin_unlock_irqrestore(&ct->lock, flags); + spin_lock_irqsave(&ct->requests.lock, flags); + list_add_tail(&request.link, &ct->requests.pending); + spin_unlock_irqrestore(&ct->requests.lock, flags); err = ctb_write(ctb, action, len, fence, !!response_buf); if (unlikely(err)) @@ -501,9 +500,9 @@ static int ctch_send(struct intel_guc_ct *ct, } unlink: - spin_lock_irqsave(&ct->lock, flags); + spin_lock_irqsave(&ct->requests.lock, flags); list_del(&request.link); - spin_unlock_irqrestore(&ct->lock, flags); + spin_unlock_irqrestore(&ct->requests.lock, flags); return err; } @@ -511,18 +510,21 @@ unlink: /* * Command Transport (CT) buffer based GuC send function. */ -int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len, +int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { - struct intel_guc_ct *ct = &guc->ct; - struct intel_guc_ct_channel *ctch = &ct->host_channel; + struct intel_guc *guc = ct_to_guc(ct); u32 status = ~0; /* undefined */ int ret; + if (unlikely(!ct->enabled)) { + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; + } + mutex_lock(&guc->send_mutex); - ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size, - &status); + ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n", action[0], ret, status); @@ -653,8 +655,8 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status); - spin_lock(&ct->lock); - list_for_each_entry(req, &ct->pending_requests, link) { + spin_lock(&ct->requests.lock); + list_for_each_entry(req, &ct->requests.pending, link) { if (unlikely(fence != req->fence)) { CT_DEBUG_DRIVER("CT: request %u awaits response\n", req->fence); @@ -672,7 +674,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) found = true; break; } - spin_unlock(&ct->lock); + spin_unlock(&ct->requests.lock); if (!found) DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg); @@ -710,13 +712,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) u32 *payload; bool done; - spin_lock_irqsave(&ct->lock, flags); - request = list_first_entry_or_null(&ct->incoming_requests, + spin_lock_irqsave(&ct->requests.lock, flags); + request = list_first_entry_or_null(&ct->requests.incoming, struct ct_incoming_request, link); if (request) list_del(&request->link); - done = !!list_empty(&ct->incoming_requests); - spin_unlock_irqrestore(&ct->lock, flags); + done = !!list_empty(&ct->requests.incoming); + spin_unlock_irqrestore(&ct->requests.lock, flags); if (!request) return true; @@ -734,12 +736,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) static void ct_incoming_request_worker_func(struct work_struct *w) { - struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker); + struct intel_guc_ct *ct = + container_of(w, struct intel_guc_ct, requests.worker); bool done; done = ct_process_incoming_requests(ct); if (!done) - queue_work(system_unbound_wq, &ct->worker); + queue_work(system_unbound_wq, &ct->requests.worker); } /** @@ -777,23 +780,28 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) } memcpy(request->msg, msg, 4 * msglen); - spin_lock_irqsave(&ct->lock, flags); - list_add_tail(&request->link, &ct->incoming_requests); - spin_unlock_irqrestore(&ct->lock, flags); + spin_lock_irqsave(&ct->requests.lock, flags); + list_add_tail(&request->link, &ct->requests.incoming); + spin_unlock_irqrestore(&ct->requests.lock, flags); - queue_work(system_unbound_wq, &ct->worker); + queue_work(system_unbound_wq, &ct->requests.worker); return 0; } -static void ct_process_host_channel(struct intel_guc_ct *ct) +/* + * When we're communicating with the GuC over CT, GuC uses events + * to notify us about new messages being posted on the RECV buffer. + */ +void intel_guc_ct_event_handler(struct intel_guc_ct *ct) { - struct intel_guc_ct_channel *ctch = &ct->host_channel; - struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV]; + struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV]; u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */ int err = 0; - if (!ctch->enabled) + if (unlikely(!ct->enabled)) { + WARN(1, "Unexpected GuC event received while CT disabled!\n"); return; + } do { err = ctb_read(ctb, msg); @@ -812,86 +820,3 @@ static void ct_process_host_channel(struct intel_guc_ct *ct) } } -/* - * When we're communicating with the GuC over CT, GuC uses events - * to notify us about new messages being posted on the RECV buffer. - */ -void intel_guc_to_host_event_handler_ct(struct intel_guc *guc) -{ - struct intel_guc_ct *ct = &guc->ct; - - ct_process_host_channel(ct); -} - -/** - * intel_guc_ct_init - Init CT communication - * @ct: pointer to CT struct - * - * Allocate memory required for communication via - * the CT channel. - * - * Return: 0 on success, a negative errno code on failure. - */ -int intel_guc_ct_init(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - int err; - - err = ctch_init(guc, ctch); - if (unlikely(err)) { - DRM_ERROR("CT: can't open channel %d; err=%d\n", - ctch->owner, err); - return err; - } - - GEM_BUG_ON(!ctch->vma); - return 0; -} - -/** - * intel_guc_ct_fini - Fini CT communication - * @ct: pointer to CT struct - * - * Deallocate memory required for communication via - * the CT channel. - */ -void intel_guc_ct_fini(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - - ctch_fini(guc, ctch); -} - -/** - * intel_guc_ct_enable - Enable buffer based command transport. - * @ct: pointer to CT struct - * - * Return: 0 on success, a negative errno code on failure. - */ -int intel_guc_ct_enable(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - - if (ctch->enabled) - return 0; - - return ctch_enable(guc, ctch); -} - -/** - * intel_guc_ct_disable - Disable buffer based command transport. - * @ct: pointer to CT struct - */ -void intel_guc_ct_disable(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - - if (!ctch->enabled) - return; - - ctch_disable(guc, ctch); -} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 7c24d83f5c24..3e7fe237cfa5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -35,44 +35,28 @@ struct intel_guc_ct_buffer { u32 *cmds; }; -/** Represents pair of command transport buffers. - * - * Buffers go in pairs to allow bi-directional communication. - * To simplify the code we place both of them in the same vma. - * Buffers from the same pair must share unique owner id. - * - * @vma: pointer to the vma with pair of CT buffers - * @ctbs: buffers for sending(0) and receiving(1) commands - * @owner: unique identifier - * @next_fence: fence to be used with next send command - */ -struct intel_guc_ct_channel { - struct i915_vma *vma; - struct intel_guc_ct_buffer ctbs[2]; - u32 owner; - u32 next_fence; - bool enabled; -}; -/** Holds all command transport channels. +/** Top-level structure for Command Transport related data * - * @host_channel: main channel used by the host + * Includes a pair of CT buffers for bi-directional communication and tracking + * for the H2G and G2H requests sent and received through the buffers. */ struct intel_guc_ct { - struct intel_guc_ct_channel host_channel; - /* other channels are tbd */ + struct i915_vma *vma; + bool enabled; - /** @lock: protects pending requests list */ - spinlock_t lock; + /* buffers for sending(0) and receiving(1) commands */ + struct intel_guc_ct_buffer ctbs[2]; - /** @pending_requests: list of requests waiting for response */ - struct list_head pending_requests; + struct { + u32 next_fence; /* fence to be used with next request to send */ - /** @incoming_requests: list of incoming requests */ - struct list_head incoming_requests; + spinlock_t lock; /* protects pending requests list */ + struct list_head pending; /* requests waiting for response */ - /** @worker: worker for handling incoming requests */ - struct work_struct worker; + struct list_head incoming; /* incoming requests */ + struct work_struct worker; /* handler for incoming requests */ + } requests; }; void intel_guc_ct_init_early(struct intel_guc_ct *ct); @@ -81,13 +65,13 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct); int intel_guc_ct_enable(struct intel_guc_ct *ct); void intel_guc_ct_disable(struct intel_guc_ct *ct); -static inline void intel_guc_ct_stop(struct intel_guc_ct *ct) +static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct) { - ct->host_channel.enabled = false; + return ct->enabled; } -int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len, +int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size); -void intel_guc_to_host_event_handler_ct(struct intel_guc *guc); +void intel_guc_ct_event_handler(struct intel_guc_ct *ct); #endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 5528224448f6..3a1c47d600ea 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -149,7 +149,7 @@ int intel_guc_fw_upload(struct intel_guc *guc) * Current uCode expects the code to be loaded at 8k; locations below * this are used for the stack. */ - ret = intel_uc_fw_upload(&guc->fw, gt, 0x2000, UOS_MOVE); + ret = intel_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 1d3cdd67ca2f..a6b733c146c9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -31,7 +31,6 @@ #define GUC_DOORBELL_INVALID 256 -#define GUC_DB_SIZE (PAGE_SIZE) #define GUC_WQ_SIZE (PAGE_SIZE * 2) /* Work queue item header definitions */ @@ -548,6 +547,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, @@ -556,7 +556,6 @@ enum intel_guc_action { INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, - INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 2cf2d3314f62..caed0d57e704 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -226,7 +226,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) mutex_lock(&log->relay.lock); - if (WARN_ON(!intel_guc_log_relay_enabled(log))) + if (WARN_ON(!intel_guc_log_relay_created(log))) goto out_unlock; /* Get the pointer to shared GuC log buffer */ @@ -361,6 +361,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log) { mutex_init(&log->relay.lock); INIT_WORK(&log->relay.flush_work, capture_logs_work); + log->relay.started = false; } static int guc_log_relay_create(struct intel_guc_log *log) @@ -546,7 +547,7 @@ out_unlock: return ret; } -bool intel_guc_log_relay_enabled(const struct intel_guc_log *log) +bool intel_guc_log_relay_created(const struct intel_guc_log *log) { return log->relay.buf_addr; } @@ -560,7 +561,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) mutex_lock(&log->relay.lock); - if (intel_guc_log_relay_enabled(log)) { + if (intel_guc_log_relay_created(log)) { ret = -EEXIST; goto out_unlock; } @@ -585,6 +586,21 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) mutex_unlock(&log->relay.lock); + return 0; + +out_relay: + guc_log_relay_destroy(log); +out_unlock: + mutex_unlock(&log->relay.lock); + + return ret; +} + +int intel_guc_log_relay_start(struct intel_guc_log *log) +{ + if (log->relay.started) + return -EEXIST; + guc_log_enable_flush_events(log); /* @@ -594,14 +610,9 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) */ queue_work(system_highpri_wq, &log->relay.flush_work); - return 0; - -out_relay: - guc_log_relay_destroy(log); -out_unlock: - mutex_unlock(&log->relay.lock); + log->relay.started = true; - return ret; + return 0; } void intel_guc_log_relay_flush(struct intel_guc_log *log) @@ -609,6 +620,9 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) struct intel_guc *guc = log_to_guc(log); intel_wakeref_t wakeref; + if (!log->relay.started) + return; + /* * Before initiating the forceful flush, wait for any pending/ongoing * flush to complete otherwise forceful flush may not actually happen. @@ -622,18 +636,33 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) guc_log_capture_logs(log); } -void intel_guc_log_relay_close(struct intel_guc_log *log) +/* + * Stops the relay log. Called from intel_guc_log_relay_close(), so no + * possibility of race with start/flush since relay_write cannot race + * relay_close. + */ +static void guc_log_relay_stop(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + if (!log->relay.started) + return; + guc_log_disable_flush_events(log); intel_synchronize_irq(i915); flush_work(&log->relay.flush_work); + log->relay.started = false; +} + +void intel_guc_log_relay_close(struct intel_guc_log *log) +{ + guc_log_relay_stop(log); + mutex_lock(&log->relay.lock); - GEM_BUG_ON(!intel_guc_log_relay_enabled(log)); + GEM_BUG_ON(!intel_guc_log_relay_created(log)); guc_log_unmap(log); guc_log_relay_destroy(log); mutex_unlock(&log->relay.lock); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 6f764879acb1..c252c022c5fc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -47,6 +47,7 @@ struct intel_guc_log { struct i915_vma *vma; struct { void *buf_addr; + bool started; struct work_struct flush_work; struct rchan *channel; struct mutex lock; @@ -65,8 +66,9 @@ int intel_guc_log_create(struct intel_guc_log *log); void intel_guc_log_destroy(struct intel_guc_log *log); int intel_guc_log_set_level(struct intel_guc_log *log, u32 level); -bool intel_guc_log_relay_enabled(const struct intel_guc_log *log); +bool intel_guc_log_relay_created(const struct intel_guc_log *log); int intel_guc_log_relay_open(struct intel_guc_log *log); +int intel_guc_log_relay_start(struct intel_guc_log *log); void intel_guc_log_relay_flush(struct intel_guc_log *log); void intel_guc_log_relay_close(struct intel_guc_log *log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 009e54a3764f..9e42324fdecd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -6,26 +6,18 @@ #include <linux/circ_buf.h> #include "gem/i915_gem_context.h" - #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_ring.h" + #include "intel_guc_submission.h" #include "i915_drv.h" #include "i915_trace.h" -enum { - GUC_PREEMPT_NONE = 0, - GUC_PREEMPT_INPROGRESS, - GUC_PREEMPT_FINISHED, -}; -#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 -#define GUC_PREEMPT_BREADCRUMB_BYTES \ - (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS) - /** * DOC: GuC-based command submission * @@ -35,25 +27,14 @@ enum { * code) matches the old submission model and will be updated as part of the * upgrade to the new flow. * - * GuC client: - * A intel_guc_client refers to a submission path through GuC. Currently, there - * is only one client, which is charged with all submissions to the GuC. This - * struct is the owner of a doorbell, a process descriptor and a workqueue (all - * of them inside a single gem object that contains all required pages for these - * elements). - * * GuC stage descriptor: * During initialization, the driver allocates a static pool of 1024 such - * descriptors, and shares them with the GuC. - * Currently, there exists a 1:1 mapping between a intel_guc_client and a - * guc_stage_desc (via the client's stage_id), so effectively only one - * gets used. This stage descriptor lets the GuC know about the doorbell, - * workqueue and process descriptor. Theoretically, it also lets the GuC - * know about our HW contexts (context ID, etc...), but we actually - * employ a kind of submission where the GuC uses the LRCA sent via the work - * item instead (the single guc_stage_desc associated to execbuf client - * contains information about the default kernel context only, but this is - * essentially unused). This is called a "proxy" submission. + * descriptors, and shares them with the GuC. Currently, we only use one + * descriptor. This stage descriptor lets the GuC know about the workqueue and + * process descriptor. Theoretically, it also lets the GuC know about our HW + * contexts (context ID, etc...), but we actually employ a kind of submission + * where the GuC uses the LRCA sent via the work item instead. This is called + * a "proxy" submission. * * The Scratch registers: * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes @@ -62,11 +43,6 @@ enum { * Firmware writes a success/fail code back to the action register after * processes the request. The kernel driver polls waiting for this update and * then proceeds. - * See intel_guc_send() - * - * Doorbells: - * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) - * mapped into process space. * * Work Items: * There are several types of work items that the host may place into a @@ -83,213 +59,45 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static inline bool is_high_priority(struct intel_guc_client *client) -{ - return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || - client->priority == GUC_CLIENT_PRIORITY_HIGH); -} - -static int reserve_doorbell(struct intel_guc_client *client) -{ - unsigned long offset; - unsigned long end; - u16 id; - - GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID); - - /* - * The bitmap tracks which doorbell registers are currently in use. - * It is split into two halves; the first half is used for normal - * priority contexts, the second half for high-priority ones. - */ - offset = 0; - end = GUC_NUM_DOORBELLS / 2; - if (is_high_priority(client)) { - offset = end; - end += offset; - } - - id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset); - if (id == end) - return -ENOSPC; - - __set_bit(id, client->guc->doorbell_bitmap); - client->doorbell_id = id; - DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n", - client->stage_id, yesno(is_high_priority(client)), - id); - return 0; -} - -static bool has_doorbell(struct intel_guc_client *client) -{ - if (client->doorbell_id == GUC_DOORBELL_INVALID) - return false; - - return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); -} - -static void unreserve_doorbell(struct intel_guc_client *client) -{ - GEM_BUG_ON(!has_doorbell(client)); - - __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); - client->doorbell_id = GUC_DOORBELL_INVALID; -} - -/* - * Tell the GuC to allocate or deallocate a specific doorbell - */ - -static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id) +static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) { - u32 action[] = { - INTEL_GUC_ACTION_ALLOCATE_DOORBELL, - stage_id - }; + struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + return &base[id]; } -static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id) +static int guc_workqueue_create(struct intel_guc *guc) { - u32 action[] = { - INTEL_GUC_ACTION_DEALLOCATE_DOORBELL, - stage_id - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue, + &guc->workqueue_vaddr); } -static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client) +static void guc_workqueue_destroy(struct intel_guc *guc) { - struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; - - return &base[client->stage_id]; + i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP); } /* - * Initialise, update, or clear doorbell data shared with the GuC - * - * These functions modify shared data and so need access to the mapped - * client object which contains the page being used for the doorbell + * Initialise the process descriptor shared with the GuC firmware. */ - -static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id) +static int guc_proc_desc_create(struct intel_guc *guc) { - struct guc_stage_desc *desc; + const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc)); - /* Update the GuC's idea of the doorbell ID */ - desc = __get_stage_desc(client); - desc->db_id = new_id; + return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc, + &guc->proc_desc_vaddr); } -static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) +static void guc_proc_desc_destroy(struct intel_guc *guc) { - return client->vaddr + client->doorbell_offset; -} - -static bool __doorbell_valid(struct intel_guc *guc, u16 db_id) -{ - struct intel_uncore *uncore = guc_to_gt(guc)->uncore; - - GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS); - return intel_uncore_read(uncore, GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID; + i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP); } -static void __init_doorbell(struct intel_guc_client *client) -{ - struct guc_doorbell_info *doorbell; - - doorbell = __get_doorbell(client); - doorbell->db_status = GUC_DOORBELL_ENABLED; - doorbell->cookie = 0; -} - -static void __fini_doorbell(struct intel_guc_client *client) -{ - struct guc_doorbell_info *doorbell; - u16 db_id = client->doorbell_id; - - doorbell = __get_doorbell(client); - doorbell->db_status = GUC_DOORBELL_DISABLED; - - /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit - * to go to zero after updating db_status before we call the GuC to - * release the doorbell - */ - if (wait_for_us(!__doorbell_valid(client->guc, db_id), 10)) - WARN_ONCE(true, "Doorbell never became invalid after disable\n"); -} - -static int create_doorbell(struct intel_guc_client *client) -{ - int ret; - - if (WARN_ON(!has_doorbell(client))) - return -ENODEV; /* internal setup error, should never happen */ - - __update_doorbell_desc(client, client->doorbell_id); - __init_doorbell(client); - - ret = __guc_allocate_doorbell(client->guc, client->stage_id); - if (ret) { - __fini_doorbell(client); - __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n", - client->stage_id, ret); - return ret; - } - - return 0; -} - -static int destroy_doorbell(struct intel_guc_client *client) -{ - int ret; - - GEM_BUG_ON(!has_doorbell(client)); - - __fini_doorbell(client); - ret = __guc_deallocate_doorbell(client->guc, client->stage_id); - if (ret) - DRM_ERROR("Couldn't destroy client %u doorbell: %d\n", - client->stage_id, ret); - - __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - - return ret; -} - -static unsigned long __select_cacheline(struct intel_guc *guc) -{ - unsigned long offset; - - /* Doorbell uses a single cache line within a page */ - offset = offset_in_page(guc->db_cacheline); - - /* Moving to next cache line to reduce contention */ - guc->db_cacheline += cache_line_size(); - - DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n", - offset, guc->db_cacheline, cache_line_size()); - return offset; -} - -static inline struct guc_process_desc * -__get_process_desc(struct intel_guc_client *client) -{ - return client->vaddr + client->proc_desc_offset; -} - -/* - * Initialise the process descriptor shared with the GuC firmware. - */ -static void guc_proc_desc_init(struct intel_guc_client *client) +static void guc_proc_desc_init(struct intel_guc *guc) { struct guc_process_desc *desc; - desc = memset(__get_process_desc(client), 0, sizeof(*desc)); + desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc)); /* * XXX: pDoorbell and WQVBaseAddress are pointers in process address @@ -300,47 +108,27 @@ static void guc_proc_desc_init(struct intel_guc_client *client) desc->wq_base_addr = 0; desc->db_base_addr = 0; - desc->stage_id = client->stage_id; desc->wq_size_bytes = GUC_WQ_SIZE; desc->wq_status = WQ_STATUS_ACTIVE; - desc->priority = client->priority; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; } -static void guc_proc_desc_fini(struct intel_guc_client *client) +static void guc_proc_desc_fini(struct intel_guc *guc) { - struct guc_process_desc *desc; - - desc = __get_process_desc(client); - memset(desc, 0, sizeof(*desc)); + memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc)); } static int guc_stage_desc_pool_create(struct intel_guc *guc) { - struct i915_vma *vma; - void *vaddr; - - vma = intel_guc_allocate_vma(guc, - PAGE_ALIGN(sizeof(struct guc_stage_desc) * - GUC_MAX_STAGE_DESCRIPTORS)); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); - } + u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * + GUC_MAX_STAGE_DESCRIPTORS); - guc->stage_desc_pool = vma; - guc->stage_desc_pool_vaddr = vaddr; - ida_init(&guc->stage_ids); - - return 0; + return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, + &guc->stage_desc_pool_vaddr); } static void guc_stage_desc_pool_destroy(struct intel_guc *guc) { - ida_destroy(&guc->stage_ids); i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); } @@ -348,63 +136,49 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc) * Initialise/clear the stage descriptor shared with the GuC firmware. * * This descriptor tells the GuC where (in GGTT space) to find the important - * data structures relating to this client (doorbell, process descriptor, - * write queue, etc). + * data structures related to work submission (process descriptor, write queue, + * etc). */ -static void guc_stage_desc_init(struct intel_guc_client *client) +static void guc_stage_desc_init(struct intel_guc *guc) { - struct intel_guc *guc = client->guc; struct guc_stage_desc *desc; - u32 gfx_addr; - desc = __get_stage_desc(client); + /* we only use 1 stage desc, so hardcode it to 0 */ + desc = __get_stage_desc(guc, 0); memset(desc, 0, sizeof(*desc)); desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; - if (is_high_priority(client)) - desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; - desc->stage_id = client->stage_id; - desc->priority = client->priority; - desc->db_id = client->doorbell_id; - /* - * The doorbell, process descriptor, and workqueue are all parts - * of the client object, which the GuC will reference via the GGTT - */ - gfx_addr = intel_guc_ggtt_offset(guc, client->vma); - desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + - client->doorbell_offset; - desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); - desc->db_trigger_uk = gfx_addr + client->doorbell_offset; - desc->process_desc = gfx_addr + client->proc_desc_offset; - desc->wq_addr = gfx_addr + GUC_DB_SIZE; - desc->wq_size = GUC_WQ_SIZE; + desc->stage_id = 0; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; - desc->desc_private = ptr_to_u64(client); + desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc); + desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue); + desc->wq_size = GUC_WQ_SIZE; } -static void guc_stage_desc_fini(struct intel_guc_client *client) +static void guc_stage_desc_fini(struct intel_guc *guc) { struct guc_stage_desc *desc; - desc = __get_stage_desc(client); + desc = __get_stage_desc(guc, 0); memset(desc, 0, sizeof(*desc)); } /* Construct a Work Item and append it to the GuC's Work Queue */ -static void guc_wq_item_append(struct intel_guc_client *client, +static void guc_wq_item_append(struct intel_guc *guc, u32 target_engine, u32 context_desc, u32 ring_tail, u32 fence_id) { /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size / sizeof(u32) - 1; - struct guc_process_desc *desc = __get_process_desc(client); + struct guc_process_desc *desc = guc->proc_desc_vaddr; struct guc_wq_item *wqi; u32 wq_off; - lockdep_assert_held(&client->wq_lock); + lockdep_assert_held(&guc->wq_lock); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we * should not have the case where structure wqi is across page, neither @@ -424,58 +198,30 @@ static void guc_wq_item_append(struct intel_guc_client *client, GUC_WQ_SIZE) < wqi_size); GEM_BUG_ON(wq_off & (wqi_size - 1)); - /* WQ starts from the page after doorbell / process_desc */ - wqi = client->vaddr + wq_off + GUC_DB_SIZE; - - if (I915_SELFTEST_ONLY(client->use_nop_wqi)) { - wqi->header = WQ_TYPE_NOOP | (wqi_len << WQ_LEN_SHIFT); - } else { - /* Now fill in the 4-word work queue item */ - wqi->header = WQ_TYPE_INORDER | - (wqi_len << WQ_LEN_SHIFT) | - (target_engine << WQ_TARGET_SHIFT) | - WQ_NO_WCFLUSH_WAIT; - wqi->context_desc = context_desc; - wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; - GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); - wqi->fence_id = fence_id; - } + wqi = guc->workqueue_vaddr + wq_off; + + /* Now fill in the 4-word work queue item */ + wqi->header = WQ_TYPE_INORDER | + (wqi_len << WQ_LEN_SHIFT) | + (target_engine << WQ_TARGET_SHIFT) | + WQ_NO_WCFLUSH_WAIT; + wqi->context_desc = context_desc; + wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); + wqi->fence_id = fence_id; /* Make the update visible to GuC */ WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); } -static void guc_ring_doorbell(struct intel_guc_client *client) -{ - struct guc_doorbell_info *db; - u32 cookie; - - lockdep_assert_held(&client->wq_lock); - - /* pointer of current doorbell cacheline */ - db = __get_doorbell(client); - - /* - * We're not expecting the doorbell cookie to change behind our back, - * we also need to treat 0 as a reserved value. - */ - cookie = READ_ONCE(db->cookie); - WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie); - - /* XXX: doorbell was lost and need to acquire it again */ - GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); -} - static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { - struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); + u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - guc_wq_item_append(client, engine->guc_id, ctx_desc, + guc_wq_item_append(guc, engine->guc_id, ctx_desc, ring_tail, rq->fence.seqno); - guc_ring_doorbell(client); } /* @@ -487,10 +233,9 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) */ static void flush_ggtt_writes(struct i915_vma *vma) { - struct drm_i915_private *i915 = vma->vm->i915; - if (i915_vma_is_map_and_fenceable(vma)) - intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS); + intel_uncore_posting_read_fw(vma->vm->gt->uncore, + GUC_STATUS); } static void guc_submit(struct intel_engine_cs *engine, @@ -498,9 +243,8 @@ static void guc_submit(struct intel_engine_cs *engine, struct i915_request **end) { struct intel_guc *guc = &engine->gt->uc.guc; - struct intel_guc_client *client = guc->execbuf_client; - spin_lock(&client->wq_lock); + spin_lock(&guc->wq_lock); do { struct i915_request *rq = *out++; @@ -509,7 +253,7 @@ static void guc_submit(struct intel_engine_cs *engine, guc_add_request(guc, rq); } while (out != end); - spin_unlock(&client->wq_lock); + spin_unlock(&guc->wq_lock); } static inline int rq_prio(const struct i915_request *rq) @@ -528,7 +272,7 @@ static struct i915_request *schedule_in(struct i915_request *rq, int idx) * required if we generalise the inflight tracking. */ - intel_gt_pm_get(rq->engine->gt); + __intel_gt_pm_get(rq->engine->gt); return i915_request_get(rq); } @@ -536,7 +280,7 @@ static void schedule_out(struct i915_request *rq) { trace_i915_request_out(rq); - intel_gt_pm_put(rq->engine->gt); + intel_gt_pm_put_async(rq->engine->gt); i915_request_put(rq); } @@ -571,7 +315,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - if (last && rq->hw_context != last->hw_context) { + if (last && rq->context != last->context) { if (port == last_port) goto done; @@ -630,7 +374,7 @@ static void guc_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); /* * Prevent request submission to the hardware until we have @@ -657,7 +401,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) memset(execlists->inflight, 0, sizeof(execlists->inflight)); } -static void guc_reset(struct intel_engine_cs *engine, bool stalled) +static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq; @@ -676,20 +420,20 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled) stalled = false; __i915_request_reset(rq, stalled); - intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled); + intel_lr_context_reset(engine, rq->context, rq->head, stalled); out_unlock: spin_unlock_irqrestore(&engine->active.lock, flags); } -static void guc_cancel_requests(struct intel_engine_cs *engine) +static void guc_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -750,8 +494,8 @@ static void guc_reset_finish(struct intel_engine_cs *engine) /* And kick in case we missed a new request submission. */ tasklet_hi_schedule(&execlists->tasklet); - GEM_TRACE("%s: depth->%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); + ENGINE_TRACE(engine, "depth->%d\n", + atomic_read(&execlists->tasklet.count)); } /* @@ -760,213 +504,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine) * path of guc_submit() above. */ -/* Check that a doorbell register is in the expected state */ -static bool doorbell_ok(struct intel_guc *guc, u16 db_id) -{ - bool valid; - - GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS); - - valid = __doorbell_valid(guc, db_id); - - if (test_bit(db_id, guc->doorbell_bitmap) == valid) - return true; - - DRM_DEBUG_DRIVER("Doorbell %u has unexpected state: valid=%s\n", - db_id, yesno(valid)); - - return false; -} - -static bool guc_verify_doorbells(struct intel_guc *guc) -{ - bool doorbells_ok = true; - u16 db_id; - - for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) - if (!doorbell_ok(guc, db_id)) - doorbells_ok = false; - - return doorbells_ok; -} - -/** - * guc_client_alloc() - Allocate an intel_guc_client - * @guc: the intel_guc structure - * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW - * The kernel client to replace ExecList submission is created with - * NORMAL priority. Priority of a client for scheduler can be HIGH, - * while a preemption context can use CRITICAL. - * - * Return: An intel_guc_client object if success, else NULL. - */ -static struct intel_guc_client * -guc_client_alloc(struct intel_guc *guc, u32 priority) -{ - struct intel_guc_client *client; - struct i915_vma *vma; - void *vaddr; - int ret; - - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) - return ERR_PTR(-ENOMEM); - - client->guc = guc; - client->priority = priority; - client->doorbell_id = GUC_DOORBELL_INVALID; - spin_lock_init(&client->wq_lock); - - ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, - GFP_KERNEL); - if (ret < 0) - goto err_client; - - client->stage_id = ret; - - /* The first page is doorbell/proc_desc. Two followed pages are wq. */ - vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_id; - } - - /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ - client->vma = vma; - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_vma; - } - client->vaddr = vaddr; - - ret = reserve_doorbell(client); - if (ret) - goto err_vaddr; - - client->doorbell_offset = __select_cacheline(guc); - - /* - * Since the doorbell only requires a single cacheline, we can save - * space by putting the application process descriptor in the same - * page. Use the half of the page that doesn't include the doorbell. - */ - if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) - client->proc_desc_offset = 0; - else - client->proc_desc_offset = (GUC_DB_SIZE / 2); - - DRM_DEBUG_DRIVER("new priority %u client %p: stage_id %u\n", - priority, client, client->stage_id); - DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n", - client->doorbell_id, client->doorbell_offset); - - return client; - -err_vaddr: - i915_gem_object_unpin_map(client->vma->obj); -err_vma: - i915_vma_unpin_and_release(&client->vma, 0); -err_id: - ida_simple_remove(&guc->stage_ids, client->stage_id); -err_client: - kfree(client); - return ERR_PTR(ret); -} - -static void guc_client_free(struct intel_guc_client *client) -{ - unreserve_doorbell(client); - i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP); - ida_simple_remove(&client->guc->stage_ids, client->stage_id); - kfree(client); -} - -static inline bool ctx_save_restore_disabled(struct intel_context *ce) -{ - u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1]; - -#define SR_DISABLED \ - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \ - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) - - return (sr & SR_DISABLED) == SR_DISABLED; - -#undef SR_DISABLED -} - -static int guc_clients_create(struct intel_guc *guc) -{ - struct intel_guc_client *client; - - GEM_BUG_ON(guc->execbuf_client); - - client = guc_client_alloc(guc, GUC_CLIENT_PRIORITY_KMD_NORMAL); - if (IS_ERR(client)) { - DRM_ERROR("Failed to create GuC client for submission!\n"); - return PTR_ERR(client); - } - guc->execbuf_client = client; - - return 0; -} - -static void guc_clients_destroy(struct intel_guc *guc) -{ - struct intel_guc_client *client; - - client = fetch_and_zero(&guc->execbuf_client); - if (client) - guc_client_free(client); -} - -static int __guc_client_enable(struct intel_guc_client *client) -{ - int ret; - - guc_proc_desc_init(client); - guc_stage_desc_init(client); - - ret = create_doorbell(client); - if (ret) - goto fail; - - return 0; - -fail: - guc_stage_desc_fini(client); - guc_proc_desc_fini(client); - return ret; -} - -static void __guc_client_disable(struct intel_guc_client *client) -{ - /* - * By the time we're here, GuC may have already been reset. if that is - * the case, instead of trying (in vain) to communicate with it, let's - * just cleanup the doorbell HW and our internal state. - */ - if (intel_guc_is_running(client->guc)) - destroy_doorbell(client); - else - __fini_doorbell(client); - - guc_stage_desc_fini(client); - guc_proc_desc_fini(client); -} - -static int guc_clients_enable(struct intel_guc *guc) -{ - return __guc_client_enable(guc->execbuf_client); -} - -static void guc_clients_disable(struct intel_guc *guc) -{ - if (guc->execbuf_client) - __guc_client_disable(guc->execbuf_client); -} - /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -987,13 +524,20 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->stage_desc_pool); - WARN_ON(!guc_verify_doorbells(guc)); - ret = guc_clients_create(guc); + ret = guc_workqueue_create(guc); if (ret) goto err_pool; + ret = guc_proc_desc_create(guc); + if (ret) + goto err_workqueue; + + spin_lock_init(&guc->wq_lock); + return 0; +err_workqueue: + guc_workqueue_destroy(guc); err_pool: guc_stage_desc_pool_destroy(guc); return ret; @@ -1001,83 +545,37 @@ err_pool: void intel_guc_submission_fini(struct intel_guc *guc) { - guc_clients_destroy(guc); - WARN_ON(!guc_verify_doorbells(guc)); - - if (guc->stage_desc_pool) + if (guc->stage_desc_pool) { + guc_proc_desc_destroy(guc); + guc_workqueue_destroy(guc); guc_stage_desc_pool_destroy(guc); + } } static void guc_interrupts_capture(struct intel_gt *gt) { - struct intel_rps *rps = >->i915->gt_pm.rps; struct intel_uncore *uncore = gt->uncore; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int irqs; + u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; + u32 dmask = irqs << 16 | irqs; - /* tell all command streamers to forward interrupts (but not vblank) - * to GuC - */ - irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt, id) - ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); - - /* route USER_INTERRUPT to Host, all others are sent to GuC. */ - irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - /* These three registers have the same bit definitions */ - intel_uncore_write(uncore, GUC_BCS_RCS_IER, ~irqs); - intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, ~irqs); - intel_uncore_write(uncore, GUC_WD_VECS_IER, ~irqs); + GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); - /* - * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all - * (unmasked) PM interrupts to the GuC. All other bits of this - * register *disable* generation of a specific interrupt. - * - * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when - * writing to the PM interrupt mask register, i.e. interrupts - * that must not be disabled. - * - * If the GuC is handling these interrupts, then we must not let - * the PM code disable ANY interrupt that the GuC is expecting. - * So for each ENABLED (0) bit in this register, we must SET the - * bit in pm_intrmsk_mbz so that it's left enabled for the GuC. - * GuC needs ARAT expired interrupt unmasked hence it is set in - * pm_intrmsk_mbz. - * - * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will - * result in the register bit being left SET! - */ - rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; - rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + /* Don't handle the ctx switch interrupt in GuC submission mode */ + intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0); + intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0); } static void guc_interrupts_release(struct intel_gt *gt) { - struct intel_rps *rps = >->i915->gt_pm.rps; struct intel_uncore *uncore = gt->uncore; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int irqs; + u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; + u32 dmask = irqs << 16 | irqs; - /* - * tell all command streamers NOT to forward interrupts or vblank - * to GuC. - */ - irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); - irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt, id) - ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); - - /* route all GT interrupts to the host */ - intel_uncore_write(uncore, GUC_BCS_RCS_IER, 0); - intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, 0); - intel_uncore_write(uncore, GUC_WD_VECS_IER, 0); - - rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; + GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); + + /* Handle ctx switch interrupts again */ + intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask); + intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask); } static void guc_set_default_submission(struct intel_engine_cs *engine) @@ -1101,11 +599,10 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) engine->park = engine->unpark = NULL; engine->reset.prepare = guc_reset_prepare; - engine->reset.reset = guc_reset; + engine->reset.rewind = guc_reset_rewind; + engine->reset.cancel = guc_reset_cancel; engine->reset.finish = guc_reset_finish; - engine->cancel_requests = guc_cancel_requests; - engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; @@ -1118,16 +615,11 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) GEM_BUG_ON(engine->irq_enable || engine->irq_disable); } -int intel_guc_submission_enable(struct intel_guc *guc) +void intel_guc_submission_enable(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; enum intel_engine_id id; - int err; - - err = i915_inject_load_error(gt->i915, -ENXIO); - if (err) - return err; /* * We're using GuC work items for submitting work through GuC. Since @@ -1142,11 +634,8 @@ int intel_guc_submission_enable(struct intel_guc *guc) sizeof(struct guc_wq_item) * I915_NUM_ENGINES > GUC_WQ_SIZE); - GEM_BUG_ON(!guc->execbuf_client); - - err = guc_clients_enable(guc); - if (err) - return err; + guc_proc_desc_init(guc); + guc_stage_desc_init(guc); /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(gt); @@ -1155,8 +644,6 @@ int intel_guc_submission_enable(struct intel_guc *guc) engine->set_default_submission = guc_set_default_submission; engine->set_default_submission(engine); } - - return 0; } void intel_guc_submission_disable(struct intel_guc *guc) @@ -1165,8 +652,12 @@ void intel_guc_submission_disable(struct intel_guc *guc) GEM_BUG_ON(gt->awake); /* GT should be parked first */ + /* Note: By the time we're here, GuC may have already been reset */ + guc_interrupts_release(gt); - guc_clients_disable(guc); + + guc_stage_desc_fini(guc); + guc_proc_desc_fini(guc); } static bool __guc_submission_support(struct intel_guc *guc) @@ -1185,6 +676,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc) guc->submission_supported = __guc_submission_support(guc); } -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftest_guc.c" -#endif +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == guc_set_default_submission; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 54d716828352..e402a2932592 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -6,62 +6,18 @@ #ifndef _INTEL_GUC_SUBMISSION_H_ #define _INTEL_GUC_SUBMISSION_H_ -#include <linux/spinlock.h> +#include <linux/types.h> -#include "gt/intel_engine_types.h" - -#include "i915_gem.h" -#include "i915_selftest.h" - -struct drm_i915_private; - -/* - * This structure primarily describes the GEM object shared with the GuC. - * The specs sometimes refer to this object as a "GuC context", but we use - * the term "client" to avoid confusion with hardware contexts. This - * GEM object is held for the entire lifetime of our interaction with - * the GuC, being allocated before the GuC is loaded with its firmware. - * Because there's no way to update the address used by the GuC after - * initialisation, the shared object must stay pinned into the GGTT as - * long as the GuC is in use. We also keep the first page (only) mapped - * into kernel address space, as it includes shared data that must be - * updated on every request submission. - * - * The single GEM object described here is actually made up of several - * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process descriptor" which holds sequence data for - * the doorbell, and one cacheline which actually *is* the doorbell; a - * write to this will "ring the doorbell" (i.e. send an interrupt to the - * GuC). The subsequent pages of the client object constitute the work - * queue (a circular array of work items), again described in the process - * descriptor. Work queue pages are mapped momentarily as required. - */ -struct intel_guc_client { - struct i915_vma *vma; - void *vaddr; - struct intel_guc *guc; - - /* bitmap of (host) engine ids */ - u32 priority; - u32 stage_id; - u32 proc_desc_offset; - - u16 doorbell_id; - unsigned long doorbell_offset; - - /* Protects GuC client's WQ access */ - spinlock_t wq_lock; - - /* For testing purposes, use nop WQ items instead of real ones */ - I915_SELFTEST_DECLARE(bool use_nop_wqi); -}; +struct intel_guc; +struct intel_engine_cs; void intel_guc_submission_init_early(struct intel_guc *guc); int intel_guc_submission_init(struct intel_guc *guc); -int intel_guc_submission_enable(struct intel_guc *guc); +void intel_guc_submission_enable(struct intel_guc *guc); void intel_guc_submission_disable(struct intel_guc *guc); void intel_guc_submission_fini(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 8be515c8d0f0..32a069841c14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -63,7 +63,7 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) void *vaddr; int err; - err = i915_inject_load_error(gt->i915, -ENXIO); + err = i915_inject_probe_error(gt->i915, -ENXIO); if (err) return err; @@ -161,7 +161,7 @@ int intel_huc_auth(struct intel_huc *huc) if (!intel_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; - ret = i915_inject_load_error(gt->i915, -ENXIO); + ret = i915_inject_probe_error(gt->i915, -ENXIO); if (ret) goto fail; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index d654340d4d03..eee193bf2cc4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -39,5 +39,5 @@ void intel_huc_fw_init_early(struct intel_huc *huc) int intel_huc_fw_upload(struct intel_huc *huc) { /* HW doesn't look at destination address for HuC, so set it to 0 */ - return intel_uc_fw_upload(&huc->fw, huc_to_gt(huc), 0, HUC_UKERNEL); + return intel_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 3fdbc935d155..3ffc6267f96e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -20,7 +20,7 @@ static int __intel_uc_reset_hw(struct intel_uc *uc) int ret; u32 guc_status; - ret = i915_inject_load_error(gt->i915, -ENXIO); + ret = i915_inject_probe_error(gt->i915, -ENXIO); if (ret) return ret; @@ -123,6 +123,11 @@ static void __uc_free_load_err_log(struct intel_uc *uc) i915_gem_object_put(log); } +static inline bool guc_communication_enabled(struct intel_guc *guc) +{ + return intel_guc_ct_enabled(&guc->ct); +} + /* * Events triggered while CT buffers are disabled are logged in the SCRATCH_15 * register using the same bits used in the CT message payload. Since our @@ -158,7 +163,7 @@ static void guc_handle_mmio_msg(struct intel_guc *guc) struct drm_i915_private *i915 = guc_to_gt(guc)->i915; /* we need communication to be enabled to reply to GuC */ - GEM_BUG_ON(guc->handler == intel_guc_to_host_event_handler_nop); + GEM_BUG_ON(!guc_communication_enabled(guc)); if (!guc->mmio_msg) return; @@ -185,11 +190,6 @@ static void guc_disable_interrupts(struct intel_guc *guc) guc->interrupts.disable(guc); } -static inline bool guc_communication_enabled(struct intel_guc *guc) -{ - return guc->send != intel_guc_send_nop; -} - static int guc_enable_communication(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_gt(guc)->i915; @@ -197,7 +197,7 @@ static int guc_enable_communication(struct intel_guc *guc) GEM_BUG_ON(guc_communication_enabled(guc)); - ret = i915_inject_load_error(i915, -ENXIO); + ret = i915_inject_probe_error(i915, -ENXIO); if (ret) return ret; @@ -205,9 +205,6 @@ static int guc_enable_communication(struct intel_guc *guc) if (ret) return ret; - guc->send = intel_guc_send_ct; - guc->handler = intel_guc_to_host_event_handler_ct; - /* check for mmio messages received before/during the CT enable */ guc_get_mmio_msg(guc); guc_handle_mmio_msg(guc); @@ -216,7 +213,7 @@ static int guc_enable_communication(struct intel_guc *guc) /* check for CT messages received before we enabled interrupts */ spin_lock_irq(&i915->irq_lock); - intel_guc_to_host_event_handler_ct(guc); + intel_guc_ct_event_handler(&guc->ct); spin_unlock_irq(&i915->irq_lock); DRM_INFO("GuC communication enabled\n"); @@ -224,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc) return 0; } -static void __guc_stop_communication(struct intel_guc *guc) +static void guc_disable_communication(struct intel_guc *guc) { /* * Events generated during or after CT disable are logged by guc in @@ -235,23 +232,6 @@ static void __guc_stop_communication(struct intel_guc *guc) guc_disable_interrupts(guc); - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; -} - -static void guc_stop_communication(struct intel_guc *guc) -{ - intel_guc_ct_stop(&guc->ct); - - __guc_stop_communication(guc); - - DRM_INFO("GuC communication stopped\n"); -} - -static void guc_disable_communication(struct intel_guc *guc) -{ - __guc_stop_communication(guc); - intel_guc_ct_disable(&guc->ct); /* @@ -267,28 +247,22 @@ static void guc_disable_communication(struct intel_guc *guc) void intel_uc_fetch_firmwares(struct intel_uc *uc) { - struct drm_i915_private *i915 = uc_to_gt(uc)->i915; int err; if (!intel_uc_uses_guc(uc)) return; - err = intel_uc_fw_fetch(&uc->guc.fw, i915); + err = intel_uc_fw_fetch(&uc->guc.fw); if (err) return; if (intel_uc_uses_huc(uc)) - intel_uc_fw_fetch(&uc->huc.fw, i915); + intel_uc_fw_fetch(&uc->huc.fw); } void intel_uc_cleanup_firmwares(struct intel_uc *uc) { - if (!intel_uc_uses_guc(uc)) - return; - - if (intel_uc_uses_huc(uc)) - intel_uc_fw_cleanup_fetch(&uc->huc.fw); - + intel_uc_fw_cleanup_fetch(&uc->huc.fw); intel_uc_fw_cleanup_fetch(&uc->guc.fw); } @@ -316,15 +290,8 @@ void intel_uc_init(struct intel_uc *uc) void intel_uc_fini(struct intel_uc *uc) { - struct intel_guc *guc = &uc->guc; - - if (!intel_uc_uses_guc(uc)) - return; - - if (intel_uc_uses_huc(uc)) - intel_huc_fini(&uc->huc); - - intel_guc_fini(guc); + intel_huc_fini(&uc->huc); + intel_guc_fini(&uc->guc); __uc_free_load_err_log(uc); } @@ -372,7 +339,7 @@ static int uc_init_wopcm(struct intel_uc *uc) GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); - err = i915_inject_load_error(gt->i915, -ENXIO); + err = i915_inject_probe_error(gt->i915, -ENXIO); if (err) return err; @@ -486,11 +453,8 @@ int intel_uc_init_hw(struct intel_uc *uc) if (ret) goto err_communication; - if (intel_uc_supports_guc_submission(uc)) { - ret = intel_guc_submission_enable(guc); - if (ret) - goto err_communication; - } + if (intel_uc_supports_guc_submission(uc)) + intel_guc_submission_enable(guc); dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, @@ -560,7 +524,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc) if (!intel_guc_is_running(guc)) return; - guc_stop_communication(guc); + guc_disable_communication(guc); __uc_sanitize(uc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index bb4889d2346d..8ee0a0c7f447 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -11,7 +11,6 @@ #include "intel_uc_fw_abi.h" #include "i915_drv.h" -#ifdef CONFIG_DRM_I915_DEBUG_GUC static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw) { GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED); @@ -22,6 +21,7 @@ static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw) return container_of(uc_fw, struct intel_gt, uc.huc.fw); } +#ifdef CONFIG_DRM_I915_DEBUG_GUC void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, enum intel_uc_fw_status status) { @@ -37,8 +37,13 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, /* * List of required GuC and HuC binaries per-platform. * Must be ordered based on platform + revid, from newer to older. + * + * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas + * between 33.0 and 35.2 are only related to new additions to support new Gen12 + * features. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ @@ -214,35 +219,36 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); } -static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, - struct drm_i915_private *i915, - int e) +static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e) { + struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; bool user = e == -EINVAL; - if (i915_inject_load_error(i915, e)) { + if (i915_inject_probe_error(i915, e)) { /* non-existing blob */ uc_fw->path = "<invalid>"; uc_fw->user_overridden = user; - } else if (i915_inject_load_error(i915, e)) { + } else if (i915_inject_probe_error(i915, e)) { /* require next major version */ uc_fw->major_ver_wanted += 1; uc_fw->minor_ver_wanted = 0; uc_fw->user_overridden = user; - } else if (i915_inject_load_error(i915, e)) { + } else if (i915_inject_probe_error(i915, e)) { /* require next minor version */ uc_fw->minor_ver_wanted += 1; uc_fw->user_overridden = user; - } else if (uc_fw->major_ver_wanted && i915_inject_load_error(i915, e)) { + } else if (uc_fw->major_ver_wanted && + i915_inject_probe_error(i915, e)) { /* require prev major version */ uc_fw->major_ver_wanted -= 1; uc_fw->minor_ver_wanted = 0; uc_fw->user_overridden = user; - } else if (uc_fw->minor_ver_wanted && i915_inject_load_error(i915, e)) { + } else if (uc_fw->minor_ver_wanted && + i915_inject_probe_error(i915, e)) { /* require prev minor version - hey, this should work! */ uc_fw->minor_ver_wanted -= 1; uc_fw->user_overridden = user; - } else if (user && i915_inject_load_error(i915, e)) { + } else if (user && i915_inject_probe_error(i915, e)) { /* officially unsupported platform */ uc_fw->major_ver_wanted = 0; uc_fw->minor_ver_wanted = 0; @@ -253,14 +259,14 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, /** * intel_uc_fw_fetch - fetch uC firmware * @uc_fw: uC firmware - * @i915: device private * * Fetch uC firmware into GEM obj. * * Return: 0 on success, a negative errno code on failure. */ -int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) +int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) { + struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; struct device *dev = i915->drm.dev; struct drm_i915_gem_object *obj; const struct firmware *fw = NULL; @@ -271,12 +277,12 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) GEM_BUG_ON(!i915->wopcm.size); GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw)); - err = i915_inject_load_error(i915, -ENXIO); + err = i915_inject_probe_error(i915, -ENXIO); if (err) return err; - __force_fw_fetch_failures(uc_fw, i915, -EINVAL); - __force_fw_fetch_failures(uc_fw, i915, -ESTALE); + __force_fw_fetch_failures(uc_fw, -EINVAL); + __force_fw_fetch_failures(uc_fw, -ESTALE); err = request_firmware(&fw, uc_fw->path, dev); if (err) @@ -383,8 +389,9 @@ fail: return err; } -static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt) +static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw) { + struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; struct drm_mm_node *node = &ggtt->uc_fw; GEM_BUG_ON(!drm_mm_node_allocated(node)); @@ -394,13 +401,12 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt) return lower_32_bits(node->start); } -static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw, - struct intel_gt *gt) +static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; - struct i915_ggtt *ggtt = gt->ggtt; + struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; struct i915_vma dummy = { - .node.start = uc_fw_ggtt_offset(uc_fw, ggtt), + .node.start = uc_fw_ggtt_offset(uc_fw), .node.size = obj->base.size, .pages = obj->mm.pages, .vm = &ggtt->vm, @@ -415,37 +421,36 @@ static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw, ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); } -static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw, - struct intel_gt *gt) +static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; - struct i915_ggtt *ggtt = gt->ggtt; - u64 start = uc_fw_ggtt_offset(uc_fw, ggtt); + struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; + u64 start = uc_fw_ggtt_offset(uc_fw); ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size); } -static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, - u32 wopcm_offset, u32 dma_flags) +static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) { + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); struct intel_uncore *uncore = gt->uncore; u64 offset; int ret; - ret = i915_inject_load_error(gt->i915, -ETIMEDOUT); + ret = i915_inject_probe_error(gt->i915, -ETIMEDOUT); if (ret) return ret; intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); /* Set the source address for the uCode */ - offset = uc_fw_ggtt_offset(uc_fw, gt->ggtt); + offset = uc_fw_ggtt_offset(uc_fw); GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000); intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset)); intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset)); /* Set the DMA destination */ - intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, wopcm_offset); + intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, dst_offset); intel_uncore_write_fw(uncore, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); /* @@ -477,23 +482,22 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, /** * intel_uc_fw_upload - load uC firmware using custom loader * @uc_fw: uC firmware - * @gt: the intel_gt structure - * @wopcm_offset: destination offset in wopcm + * @dst_offset: destination offset * @dma_flags: flags for flags for dma ctrl * * Loads uC firmware and updates internal flags. * * Return: 0 on success, non-zero on failure. */ -int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, - u32 wopcm_offset, u32 dma_flags) +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) { + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); int err; /* make sure the status was cleared the last time we reset the uc */ GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw)); - err = i915_inject_load_error(gt->i915, -ENOEXEC); + err = i915_inject_probe_error(gt->i915, -ENOEXEC); if (err) return err; @@ -501,9 +505,9 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, return -ENOEXEC; /* Call custom loader */ - intel_uc_fw_ggtt_bind(uc_fw, gt); - err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags); - intel_uc_fw_ggtt_unbind(uc_fw, gt); + uc_fw_bind_ggtt(uc_fw); + err = uc_fw_xfer(uc_fw, dst_offset, dma_flags); + uc_fw_unbind_ggtt(uc_fw); if (err) goto fail; @@ -540,10 +544,7 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) { - if (!intel_uc_fw_is_available(uc_fw)) - return; - - i915_gem_object_unpin_pages(uc_fw->obj); + intel_uc_fw_cleanup_fetch(uc_fw); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 7a0a5989afc9..1f30543d0d2d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -229,10 +229,9 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type, bool supported, enum intel_platform platform, u8 rev); -int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915); +int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw); void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); -int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, - u32 wopcm_offset, u32 dma_flags); +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags); int intel_uc_fw_init(struct intel_uc_fw *uc_fw); void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len); diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c deleted file mode 100644 index d8a80388bd31..000000000000 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ /dev/null @@ -1,299 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2017 Intel Corporation - */ - -#include "i915_selftest.h" -#include "gem/i915_gem_pm.h" - -/* max doorbell number + negative test for each client type */ -#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM) - -static struct intel_guc_client *clients[ATTEMPTS]; - -static bool available_dbs(struct intel_guc *guc, u32 priority) -{ - unsigned long offset; - unsigned long end; - u16 id; - - /* first half is used for normal priority, second half for high */ - offset = 0; - end = GUC_NUM_DOORBELLS / 2; - if (priority <= GUC_CLIENT_PRIORITY_HIGH) { - offset = end; - end += offset; - } - - id = find_next_zero_bit(guc->doorbell_bitmap, end, offset); - if (id < end) - return true; - - return false; -} - -static int check_all_doorbells(struct intel_guc *guc) -{ - u16 db_id; - - pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS); - for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) { - if (!doorbell_ok(guc, db_id)) { - pr_err("doorbell %d, not ok\n", db_id); - return -EIO; - } - } - - return 0; -} - -static int ring_doorbell_nop(struct intel_guc_client *client) -{ - struct guc_process_desc *desc = __get_process_desc(client); - int err; - - client->use_nop_wqi = true; - - spin_lock_irq(&client->wq_lock); - - guc_wq_item_append(client, 0, 0, 0, 0); - guc_ring_doorbell(client); - - spin_unlock_irq(&client->wq_lock); - - client->use_nop_wqi = false; - - /* if there are no issues GuC will update the WQ head and keep the - * WQ in active status - */ - err = wait_for(READ_ONCE(desc->head) == READ_ONCE(desc->tail), 10); - if (err) { - pr_err("doorbell %u ring failed!\n", client->doorbell_id); - return -EIO; - } - - if (desc->wq_status != WQ_STATUS_ACTIVE) { - pr_err("doorbell %u ring put WQ in bad state (%u)!\n", - client->doorbell_id, desc->wq_status); - return -EIO; - } - - return 0; -} - -/* - * Basic client sanity check, handy to validate create_clients. - */ -static int validate_client(struct intel_guc_client *client, int client_priority) -{ - if (client->priority != client_priority || - client->doorbell_id == GUC_DOORBELL_INVALID) - return -EINVAL; - else - return 0; -} - -static bool client_doorbell_in_sync(struct intel_guc_client *client) -{ - return !client || doorbell_ok(client->guc, client->doorbell_id); -} - -/* - * Check that we're able to synchronize guc_clients with their doorbells - * - * We're creating clients and reserving doorbells once, at module load. During - * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to - * GuC being reset. In other words - GuC clients are still around, but the - * status of their doorbells may be incorrect. This is the reason behind - * validating that the doorbells status expected by the driver matches what the - * GuC/HW have. - */ -static int igt_guc_clients(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_guc *guc = >->uc.guc; - intel_wakeref_t wakeref; - int err = 0; - - GEM_BUG_ON(!HAS_GT_UC(gt->i915)); - wakeref = intel_runtime_pm_get(gt->uncore->rpm); - - err = check_all_doorbells(guc); - if (err) - goto unlock; - - /* - * Get rid of clients created during driver load because the test will - * recreate them. - */ - guc_clients_disable(guc); - guc_clients_destroy(guc); - if (guc->execbuf_client) { - pr_err("guc_clients_destroy lied!\n"); - err = -EINVAL; - goto unlock; - } - - err = guc_clients_create(guc); - if (err) { - pr_err("Failed to create clients\n"); - goto unlock; - } - GEM_BUG_ON(!guc->execbuf_client); - - err = validate_client(guc->execbuf_client, - GUC_CLIENT_PRIORITY_KMD_NORMAL); - if (err) { - pr_err("execbug client validation failed\n"); - goto out; - } - - /* the client should now have reserved a doorbell */ - if (!has_doorbell(guc->execbuf_client)) { - pr_err("guc_clients_create didn't reserve doorbells\n"); - err = -EINVAL; - goto out; - } - - /* Now enable the clients */ - guc_clients_enable(guc); - - /* each client should now have received a doorbell */ - if (!client_doorbell_in_sync(guc->execbuf_client)) { - pr_err("failed to initialize the doorbells\n"); - err = -EINVAL; - goto out; - } - - /* - * Basic test - an attempt to reallocate a valid doorbell to the - * client it is currently assigned should not cause a failure. - */ - err = create_doorbell(guc->execbuf_client); - -out: - /* - * Leave clean state for other test, plus the driver always destroy the - * clients during unload. - */ - guc_clients_disable(guc); - guc_clients_destroy(guc); - guc_clients_create(guc); - guc_clients_enable(guc); -unlock: - intel_runtime_pm_put(gt->uncore->rpm, wakeref); - return err; -} - -/* - * Create as many clients as number of doorbells. Note that there's already - * client(s)/doorbell(s) created during driver load, but this test creates - * its own and do not interact with the existing ones. - */ -static int igt_guc_doorbells(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_guc *guc = >->uc.guc; - intel_wakeref_t wakeref; - int i, err = 0; - u16 db_id; - - GEM_BUG_ON(!HAS_GT_UC(gt->i915)); - wakeref = intel_runtime_pm_get(gt->uncore->rpm); - - err = check_all_doorbells(guc); - if (err) - goto unlock; - - for (i = 0; i < ATTEMPTS; i++) { - clients[i] = guc_client_alloc(guc, i % GUC_CLIENT_PRIORITY_NUM); - - if (!clients[i]) { - pr_err("[%d] No guc client\n", i); - err = -EINVAL; - goto out; - } - - if (IS_ERR(clients[i])) { - if (PTR_ERR(clients[i]) != -ENOSPC) { - pr_err("[%d] unexpected error\n", i); - err = PTR_ERR(clients[i]); - goto out; - } - - if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) { - pr_err("[%d] non-db related alloc fail\n", i); - err = -EINVAL; - goto out; - } - - /* expected, ran out of dbs for this client type */ - continue; - } - - /* - * The check below is only valid because we keep a doorbell - * assigned during the whole life of the client. - */ - if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) { - pr_err("[%d] more clients than doorbells (%d >= %d)\n", - i, clients[i]->stage_id, GUC_NUM_DOORBELLS); - err = -EINVAL; - goto out; - } - - err = validate_client(clients[i], i % GUC_CLIENT_PRIORITY_NUM); - if (err) { - pr_err("[%d] client_alloc sanity check failed!\n", i); - err = -EINVAL; - goto out; - } - - db_id = clients[i]->doorbell_id; - - err = __guc_client_enable(clients[i]); - if (err) { - pr_err("[%d] Failed to create a doorbell\n", i); - goto out; - } - - /* doorbell id shouldn't change, we are holding the mutex */ - if (db_id != clients[i]->doorbell_id) { - pr_err("[%d] doorbell id changed (%d != %d)\n", - i, db_id, clients[i]->doorbell_id); - err = -EINVAL; - goto out; - } - - err = check_all_doorbells(guc); - if (err) - goto out; - - err = ring_doorbell_nop(clients[i]); - if (err) - goto out; - } - -out: - for (i = 0; i < ATTEMPTS; i++) - if (!IS_ERR_OR_NULL(clients[i])) { - __guc_client_disable(clients[i]); - guc_client_free(clients[i]); - } -unlock: - intel_runtime_pm_put(gt->uncore->rpm, wakeref); - return err; -} - -int intel_guc_live_selftest(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_guc_clients), - SUBTEST(igt_guc_doorbells), - }; - - if (!USES_GUC_SUBMISSION(i915)) - return 0; - - return intel_gt_live_subtests(tests, &i915->gt); -} |